In [1]:
#Import package pandas for data analysis
import pandas as pd

# Import package numpy for numeric computing
import numpy as np

# Import package matplotlib for visualisation/plotting
import matplotlib.pyplot as plt

In [2]:
dynamic = pd.read_csv('db_backup/07.04/BikeData_DynamicData.csv', names=['StationNumber' , 'StationName', 'AvailableBikes' , 'AvailableBikeStands', 'Date', 'Time'])
static = pd.read_csv('db_backup/07.04/BikeData_StaticData.csv', names=['StationNumber', 'StationName', 'Address', 'Latitude', 'Longitude', 'Banking'])
weather = pd.read_csv('db_backup/07.04/BikeData_WeatherData.csv', names=['Date', 'Time', 'Rainfall', 'Temperature', 'Icon', 'WindSpeed'])

# Static Data

In [3]:
#Displaying the first few and last rows of the static table
static.head()

Unnamed: 0,StationNumber,StationName,Address,Latitude,Longitude,Banking
0,42,SMITHFIELD NORTH,Smithfield North,53.349562,-6.278198,1
1,30,PARNELL SQUARE NORTH,Parnell Square North,53.353462,-6.265305,1
2,54,CLONMEL STREET,Clonmel Street,53.336021,-6.26298,0
3,108,AVONDALE ROAD,Avondale Road,53.359405,-6.276142,0
4,56,MOUNT STREET LOWER,Mount Street Lower,53.33796,-6.24153,0


In [4]:
static.shape

(110, 6)

- There are 110 stations.

In [5]:
# Number of empty cells for each column
static.isnull().sum()

StationNumber    0
StationName      0
Address          0
Latitude         0
Longitude        0
Banking          0
dtype: int64

In [6]:
# Creating an array with the station names
station_numbers = static["StationNumber"].to_numpy()
station_numbers.sort()

Unnamed: 0,StationNumber,StationName,Address,Latitude,Longitude,Banking
0,2,SMITHFIELD NORTH,Smithfield North,53.349562,-6.278198,1
1,3,PARNELL SQUARE NORTH,Parnell Square North,53.353462,-6.265305,1
2,4,CLONMEL STREET,Clonmel Street,53.336021,-6.262980,0
3,5,AVONDALE ROAD,Avondale Road,53.359405,-6.276142,0
4,6,MOUNT STREET LOWER,Mount Street Lower,53.337960,-6.241530,0
...,...,...,...,...,...,...
95,103,UPPER SHERRARD STREET,Upper Sherrard Street,53.358437,-6.260641,0
96,104,FITZWILLIAM SQUARE EAST,Fitzwilliam Square East,53.335211,-6.250900,1
97,105,GRATTAN STREET,Grattan Street,53.339629,-6.243778,0
98,106,ST JAMES HOSPITAL (LUAS),St James Hospital (Luas),53.341359,-6.292951,0


# Weather

In [7]:
#Displaying the first few and last rows of the dynamic table
weather

Unnamed: 0,Date,Time,Rainfall,Temperature,Icon,WindSpeed
0,2020-02-21,13:00:05,0.0,10.1,,
1,2020-02-21,13:30:02,0.0,10.2,,
2,2020-02-21,14:00:02,0.0,10.2,,
3,2020-02-22,14:30:01,0.0,7.4,,
4,2020-02-22,15:00:02,0.0,7.4,,
...,...,...,...,...,...,...
2063,2020-04-07,07:30:02,0.0,8.0,partly-cloudy-day,10.86
2064,2020-04-07,08:00:02,0.0,8.2,partly-cloudy-day,12.09
2065,2020-04-07,08:30:02,0.0,8.4,partly-cloudy-day,13.23
2066,2020-04-07,09:00:02,0.0,8.7,partly-cloudy-day,14.26


In [8]:
# Number of empty cells for each column
weather.isnull().sum()

Date             0
Time             0
Rainfall         0
Temperature      0
Icon           387
WindSpeed      388
dtype: int64

# Dynamic Data

In [9]:
# Converting StationNumber to object so it does not appear during descriptive stats
dynamic['StationNumber'] = dynamic['StationNumber'].astype('object')

In [10]:
# Converting Date to datetime 
dynamic['Date'] = dynamic['Date'].astype('datetime64')
dynamic['Time'] = dynamic['Time'].astype('datetime64')

In [11]:
#Displaying the first few and last rows of the dynamic table
dynamic

Unnamed: 0,StationNumber,StationName,AvailableBikes,AvailableBikeStands,Date,Time
0,42,SMITHFIELD NORTH,2,28,2020-02-21,2020-04-08 12:52:58
1,30,PARNELL SQUARE NORTH,1,19,2020-02-21,2020-04-08 12:54:29
2,54,CLONMEL STREET,25,8,2020-02-21,2020-04-08 12:54:39
3,108,AVONDALE ROAD,4,36,2020-02-21,2020-04-08 12:50:23
4,56,MOUNT STREET LOWER,27,13,2020-02-21,2020-04-08 12:47:41
...,...,...,...,...,...,...
1343418,39,WILTON TERRACE,5,15,2020-04-07,2020-04-08 09:34:05
1343419,83,EMMET ROAD,19,20,2020-04-07,2020-04-08 09:30:33
1343420,92,HEUSTON BRIDGE (NORTH),19,20,2020-04-07,2020-04-08 09:30:36
1343421,21,LEINSTER STREET SOUTH,15,15,2020-04-07,2020-04-08 09:32:02


In [12]:
print('Number of duplicate (excluding first) rows in the table is: ', dynamic.duplicated().sum())

Number of duplicate (excluding first) rows in the table is:  575827


### Checking for duplicate features

In [13]:
dupl = dynamic.duplicated()
new = dynamic.loc[(dupl == True)]

new.head(200)

Unnamed: 0,StationNumber,StationName,AvailableBikes,AvailableBikeStands,Date,Time
110,30,PARNELL SQUARE NORTH,1,19,2020-02-21,2020-04-08 12:54:29
111,54,CLONMEL STREET,25,8,2020-02-21,2020-04-08 12:54:39
112,108,AVONDALE ROAD,4,36,2020-02-21,2020-04-08 12:50:23
119,13,FITZWILLIAM SQUARE WEST,21,9,2020-02-21,2020-04-08 12:53:25
120,43,PORTOBELLO ROAD,0,30,2020-02-21,2020-04-08 12:50:26
...,...,...,...,...,...,...
791,63,FENIAN STREET,13,21,2020-02-21,2020-04-08 13:21:23
792,113,MERRION SQUARE SOUTH,19,21,2020-02-21,2020-04-08 13:22:39
803,73,FRANCIS STREET,0,30,2020-02-21,2020-04-08 13:23:05
804,4,GREEK STREET,4,16,2020-02-21,2020-04-08 13:20:37


In [14]:
dynamic.head(200)

Unnamed: 0,StationNumber,StationName,AvailableBikes,AvailableBikeStands,Date,Time
0,42,SMITHFIELD NORTH,2,28,2020-02-21,2020-04-08 12:52:58
1,30,PARNELL SQUARE NORTH,1,19,2020-02-21,2020-04-08 12:54:29
2,54,CLONMEL STREET,25,8,2020-02-21,2020-04-08 12:54:39
3,108,AVONDALE ROAD,4,36,2020-02-21,2020-04-08 12:50:23
4,56,MOUNT STREET LOWER,27,13,2020-02-21,2020-04-08 12:47:41
...,...,...,...,...,...,...
195,10,DAME STREET,6,10,2020-02-21,2020-04-08 12:59:42
196,100,HEUSTON BRIDGE (SOUTH),13,12,2020-02-21,2020-04-08 12:58:07
197,24,CATHAL BRUGHA STREET,3,17,2020-02-21,2020-04-08 12:59:36
198,64,SANDWITH STREET,20,19,2020-02-21,2020-04-08 12:58:51


### Dropping duplicates

In [15]:
dynamic = dynamic.drop_duplicates()

In [16]:
dynamic

Unnamed: 0,StationNumber,StationName,AvailableBikes,AvailableBikeStands,Date,Time
0,42,SMITHFIELD NORTH,2,28,2020-02-21,2020-04-08 12:52:58
1,30,PARNELL SQUARE NORTH,1,19,2020-02-21,2020-04-08 12:54:29
2,54,CLONMEL STREET,25,8,2020-02-21,2020-04-08 12:54:39
3,108,AVONDALE ROAD,4,36,2020-02-21,2020-04-08 12:50:23
4,56,MOUNT STREET LOWER,27,13,2020-02-21,2020-04-08 12:47:41
...,...,...,...,...,...,...
1343414,40,JERVIS STREET,3,18,2020-04-07,2020-04-08 09:34:23
1343415,29,ORMOND QUAY UPPER,14,15,2020-04-07,2020-04-08 09:37:12
1343416,103,GRANGEGORMAN LOWER (SOUTH),2,38,2020-04-07,2020-04-08 09:38:12
1343417,28,MOUNTJOY SQUARE WEST,7,23,2020-04-07,2020-04-08 09:35:51


In [17]:
print('Number of duplicate (excluding first) rows in the table is: ', dynamic.duplicated().sum())

Number of duplicate (excluding first) rows in the table is:  0


In [18]:
## Reseting the index count
dynamic = dynamic.reset_index(drop=True) 

In [19]:
# Number of empty cells for each column
dynamic.isnull().sum()

StationNumber          0
StationName            0
AvailableBikes         0
AvailableBikeStands    0
Date                   0
Time                   0
dtype: int64

In [20]:
dynamic.dtypes

StationNumber                  object
StationName                    object
AvailableBikes                  int64
AvailableBikeStands             int64
Date                   datetime64[ns]
Time                   datetime64[ns]
dtype: object

In [21]:
# Displaying descriptive stats for numeric columns
numeric_cols = dynamic.select_dtypes(['int64']).columns
dynamic[numeric_cols].describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
AvailableBikes,767596.0,11.820447,8.876169,0.0,5.0,11.0,17.0,40.0
AvailableBikeStands,767596.0,20.187855,10.415964,0.0,13.0,20.0,28.0,41.0


# Makeof Json files for the weekly and hourly charts

### Adding DayOfWeek column to the dataframe


In [22]:
date = dynamic['Date']
day_of_week_arr=[]
for i in date:
    day_of_week_arr.append(i.dayofweek)
day_of_week=pd.Series(day_of_week_arr, dtype='category')
dynamic['DayOfWeek']=day_of_week

In [23]:
#dynamic

### Adding Hour column to the dataframe

In [24]:
hour = dynamic['Time']
hour_arr=[]

for i in hour:
    hour_arr.append(i.hour)
    
hour=pd.Series(hour_arr, dtype='category')
dynamic['Hour']=hour

In [25]:
#dynamic

### Filtering the dynamic df and creating subsets by day of the week 

In [26]:
monday=dynamic.loc[(dynamic['DayOfWeek'] == 0)]
tuesday=dynamic.loc[(dynamic['DayOfWeek'] == 1)]
wednesday=dynamic.loc[(dynamic['DayOfWeek'] == 2)]
thursday=dynamic.loc[(dynamic['DayOfWeek'] == 3)]
friday=dynamic.loc[(dynamic['DayOfWeek'] == 4)]
saturday=dynamic.loc[(dynamic['DayOfWeek'] == 5)]
sunday=dynamic.loc[(dynamic['DayOfWeek'] == 6)]

#days_of_week contains the subsets
days_of_week=[monday, tuesday, wednesday, thursday, friday, saturday, sunday]

In [27]:
#Creating a DataFrame only with the station_numbers to populate with the avges by day of week for each station
avg_station_weekly=pd.DataFrame(station_numbers, dtype='category', columns=['StationNumber'])

In [46]:
week_days=['monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday']
count=0

for i in days_of_week:
    #Grouping the subsets by day of the week created by station number and performing sum() and count() of rows
    sum_day_of_week=i.groupby('StationNumber',as_index=False).sum()
    count_day_of_week=i.groupby('StationNumber',as_index=False).count()
    
    #Selecting only the available bikes columns
    sum_day_of_week=sum_day_of_week['AvailableBikes']
    count_day_of_week=count_day_of_week['AvailableBikes']
    #Computting the avg of available bikes from the sum and count columns
    avg_day_of_week=round(sum_day_of_week/count_day_of_week,0)
    
    #Adding the columns: sum, count and average to the new dataframe created: avg_station_weekly
    avg_station_weekly['sum_'+ str(week_days[count])]=sum_day_of_week
    avg_station_weekly['count_'+ str(week_days[count])]=count_day_of_week
    avg_station_weekly['avg_'+ str(week_days[count])]=avg_day_of_week
    count+=1
    
#avg_station_weekly

### Converting the dataframe into a dictionary

In [29]:
dict_avg_station={}
for ind in avg_station_weekly.index: 
    dict2={}
    dict2['monday']=avg_station_weekly['avg_monday'][ind]
    dict2['tuesday']=avg_station_weekly['avg_tuesday'][ind]
    dict2['wednesday']=avg_station_weekly['avg_wednesday'][ind]
    dict2['thursday']=avg_station_weekly['avg_thursday'][ind]
    dict2['friday']=avg_station_weekly['avg_friday'][ind]
    dict2['saturday']=avg_station_weekly['avg_saturday'][ind]
    dict2['sunday']=avg_station_weekly['avg_sunday'][ind] 
    dict_avg_station[str(avg_station_weekly['StationNumber'][ind])]=dict2

In [30]:
#dict_avg_station

### Exporting average by weekday to a json file

In [31]:
#import json
#with open('avg_weekday.json', 'w') as fp:
#    json.dump(dict_avg_station, fp)

In [62]:
#days_of_week

## Hourly chart

In [55]:
#hours_of_day will contain the subsets
weekdays=['monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday']
hours_of_day=['six', 'seven', 'eight', 'nine', 'ten', 'eleven', 'twelve', 'thirteen', 'fourteen', 'fiveteen', 'sixteen', 'seventeen', 'eighteen', 'nineteen', 'twenty', 'twentyOne']
in_numbers=[6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21]

count=0
for j in days_of_week:
    arr=[]
    for i in range(len(hours_of_day)):
        aux=j.loc[(j['Hour']==in_numbers[i])]
        arr.append(aux)
    weekdays[count]=arr
    count+=1

In [64]:
weekdays

[[       StationNumber         StationName  AvailableBikes  AvailableBikeStands  \
  130447            42    SMITHFIELD NORTH              27                    3   
  130448            54      CLONMEL STREET               9                   24   
  130449            56  MOUNT STREET LOWER              20                   20   
  130450             6  CHRISTCHURCH PLACE              12                    8   
  130451            18     GRANTHAM STREET              10                   20   
  ...              ...                 ...             ...                  ...   
  748949           116          BROADSTONE               6                   24   
  748953            73      FRANCIS STREET               3                   27   
  748958            66    NEW CENTRAL BANK              22                   18   
  748975            50        GEORGES LANE               2                   38   
  748991             3       BOLTON STREET               8                   12   
  
 

In [71]:
#Creating a DataFrame only with the station_numbers to populate with the avges by day of week for each station
avg_station_2=pd.DataFrame(station_numbers, dtype='category', columns=['StationNumber'])

In [79]:
for i in weekdays:
    for j in i:
        #Grouping the subsets by day of the week created by station number and performing sum() and count() of rows
        sum_1=j.groupby('StationNumber',as_index=False).sum()
        count_1=j.groupby('StationNumber',as_index=False).count()
        
        #Selecting only the available bikes columns
        sum_1=sum_1['AvailableBikes']
        count_1=count_1['AvailableBikes']
        #Computting the avg of available bikes from the sum and count columns
        avg_1=round(sum_1/count_1,0)
        
        

In [81]:
for ind in avg_1.index:
    print(avg_1[ind])
    print(ind)
    

8.0
0
7.0
1
12.0
2
13.0
3
11.0
4
13.0
5
12.0
6
7.0
7
10.0
8
7.0
9
5.0
10
6.0
11
4.0
12
7.0
13
8.0
14
15.0
15
14.0
16
13.0
17
9.0
18
11.0
19
7.0
20
3.0
21
8.0
22
4.0
23
11.0
24
19.0
25
2.0
26
6.0
27
11.0
28
7.0
29
19.0
30
13.0
31
11.0
32
33.0
33
7.0
34
9.0
35
6.0
36
23.0
37
13.0
38
13.0
39
18.0
40
7.0
41
18.0
42
12.0
43
9.0
44
13.0
45
8.0
46
6.0
47
10.0
48
3.0
49
15.0
50
8.0
51
15.0
52
6.0
53
12.0
54
11.0
55
13.0
56
15.0
57
6.0
58
15.0
59
20.0
60
18.0
61
26.0
62
19.0
63
13.0
64
9.0
65
12.0
66
8.0
67
14.0
68
14.0
69
22.0
70
17.0
71
14.0
72
16.0
73
11.0
74
20.0
75
13.0
76
10.0
77
17.0
78
17.0
79
20.0
80
7.0
81
10.0
82
24.0
83
29.0
84
23.0
85
22.0
86
12.0
87
13.0
88
14.0
89
11.0
90
4.0
91
9.0
92
16.0
93
9.0
94
6.0
95
8.0
96
9.0
97
10.0
98
11.0
99
20.0
100
16.0
101
7.0
102
11.0
103
13.0
104
5.0
105
6.0
106
21.0
107
4.0
108
6.0
109


In [77]:
dict_final={}

        
for ind in sum_1.index:
    for i in weekdays:
        dict_1={}
        for j in in_numbers:
            
    print(ind)
print(sum_1)


    dict3={}
    for i in in_numbers:
        dict3[int(i)]=avg_station_hourly['avg_'+str(i)][ind]
    
    dict_avg_station_hourly[str(avg_station_hourly['StationNumber'][ind])]=dict3

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
0      270
1      244
2      474
3      503
4      421
      ... 
105    177
106    246
107    790
108    151
109    225
Name: AvailableBikes, Length: 110, dtype: int64


In [67]:
for i in weekdays:
    for j in i:
        #Grouping the subsets by day of the week created by station number and performing sum() and count() of rows
        sum_1=j.groupby('StationNumber',as_index=False).sum()
        count_1=j.groupby('StationNumber',as_index=False).count()

        #Selecting only the available bikes columns
        sum_1=sum_1['AvailableBikes']
        count_1=count_1['AvailableBikes']
        #Computting the avg of available bikes from the sum and count columns
        avg_1=round(sum_1/count_1,0)

        #Adding the columns: sum, count and average to the new dataframe created: avg_station_weekly
        avg_station_2['sum_'+ str(week_days[count])]=sum_1
        avg_station_2['count_'+ str(week_days[count])]=count_day_of_week
        avg_station_2['avg_'+ str(week_days[count])]=avg_day_of_week
        count+=1

In [None]:
{StationNumb1:
     {monday:
          {6h:1, 7h:2, 8h:4,...}, 
      tuedays:
          {6h:1, 7h:2, 8h:4,...}}, 
 StationNumb2:
     {monday:
          {6h:1, 7h:2, 8h:4,...}, 
      tuedays:
          {6h:1, 7h:2, 8h:4,...}},
 ...}


{StationNumb1:
     {6h:1, 7h:2, 8h:4,...},
 StationNumb2:
     {6h:1, 7h:2, 8h:4,...},
 ...}

# Do not consider below
All the below work results in a json that groups the stations availability by time of day without considering the day of the week.

### Filtering the dynamic df and creating subsets by hour of day from 6am until 9pm

In [32]:
#hours_of_day will contain the subsets
hours_of_day=['six', 'seven', 'eight', 'nine', 'ten', 'eleven', 'twelve', 'thirteen', 'fourteen', 'fiveteen', 'sixteen', 'seventeen', 'eighteen', 'nineteen', 'twenty', 'twentyOne']
in_numbers=[6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21]

#Changed hours_of_day
for i in range(len(hours_of_day)):
    hours_of_day[i]=dynamic.loc[(dynamic['Hour']==in_numbers[i])]

In [33]:
#Creating a DataFrame only with the station_numbers to populate with the avges by hour of the day for each station
avg_station_hourly=pd.DataFrame(station_numbers, dtype='category', columns=['StationNumber'])

In [34]:
hours_of_day_str=['six', 'seven', 'eight', 'nine', 'ten', 'eleven', 'twelve', 'thirteen', 'fourteen', 'fiveteen', 'sixteen', 'seventeen', 'eighteen', 'nineteen', 'twenty', 'twentyOne']
count=0
for i in hours_of_day:
    #Grouping the subsets created by station number and performing sum() and count() of rows
    sum_hours=i.groupby('StationNumber',as_index=False).sum()
    count_hours=i.groupby('StationNumber',as_index=False).count()
    
    #Selecting only the available bikes columns
    sum_hours=sum_hours['AvailableBikes']
    count_hours=count_hours['AvailableBikes']
    #Computting the avg of available bikes from the sum and count columns
    avg_hours=round(sum_hours/count_hours,0)
    
    #Adding the columns: sum, count and average to the new dataframe created: avg_station_hourly
    avg_station_hourly['sum_'+ str(in_numbers[count])]=sum_hours
    avg_station_hourly['count_'+ str(in_numbers[count])]=count_hours
    avg_station_hourly['avg_'+ str(in_numbers[count])]=avg_hours
    count+=1
    
avg_station_hourly

Unnamed: 0,StationNumber,sum_6,count_6,avg_6,sum_7,count_7,avg_7,sum_8,count_8,avg_8,...,avg_18,sum_19,count_19,avg_19,sum_20,count_20,avg_20,sum_21,count_21,avg_21
0,2,2554,277,9.0,2383,295,8.0,2067,303,7.0,...,8.0,2334,286,8.0,2280,270,8.0,2397,267,9.0
1,3,1787,287,6.0,1741,283,6.0,1705,313,5.0,...,6.0,1969,293,7.0,2219,299,7.0,2136,281,8.0
2,4,2561,280,9.0,2660,299,9.0,2872,333,9.0,...,7.0,2114,285,7.0,2132,270,8.0,2302,262,9.0
3,5,6801,327,21.0,5642,352,16.0,4185,383,11.0,...,11.0,5508,376,15.0,5480,329,17.0,5575,333,17.0
4,6,1185,281,4.0,1364,292,5.0,1422,315,5.0,...,6.0,1941,297,7.0,1875,280,7.0,1840,282,7.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
105,113,1089,262,4.0,1421,271,5.0,2144,292,7.0,...,7.0,1336,265,5.0,1293,259,5.0,1266,255,5.0
106,114,1773,292,6.0,2343,318,7.0,4191,344,12.0,...,5.0,1597,284,6.0,1385,268,5.0,1368,262,5.0
107,115,6462,309,21.0,5677,326,17.0,4277,338,13.0,...,18.0,6280,317,20.0,6458,306,21.0,5976,276,22.0
108,116,1284,243,5.0,1232,239,5.0,1157,241,5.0,...,5.0,1227,239,5.0,1266,237,5.0,1235,235,5.0


### Converting the dataframe into a dictionary

In [39]:
dict_avg_station_hourly={}

for ind in avg_station_hourly.index:
    dict3={}
    for i in in_numbers:
        dict3[int(i)]=avg_station_hourly['avg_'+str(i)][ind]
    
    dict_avg_station_hourly[str(avg_station_hourly['StationNumber'][ind])]=dict3

In [40]:
in_numbers

[6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]

In [41]:
dict_avg_station_hourly

{'2': {6: 9.0,
  7: 8.0,
  8: 7.0,
  9: 6.0,
  10: 5.0,
  11: 6.0,
  12: 6.0,
  13: 6.0,
  14: 6.0,
  15: 6.0,
  16: 6.0,
  17: 7.0,
  18: 8.0,
  19: 8.0,
  20: 8.0,
  21: 9.0},
 '3': {6: 6.0,
  7: 6.0,
  8: 5.0,
  9: 5.0,
  10: 6.0,
  11: 6.0,
  12: 6.0,
  13: 8.0,
  14: 8.0,
  15: 8.0,
  16: 6.0,
  17: 5.0,
  18: 6.0,
  19: 7.0,
  20: 7.0,
  21: 8.0},
 '4': {6: 9.0,
  7: 9.0,
  8: 9.0,
  9: 10.0,
  10: 11.0,
  11: 10.0,
  12: 9.0,
  13: 8.0,
  14: 8.0,
  15: 8.0,
  16: 8.0,
  17: 7.0,
  18: 7.0,
  19: 7.0,
  20: 8.0,
  21: 9.0},
 '5': {6: 21.0,
  7: 16.0,
  8: 11.0,
  9: 13.0,
  10: 14.0,
  11: 13.0,
  12: 12.0,
  13: 11.0,
  14: 11.0,
  15: 11.0,
  16: 11.0,
  17: 10.0,
  18: 11.0,
  19: 15.0,
  20: 17.0,
  21: 17.0},
 '6': {6: 4.0,
  7: 5.0,
  8: 5.0,
  9: 5.0,
  10: 6.0,
  11: 6.0,
  12: 6.0,
  13: 6.0,
  14: 7.0,
  15: 8.0,
  16: 7.0,
  17: 6.0,
  18: 6.0,
  19: 7.0,
  20: 7.0,
  21: 7.0},
 '7': {6: 12.0,
  7: 11.0,
  8: 9.0,
  9: 8.0,
  10: 9.0,
  11: 9.0,
  12: 10.0,
  13: 10.0

### Exporting average hourly to a json file

In [42]:
#import json
#with open('avg_hourly.json', 'w') as fp:
#    json.dump(dict_avg_station_hourly, fp)