## Build the Model on Hour Data

In [93]:
import numpy as np
import pandas as pd

from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor

from sklearn.preprocessing import OneHotEncoder, StandardScaler
OHE = OneHotEncoder(sparse=False)
pd.set_option('display.max_rows', 30)

In [135]:
# 月日如果用起来要one-hot，之后产生的feature就太多了而且感觉没必要，可能考虑is_special更合适
# 但是前后依赖这里怎么办呢？
# train[['month','day']] = train.date.str.split('-', expand=True)
def get_x_y(df):
    data = df.drop(columns=['Unnamed: 0', 'route', 'calculated_day', 'date'])
    ohe = OHE.fit_transform(data[['hr', 'weekday']])
    ohe_df = pd.DataFrame(ohe, columns= OHE.get_feature_names(input_features=['hr', 'weekday']))
    data = pd.concat([data, ohe_df],axis = 1)
    data = data.drop(columns=['hr', 'weekday'])
    x = data.drop(columns=['label'])
    y = data.label
    return x, y

In [136]:
def mape_error(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true))


def scoring(reg, x, y):
    pred = reg.predict(x)
    return -mape_error(pred, y)

In [226]:

train_path = '/Users/vayne/Desktop/dm_pro_engin/data_feature/A_2.csv'
test_path = '/Users/vayne/Desktop/dm_pro_engin/data_testing/A_2.csv'

train = pd.read_csv(train_path)
test = pd.read_csv(test_path)

train_x, train_y = get_x_y(train)
test_x, test_y = get_x_y(test)

gdbt = GradientBoostingRegressor(n_estimators=20, learning_rate=0.1, 
                                 max_depth=5, random_state=0, loss='ls').fit(train_x, train_y)

print(mape_error(train_y, gdbt.predict(train_x)))
print(mape_error(test_y, gdbt.predict(test_x)))
result = gdbt.predict(train_x)
train_compare = pd.concat([train_y, pd.Series(result)],axis = 1)
print(train_compare)

result = gdbt.predict(test_x)
test_compare = pd.concat([test_y, pd.Series(result)],axis = 1)
print(test_compare)


0.10156371858535118
0.09358656422206517
          label           0
0     75.086667   77.751887
1    148.053333  109.006251
2    109.863333  103.148080
3     74.633333   85.061149
4     88.636667   95.207010
..          ...         ...
318   67.673333   70.154831
319   67.823333   70.357398
320   69.840000   72.479203
321   62.610000   67.267237
322   58.156667   67.834917

[323 rows x 2 columns]
        label          0
0   75.250000  86.068210
1   95.980000  96.529341
2   80.773333  91.905146
3   62.000000  71.983099
4   63.230000  72.099899
5   62.546667  68.287137
6   73.080000  92.636122
7   80.073333  76.974327
8   80.103333  76.510126
9   89.450000  76.820165
10  85.500000  72.537016
11  71.763333  76.740076
12  71.486667  67.862360
13  68.963333  76.842057
14  68.896667  77.791250
15  61.296667  68.313042
16  77.910000  67.574247
17  81.890000  76.628883
18  70.700000  73.814270
19  75.013333  71.590816
20  69.283333  73.953121
21  66.796667  77.621574
22  67.853333  70.478879


In [222]:
train_path = '/Users/vayne/Desktop/dm_pro_engin/data_feature/A_3.csv'
test_path = '/Users/vayne/Desktop/dm_pro_engin/data_testing/A_3.csv'

train = pd.read_csv(train_path)
test = pd.read_csv(test_path)

train_x, train_y = get_x_y(train)
test_x, test_y = get_x_y(test)

gdbt = GradientBoostingRegressor(n_estimators=28, learning_rate=0.1, 
                                 max_depth=6, random_state=0, loss='ls').fit(train_x, train_y)

print(mape_error(train_y, gdbt.predict(train_x)))
print(mape_error(test_y, gdbt.predict(test_x)))
result = gdbt.predict(train_x)
train_compare = pd.concat([train_y, pd.Series(result)],axis = 1)
print(train_compare)

result = gdbt.predict(test_x)
test_compare = pd.concat([test_y, pd.Series(result)],axis = 1)
print(test_compare)

0.07052904103188902
0.1630436689624688
          label           0
0    146.513333  139.636393
1    203.340000  205.794151
2    168.586667  168.009667
3    139.636667  138.003207
4    108.560000  125.801834
..          ...         ...
314  110.613333  118.334056
315  140.670000  124.462189
316  257.630000  227.953294
317  137.190000  135.407250
318  129.810000  134.114770

[319 rows x 2 columns]
         label           0
0   206.073333  212.201100
1   324.796667  240.431903
2   217.020000  162.959171
3   256.776667  155.864105
4   143.293333  154.573919
5    97.700000  118.381734
6   227.373333  249.755215
7   185.443333  214.593862
8   285.266667  236.655786
9   215.743333  174.013771
10  227.686667  151.456359
11  129.380000  156.659107
12  105.200000  149.517053
13  167.313333  229.883268
14  126.723333  134.515129
15  129.363333  122.711113
16  130.300000  131.593388
17  121.636667  120.550261
18  116.326667  120.891190
19  110.940000  124.104691
20  113.813333  145.210328
21  118

In [224]:
train_path = '/Users/vayne/Desktop/dm_pro_engin/data_feature/B_1.csv'
test_path = '/Users/vayne/Desktop/dm_pro_engin/data_testing/B_1.csv'

train = pd.read_csv(train_path)
test = pd.read_csv(test_path)

train_x, train_y = get_x_y(train)
test_x, test_y = get_x_y(test)

gdbt = GradientBoostingRegressor(n_estimators=30, learning_rate=0.1, 
                                 max_depth=5, random_state=0, loss='ls').fit(train_x, train_y)

print(mape_error(train_y, gdbt.predict(train_x)))
print(mape_error(test_y, gdbt.predict(test_x)))
result = gdbt.predict(train_x)
train_compare = pd.concat([train_y, pd.Series(result)],axis = 1)
print(train_compare)

result = gdbt.predict(test_x)
test_compare = pd.concat([test_y, pd.Series(result)],axis = 1)
print(test_compare)

0.10428539027773352
0.14568259200451397
          label           0
0    100.320000  119.374242
1    212.310000  184.451290
2    129.945000  130.144409
3    118.616667  119.082394
4    118.785000  113.599178
..          ...         ...
260  148.810000  138.090744
261  203.920000  218.369759
262  111.363333  122.043876
263  117.240000  129.959749
264  120.330000  126.618026

[265 rows x 2 columns]
         label           0
0   111.033333  127.406938
1   127.316667  132.188309
2   110.166667  121.923471
3   135.886667  123.471223
4   109.650000  130.303506
5   125.720000  106.196324
6   112.630000  122.692248
7   111.406667  135.758423
8   132.586667  135.248170
9   119.023333  119.962195
10  111.993333  125.193656
11  133.820000  133.932577
12  148.145000  117.459167
13  110.536667  124.414681
14  141.666667  111.134857
15  117.930000  125.560530
16  133.916667  144.363266
17  198.110000  204.250712
18  142.356667  122.499129
19  105.203333  137.226967
20  122.140000  123.066782
21  14

In [227]:
train_path = '/Users/vayne/Desktop/dm_pro_engin/data_feature/B_3.csv'
test_path = '/Users/vayne/Desktop/dm_pro_engin/data_testing/B_3.csv'

train = pd.read_csv(train_path)
test = pd.read_csv(test_path)

train_x, train_y = get_x_y(train)
test_x, test_y = get_x_y(test)

gdbt = GradientBoostingRegressor(n_estimators=30, learning_rate=0.1, 
                                 max_depth=5, random_state=0, loss='ls').fit(train_x, train_y)

print(mape_error(train_y, gdbt.predict(train_x)))
print(mape_error(test_y, gdbt.predict(test_x)))
result = gdbt.predict(train_x)
train_compare = pd.concat([train_y, pd.Series(result)],axis = 1)
print(train_compare)

result = gdbt.predict(test_x)
test_compare = pd.concat([test_y, pd.Series(result)],axis = 1)
print(test_compare)

0.08672401539461208
0.14524607907523132
          label           0
0    147.723333  137.732257
1    148.253333  146.078392
2    107.110000  112.224554
3    155.416667  138.515923
4    103.586667  123.204382
..          ...         ...
316  102.470000  106.390585
317  118.543333  111.845883
318   94.883333  102.511725
319  113.926667  105.777372
320   79.440000  101.432433

[321 rows x 2 columns]
         label           0
0   113.593333  103.096451
1   111.976667   95.736438
2    94.890000  108.295287
3    95.633333  114.879182
4    93.423333  121.955908
5    94.453333   99.932858
6   108.876667  112.820409
7   133.596667  107.484395
8   116.496667   99.574284
9    93.263333  112.701927
10   97.956667  120.150317
11  107.416667  136.190645
12   96.690000  104.732387
13  136.133333  120.775673
14   94.173333  103.642846
15  104.376667  103.822312
16   99.193333  136.795616
17  107.746667  103.891503
18  104.146667  117.863137
19   94.710000  100.700345
20   95.560000  101.847586
21  11

In [204]:
train_path = '/Users/vayne/Desktop/dm_pro_engin/data_feature/C_1.csv'
test_path = '/Users/vayne/Desktop/dm_pro_engin/data_testing/C_1.csv'

train = pd.read_csv(train_path)
test = pd.read_csv(test_path)

train_x, train_y = get_x_y(train)
test_x, test_y = get_x_y(test)

gdbt = GradientBoostingRegressor(n_estimators=8, learning_rate=0.1, 
                                 max_depth=5, random_state=1, loss='ls').fit(train_x, train_y)

print(mape_error(train_y, gdbt.predict(train_x)))
print(mape_error(test_y, gdbt.predict(test_x)))
result = gdbt.predict(train_x)
train_compare = pd.concat([train_y, pd.Series(result)],axis = 1)
print(train_compare)

result = gdbt.predict(test_x)
test_compare = pd.concat([test_y, pd.Series(result)],axis = 1)
print(test_compare)

0.13230485192406172
0.19585555171623992
          label           0
0    140.245000  176.741228
1    231.923333  196.696115
2    146.070000  174.691842
3    190.573333  169.631021
4    207.370000  198.804284
..          ...         ...
270  282.260000  207.299104
271  230.510000  216.360554
272  198.123333  186.413262
273  266.680000  223.140409
274  267.110000  213.325970

[275 rows x 2 columns]
         label           0
0   158.785000  184.290946
1   162.683333  178.591640
2   193.376667  180.628192
3   170.245000  170.812322
4   171.540000  187.730217
5   166.753333  177.119906
6   158.060000  185.567379
7   146.846667  184.290946
8   210.020000  178.591640
9   157.720000  203.614958
10  168.473333  170.812322
11  187.665000  187.730217
12  270.883333  177.119906
13  171.320000  185.567379
14  280.493333  195.236339
15  207.370000  173.943350
16  269.365000  190.148895
17  404.243333  194.380886
18  266.843333  195.969729
19  226.350000  203.376835
20  168.280000  177.004005
21  27

In [237]:
train_path = '/Users/vayne/Desktop/dm_pro_engin/data_feature/C_3.csv'
test_path = '/Users/vayne/Desktop/dm_pro_engin/data_testing/C_3.csv'

train = pd.read_csv(train_path)
test = pd.read_csv(test_path)

train_x, train_y = get_x_y(train)
test_x, test_y = get_x_y(test)

gdbt = GradientBoostingRegressor(n_estimators=30, learning_rate=0.1, 
                                 max_depth=5, random_state=1, loss='ls').fit(train_x, train_y)

print(mape_error(train_y, gdbt.predict(train_x)))
print(mape_error(test_y, gdbt.predict(test_x)))
result = gdbt.predict(train_x)
train_compare = pd.concat([train_y, pd.Series(result)],axis = 1)
print(train_compare)

result = gdbt.predict(test_x)
test_compare = pd.concat([test_y, pd.Series(result)],axis = 1)
print(test_compare)

0.10786627146418085
0.1694844851921848
          label           0
0    132.050000  152.067457
1    219.150000  191.975288
2    166.150000  172.776136
3    161.786667  191.049192
4    207.950000  204.496302
..          ...         ...
251  178.660000  186.444201
252  152.910000  188.984838
253  250.595000  195.149026
254  142.770000  175.902284
255  350.115000  230.203302

[256 rows x 2 columns]
         label           0
0   194.430000  190.127255
1   188.860000  198.920339
2   168.170000  202.238012
3   169.465000  175.393228
4   196.430000  169.789290
5          NaN  193.531890
6   141.630000  181.997898
7   179.806667  181.322388
8   173.563333  201.185904
9   178.855000  191.573192
10  131.273333  176.444455
11  147.816667  173.413913
12  152.610000  193.303559
13  155.760000  183.049125
14  189.830000  168.015589
15  147.035000  181.965882
16  151.936667  193.960496
17  224.380000  178.851561
18  204.046667  188.254823
19  199.775000  180.766729
20  146.396667  176.583275
21  144

## Read the Real Value at Each Time Window

In [239]:


import math
from datetime import datetime,timedelta
import numpy as np
import pandas as pd
names=locals()

link_time={}
time_train={}
time_predict={}
time_check={}
test_time_train={}
test_time_check={}
weathers={}
rainingTotalTime={}

file_path='/Users/vayne/Desktop/DM_Project_0522due/dataSet_phase2/table5.csv'
# Step 1: Load trajectories
fr = open(file_path, 'r')
fr.readline()  # skip the header
traj_data = fr.readlines()
fr.close()

# 建立各Link時間的dictionary
for i in range(24):
    link_time[str(i+100)]={}
    # Step 2: Create a dictionary to store travel time for each route per time window
    travel_times = {}  # key: route_id. Value is also a dictionary of which key is the start time for the time window and value is a list of travel times
    for i in range(len(traj_data)):
        each_traj = traj_data[i].replace('"', '').split(',')
        intersection_id = each_traj[0]
        tollgate_id = each_traj[1]

        route_id = intersection_id + '_' + tollgate_id
        if route_id not in travel_times.keys():
            travel_times[route_id] = {}

        trace_start_time = each_traj[3]
        travel_seq = each_traj[4]
        trace_start_time = datetime.strptime(trace_start_time, "%Y-%m-%d %H:%M:%S")
        time_window_minute = math.floor(trace_start_time.minute / 20) * 20
        start_time_window = datetime(trace_start_time.year, trace_start_time.month, trace_start_time.day,
                                     trace_start_time.hour, time_window_minute, 0)
        time=start_time_window
        tt = float(each_traj[-1])
        # 國慶不管，因道路狀態不一樣
        if(time.month==10 and time.day in [1,2,3,4,5,6,7]):
            continue
        # 中秋節不管，因道路狀態不一樣
        if(time.month==9 and time.day in [15,16,17]):
            continue
        if start_time_window not in travel_times[route_id].keys():
            travel_times[route_id][start_time_window] = [tt]
        else:
            travel_times[route_id][start_time_window].append(tt)

In [240]:
real_value=[]
real_value_dict={}
fw = open('/Users/vayne/Desktop/dm_pro_engin/real_value.csv', 'w')
fw.writelines(','.join(['"intersection_id"', '"tollgate_id"', '"time_window"', '"avg_travel_time"']) + '\n')
for route in travel_times.keys():
    if route not in real_value_dict.keys():
        real_value_dict[route] = {}
    route_time_windows = list(travel_times[route].keys())
    route_time_windows.sort()
    for time_window_start in route_time_windows:
        time_window_end = time_window_start + timedelta(minutes=20)
        tt_set = travel_times[route][time_window_start]
        avg_tt = round(sum(tt_set) / float(len(tt_set)), 2)
        out_line =[str(route),str(time_window_start), str(avg_tt)]
        real_value.append(out_line)    
        
        

In [243]:
real_value

[['B_3', datetime.datetime(2016, 10, 18, 0, 0), '64.69'],
 ['B_3', datetime.datetime(2016, 10, 18, 0, 40), '75.37'],
 ['B_3', datetime.datetime(2016, 10, 18, 1, 40), '70.14'],
 ['B_3', datetime.datetime(2016, 10, 18, 2, 0), '96.03'],
 ['B_3', datetime.datetime(2016, 10, 18, 3, 40), '76.88'],
 ['B_3', datetime.datetime(2016, 10, 18, 6, 0), '54.75'],
 ['B_3', datetime.datetime(2016, 10, 18, 6, 20), '121.02'],
 ['B_3', datetime.datetime(2016, 10, 18, 6, 40), '94.04'],
 ['B_3', datetime.datetime(2016, 10, 18, 7, 0), '120.6'],
 ['B_3', datetime.datetime(2016, 10, 18, 7, 20), '73.4'],
 ['B_3', datetime.datetime(2016, 10, 18, 7, 40), '89.91'],
 ['B_3', datetime.datetime(2016, 10, 18, 8, 0), '126.4'],
 ['B_3', datetime.datetime(2016, 10, 18, 8, 20), '115.52'],
 ['B_3', datetime.datetime(2016, 10, 18, 8, 40), '98.86'],
 ['B_3', datetime.datetime(2016, 10, 18, 9, 0), '131.15'],
 ['B_3', datetime.datetime(2016, 10, 18, 9, 20), '151.95'],
 ['B_3', datetime.datetime(2016, 10, 18, 9, 40), '117.69'],

In [246]:
real_value_needed=[]
for i in real_value:
    if i[1].hour in [8,9,17,18]:
        real_value_needed.append(i)
        

In [249]:
#convert to dataframe
feature=['route','time','label']
real_value_df=pd.DataFrame(real_value_needed,columns=feature)


In [250]:
real_value_df

Unnamed: 0,route,time,label
0,B_3,2016-10-18 08:00:00,126.4
1,B_3,2016-10-18 08:20:00,115.52
2,B_3,2016-10-18 08:40:00,98.86
3,B_3,2016-10-18 09:00:00,131.15
4,B_3,2016-10-18 09:20:00,151.95
...,...,...,...
456,B_1,2016-10-24 17:00:00,114.14
457,B_1,2016-10-24 17:20:00,128.66
458,B_1,2016-10-24 17:40:00,123.62
459,B_1,2016-10-24 18:00:00,146.24


## To Find the Ratio

In [436]:
#find the travel time at each time window
file_path='/Users/vayne/Desktop/dm_pro_engin/phase1_training/table5.csv'

# Step 1: Load trajectories
fr = open(file_path, 'r')
fr.readline()  # skip the header
traj_data = fr.readlines()
fr.close()

# 建立各Link時間的dictionary
for i in range(24):
    link_time[str(i+100)]={}
# Step 2: Create a dictionary to store travel time for each route per time window
travel_times = {}  # key: route_id. Value is also a dictionary of which key is the start time for the time window and value is a list of travel times
for i in range(len(traj_data)):
    each_traj = traj_data[i].replace('"', '').split(',')
    intersection_id = each_traj[0]
    tollgate_id = each_traj[1]

    route_id = intersection_id + '_' + tollgate_id
    if route_id not in travel_times.keys():
        travel_times[route_id] = {}

    trace_start_time = each_traj[3]
    travel_seq = each_traj[4]
    trace_start_time = datetime.strptime(trace_start_time, "%Y-%m-%d %H:%M:%S")
    time_window_minute = math.floor(trace_start_time.minute / 20) * 20
    start_time_window = datetime(trace_start_time.year, trace_start_time.month, trace_start_time.day,
                                     trace_start_time.hour, time_window_minute, 0)
    time=start_time_window
    tt = float(each_traj[-1])
    # 國慶不管，因道路狀態不一樣
    if(time.month==10 and time.day in [1,2,3,4,5,6,7]):
        continue
    # 中秋節不管，因道路狀態不一樣
    if(time.month==9 and time.day in [15,16,17]):
        continue
    if start_time_window not in travel_times[route_id].keys():
        travel_times[route_id][start_time_window] = [tt]
    else:
        travel_times[route_id][start_time_window].append(tt)
    


In [437]:
travel_times.keys()

dict_keys(['B_3', 'B_1', 'A_2', 'C_3', 'A_3', 'C_1'])

In [438]:
B_1_travel_times=travel_times['B_1']

In [439]:
B_1_travel_times

{datetime.datetime(2016, 7, 19, 0, 20): [79.76],
 datetime.datetime(2016, 7, 19, 0, 40): [137.98],
 datetime.datetime(2016, 7, 19, 1, 20): [176.7],
 datetime.datetime(2016, 7, 19, 4, 20): [94.06],
 datetime.datetime(2016, 7, 19, 5, 20): [66.98],
 datetime.datetime(2016, 7, 19, 5, 40): [87.83],
 datetime.datetime(2016, 7, 19, 6, 0): [170.09],
 datetime.datetime(2016, 7, 19, 7, 0): [65.41],
 datetime.datetime(2016, 7, 19, 8, 20): [128.75],
 datetime.datetime(2016, 7, 19, 8, 40): [59.25, 73.48, 82.95],
 datetime.datetime(2016, 7, 19, 9, 0): [164.9, 68.96000000000001, 297.46],
 datetime.datetime(2016, 7, 19, 9, 20): [104.33],
 datetime.datetime(2016, 7, 19, 10, 0): [73.45, 173.09, 151.49],
 datetime.datetime(2016, 7, 19, 10, 20): [120.52, 220.67000000000002],
 datetime.datetime(2016, 7, 19, 10, 40): [79.57],
 datetime.datetime(2016, 7, 19, 11, 0): [54.46],
 datetime.datetime(2016, 7, 19, 12, 0): [145.29],
 datetime.datetime(2016, 7, 19, 12, 20): [77.44, 65.28999999999999],
 datetime.dateti

In [440]:
B_1_travel_times_need={}
for i in B_1_travel_times:
    if i.hour in [7,8,9,16,17,18]:
        if i not in B_1_travel_times_need:
            B_1_travel_times_need[i]=[]
        if i in B_1_travel_times_need:
            B_1_travel_times_need[i].extend(B_1_travel_times[i])
        

In [441]:
B_1_travel_times_need

{datetime.datetime(2016, 7, 19, 7, 0): [65.41],
 datetime.datetime(2016, 7, 19, 8, 20): [128.75],
 datetime.datetime(2016, 7, 19, 8, 40): [59.25, 73.48, 82.95],
 datetime.datetime(2016, 7, 19, 9, 0): [164.9, 68.96000000000001, 297.46],
 datetime.datetime(2016, 7, 19, 9, 20): [104.33],
 datetime.datetime(2016, 7, 19, 16, 20): [105.64],
 datetime.datetime(2016, 7, 19, 16, 40): [122.31],
 datetime.datetime(2016, 7, 19, 18, 20): [382.03],
 datetime.datetime(2016, 7, 20, 7, 0): [188.9, 86.75],
 datetime.datetime(2016, 7, 20, 7, 20): [82.38, 114.02],
 datetime.datetime(2016, 7, 20, 7, 40): [94.11],
 datetime.datetime(2016, 7, 20, 8, 0): [212.31],
 datetime.datetime(2016, 7, 20, 9, 0): [137.38, 158.96, 178.29, 131.13],
 datetime.datetime(2016, 7, 20, 9, 20): [115.11],
 datetime.datetime(2016, 7, 20, 9, 40): [157.07, 153.74],
 datetime.datetime(2016, 7, 20, 16, 0): [141.65, 148.01],
 datetime.datetime(2016, 7, 21, 7, 40): [88.63, 100.94, 109.31],
 datetime.datetime(2016, 7, 21, 8, 0): [124.710

In [442]:
B_1_ratio_8_9={}
B_1_ratio_9_10={}
B_1_ratio_17_18={}
B_1_ratio_18_19={}

date_key=[236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327]

In [443]:
for i in date_key:
    B_1_ratio_8_9[i]={}
    B_1_ratio_9_10[i]={}
    B_1_ratio_17_18[i]={}
    B_1_ratio_18_19[i]={}
    
    for j in [7*60+40,8*60+0,8*60+20,8*60+40,9*60]:
        B_1_ratio_8_9[i][j]=[]
    
    for j in [8*60+40,9*60,9*60+20,9*60+40,10*60]:
        B_1_ratio_9_10[i][j]=[]
        
    for j in [16*60+40,17*60+0,17*60+20,17*60+40,18*60+0]:
        B_1_ratio_17_18[i][j]=[]
        
    for j in [17*60+40,18*60+0,18*60+20,18*60+40,19*60+0]:
        B_1_ratio_18_19[i][j]=[]
  

In [444]:
B_1_ratio_8_9

{236: {460: [], 480: [], 500: [], 520: [], 540: []},
 237: {460: [], 480: [], 500: [], 520: [], 540: []},
 238: {460: [], 480: [], 500: [], 520: [], 540: []},
 239: {460: [], 480: [], 500: [], 520: [], 540: []},
 240: {460: [], 480: [], 500: [], 520: [], 540: []},
 241: {460: [], 480: [], 500: [], 520: [], 540: []},
 242: {460: [], 480: [], 500: [], 520: [], 540: []},
 243: {460: [], 480: [], 500: [], 520: [], 540: []},
 244: {460: [], 480: [], 500: [], 520: [], 540: []},
 245: {460: [], 480: [], 500: [], 520: [], 540: []},
 246: {460: [], 480: [], 500: [], 520: [], 540: []},
 247: {460: [], 480: [], 500: [], 520: [], 540: []},
 248: {460: [], 480: [], 500: [], 520: [], 540: []},
 249: {460: [], 480: [], 500: [], 520: [], 540: []},
 250: {460: [], 480: [], 500: [], 520: [], 540: []},
 251: {460: [], 480: [], 500: [], 520: [], 540: []},
 252: {460: [], 480: [], 500: [], 520: [], 540: []},
 253: {460: [], 480: [], 500: [], 520: [], 540: []},
 254: {460: [], 480: [], 500: [], 520: [], 540

In [445]:
for i in B_1_travel_times_need:
    temp_date=i.month*31+i.day
    temp_hour=i.hour
    temp_min=i.minute
    
    
    if temp_hour*60+temp_min>=460 and temp_hour*60+temp_min<=540:
        B_1_ratio_8_9[temp_date][int(temp_hour*60+temp_min)].append(B_1_travel_times_need[i])
        
    if temp_hour*60+temp_min>=520 and temp_hour*60+temp_min<=600:
        B_1_ratio_9_10[temp_date][int(temp_hour*60+temp_min)].append(B_1_travel_times_need[i])
    
    if temp_hour*60+temp_min>=16*60+40 and temp_hour*60+temp_min<=18*60:
        B_1_ratio_17_18[temp_date][int(temp_hour*60+temp_min)].append(B_1_travel_times_need[i])
        
    if temp_hour*60+temp_min>=17*60+40 and temp_hour*60+temp_min<=19*60:
        B_1_ratio_18_19[temp_date][int(temp_hour*60+temp_min)].append(B_1_travel_times_need[i])
        
  
    
    
        
    
    #print(B_1_travel_times_need[i])
    
    
    
    
    

In [446]:
 B_1_ratio_8_9

{236: {460: [],
  480: [],
  500: [[128.75]],
  520: [[59.25, 73.48, 82.95]],
  540: [[164.9, 68.96000000000001, 297.46]]},
 237: {460: [[94.11]],
  480: [[212.31]],
  500: [],
  520: [],
  540: [[137.38, 158.96, 178.29, 131.13]]},
 238: {460: [[88.63, 100.94, 109.31]],
  480: [[124.71000000000001, 230.72, 97.64]],
  500: [[108.87]],
  520: [],
  540: [[168.18, 178.55]]},
 239: {460: [[124.84]],
  480: [],
  500: [],
  520: [],
  540: [[100.83, 79.24, 98.4]]},
 240: {460: [[162.62, 112.34]],
  480: [[75.11]],
  500: [[96.02]],
  520: [[184.72]],
  540: [[124.46000000000001]]},
 241: {460: [],
  480: [],
  500: [[124.26, 130.29, 162.4]],
  520: [[120.06, 77.12]],
  540: [[80.05, 147.34]]},
 242: {460: [[134.92000000000002]],
  480: [[98.76]],
  500: [[89.67, 111.07]],
  520: [[153.2, 55.85]],
  540: [[130.32999999999998, 31.52]]},
 243: {460: [], 480: [], 500: [], 520: [], 540: [[123.18]]},
 244: {460: [],
  480: [[154.18, 263.5]],
  500: [],
  520: [[149.16]],
  540: [[102.28]]},
 245:

In [447]:
for i in date_key:
    k=0
    if B_1_ratio_8_9[i][480]==[]:k+=1
    if B_1_ratio_8_9[i][500]==[]:k+=1
    if B_1_ratio_8_9[i][520]==[]:k+=1 
    if k>1:del B_1_ratio_8_9[i]

In [448]:
for i in date_key:
    k=0
    if B_1_ratio_9_10[i][540]==[]:k+=1
    if B_1_ratio_9_10[i][560]==[]:k+=1
    if B_1_ratio_9_10[i][580]==[]:k+=1 
    if k>1:del B_1_ratio_9_10[i]

In [449]:
for i in date_key:
    k=0
    if B_1_ratio_17_18[i][17*60]==[]:k+=1 
    if B_1_ratio_17_18[i][17*60+20]==[]:k+=1 
    if B_1_ratio_17_18[i][17*60+40]==[]:k+=1
    if k>1:del B_1_ratio_17_18[i]

In [450]:
for i in date_key:
    k=0
    if B_1_ratio_18_19[i][18*60]==[]:k+=1 
    if B_1_ratio_18_19[i][18*60+20]==[]:k+=1 
    if B_1_ratio_18_19[i][18*60+40]==[]:k+=1
    if k>1:del B_1_ratio_18_19[i]

In [451]:
for i in B_1_ratio_8_9:
    for k in [480,500,520]:
        if B_1_ratio_8_9[i][k]==[] and B_1_ratio_8_9[i][k-20]!=[] and B_1_ratio_8_9[i][k+20]!=[]:
            B_1_ratio_8_9[i][k]=[np.mean([np.mean(B_1_ratio_8_9[i][k-20]),np.mean(B_1_ratio_8_9[i][k+20])])]
            
   

In [452]:
for i in B_1_ratio_9_10:
    for k in [9*60,9*60+20,9*60+40]:
        if B_1_ratio_9_10[i][k]==[] and B_1_ratio_9_10[i][k-20]!=[] and B_1_ratio_9_10[i][k+20]!=[]:
            B_1_ratio_9_10[i][k]=[np.mean([np.mean(B_1_ratio_9_10[i][k-20]),np.mean(B_1_ratio_9_10[i][k+20])])]

In [453]:
for i in B_1_ratio_17_18:
    for k in [17*60,17*60+20,17*60+40]:
        if B_1_ratio_17_18[i][k]==[] and B_1_ratio_17_18[i][k-20]!=[] and B_1_ratio_17_18[i][k+20]!=[]:
            B_1_ratio_17_18[i][k]=[np.mean([np.mean(B_1_ratio_17_18[i][k-20]),np.mean(B_1_ratio_17_18[i][k+20])])]

In [454]:
for i in B_1_ratio_18_19:
    for k in [18*60,18*60+20,18*60+40]:
        if B_1_ratio_18_19[i][k]==[] and B_1_ratio_18_19[i][k-20]!=[] and B_1_ratio_18_19[i][k+20]!=[]:
            B_1_ratio_18_19[i][k]=[np.mean([np.mean(B_1_ratio_18_19[i][k-20]),np.mean(B_1_ratio_18_19[i][k+20])])]

In [455]:
B_1_ratio_8_9

{236: {460: [],
  480: [],
  500: [[128.75]],
  520: [[59.25, 73.48, 82.95]],
  540: [[164.9, 68.96000000000001, 297.46]]},
 238: {460: [[88.63, 100.94, 109.31]],
  480: [[124.71000000000001, 230.72, 97.64]],
  500: [[108.87]],
  520: [141.1175],
  540: [[168.18, 178.55]]},
 240: {460: [[162.62, 112.34]],
  480: [[75.11]],
  500: [[96.02]],
  520: [[184.72]],
  540: [[124.46000000000001]]},
 241: {460: [],
  480: [],
  500: [[124.26, 130.29, 162.4]],
  520: [[120.06, 77.12]],
  540: [[80.05, 147.34]]},
 242: {460: [[134.92000000000002]],
  480: [[98.76]],
  500: [[89.67, 111.07]],
  520: [[153.2, 55.85]],
  540: [[130.32999999999998, 31.52]]},
 244: {460: [],
  480: [[154.18, 263.5]],
  500: [179.0],
  520: [[149.16]],
  540: [[102.28]]},
 246: {460: [],
  480: [[200.01]],
  500: [[140.15]],
  520: [[75.93, 97.32]],
  540: [[132.95]]},
 247: {460: [[247.96]],
  480: [[139.21, 55.93]],
  500: [[107.16]],
  520: [165.345],
  540: [[223.53]]},
 248: {460: [[101.74]],
  480: [99.1687499999

In [456]:
for i in B_1_ratio_8_9:
    del B_1_ratio_8_9[i][8*60-20]
    del B_1_ratio_8_9[i][9*60]

In [457]:
B_1_ratio_8_9

{236: {480: [], 500: [[128.75]], 520: [[59.25, 73.48, 82.95]]},
 238: {480: [[124.71000000000001, 230.72, 97.64]],
  500: [[108.87]],
  520: [141.1175]},
 240: {480: [[75.11]], 500: [[96.02]], 520: [[184.72]]},
 241: {480: [], 500: [[124.26, 130.29, 162.4]], 520: [[120.06, 77.12]]},
 242: {480: [[98.76]], 500: [[89.67, 111.07]], 520: [[153.2, 55.85]]},
 244: {480: [[154.18, 263.5]], 500: [179.0], 520: [[149.16]]},
 246: {480: [[200.01]], 500: [[140.15]], 520: [[75.93, 97.32]]},
 247: {480: [[139.21, 55.93]], 500: [[107.16]], 520: [165.345]},
 248: {480: [99.16874999999999],
  500: [[111.86, 138.77, 89.78, 45.980000000000004]],
  520: [[80.03999999999999]]},
 250: {480: [[67.84]], 500: [[108.21000000000001]], 520: [[98.22, 137.35]]},
 252: {480: [[110.57]], 500: [102.7825], 520: [[83.38, 106.61]]},
 253: {480: [[126.9]], 500: [112.445], 520: [[97.99]]},
 254: {480: [[112.75]], 500: [[83.89, 93.26]], 520: [[73.56, 113.23]]},
 255: {480: [[105.0]], 500: [[174.95]], 520: [163.2749999999999

In [458]:
for i in B_1_ratio_9_10:
    del B_1_ratio_9_10[i][9*60-20]
    del B_1_ratio_9_10[i][10*60]

In [459]:
for i in B_1_ratio_17_18:
    del B_1_ratio_17_18[i][17*60-20]
    del B_1_ratio_17_18[i][18*60]

In [460]:
for i in B_1_ratio_18_19:
    del B_1_ratio_18_19[i][18*60-20]
    del B_1_ratio_18_19[i][19*60]