## Project : CabEdge

## # Training for all zones

In [1]:
import pandas as pd
import numpy as np

import datetime as pydt

import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
from fbprophet import Prophet

In [3]:
from fbprophet.diagnostics import cross_validation

In [4]:
import import_ipynb
import cabedge_utils

importing Jupyter notebook from cabedge_utils.ipynb


In [5]:
dropoff_df = pd.read_csv('/home/jupyter/capstone/database/complete_dropoff_2018.csv')

In [6]:
df = pd.read_csv('/home/jupyter/capstone/database/complete_pickup_2018.csv')
zone_distance_matrix = pd.read_csv('/home/jupyter/capstone/database/zone_distance_matrix.csv')
zone_df = pd.read_csv('/home/jupyter/capstone/database/zone_df.csv')

In [7]:
def modelling_train_pickup_zone(df):
    
    pickup_model_object_list = []
    df.rename(columns={'datetime':'ds','Count' : 'y'},inplace=True)
    for zone in range(1,264):
       
        train_df = df[df['PULocationID'] == zone]
        train_df = train_df[['ds','apparentTemperature','windSpeed','y']]
        train_df['ds']= pd.to_datetime(train_df['ds'])
        train_df['cap'] = train_df['y'].max() + 1
        train_df['floor'] = 0
        
        print('Creating Object')
        print('--------------------------------------------')
        m = Prophet(growth='logistic',changepoint_prior_scale = 30,seasonality_prior_scale=35,holidays_prior_scale=20,seasonality_mode='multiplicative',yearly_seasonality=True,
        weekly_seasonality=True,
        daily_seasonality=True)
        m.add_country_holidays(country_name='US')
        m.add_seasonality(name='hourly', period=24, fourier_order=5)
        
        m.add_regressor('apparentTemperature')
        m.add_regressor('windSpeed')
        try:
            zone_fit = m.fit(train_df)
            pickup_model_object_list.append(zone_fit)
        except:
            print('Problem zone id: ', zone)
            pickup_model_object_list.append(pickup_model_object_list[zone-2])
        
        del(m)
        print('Training for Zone: ',zone,'completed')
        print('--------------------------------------------')
    
    return pickup_model_object_list
        

In [8]:
def modelling_train_dropoff_zone(df):
    
    dropoff_model_object_list = []
    df.rename(columns={'datetime':'ds','Count' : 'y'},inplace=True)
    for zone in range(1,264):
       
        train_df = df[df['DOLocationID'] == zone]
        train_df = train_df[['ds','apparentTemperature','windSpeed','y']]
        train_df['ds']= pd.to_datetime(train_df['ds'])
        train_df['cap'] = train_df['y'].max() + 1
        train_df['floor'] = 0
        
        print('Creating Object')
        print('--------------------------------------------')
        m = Prophet(growth='logistic',changepoint_prior_scale = 30,seasonality_prior_scale=35,holidays_prior_scale=20,seasonality_mode='multiplicative',yearly_seasonality=True,
        weekly_seasonality=True,
        daily_seasonality=True)
        m.add_country_holidays(country_name='US')
        m.add_seasonality(name='hourly', period=24, fourier_order=5)
        
        m.add_regressor('apparentTemperature')
        m.add_regressor('windSpeed')
        
        try:
            zone_fit = m.fit(train_df)
            dropoff_model_object_list.append(zone_fit)
        except:
            print('Problem zone id: ', zone)
            dropoff_model_object_list.append(dropoff_model_object_list[zone-2])
        
        del(m)
        print('Training for Zone: ',zone,'completed')
        print('--------------------------------------------')
    
    return dropoff_model_object_list

In [15]:
#pick_up_model_object_list = modelling_train_pickup_zone(df)

In [16]:
#dropoff_model_object_list = modelling_train_dropoff_zone(dropoff_df)

In [20]:
len(dropoff_model_object_list)

263

In [21]:
len(pick_up_model_object_list)

263

In [25]:
%store dropoff_model_object_list

Stored 'dropoff_model_object_list' (list)


In [26]:
%store pick_up_model_object_list

Stored 'pick_up_model_object_list' (list)


In [19]:
test_1 = df[df['PULocationID'] == 263]
cap = test_1['y'].max()
test_1 = test_1[['ds','apparentTemperature','windSpeed']]
test_1['cap'] = cap
test_1['floor'] = 0

In [123]:
test_1 = pd.DataFrame(columns=['ds','apparentTemperature','windSpeed','cap','floor'])
test_1.loc[0,'ds'] = '2020-03-10 11:00:00'
test_1.loc[0,'apparentTemperature'] = 14.25
test_1.loc[0,'windSpeed'] = 1.56
test_1.loc[0,'cap'] = 1000
test_1.loc[0,'floor'] =0
test_1['ds'] = pd.to_datetime(test_1['ds'])

Unnamed: 0,ds,apparentTemperature,windSpeed,cap,floor
0,2020-03-10 11:00:00,14.25,1.56,1000,0


In [125]:
test_1['apparentTemperature'] = test_1['apparentTemperature'].apply(float)
test_1['windSpeed'] = test_1['windSpeed'].apply(float)
test_1['cap'] = test_1['cap'].apply(int)
test_1['floor'] = test_1['floor'].apply(int)

In [26]:
zone_pickup_list = []
for pickup in pick_up_model_object_list:
    predicted_pickup = pickup.predict(test_1)['yhat'].values[0]
    if predicted_pickup < 0 :
        predicted_pickup = 0
    
    zone_pickup_list.append(predicted_pickup)

In [27]:
zone_pickup_list

[21.3544059114053,
 8.368234795860279e-07,
 5.537418950316151,
 4.5614498477520815,
 0.04770434529535394,
 0.2284346023589372,
 9.737725822826738,
 771.0831460553989,
 1583.3255809648915,
 4.117389725620795,
 0.5535997726987533,
 735.1137862164654,
 77.25084424097489,
 16.379021760515638,
 1099.3275285552888,
 514.1860365516193,
 37.45257731725181,
 163.54808248370279,
 5.44978602467659,
 2559.8133488772337,
 2.4253004723994738,
 3.6595037971394153,
 0.035515625019093826,
 32.23820668474912,
 2.1482721103641427,
 352.7288509459785,
 0.0,
 0.7151193676797841,
 2.467216998310076,
 4.907668407950067e-55,
 259.3045171263062,
 308.2440722123989,
 3.4879868391954147,
 0.06599910832620223,
 81.4978378095271,
 0,
 0,
 186.72487663253023,
 231.3907501716344,
 1.1602277209180862,
 17.547814241876836,
 7.08171734022469,
 367.36507628224035,
 52494.24547963137,
 194.78329434679281,
 15.948093804277594,
 0.6095921291434531,
 25.677764619515813,
 2.70903101757785,
 8.403576369405624,
 40.73996659748

In [28]:
zone_dropoff_list = []

In [29]:
zone_dropoff_list = []
for dropoff in dropoff_model_object_list:
    predicted_dropoff = dropoff.predict(test_1)['yhat'].values[0]
    if predicted_dropoff < 0 :
        predicted_dropoff = 0
    
    zone_dropoff_list.append(predicted_dropoff)

In [30]:
zone_demand_list = cabedge_utils.model_demand_supply(zone_pickup_list,zone_dropoff_list,2)

demand zone : 8 ,  3091.5579558884797
demand zone : 9 ,  1178.938944772106
demand zone : 12 ,  1.4119297372878898
demand zone : 15 ,  192.64366327234754
demand zone : 16 ,  11.990960907873243
demand zone : 17 ,  4.918706426774082
demand zone : 20 ,  229.94804048424186
demand zone : 26 ,  22.849724054905025
demand zone : 31 ,  20.489235218797088
demand zone : 38 ,  47.725501712265995
demand zone : 39 ,  2.745719321160462
demand zone : 44 ,  28.669337379338838
demand zone : 53 ,  212.155919402924
demand zone : 55 ,  3.76554865795931
demand zone : 56 ,  1.5200808475261658
demand zone : 61 ,  2.6354636415235726
demand zone : 62 ,  7.655390157335066
demand zone : 71 ,  1.3346399539301952
demand zone : 72 ,  2.763487148058788
demand zone : 73 ,  3.8117666608159033
demand zone : 75 ,  1.222823613468846
demand zone : 77 ,  1.850564132651345
demand zone : 85 ,  5.631250423482975
demand zone : 87 ,  1.5556022225799981
demand zone : 88 ,  1.2875994190909834
demand zone : 89 ,  59.98975652157786
d


divide by zero encountered in double_scalars


invalid value encountered in double_scalars



In [40]:
best_zone = cabedge_utils.find_best_zone_distance(zone_distance_matrix,262,zone_demand_list)

140


In [73]:
def closest_ten_zones(current_zone,zone_distance_matrix):
    best_zone_distance_list = []
    zone_demand_list =[]
    for zone in range(1,264):
        dist = zone_distance_matrix.loc[current_zone-1][zone-1]
        best_zone_distance_list.append(dist)
        zone_demand_list.append(zone)
    
    close_zone_distance = pd.DataFrame()
    close_zone_distance['zone'] = zone_demand_list
    close_zone_distance['Distance'] = best_zone_distance_list
    closest_zones = close_zone_distance.sort_values(by='Distance').iloc[0:9,0].values
    
    return closest_zones
      

In [76]:
def predict_nb_pickup(test,closest_zones,pick_up_model_object_list):
    
    zone_pickup_dict = {}
    
    for pickup in closest_zones:
        predicted_pickup = pick_up_model_object_list[pickup-1].predict(test)['yhat'].values[0]
        if predicted_pickup < 0 :
            predicted_pickup = 0
    
        zone_pickup_list[pickup] = predicted_pickup
    return zone_pickup_dict

In [77]:
def predict_nb_dropoff(test,closest_zones,dropoff_model_object_list):
    
    zone_dropoff_dict = {}
    
    for dropoff in closest_zones:
        predicted_dropoff = dropoff_model_object_list[dropoff-1].predict(test)['yhat'].values[0]
        if predicted_dropoff < 0 :
            predicted_dropoff = 0
    
        zone_dropoff_dict[dropoff] = predicted_dropoff
    return zone_dropoff_dict

In [126]:
zone_pickup_dict = predict_nb_pickup(test_1,closest_zones,pick_up_model_object_list)

In [79]:
zone_dropoff_dict = predict_nb_dropoff(test_1,closest_zones,dropoff_model_object_list)

In [80]:
zone_demand_list = cabedge_utils.model_demand_supply(zone_pickup_dict,zone_dropoff_dict,263)

demand zone : 4 ,  1.3122568340266998
demand zone : 7 ,  11.184454624603347
demand zone : 9 ,  1.222823613468846


In [82]:
best_zone = cabedge_utils.find_best_zone_distance(zone_distance_matrix,262,zone_demand_list)

7


In [86]:
zone_df[zone_df['LocationID'] == 4]

Unnamed: 0,LocationID,Borough,Zone,service_zone,New_Name,Latitude,Longitude
3,4,Manhattan,Alphabet City,Yellow Zone,Manhattan_Alphabet_City,40.725102,-73.979583


In [130]:
date_str = "2019-02-23 09"

In [131]:
date_object = pydt.datetime.strptime(date_str,'%Y-%m-%d %H')

In [132]:
print(date_object)

2019-02-23 09:00:00


In [133]:
test_1 = pd.DataFrame(columns=['ds','apparentTemperature','windSpeed','floor','cap'])
test_1.loc[0,'ds'] = date_object
test_1.loc[0,'apparentTemperature'] = 14.25
test_1.loc[0,'windSpeed'] = 1.56
test_1.loc[0,'floor'] = 0
test_1.loc[0, 'cap'] = 1000

test_1['apparentTemperature'] = test_1['apparentTemperature'].apply(float)
test_1['windSpeed'] = test_1['windSpeed'].apply(float)
test_1['cap'] = test_1['cap'].apply(int)
test_1['floor'] = test_1['floor'].apply(int)

In [134]:
test_1['ds'] = pd.to_datetime(test_1['ds'])

In [135]:
test_1

Unnamed: 0,ds,apparentTemperature,windSpeed,floor,cap
0,2019-02-23 09:00:00,14.25,1.56,0,1000


In [136]:
current_zone = 201

In [137]:
closest_zones = closest_ten_zones(current_zone,zone_distance_matrix)
print(closest_zones)
zone_pickup_list = predict_nb_pickup(test_1,closest_zones,pick_up_model_object_list)
zone_dropoff_list = predict_nb_dropoff(test_1,closest_zones,dropoff_model_object_list)
zone_demand_list = cabedge_utils.model_demand_supply(zone_pickup_list,zone_dropoff_list,current_zone)
best_zone = cabedge_utils.find_best_zone_distance(zone_distance_matrix,current_zone,zone_demand_list)

[201   2 117  30 154  27 124 222 132]
demand zone : 5 ,  2.8723158245539544
demand zone : 7 ,  1.3057884957035275
7


In [85]:
zone_df

Unnamed: 0,LocationID,Borough,Zone,service_zone,New_Name,Latitude,Longitude
0,1,EWR,Newark Airport,EWR,EWR_Newark_Airport,40.689064,-74.177255
1,2,Queens,Jamaica Bay,Boro Zone,Queens_Jamaica_Bay,40.603994,-73.835412
2,3,Bronx,Allerton/Pelham Gardens,Boro Zone,Bronx_Allerton/Pelham_Gardens,40.862773,-73.843439
3,4,Manhattan,Alphabet City,Yellow Zone,Manhattan_Alphabet_City,40.725102,-73.979583
4,5,Staten Island,Arden Heights,Boro Zone,Staten_Island_Arden_Heights,40.556413,-74.173504
...,...,...,...,...,...,...,...
258,259,Bronx,Woodlawn/Wakefield,Boro Zone,Bronx_Woodlawn/Wakefield,40.895589,-73.862713
259,260,Queens,Woodside,Boro Zone,Queens_Woodside,40.745380,-73.905415
260,261,Manhattan,World Trade Center,Yellow Zone,Manhattan_World_Trade_Center,40.711891,-74.012613
261,262,Manhattan,Yorkville East,Yellow Zone,Manhattan_Yorkville_East,40.778007,-73.948202


In [41]:
zone_distance_matrix.drop(columns=['Unnamed: 0'],inplace=True)

In [44]:
zone_distance_matrix

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,254,255,256,257,258,259,260,261,262,263
0,0.000000,0.352269,0.376308,0.200930,0.132704,0.148186,0.260545,0.269987,0.394234,0.394605,...,0.358806,0.225259,0.213953,0.203451,0.319342,0.376283,0.277612,0.166217,0.245715,0.245715
1,0.352269,0.000000,0.258903,0.188288,0.341424,0.222523,0.192947,0.195380,0.163953,0.096981,...,0.269489,0.161776,0.166297,0.150335,0.088195,0.292870,0.157767,0.207466,0.207369,0.207369
2,0.376308,0.258903,0.000000,0.193619,0.450332,0.338254,0.125603,0.115401,0.114679,0.187517,...,0.026314,0.184529,0.195438,0.246091,0.174105,0.038058,0.132748,0.226682,0.134761,0.134761
3,0.200930,0.188288,0.193619,0.000000,0.257024,0.146579,0.068065,0.078298,0.193308,0.200884,...,0.183885,0.028156,0.022133,0.069384,0.126837,0.206698,0.076891,0.035574,0.061511,0.061511
4,0.132704,0.341424,0.450332,0.257024,0.000000,0.123964,0.325036,0.335273,0.435119,0.411565,...,0.438666,0.271024,0.259068,0.220608,0.342416,0.460034,0.327995,0.223740,0.316014,0.316014
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
258,0.376283,0.292870,0.038058,0.206698,0.460034,0.353218,0.140834,0.131235,0.152709,0.224937,...,0.024750,0.202440,0.212297,0.265449,0.206375,0.000000,0.156160,0.237096,0.145375,0.145375
259,0.277612,0.157767,0.132748,0.076891,0.327995,0.209862,0.036429,0.037617,0.116802,0.136629,...,0.131448,0.057039,0.069095,0.114397,0.073517,0.156160,0.000000,0.112307,0.053808,0.053808
260,0.166217,0.207466,0.226682,0.035574,0.223740,0.119593,0.101959,0.112152,0.228332,0.231470,...,0.215173,0.059226,0.048445,0.066700,0.156345,0.237096,0.112307,0.000000,0.092304,0.092304
261,0.245715,0.207369,0.134761,0.061511,0.316014,0.208072,0.018909,0.025589,0.159341,0.189690,...,0.122981,0.063602,0.070706,0.125465,0.126595,0.145375,0.053808,0.092304,0.000000,0.000000


In [54]:
zone_distance_matrix.loc[262][0]

0.24571510255371795