#### Author: Kubam Ivo

#### Date: 1/26/2021"

#### Project: NY Taxi Time Series Forecast

In [1]:
# Importing the necessary packages
from __future__ import print_function
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
import pandas as pd
import numpy as np
import pmdarima as pm
import numpy as np
import calendar
from matplotlib import pyplot as plt

In [2]:
# Importing the data set
df = pd.read_csv("C:\\Users\\ivomb\\OneDrive\\Personal_Projects\\df_ts.csv")
df_loc = pd.read_csv('./Data/df_loc.csv')

In [3]:
# defining functions,
def time_series(start,end,field,borough,taxi,df=df, df_zone = df_loc):
    '''This function receives parameters and filters, grouped and output a time series dataset'''
    
    df_zone = df_zone[df_zone['Borough'].isin(list(borough))]
    dict = {}
    df_zone = df_zone.reset_index()
    for i in range(df_zone.shape[0]):
        dict[df_zone['Zone'][i]] = df_zone['LocationID'][i]
    dict['ALL'] = 265
    loc_list = [(k, v) for k, v in dict.items()] 
    
    
    # filter records based on start, end and taxi type"
    df = pd.DataFrame(df.loc[(df['pulocationid'] == start) & (df['dolocationid'] == end)
                            & (df['taxi_type'] == taxi) ])

    df = df.groupby(['trip_month','year']).agg(value = (field,'median'),).reset_index()
    
     # sorting dataframe by month and year",
    df.sort_values(by=['year','trip_month'],inplace=True)

    index = np.arange(df.shape[0])
    df.set_index(index,inplace=True)
  
    return df

In [4]:
def model_pred(df,period=6):
    model = pm.auto_arima(df['value'], error_action='ignore', trace=False, suppress_warnings=True,stepwise=True,\
                          maxiter=10,seasonal=True, m=12)
    
    pred , CI = model.predict(n_periods=6, return_conf_int=True)
    return pred, CI, model
    

In [5]:
def plot(df,pred,CI,model,model_show,pred_show,field,taxi):
    start = df.shape[0]
    end = df.shape[0]+6
    x_values = np.arange(start,end)
    plt.plot(df.index,df['value'])
    plt.title('Time Series forecast for ' + field)
    plt.plot(pd.Series(x_values),pred, alpha=0.4, marker='x',c='red')
    plt.fill_between(x_values, CI[:, 0], CI[:, 1], alpha=0.1, color='b')
    print('Number of observations: ', df.shape[0])
    print('First Observation: ', 'month:', calendar.month_name[df['trip_month'][1]], ', year:',df['year'][1], \
          ', value:','{:,.2f}'.format(df['value'][1]))
    print('Last Observation: ', 'month:', calendar.month_name[df['trip_month'][df.shape[0]-1]],\
          ', year:',df['year'][df.shape[0]-1] , 'value:','{:,.2f}'.format(df['value'][df.shape[0]-1]))
    
    if pred_show==True:
        print('first 5 observed values:')
        print(df.head())
        print('last 5 observed values:')
        print(df.tail())
        print('Forecast values are')
        [print('{:,.2f}'.format(x),end=', ') for x in pred]
    plt.show()
    
    if model_show==True:
        print(model.summary())
        

In [6]:
def forecast(start,end,field,borough,taxi, model_show,pred_show, df=df, df_zone = df_loc,period = 6):
    df = time_series(start,end,field,borough,taxi,df=df, df_zone = df_loc)
    if df.shape[0] > 12:
        pred, CI, model = model_pred(df,period=6)
        plot(df,pred,CI,model,model_show,pred_show,field,taxi)
    else: 
        print('Not enough data to make forecast')
    

In [7]:
field_list = [('Trip duration','trip_duration'),('Trip Amount','total_amount')]  

dict = {}
for i in range(df_loc.shape[0]):
    dict[df_loc['Zone'][i]] = df_loc['LocationID'][i]

loc_list = [(k, v) for k, v in dict.items()] 

In [8]:
a = widgets.Dropdown(description='Start',options=loc_list,value=132)
b = widgets.Dropdown(description='End',options=loc_list,value=43)
d = widgets.Select(description='Field',options=field_list, value='trip_duration')
e = widgets.RadioButtons(description='Taxi',options=['green','yellow'], value='yellow')
f = widgets.SelectMultiple(description='Borough',options=['Bronx','Brooklyn','EWR','Manhattan','Staten Island', 'Queens'], value=['Manhattan','Queens'])
g = widgets.IntSlider(value=6,min=3,max=12,step=1,description='Periods:',orientation='horizontal',readout=True,readout_format='d')
h = widgets.Checkbox(value=True, description='Show Pred', disabled=False)
i = widgets.Checkbox(value=False,description='Model details',disabled=False)

loc_box = widgets.HBox([f, a,b])
stat_box = widgets.HBox([e,d,g])
opt_box = widgets.HBox([h,i])

accordion = widgets.Accordion(children=[loc_box,stat_box, opt_box])
accordion.set_title(0, 'Location')
accordion.set_title(1, 'Statistics')
accordion.set_title(2, 'Options')
out = widgets.interactive_output(forecast,{'start':a,'end':b,'taxi':e, 'field':d,'borough':f,'period':g, 'model_show':i,'pred_show':h})
display(accordion, out)

Accordion(children=(HBox(children=(SelectMultiple(description='Borough', index=(3, 5), options=('Bronx', 'Broo…

Output()