# Time Series Forecasting

## 0. Import Data

In [None]:
import pandas as pd
from fbprophet.plot import plot_plotly, plot_components_plotly
from fbprophet import Prophet

In [None]:
from google.colab import drive
drive.mount('/gdrive')
%cd /gdrive

Mounted at /gdrive
/gdrive


In [None]:
df_4 = pd.read_csv("/gdrive/MyDrive/Geo/daily_member.csv")
df_4

Unnamed: 0.1,Unnamed: 0,start_date,h3_index,lat,lng,geometry,daily_ride_type_count
0,0,2020-01-01,872664c10ffffff,41.931865,-87.638686,POINT (-87.63868639644498 41.8661),4
1,1,2020-01-01,872664ca9ffffff,41.870611,-87.664571,POINT (-87.66457059524387 41.8856),4
2,3,2020-01-01,872664ca9ffffff,41.870611,-87.664571,POINT (-87.66457059524387 41.8834),2
3,4,2020-01-01,872664ca9ffffff,41.870611,-87.664571,POINT (-87.66457059524387 41.881),2
4,5,2020-01-01,872664ca9ffffff,41.870611,-87.664571,POINT (-87.66457059524387 41.8804),2
...,...,...,...,...,...,...,...
2455508,4148577,2022-04-30,872664c1affffff,41.874988,-87.635029,POINT (-87.63502864308958 41.86532116666667),2
2455509,4148579,2022-04-30,872664c1affffff,41.874988,-87.635029,POINT (-87.63502864308958 41.8653105),2
2455510,4148580,2022-04-30,872664c1affffff,41.874988,-87.635029,POINT (-87.63502864308958 41.870257),4
2455511,4148587,2022-04-30,872664c1affffff,41.874988,-87.635029,POINT (-87.63502864308958 41.864883),2


In [None]:
hex_index_example = "872664c1effffff"
df_ts =  df_4.loc[df_4['h3_index'] == hex_index_example][['start_date','daily_ride_type_count']]
df_ts = df_ts.groupby("start_date").sum()
df_ts = df_ts.reset_index()
df_ts

Unnamed: 0,start_date,daily_ride_type_count
0,2020-01-01,552
1,2020-01-02,2270
2,2020-01-03,2160
3,2020-01-04,1076
4,2020-01-05,836
...,...,...
844,2022-04-26,2898
845,2022-04-27,2572
846,2022-04-28,2538
847,2022-04-29,2600


## 1. Using Prophet

In [None]:
ts = df_ts.rename(columns={'start_date':'ds', 'daily_ride_type_count':'y'})
ts

Unnamed: 0,ds,y
0,2020-01-01,552
1,2020-01-02,2270
2,2020-01-03,2160
3,2020-01-04,1076
4,2020-01-05,836
...,...,...
844,2022-04-26,2898
845,2022-04-27,2572
846,2022-04-28,2538
847,2022-04-29,2600


In [None]:
# Fitting Prophet Model
from fbprophet.plot import plot_plotly, plot_components_plotly
from fbprophet import Prophet
# m = Prophet()
m = Prophet(yearly_seasonality = True)
m.fit(ts)

INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


<fbprophet.forecaster.Prophet at 0x7f56bbd07390>

In [None]:
# Display the future data (30 days)
future = m.make_future_dataframe(periods=7, freq='D')
future.tail()

Unnamed: 0,ds
851,2022-05-03
852,2022-05-04
853,2022-05-05
854,2022-05-06
855,2022-05-07


In [None]:
# Python
forecast = m.predict(future)
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]


Unnamed: 0,ds,yhat,yhat_lower,yhat_upper
0,2020-01-01,1825.776218,1115.321970,2540.129565
1,2020-01-02,1696.748200,998.481449,2389.442515
2,2020-01-03,1551.541752,814.573848,2305.611534
3,2020-01-04,1194.529628,515.693868,1882.470530
4,2020-01-05,924.075465,273.642497,1653.698893
...,...,...,...,...
851,2022-05-03,2319.550915,1590.608311,2968.102888
852,2022-05-04,2330.531370,1609.556456,3014.652709
853,2022-05-05,2242.656170,1556.312749,2989.042843
854,2022-05-06,2133.725647,1452.854304,2882.704383


In [None]:

# Visualize the forcasted sales
plot_plotly(m, forecast)


In [None]:
# Visualize the components
plot_components_plotly(m, forecast)


In [None]:
from fbprophet.diagnostics import cross_validation

df_cv = cross_validation(
    model=m, 
    initial='820 days', 
    period='7 days', 
    horizon = '30 days'
)

df_cv.head()

INFO:fbprophet:Making 1 forecasts with cutoffs between 2022-03-31 00:00:00 and 2022-03-31 00:00:00


  0%|          | 0/1 [00:00<?, ?it/s]

Unnamed: 0,ds,yhat,yhat_lower,yhat_upper,y,cutoff
0,2022-04-01,1499.391412,852.12696,2189.428824,1980,2022-03-31
1,2022-04-02,1158.825265,485.805289,1769.420274,1006,2022-03-31
2,2022-04-03,908.92337,250.640766,1596.793844,1344,2022-03-31
3,2022-04-04,1437.801008,724.931709,2115.950972,2314,2022-03-31
4,2022-04-05,1714.723726,1054.354211,2439.008612,2610,2022-03-31


In [None]:
from fbprophet.diagnostics import performance_metrics

df_p = performance_metrics(df=df_cv, rolling_window=0.3)

df_p.head()

Unnamed: 0,horizon,mse,rmse,mae,mape,mdape,coverage
0,9 days,279974.151959,529.125838,461.601426,0.239303,0.233341,0.777778
1,10 days,390498.057213,624.898438,531.213056,0.271587,0.233341,0.666667
2,11 days,612383.445971,782.549325,672.163697,0.308757,0.323718,0.555556
3,12 days,925511.206615,962.03493,816.510478,0.327623,0.343018,0.444444
4,13 days,844679.885431,919.06468,741.444595,0.296618,0.233341,0.555556


## 2. Univariate Deep Learning 

In [None]:
import numpy as np
import pandas as pd
import datetime

import math
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, SimpleRNN, Conv1D, GlobalMaxPooling1D, Flatten, MaxPooling1D, Bidirectional
# from keras.layers.wrappers import Bidirectional

from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import mean_squared_error

from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator

In [None]:
# target variable
ts

Unnamed: 0,ds,y
0,2020-01-01,552
1,2020-01-02,2270
2,2020-01-03,2160
3,2020-01-04,1076
4,2020-01-05,836
...,...,...
844,2022-04-26,2898
845,2022-04-27,2572
846,2022-04-28,2538
847,2022-04-29,2600


In [None]:
ts = ts.set_index('ds')
ts.head()

Unnamed: 0_level_0,y
ds,Unnamed: 1_level_1
2020-01-01,552
2020-01-02,2270
2020-01-03,2160
2020-01-04,1076
2020-01-05,836


In [None]:
## split data to train & valid
split_pct = 0.8

split = int(split_pct*len(ts))

train = ts[:split]
valid = ts[split:]

print(len(train))
print(len(valid))

train.shape, valid.shape

679
170


((679, 1), (170, 1))

In [None]:
# #scaler = MinMaxScaler(feature_range=(0, 1)) 
scaler = StandardScaler(with_mean=True, with_std=True)
train = scaler.fit_transform(train)
valid = scaler.transform(valid)

train.shape, valid.shape

((679, 1), (170, 1))

In [None]:
# Generate Timeseries Sequences

lags = 7

train_generator = TimeseriesGenerator(train, train, length=lags, batch_size=5)
valid_generator = TimeseriesGenerator(valid, valid, length=lags, batch_size=1)

In [None]:
train_generator[0][1]

array([[-0.15770124],
       [ 0.22824876],
       [-0.13164554],
       [-1.60542087],
       [-1.38394745]])

In [None]:
type(train_generator)

keras.preprocessing.sequence.TimeseriesGenerator

In [None]:
model = Sequential()
model.add(SimpleRNN(256, dropout=0.2, input_shape=(lags, 1)))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(train_generator, epochs=10, verbose=1, validation_data=valid_generator)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f56bde8dc50>

In [None]:
# skip this cell if you didn't scale
y_valid = scaler.inverse_transform(np.array([t[1][0] for t in valid_generator]))
y_preds=scaler.inverse_transform(model.predict(valid_generator))

y_preds.shape , y_valid.shape

((163, 1), (163, 1))

In [None]:
math.sqrt(mean_squared_error(y_valid.reshape(-1), y_preds.reshape(-1)))

561.9432563287535

In [None]:
def ts_predict(data, lags, num_periods, model, scaler=None):
    if scaler is not None:
      preds = scaler.transform(data[-lags:])
    
      for _ in range(num_periods):
          preds = np.append(preds, model.predict(preds[-lags:].reshape((1, lags, 1)))[0][0])
          
      return scaler.inverse_transform(preds[lags:])
    else:
      preds = data[-lags:]
    
      for _ in range(num_periods):
          preds = np.append(preds, model.predict(preds[-lags:].reshape((1, lags, 1)))[0][0])
          
      return preds[lags:]

In [None]:
ts[-lags:]

Unnamed: 0_level_0,y
ds,Unnamed: 1_level_1
2022-04-24,1432
2022-04-25,2606
2022-04-26,2898
2022-04-27,2572
2022-04-28,2538
2022-04-29,2600
2022-04-30,1340


# 3. Conclusion

* In the above experiments, I found that Facebook Prophet forecasting for one hexagon only can cover 70% of accuracy, which is not the best solution. 
* Univariate Deep Learning seems  perform better which can roughly cover 80% of accuracy.




# 4. Limiations
* Using Prophet and Deep Learning may not be the best forecasting model as the spatial correlation is not well anlayzed in the model
* Using Hexagon for data aggregation could reduce computation cost, but may not be the best solution. 