## Exponential Smoothing Model

Using seasonality because the time of year/month/day may have an impact on the number of trips. 

In [None]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from statsmodels.tsa.api import ExponentialSmoothing
## Imported for testing
import numpy as np

# Get data.
data = pd.read_csv("https://github.com/dustywhite7/econ8310-assignment1/raw/main/assignment_data_train.csv")
data['Timestamp'] = pd.to_datetime(data['Timestamp'])
#data['trips'] = data['trips'].astype(int)

trips = data['trips']
trips.index = data['Timestamp']
trips.index.freq = trips.index.inferred_freq

# Linear trend
##trend = ExponentialSmoothing(trips, trend='add', seasonal='add').fit()
# Linear trend with damping
model = ExponentialSmoothing(trips, trend='mul', seasonal='add', use_boxcox=True, damped_trend=True).fit(use_brute=True)

#forecast_t = trend.forecast(744)
modelFit = model.forecast(744)

smoothData = pd.DataFrame([trips.values, model.fittedvalues.values]).T
smoothData.columns = ['Truth', 'Damped Trend']
smoothData.index = trips.index

print(modelFit)
print(smoothData)

# calculate RMSE (root mean squared error)
rmse = np.sqrt(np.mean((trips.values - model.fittedvalues.values)**2))

#print(rmse)
'''
fig = px.line(smoothData, y = ['Truth', 'Damped Trend'], 
        x = smoothData.index,
        color_discrete_map={"Truth": 'blue',
                            'Damped Trend': 'green'
                           },
              title='With Seasonality'
       )

fig.update_xaxes(range=[smoothData.index[-50], modelFit.index[-1]])
fig.update_yaxes(range=[0, 30000])


# Incorporating the Forecasts

#fig.add_trace(go.Scatter(x=forecast_t.index, y = forecast_t.values, name='Forecast Trend', line={'color':'red'}))
fig.add_trace(go.Scatter(x=modelFit.index, y = modelFit.values, name='Forecast Damped Trend', line={'color':'green'}))
'''


overflow encountered in matmul



2019-01-01 00:00:00     5493.071181
2019-01-01 01:00:00     1876.581794
2019-01-01 02:00:00             NaN
2019-01-01 03:00:00             NaN
2019-01-01 04:00:00             NaN
                           ...     
2019-01-31 19:00:00    12558.519926
2019-01-31 20:00:00    11457.413771
2019-01-31 21:00:00    11488.694720
2019-01-31 22:00:00    10615.225990
2019-01-31 23:00:00     8611.289287
Freq: h, Length: 744, dtype: float64
                       Truth  Damped Trend
Timestamp                                 
2018-01-01 00:00:00  16714.0   4380.283242
2018-01-01 01:00:00  19041.0  13851.078675
2018-01-01 02:00:00  16590.0  17715.174543
2018-01-01 03:00:00  12626.0  16212.046097
2018-01-01 04:00:00   8739.0  12947.665285
...                      ...           ...
2018-12-31 19:00:00  14876.0  14240.515224
2018-12-31 20:00:00  14434.0  13731.988164
2018-12-31 21:00:00  14116.0  14500.654485
2018-12-31 22:00:00  10729.0  13528.477099
2018-12-31 23:00:00   8599.0   8727.864338

[8760 r

'\nfig = px.line(smoothData, y = [\'Truth\', \'Damped Trend\'], \n        x = smoothData.index,\n        color_discrete_map={"Truth": \'blue\',\n                            \'Damped Trend\': \'green\'\n                           },\n              title=\'With Seasonality\'\n       )\n\nfig.update_xaxes(range=[smoothData.index[-50], modelFit.index[-1]])\nfig.update_yaxes(range=[0, 30000])\n\n\n# Incorporating the Forecasts\n\n#fig.add_trace(go.Scatter(x=forecast_t.index, y = forecast_t.values, name=\'Forecast Trend\', line={\'color\':\'red\'}))\nfig.add_trace(go.Scatter(x=modelFit.index, y = modelFit.values, name=\'Forecast Damped Trend\', line={\'color\':\'green\'}))\n'

## Prophet Model

Using Prophet because we only have one variable we need to forecast.

In [85]:
import pandas as pd
from prophet import Prophet
## Imported for testing
import numpy as np

# Get data.
data = pd.read_csv("https://github.com/dustywhite7/econ8310-assignment1/raw/main/assignment_data_train.csv")
data['Timestamp'] = pd.to_datetime(data['Timestamp'])

# Get only timestamp and time series data from the dataframe.
data_p = data[['Timestamp', 'trips']]
data_p.columns = ['ds', 'y'] # Renaming the columns per Prophet's requirements.

model = Prophet()
modelFit = model.fit(data_p)

# Create an empty dataframe with dates for future periods
future = modelFit.make_future_dataframe(periods=744, freq='h')
# Fill in dataframe wtih forecasts of `y` for the future periods
forecast = modelFit.predict(future)

# Get only forecasted variables.
pred = forecast.loc[(forecast['ds'] >= '2019-01-01 00:00:00')]
pred = pred[['ds', 'trend']]
pred['trend'] = pred['trend'].astype(int)
pred = pred.reset_index(drop=True)

print(pred)

#greebo

# calculate RMSE (root mean squared error)
rmse = np.sqrt(np.mean((data_p['y'] - forecast['trend'])**2))

print(rmse)


14:37:25 - cmdstanpy - INFO - Chain [1] start processing
14:37:26 - cmdstanpy - INFO - Chain [1] done processing


                     ds  trend
0   2019-01-01 00:00:00  10425
1   2019-01-01 01:00:00  10424
2   2019-01-01 02:00:00  10423
3   2019-01-01 03:00:00  10422
4   2019-01-01 04:00:00  10421
..                  ...    ...
739 2019-01-31 19:00:00   9732
740 2019-01-31 20:00:00   9731
741 2019-01-31 21:00:00   9730
742 2019-01-31 22:00:00   9729
743 2019-01-31 23:00:00   9728

[744 rows x 2 columns]
5535.064099162959
