In [1]:
import pandas as pd
import numpy as np
import datetime as dt
import statsmodels.api as sm
%matplotlib widget
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv('dataset.csv')
df['Datetime'] = pd.to_datetime(df['Datetime'])
df.set_index('Datetime')
df=df.resample('D',on='Datetime').sum()
df

Unnamed: 0_level_0,sID,Count
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2012-01-09,4308,112
2012-01-10,21588,248
2012-01-11,39444,304
2012-01-12,56724,210
2012-01-13,0,0
...,...,...
2013-12-27,281940,3868
2013-12-28,282516,3084
2013-12-29,283092,2330
2013-12-30,283668,4928


In [3]:
df.reset_index(level=0, inplace=True)

In [4]:
train = df[(df['Datetime'] >= '2012-01-09') & (df['Datetime'] < '2013-12-01')]
test = df[(df['Datetime'] >= '2013-12-01') & (df['Datetime'] < '2013-12-31')]
test

Unnamed: 0,Datetime,sID,Count
692,2013-12-01,80916,222
693,2013-12-02,98772,490
694,2013-12-03,114900,530
695,2013-12-04,132756,852
696,2013-12-05,150036,484
697,2013-12-06,167892,1822
698,2013-12-07,185172,1292
699,2013-12-08,203028,2304
700,2013-12-09,220884,2844
701,2013-12-10,238164,2330


In [5]:
dd= np.asarray(train.Count)
y_hat = test.copy()
y_hat['naive'] = dd[len(dd)-1]
plt.figure(figsize=(12,8))
plt.plot(train.index, train['Count'], label='Train')
plt.plot(test.index,test['Count'], label='Test')
plt.plot(y_hat.index,y_hat['naive'], label='Naive Forecast')
plt.legend(loc='best')
plt.title("Naive Approach")
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [6]:
from sklearn.metrics import mean_squared_error,mean_absolute_error,mean_absolute_percentage_error
from math import sqrt
mae = mean_absolute_error(test.Count, y_hat.naive)
mape = mean_absolute_percentage_error(test.Count, y_hat.naive)
mse = mean_squared_error(test.Count, y_hat.naive)
rmse = sqrt(mean_squared_error(test.Count, y_hat.naive))
print(mae)
print(mape)
print(mse)
print(rmse)


1192.3333333333333
1.1445220207595501
2010050.8
1417.7626035412277


In [7]:
y_hat_avg = test.copy()
y_hat_avg['avg_forecast'] = train['Count'].mean()
plt.figure(figsize=(12,8))
plt.plot(train['Count'], label='Train')
plt.plot(test['Count'], label='Test')
plt.plot(y_hat_avg['avg_forecast'], label='Average Forecast')
plt.legend(loc='best')
plt.title("Simple Average")
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [8]:
mae = mean_absolute_error(test.Count, y_hat_avg.avg_forecast)
mape = mean_absolute_percentage_error(test.Count, y_hat_avg.avg_forecast)
mse = mean_squared_error(test.Count, y_hat_avg.avg_forecast)
rmse = sqrt(mean_squared_error(test.Count, y_hat_avg.avg_forecast))
print(mae)
print(mape)
print(mse)
print(rmse)

2017.6539499036614
0.7567901764140926
5555628.871192378
2357.0381564990366


In [9]:
y_hat_avg = test.copy()
y_hat_avg['moving_avg_forecast'] = train['Count'].rolling(60).mean().iloc[-1]
plt.figure(figsize=(16,8))
plt.plot(train['Count'], label='Train')
plt.plot(test['Count'], label='Test')
plt.plot(y_hat_avg['moving_avg_forecast'], label='Simple Moving Average Forecast')
plt.title("Simple Mov'ng Average")
plt.legend(loc='best')
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [10]:
mae = mean_absolute_error(test.Count,y_hat_avg.moving_avg_forecast)
mape = mean_absolute_percentage_error(test.Count, y_hat_avg.moving_avg_forecast)
mse = mean_squared_error(test.Count, y_hat_avg.moving_avg_forecast)
rmse = sqrt(mean_squared_error(test.Count, y_hat_avg.moving_avg_forecast))
print(mae)
print(mape)
print(mse)
print(rmse)

1183.6
1.191545393332227
2003156.7333333334
1415.3291961000923


In [11]:
y_hat_avg = test.copy()
weights = np.arange(1,61)
sum_weights = np.sum(weights)
y_hat_avg['weighted_moving_avg_forecast'] = train['Count'].rolling(window=60, center=True).apply(lambda x: np.sum(weights*x) / sum_weights, raw=False).mean()
plt.figure(figsize=(16,8))
plt.plot(train['Count'], label='Train')
plt.plot(test['Count'], label='Test')
plt.plot(y_hat_avg['weighted_moving_avg_forecast'], label='Weighted Moving Average Forecast')
plt.title("Weighted Moving Average")
plt.legend(loc='best')
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [12]:
mae = mean_absolute_error(test.Count,y_hat_avg.weighted_moving_avg_forecast)
mape = mean_absolute_percentage_error(test.Count, y_hat_avg.weighted_moving_avg_forecast)
mse = mean_squared_error(test.Count, y_hat_avg.weighted_moving_avg_forecast)
rmse = sqrt(mean_squared_error(test.Count, y_hat_avg.weighted_moving_avg_forecast))
print(mae)
print(mape)
print(mse)
print(rmse)

2029.0463977301831
0.7551041035960193
5620346.81290499
2370.727064194651


In [13]:
from statsmodels.tsa.api import ExponentialSmoothing, SimpleExpSmoothing, Holt
y_hat_avg = test.copy()
fit2 = SimpleExpSmoothing(np.asarray(train['Count'])).fit(smoothing_level=0.6,optimized=False)
y_hat_avg['SES'] = fit2.forecast(len(test))
plt.figure(figsize=(16,8))
plt.plot(train['Count'], label='Train')
plt.plot(test['Count'], label='Test')
plt.plot(y_hat_avg['SES'], label='SES')
plt.title("Simple Exponential Smoothing")
plt.legend(loc='best')
plt.show()



Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [14]:
mae = mean_absolute_error(test.Count,y_hat_avg.SES)
mape = mean_absolute_percentage_error(test.Count, y_hat_avg.SES)
mse = mean_squared_error(test.Count, y_hat_avg.SES)
rmse = sqrt(mean_squared_error(test.Count, y_hat_avg.SES))
print(mae)
print(mape)
print(mse)
print(rmse)

1193.9118655927266
1.361935378787002
2184871.5533030825
1478.1311015275614


In [15]:
y_hat_avg = test.copy()

fit1 = Holt(np.asarray(train['Count'])).fit(smoothing_level = 0.3,smoothing_slope = 0.1)
y_hat_avg['Holt_linear'] = fit1.forecast(len(test))

plt.figure(figsize=(16,8))
plt.plot(train['Count'], label='Train')
plt.plot(test['Count'], label='Test')
plt.plot(y_hat_avg['Holt_linear'], label='Holt_linear')
plt.legend(loc='best')
plt.title("Double Exponential Smoothing")
plt.show()

  fit1 = Holt(np.asarray(train['Count'])).fit(smoothing_level = 0.3,smoothing_slope = 0.1)


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [16]:
mae = mean_absolute_error(test.Count,y_hat_avg.Holt_linear)
mape = mean_absolute_percentage_error(test.Count, y_hat_avg.Holt_linear)
mse = mean_squared_error(test.Count, y_hat_avg.Holt_linear)
rmse = sqrt(mean_squared_error(test.Count, y_hat_avg.Holt_linear))
print(mae)
print(mape)
print(mse)
print(rmse)

1572.1115908366069
1.7386245902798483
3511140.266108458
1873.8036893197905


In [17]:
y_hat_avg = test.copy()
fit1 = ExponentialSmoothing(np.asarray(train['Count']) ,seasonal_periods=7 ,trend='add', seasonal='add',).fit()
y_hat_avg['Holt_Winter'] = fit1.forecast(len(test))
plt.figure(figsize=(16,8))
plt.plot( train['Count'], label='Train')
plt.plot(test['Count'], label='Test')
plt.plot(y_hat_avg['Holt_Winter'], label='Holt_Winter')
plt.title("Triple Exponential Smoothing")
plt.legend(loc='best')
plt.show()



Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [18]:
mae = mean_absolute_error(test.Count,y_hat_avg.Holt_Winter)
mape = mean_absolute_percentage_error(test.Count, y_hat_avg.Holt_Winter)
mse = mean_squared_error(test.Count, y_hat_avg.Holt_Winter)
rmse = sqrt(mean_squared_error(test.Count, y_hat_avg.Holt_Winter))
print(mae)
print(mape)
print(mse)
print(rmse)

1197.3017087266371
1.474278293113066
2421135.2823967594
1555.999769407682
