# Introduction to Time Series

## Forecasting Performance Metrics

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import statsmodels as sm

%matplotlib inline

bike_sharing_df = pd.read_csv('bike_sharing_day.csv', index_col=0)
bike_sharing_df.dteday = pd.to_datetime(bike_sharing_df.dteday)

bike_sharing_df.set_index('dteday', inplace=True)
bike_sharing_df.head()

In [None]:
def time_series_train_test_split(dataframe, target, pct=30):
    n = dataframe.shape[0]
    dataframe = dataframe.dropna().copy()
    train_n = int(n*(100-pct)/100)
    train_feats = list(dataframe.columns)
    train_feats.remove(target)
    return (dataframe[:train_n][train_feats],
            dataframe[train_n:][train_feats],
            dataframe[:train_n][target],
            dataframe[train_n:][target])

In [None]:
features = bike_sharing_df.drop(['casual','registered'], axis=1)

In [None]:
(feature_tr_df, 
 feature_ts_df, 
 target_tr, 
 target_ts) = time_series_train_test_split(features, 'cnt')

In [None]:
feature_tr_df.head()

In [None]:
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
from sklearn.svm import SVR, LinearSVR

from sklearn.metrics import mean_absolute_error, \
                            mean_squared_error, \
                            r2_score

def fit_predict_score(model):
    
    scores_train = { 'model': model, 'dataset': 'train'}
    scores_test = { 'model': model, 'dataset': 'test'}
    model.fit(feature_tr_df, target_tr)
    
    predictions_tr = model.predict(feature_tr_df)
    predictions_ts = model.predict(feature_ts_df)
    
    scores_train['mae'] = mean_absolute_error(target_tr, predictions_tr)
    scores_test['mae'] = mean_absolute_error(target_ts, predictions_ts)
    scores_train['mse'] = mean_squared_error(target_tr, predictions_tr)
    scores_test['mse'] = mean_squared_error(target_ts, predictions_ts)
    scores_train['rmse'] = np.sqrt(mean_squared_error(target_tr, predictions_tr))
    scores_test['rmse'] = np.sqrt(mean_squared_error(target_ts, predictions_ts))
    scores_train['r2'] = r2_score(target_tr, predictions_tr)
    scores_test['r2'] = r2_score(target_ts, predictions_ts)
    
    return [scores_train, scores_test]
    

In [None]:
results = (fit_predict_score(LinearRegression()) +
           fit_predict_score(Ridge()) +
           fit_predict_score(Lasso()) +
           fit_predict_score(DecisionTreeRegressor()) +
           fit_predict_score(KNeighborsRegressor()) +
           fit_predict_score(GradientBoostingRegressor()) +
           fit_predict_score(RandomForestRegressor()) +
           fit_predict_score(SVR()) +
           fit_predict_score(LinearSVR()))

In [None]:
results_df = pd.DataFrame(results)
results_df['model_name'] = results_df.model.apply(lambda x: x.__class__.__name__)
results_df = results_df[['model_name', 'model', 'dataset', 'mae', 'rmse', 'mse', 'r2']]

results_df

In [None]:
short_names = {
    'LinearRegression' : 'linrg',
    'Ridge' : 'ridge',
    'Lasso' : 'lasso',
    'DecisionTreeRegressor' : 'dtree',
    'KNeighborsRegressor' : 'knnrg',
    'GradientBoostingRegressor' : 'grbst',
    'RandomForestRegressor' : 'rndfr',
    'SVR' : 'svreg',
    'LinearSVR' : 'svlin',
}
results_df['model_short_name'] = results_df.model_name.apply(lambda x: short_names[x])

In [None]:
_, ax = plt.subplots(1,4,figsize=(20,6))
test_results_df = results_df[results_df.dataset == 'test']
test_results_df.set_index('model_short_name', drop=True, inplace=True)
test_results_df.mae.plot(kind='bar', rot=45, title='MAE by Model', ax=ax[0])
test_results_df.mse.plot(kind='bar', rot=45, title='MSE by Model', ax=ax[1])
test_results_df.rmse.plot(kind='bar', rot=45, title='RMSE by Model', ax=ax[2])
test_results_df.r2.plot(kind='bar', rot=45, title='RMSE by Model', ax=ax[3])