# Introduction to Time Series

## Feature Engineering

In [None]:
import pandas as pd
import statsmodels as sm

%matplotlib inline

bike_sharing_df = pd.read_csv('bike_sharing_day.csv', index_col=0)
bike_sharing_df.dteday = pd.to_datetime(bike_sharing_df.dteday)

bike_sharing_df.set_index('dteday', inplace=True)
bike_sharing_df.head()

### Predict Temperature From Date Information

In [None]:
temp_df = bike_sharing_df[['temp']].copy()
temp_df.head()

In [None]:
temp_df.plot()

### Create Lag Features

In [None]:
temp_df['temp_shift_1'] = temp_df.temp.shift(1)

In [None]:
temp_df.head()

#### Ridge Regression with Lag Feature

In [None]:
def time_series_train_test_split(dataframe, target, pct=30):
    n = dataframe.shape[0]
    dataframe = dataframe.dropna().copy()
    train_n = int(n*(100-pct)/100)
    train_feats = list(dataframe.columns)
    train_feats.remove(target)
    return (dataframe[:train_n][train_feats],
            dataframe[train_n:][train_feats],
            dataframe[:train_n][target],
            dataframe[train_n:][target])

In [None]:
feature_tr_df, feature_ts_df, target_tr, target_ts = time_series_train_test_split(temp_df, 'temp')

In [None]:
feature_tr_df.shape, feature_ts_df.shape

In [None]:
from sklearn.linear_model import Ridge

ridge = Ridge()

ridge.fit(feature_tr_df, target_tr)
ridge.score(feature_tr_df, target_tr), ridge.score(feature_ts_df, target_ts) 

In [None]:
predictions_df = temp_df[['temp']].copy()

In [None]:
predictions_df['temp_shift_1_model'] = pd.concat([pd.Series(ridge.predict(feature_tr_df), index=feature_tr_df.index),
                                                  pd.Series(ridge.predict(feature_ts_df), index=feature_ts_df.index)])

In [None]:
predictions_df.plot(figsize=(20,3))

In [None]:
predictions_df['2012-01'].plot(figsize=(20,3))

In [None]:
begin_testing = min(feature_ts_df.index)

In [None]:
begin_testing

In [None]:
predictions_df['06/2012'].plot(figsize=(20,3))

#### Add more Lag Features

In [None]:
temp_df['temp_shift_2'] = temp_df.temp.shift(2)
temp_df['temp_shift_3'] = temp_df.temp.shift(3)
temp_df['temp_shift_4'] = temp_df.temp.shift(4)
temp_df['temp_shift_5'] = temp_df.temp.shift(5)

In [None]:
temp_df.head(10)

In [None]:
feature_tr_df, feature_ts_df, target_tr, target_ts = time_series_train_test_split(temp_df, 'temp')

In [None]:
feature_tr_df.shape, feature_ts_df.shape

In [None]:
from sklearn.linear_model import Ridge

ridge = Ridge()

ridge.fit(feature_tr_df, target_tr)
ridge.score(feature_tr_df, target_tr), ridge.score(feature_ts_df, target_ts) 

In [None]:
predictions_df['temp_shift_5_model'] = pd.concat([pd.Series(ridge.predict(feature_tr_df), index=feature_tr_df.index),
                                                  pd.Series(ridge.predict(feature_ts_df), index=feature_ts_df.index)])

In [None]:
predictions_df.plot(figsize=(20,3))

In [None]:
predictions_df['2012-01'].plot(figsize=(20,3))

In [None]:
predictions_df['06/2012'].plot(figsize=(20,3))

##### You may need a lag value from last week, last month, and last year. This comes down to the specific domain.

### Rolling Window Statistics

In [None]:
width = 3
window = temp_df.temp_shift_1.rolling(window=width)

In [None]:
temp_df['min'] = window.min()
temp_df['mean'] = window.mean()
temp_df['max'] = window.max()

In [None]:
temp_df.head(15)

In [None]:
feature_tr_df, feature_ts_df, target_tr, target_ts = time_series_train_test_split(temp_df, 'temp')

In [None]:
feature_tr_df.shape, feature_ts_df.shape

In [None]:
from sklearn.linear_model import Ridge

ridge = Ridge()

ridge.fit(feature_tr_df, target_tr)
ridge.score(feature_tr_df, target_tr), ridge.score(feature_ts_df, target_ts) 

In [None]:
predictions_df['temp_rolling_window'] = pd.concat([pd.Series(ridge.predict(feature_tr_df), index=feature_tr_df.index),
                                                   pd.Series(ridge.predict(feature_ts_df), index=feature_ts_df.index)])

In [None]:
predictions_df.plot(figsize=(20,3))

In [None]:
predictions_df['2012-01'].plot(figsize=(20,3))

In [None]:
predictions_df['06/2012'].plot(figsize=(20,3))

#### Just the Moving Average

In [None]:
feature_tr_df, feature_ts_df, target_tr, target_ts = time_series_train_test_split(temp_df[['mean','temp']], 'temp')

In [None]:
feature_tr_df.shape, feature_ts_df.shape

In [None]:
from sklearn.linear_model import Ridge

ridge = Ridge()

ridge.fit(feature_tr_df, target_tr)
ridge.score(feature_tr_df, target_tr), ridge.score(feature_ts_df, target_ts) 

In [None]:
predictions_df['temp_ma'] = pd.concat([pd.Series(ridge.predict(feature_tr_df), index=feature_tr_df.index),
                                                   pd.Series(ridge.predict(feature_ts_df), index=feature_ts_df.index)])

In [None]:
predictions_df.plot(figsize=(20,3))

In [None]:
predictions_df['2012-01'].plot(figsize=(20,3))

In [None]:
predictions_df['06/2012'].plot(figsize=(20,3))