# Roadmap

1. Get data
2. Create train-test split
3. Clean training set (write functions): missing values, text, categorical attributes, scaling
4. Select models and scoring metrics, then train
5. Compare them: clean test set, make predictions, score
6. Fine-tune models

## 1. Load data

In [1]:
from functions import load_data
data_df = load_data('data/time_series.xlsx')
data_df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 292 entries, 2012-04-08 to 2017-11-05
Columns: 1833 entries, 012 to TRUHONE
dtypes: int64(1833)
memory usage: 4.1 MB


## 2. Train-Test Split

In [2]:
# splitting into training and testing sets using 66-34
from functions import train_test_split

train_df, test_df = train_test_split(data_df, 0.66)
train_df.head()

Observations: 292
Training Observations: 192
Testing Observations: 100


Unnamed: 0_level_0,012,017,03008944ST-1,03008944ST-3,0300ST1550-1,0300ST15X9-1,0300ST15X9-2,0300ST15X9-3,0300ST1605-1,0300ST1605-2,...,9920-2,9920-3,9920-4,9920-5,9920-6,9920-7,9997-25,HW220D15,HW240DIA,TRUHONE
EntDate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2012-04-08,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2012-04-15,0,0,0,0,0,0,0,0,0,0,...,20,19,7,166,98,0,0,0,0,0
2012-04-22,0,0,0,0,0,0,0,0,0,0,...,41,15,10,207,87,0,0,0,0,0
2012-04-29,0,0,0,0,0,0,0,0,0,0,...,38,44,13,101,21,10,0,0,0,0
2012-05-06,0,0,0,0,0,0,0,0,0,0,...,29,47,21,75,43,0,0,0,0,0


### Exponential Smoothing with Homebrewed Class

In [None]:
testing_now = train_df['9920-2']
smooth = testing_now[0]
smooth
train_df['9920-2'].iloc[0]

In [3]:
from HoltWinter_class import HoltWinterExponentialSmoothing

In [4]:
model = HoltWinterExponentialSmoothing(alpha=0.51, beta=0.015, gamma=0.1)
model.fit(train_df.index, train_df['9920-2'])
preds = model.predict(X=train_df['9920-2'], slen=10, n_preds=100)

In [5]:
model.score(y=preds, X=train_df['9920-2'], n_preds=100)

TypeError: predict() missing 1 required positional argument: 'n_preds'

### Exponential Smoothing with Homebrewed Functions

In [None]:
from exponential_smoothing import initial_trend, initial_seasonal_components, triple_exponential_smoothing

In [None]:
what_is_it = initial_trend(train_df['9920-2'], slen=10)
what_is_it

In [None]:
predictions = triple_exponential_smoothing(train_df['9920-2'], slen=10, alpha=0.51, beta=0.015, gamma=0.1, n_preds=100)
y_hat['HW_new'] = predictions[-100:]

In [None]:
from functions import RMSE

RMSE(test_df, '9920-2', y_hat, 'HW_new')

### Exponential Smoothing with Statsmodels

In [None]:
# no trend, just seasonality (multiplicative), no damping
from statsmodels.tsa.api import ExponentialSmoothing

fit1 = ExponentialSmoothing(np.asarray(train_df['9920-2']), seasonal_periods=10, trend=None, seasonal='additive').fit(smoothing_level=0.51, smoothing_seasonal=0.1)
y_hat['DES'] = fit1.forecast(len(test_df))

plot_time_series(train_df, test_df, '9920-2', y_hat, 'DES', 'Double ES Forecast')

In [None]:
from functions import RMSE

RMSE(test_df, '9920-2', y_hat, 'DES')

In [None]:
# here we have trend and seasonality, so we will use Holt-Winters
from statsmodels.tsa.api import ExponentialSmoothing

fit1 = ExponentialSmoothing(np.asarray(train_df['9920-2']), seasonal_periods=10, trend='add', seasonal='add').fit(smoothing_level=0.51, smoothing_slope=0.015,smoothing_seasonal=0.1)
y_hat['Holt_Winter'] = fit1.forecast(len(test_df))

plot_time_series(train_df, test_df, '9920-2', y_hat, 'Holt_Winter', 'Holt_Winter Forecast')

In [None]:
from functions import RMSE

RMSE(test_df, '9920-2', y_hat, 'Holt_Winter')