In [153]:
import pandas as pd
import numpy as np
from pandas import read_csv
from pmdarima.arima import auto_arima
from datetime import datetime
import matplotlib.pyplot as plt
import tsfresh
from pmdarima.arima import ADFTest
from tsfresh import extract_features
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.holtwinters import SimpleExpSmoothing
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error

In [154]:
values = read_csv("../../Time_series_data.csv")

In [155]:
values.head(13)

Unnamed: 0,Month,Sales
0,2013-01,2815
1,2013-02,2672
2,2013-03,2755
3,2013-04,2721
4,2013-05,2946
5,2013-06,3036
6,2013-07,2282
7,2013-08,2212
8,2013-09,2922
9,2013-10,4301


In [156]:

values = values.rename(columns={values.columns[1]: 'Data'})
values = values.rename(columns={values.columns[0]: 'Time'})
values['Time'] = pd.to_datetime(
    values['Time'], errors='coerce')

In [157]:
freq = pd.infer_freq(values["Time"])

In [158]:
# end_date = pd.to_datetime(x) + pd.DateOffset(months=10)
# print(end_date)

In [159]:
values.set_index('Time', inplace=True)

In [160]:
from sklearn.model_selection import train_test_split
train, test = train_test_split(
    values, test_size=0.2, shuffle=False)

In [161]:
df1 = values.copy()

In [162]:
for i in range(2,13):
    df1['moving_avg_forecast'] = values['Data'].rolling(i).mean()
    train1, test1 = train_test_split(
        df1, test_size=0.2, shuffle=False)

    y_hat_avg = test1.copy()
    rmse = np.sqrt(mean_squared_error(test.Data, y_hat_avg.moving_avg_forecast))
    abs_error = np.abs(test['Data']-y_hat_avg.moving_avg_forecast)

    actual = test['Data']
    mape = np.round(np.mean(abs_error/actual),4)

    print('Rolling window : ',i,'RMSE : ' ,np.round(rmse,2) ,'MAPE : ',mape)

Rolling window :  2 RMSE :  1658.37 MAPE :  0.2819
Rolling window :  3 RMSE :  2247.53 MAPE :  0.4473
Rolling window :  4 RMSE :  2488.44 MAPE :  0.5157
Rolling window :  5 RMSE :  2612.0 MAPE :  0.5448
Rolling window :  6 RMSE :  2551.73 MAPE :  0.5382
Rolling window :  7 RMSE :  2461.33 MAPE :  0.5095
Rolling window :  8 RMSE :  2442.75 MAPE :  0.5134
Rolling window :  9 RMSE :  2543.02 MAPE :  0.5612
Rolling window :  10 RMSE :  2554.72 MAPE :  0.5549
Rolling window :  11 RMSE :  2521.28 MAPE :  0.5259
Rolling window :  12 RMSE :  2414.18 MAPE :  0.4928


In [163]:
def ma_optimizer(data):
    best_alpha, best_mae = None, float("inf")

    for alpha in range(2,13):
        df1['moving_avg_forecast'] = values['Data'].rolling(i).mean()
        train1, test1 = train_test_split(
            df1, test_size=0.2, shuffle=False)

        y_hat_avg = test1.copy()
        mae = mean_squared_error(test.Data, y_hat_avg.moving_avg_forecast)

        if mae < best_mae:
            best_alpha, best_mae = alpha, mae

    return best_alpha, best_mae

In [164]:
def ma_model_tuning(data, title="Model Tuning - Single Exponential Smoothing"):
    
    best_alpha, best_mae = ma_optimizer(data)

    df1['moving_avg_forecast'] = values['Data'].rolling(best_alpha).mean()
    train1, test1 = train_test_split(
        df1, test_size=0.2, shuffle=False)

    y_hat_avg = test1.copy()
    mae = mean_absolute_error(test.Data, y_hat_avg.moving_avg_forecast)
    mse = mean_squared_error(test.Data, y_hat_avg.moving_avg_forecast)

    return y_hat_avg, mae, mse

In [165]:
y_pred, mae, mse = sma_model_tuning(values)

In [167]:
mse

2750196.9166666665

In [None]:
model = ARIMA( train, exog=None, order=(1, 1, 1), seasonal_order=(1, 1, 1, 12), 
              trend='n', enforce_stationarity=False, enforce_invertibility=True, 
              concentrate_scale=False, trend_offset=1, dates=None, freq=None, 
              missing='none', validate_specification=False)
model_fit = model.fit() 
model_fit.summary()

In [None]:
train.tail(2)

In [None]:
x = values.index[train.shape[0]]
print(x)

In [None]:
n_periods = test.shape[0]

In [None]:
index_future_dates = pd.date_range(
    start=x, periods=n_periods, freq=freq)

index_future_dates

In [None]:
predicted = model_fit.forecast(n_periods)

In [None]:
prediction = pd.DataFrame(predicted, index=index_future_dates)
prediction.columns = ['predicted_values']
prediction.reset_index(inplace=True)

In [None]:
# prediction=prediction.tail(12)
print(prediction)

In [None]:
prediction.columns.value_counts

In [None]:
plt.plot(values.index,values['Data'])
plt.plot(index_future_dates,predicted)

In [None]:
mae = mean_absolute_error(test['Data'], prediction['predicted_values'])
mae

In [None]:
mape = mean_absolute_percentage_error(test['Data'], prediction['predicted_values'])
mape

In [None]:
mse = mean_squared_error(test['Data'], prediction['predicted_values'])
mse

In [None]:
import itertools