In [1]:
# Default values
filename='https://www.salesanalytics.co.jp/591h'
forecast = 12
seasonal = 12

In [7]:
# import pakages
import numpy as np
import pandas as pd

import pmdarima as pm
from pmdarima import utils
from pmdarima import arima
from pmdarima import model_selection

from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_percentage_error

import plotly.graph_objects as go

import warnings
warnings.filterwarnings('ignore')

### Dataset

In [8]:
# get data
df=pd.read_csv(filename,                 
               index_col='Month', 
               parse_dates=True) 

In [13]:
# view records
df

Unnamed: 0_level_0,Passengers
Month,Unnamed: 1_level_1
1949-01-01,112
1949-02-01,118
1949-03-01,132
1949-04-01,129
1949-05-01,121
...,...
1960-08-01,606
1960-09-01,508
1960-10-01,461
1960-11-01,390


In [14]:
# graph display
fig = go.Figure()
fig.add_trace(go.Scatter(x=df.index, y=df.iloc[:,0], mode="lines", name='observed data')) 
fig.show()

### Validation

In [15]:
# Train Test Split
df_train, df_test = model_selection.train_test_split(df,
                                                     test_size=forecast)

In [16]:
# graph display
fig = go.Figure()
fig.add_trace(go.Scatter(x=df_train.index,
                         y=df_train.iloc[:,0],
                         mode="lines",
                         name='observed data (train data)')) 
fig.add_trace(go.Scatter(x=df_test.index, 
                         y=df_test.iloc[:,0],
                         mode="lines",
                         name='observed data (test data)')) 
fig.show()


In [17]:
# Train the Model
arima_model = pm.auto_arima(df_train, 
                            seasonal=True,
                            m=seasonal,
                            n_jobs=-1,
                            maxiter=10)

arima_model

In [18]:
# Evaluation
train_pred = arima_model.predict_in_sample()
test_pred = arima_model.predict(n_periods=forecast)

print('RMSE:')
print(np.sqrt(mean_squared_error(df_test, test_pred)))
print('MAE:')
print(mean_absolute_error(df_test, test_pred)) 
print('MAPE:')
print(mean_absolute_percentage_error(df_test, test_pred)) 

RMSE:
18.536468695501895
MAE:
14.898208209798137
MAPE:
0.03097561514550175


### Forecasting with ARIMA

In [22]:
# Train the Model
arima_model = pm.auto_arima(df, 
                            seasonal=True,
                            m=seasonal,
                            n_jobs=-1,
                            maxiter=10)

arima_model

In [23]:
# Forecasting
train_pred = arima_model.predict_in_sample()

test_pred, conf_int = arima_model.predict(n_periods=forecast,
                                          return_conf_int=True)


In [32]:
# create datatable
df_pred = pd.DataFrame(index=range(0,forecast))
df_pred['forecast value'] = test_pred.values
df_pred[['conf_inf','conf_sup']] =conf_int

if df.tail(1).index.month == 12:
    yyyy = df.tail(1).index.year + 1
    mm = 1
else:
    yyyy = df.tail(1).index.year
    mm = df.tail(1).index.month + 1
    
ts_yyyymm = pd.Timestamp(yyyy[0],mm,1)
df_pred.index = pd.date_range(ts_yyyymm,
                              periods=forecast,
                              freq='MS')

# view records
df_pred


Unnamed: 0,forecast value,conf_inf,conf_sup
1961-01-01,451.422001,430.341621,472.502381
1961-02-01,425.757024,401.311678,450.20237
1961-03-01,465.757467,438.35733,493.157603
1961-04-01,495.9926,465.926672,526.058527
1961-05-01,512.947426,480.433544,545.461308
1961-06-01,574.038882,539.248868,608.828896
1961-07-01,664.73889,627.812781,701.665
1961-08-01,656.244788,617.299569,695.190008
1961-09-01,551.81053,510.945843,592.675217
1961-10-01,500.161784,457.463831,542.859736


In [21]:
# graph display
fig = go.Figure()
fig.add_trace(go.Scatter(x=df.index,
                         y=df.iloc[:,0],
                         mode="lines",
                         name='observed')) 
fig.add_trace(go.Scatter(x=df_pred.index, 
                         y=df_pred.iloc[:,0],
                         mode="lines",
                         name='forecast')) 
fig.show()