In [1]:
import pandas as pd
import numpy as np
from pandas import read_csv
from pmdarima.arima import auto_arima
from datetime import datetime
import matplotlib.pyplot as plt
import tsfresh
from pmdarima.arima import ADFTest
from tsfresh import extract_features
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.holtwinters import SimpleExpSmoothing
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error

In [2]:
values = read_csv("../../Time_series_data.csv")

In [3]:
values.head(13)

Unnamed: 0,Month,Sales
0,2013-01,2815
1,2013-02,2672
2,2013-03,2755
3,2013-04,2721
4,2013-05,2946
5,2013-06,3036
6,2013-07,2282
7,2013-08,2212
8,2013-09,2922
9,2013-10,4301


In [4]:

values = values.rename(columns={values.columns[1]: 'Data'})
values = values.rename(columns={values.columns[0]: 'Time'})
values['Time'] = pd.to_datetime(
    values['Time'], errors='coerce')

In [5]:
freq = pd.infer_freq(values["Time"])

In [6]:
# end_date = pd.to_datetime(x) + pd.DateOffset(months=10)
# print(end_date)

In [7]:
values.set_index('Time', inplace=True)

In [8]:
from sklearn.model_selection import train_test_split
train, test = train_test_split(
    values, test_size=0.2, shuffle=False)

In [9]:
model = ARIMA( train, exog=None, order=(1, 1, 1), seasonal_order=(1, 1, 1, 12), 
              trend='n', enforce_stationarity=False, enforce_invertibility=True, 
              concentrate_scale=False, trend_offset=1, dates=None, freq=None, 
              missing='none', validate_specification=False)
model_fit = model.fit()  
model_fit.summary()



0,1,2,3
Dep. Variable:,Data,No. Observations:,84.0
Model:,"ARIMA(1, 1, 1)x(1, 1, 1, 12)",Log Likelihood,-463.29
Date:,"Fri, 25 Aug 2023",AIC,936.581
Time:,14:47:58,BIC,946.796
Sample:,01-01-2013,HQIC,940.551
,- 12-01-2019,,
Covariance Type:,opg,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
ar.L1,0.1642,0.115,1.426,0.154,-0.061,0.390
ma.L1,-0.9091,0.086,-10.534,0.000,-1.078,-0.740
ar.S.L12,-0.0694,0.460,-0.151,0.880,-0.972,0.833
ma.S.L12,-0.0875,0.443,-0.197,0.844,-0.957,0.782
sigma2,6.698e+05,1.08e+05,6.187,0.000,4.58e+05,8.82e+05

0,1,2,3
Ljung-Box (L1) (Q):,0.02,Jarque-Bera (JB):,4.88
Prob(Q):,0.89,Prob(JB):,0.09
Heteroskedasticity (H):,2.77,Skew:,-0.07
Prob(H) (two-sided):,0.03,Kurtosis:,4.43


In [None]:
train.tail(2)

In [None]:
x = values.index[train.shape[0]]
print(x)

In [None]:
n_periods = test.shape[0]

In [None]:
index_future_dates = pd.date_range(
    start=x, periods=n_periods, freq=freq)

index_future_dates

In [None]:
predicted = model_fit.forecast(n_periods)

In [None]:
prediction = pd.DataFrame(predicted, index=index_future_dates)
prediction.columns = ['predicted_values']
prediction.reset_index(inplace=True)

In [None]:
# prediction=prediction.tail(12)
print(prediction)

In [None]:
prediction.columns.value_counts

In [None]:
plt.plot(values.index,values['Data'])
plt.plot(index_future_dates,predicted)

In [None]:
mae = mean_absolute_error(test['Data'], prediction['predicted_values'])
mae

In [None]:
mape = mean_absolute_percentage_error(test['Data'], prediction['predicted_values'])
mape

In [None]:
mse = mean_squared_error(test['Data'], prediction['predicted_values'])
mse

In [None]:
import itertools