In [1]:

import warnings
import itertools
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
%matplotlib inline
sns.set()

#######
# import dataset

data = 'https://github.com/base2guild/generic/raw/main/dataset.csv'

df = pd.read_csv(data)
#######
# view the shape of the dataframe

df.shape
#######
# view the first five rows of dataset

df.head()
#######
# view summary of dataframe

df.info()
#######
# convert the lastUpdated column into datetime format

df['LastUpdated'] = pd.to_datetime(df['LastUpdated'])
#######
# again view the summary of dataframe

df.info()
#######
df['Date'] = df['LastUpdated'].dt.date

df['Time'] = df['LastUpdated'].dt.time
#######

# again view the first ten rows of dataset

df.head(10)
#######
# remove redundant columns

cols = ['SystemCodeNumber', 'Capacity', 'LastUpdated']

df.drop(cols, axis=1, inplace=True)
#######
# check the data types of the columns

df.dtypes
#######
# convert the data type of Date column

df['Date']=pd.to_datetime(df['Date'])
#######
# again check the data type of df dataframe

df.dtypes
#######
# set the index of the dataframe

df.set_index('Date', inplace=True)

df.index
#######
y=df['Utilization'].resample('D').mean()

y.head(10)
#######
y.isnull().sum()
#######
y.fillna(method='bfill', inplace=True)
#######

y.isnull().sum()
#######
# visualize the time series data

y.plot(figsize=(15, 6))

plt.show()
#######
from pylab import rcParams

rcParams['figure.figsize'] = 12, 8

decomposition = sm.tsa.seasonal_decompose(y, model='additive')

fig = decomposition.plot()

plt.show()
#######
# Define the p, d and q parameters to take any value between 0 and 2
p = d = q = range(0, 2)


# Generate all different combinations of p, q and q triplets
pdq = list(itertools.product(p, d, q))


# Generate all different combinations of seasonal p, q and q triplets
seasonal_pdq = [(x[0], x[1], x[2], 4) for x in list(itertools.product(p, d, q))]


print('Examples of parameter combinations for Seasonal ARIMA are as follows:-')
print('SARIMAX: {} x {}'.format(pdq[1], seasonal_pdq[1]))
print('SARIMAX: {} x {}'.format(pdq[1], seasonal_pdq[2]))
print('SARIMAX: {} x {}'.format(pdq[2], seasonal_pdq[3]))
print('SARIMAX: {} x {}'.format(pdq[2], seasonal_pdq[4]))
#######
warnings.filterwarnings("ignore") # specify to ignore warning messages

for param in pdq:
    for param_seasonal in seasonal_pdq:
        try:
            mod = sm.tsa.statespace.SARIMAX(y,
                                            order=param,
                                            seasonal_order=param_seasonal,
                                            enforce_stationarity=False,
                                            enforce_invertibility=False)

            results = mod.fit()

            print('ARIMA{}x{}4 - AIC:{}'.format(param, param_seasonal, results.aic))
        except:
            continue
#######
model = sm.tsa.statespace.SARIMAX(y,
                                order=(1, 1, 1),
                                seasonal_order=(0, 1, 1, 4),
                                enforce_stationarity=False,
                                enforce_invertibility=False)

results = model.fit()

print(results.summary().tables[1])
#######
results.plot_diagnostics(figsize=(15, 12))

plt.show()
#######
# Get forecast 100 steps ahead in future
pred_uc = results.get_forecast(steps=100)

# Get confidence intervals of forecasts
pred_ci = pred_uc.conf_int()

ax = y.plot(label='observed', figsize=(12, 8))

pred_uc.predicted_mean.plot(ax=ax, label='forecast')

ax.fill_between(pred_ci.index,
                pred_ci.iloc[:, 0],
                pred_ci.iloc[:, 1], color='k', alpha=.25)

ax.set_xlabel('Date')
ax.set_ylabel('Utilization')

plt.legend()
plt.show()


ModuleNotFoundError: No module named 'numpy'