In [None]:
from statsmodels.tsa.stattools import adfuller

In [None]:
from numpy import log

In [None]:
import pandas as pd
import numpy as np
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
import matplotlib.pyplot as plt
plt.rcParams.update({'figure.figsize': (9,7), 'figure.dpi': 120})

In [None]:
df = pd.read_csv('https://raw.githubusercontent.com/selva86/datasets/master/wwwusage.csv', names=['value'], header=0)

In [None]:
result = adfuller(df.value.dropna())

In [None]:
print("ADF Statistics: %f" %result[0])

In [None]:
print("p-values: %f" %result[1])

In [None]:
fig, axes = plt.subplots(3,2, sharex = True)

In [None]:
axes[0,0].plot(df.value); axes[0,0].set_title('Original Series')
plot_acf(df.value, ax = axes[0,1])

In [None]:
axes[1,0].plot(df.value.diff()); axes[1,0].set_title("1st Order Difference")
plot_acf(df.value.diff().dropna(), ax = axes[1,1])

In [None]:
axes[2,0].plot(df.value.diff()); axes[2,0].set_title("2nd Order Difference")
plot_acf(df.value.diff().diff().dropna(), ax = axes[2,1])

In [None]:
plt.show()

In [None]:
from pmdarima.arima.utils import ndiffs

In [None]:
df = pd.read_csv('https://raw.githubusercontent.com/selva86/datasets/master/wwwusage.csv', names=['value'], header=0)

In [None]:
y = df.value

In [None]:
ndiffs(y, test = "adf")

In [None]:
ndiffs(y, test = "kpss")

In [None]:
ndiffs(y, test = "pp")

In [None]:
#PACF Plots
plt.rcParams.update({'figure.figsize': (9,3), 'figure.dpi' : 120})

In [None]:
fig, axes = plt.subplots(1,2, sharex = True)
axes[0].plot(df.value.diff()); axes[0].set_title('1st Differencing')
axes[1].set(ylim = (0,5))
plot_pacf(df.value.diff().dropna(), ax = axes[1])
plt.show()

In [None]:
from statsmodels.tsa.arima_model import ARIMA

In [None]:
model = ARIMA(df.value, order = (1,1,2))

In [None]:
model_fit = model.fit(disp = 0)

In [None]:
print(model_fit.summary())

In [None]:
model = ARIMA(df.value, order = (1,1,1))

In [None]:
model_fit = model.fit(disp = 0)

In [None]:
print(model_fit.summary())

In [None]:
residuals = pd.DataFrame(model_fit.resid)

In [None]:
fig, ax = plt.subplots(1,2)
residuals.plot(title = "Residuals", ax = ax[0])
residuals.plot(kind = 'kde', title = "Density", ax = ax[1])
plt.show()

In [None]:
model_fit.plot_predict(dynamic = False)
plt.show()

In [None]:
from statsmodels.tsa.stattools import acf
train  = df.value[:85]
test = df.value[85:]

# With order (1,1,1), the model gives correct forecast, but still by adding some more constant (3,2,1) , we can improve the prediction. Try both...

In [None]:
model = ARIMA(train, order = (3,2,1))

# model = ARIMA(train, order = (1,1,1))

fitted = model.fit(disp = 1)

In [None]:
fc, se, conf = fitted.forecast(15, alpha = 0.05)
print(fc)

In [None]:
fc_series = pd.Series(fc, index = test.index)

In [None]:
lower_series = pd.Series(conf[:,0], index = test.index)

In [None]:
upper_series = pd.Series(conf[:,1], index = test.index)

In [None]:
plt.figure(figsize = (12,5), dpi = 100)

In [None]:
plt.plot(train, label = "training")
plt.plot(test, label = "actual")
plt.plot(fc_series, label = "forecast")
plt.fill_between(lower_series.index, lower_series, upper_series, color = 'k', alpha = .15)
plt.title('Forecast Vs Actuals')
plt.legend(loc = 'upper left', fontsize = 8)
plt.show()

# Accuracy Metrics for the Forecast

In [None]:
def forecast_accuracy(forecast, actual):
    mape = np.mean(np.abs(forecast- actual)/np.abs(actual))
    me = np.mean(forecast- actual)
    mae = np.mean(np.abs(forecast - actual))
    mpe = np.mean((forecast- actual)/actual)
    rmse = np.mean((forecast - actual)**2)**.5
    corr = np.corrcoef(forecast, actual)[0,1]
    mins = np.amin(np.hstack([forecast[:, None], actual[:,None]]), axis = 1)
    maxs = np.amax(np.hstack([forecast[:, None], actual[:,None]]), axis = 1)
    minmax = 1 - np.mean(mins/maxs)
    acf1 = acf(fc-test)[1]
    return({'mape': mape,
           'me': me,
           'mae': mae,
           'mpe': mpe,
           'rmse': rmse,
           'corr': corr,
           'acf1': acf1,
           'minmax': minmax})
forecast_accuracy(fc, test.values)

In [None]:
from statsmodels.tsa.arima_model import ARIMA
import pmdarima as pm

In [None]:
df = pd.read_csv('https://raw.githubusercontent.com/selva86/datasets/master/wwwusage.csv', names=['value'], header=0)

In [None]:
df.head()

In [None]:
model = pm.auto_arima(df.value, start_p = 1, start_q = 1, test = 'adf', max_p = 3, max_q = 3, m = 1, d = None, seasonal = False, 
                     start_P = 0, D = 0, trace = True, error_action = 'ignore', suppress_warnings = True, stepwise = True)

In [None]:
print(model.summary())

In [None]:
model.plot_diagnostics(figsize=(7,5))
plt.show()