In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

## Read data

In [None]:
df = pd.read_csv('2021/MG.csv')
print(df.shape)
print(df.dropna().shape)
df.head()

feature = 'Ua'

#df=pd.read_csv('../../ARIMA-Temperature_Forecasting/MaunaLoaDailyTemps.csv',index_col='DATE',parse_dates=True)
#df=df.dropna()
#print('Shape of data',df.shape)
#df.head()

In [None]:
df[feature].dropna().plot(figsize=(12,5))

## Check for stationarity (p-value should be less than 0.05)

In [None]:
from statsmodels.tsa.stattools import adfuller

def adf_test(dataset):
  dftest = adfuller(dataset, autolag = 'AIC')
  print("1. ADF : ",dftest[0])
  print("2. P-Value : ", dftest[1])
  print("3. Num Of Lags : ", dftest[2])
  print("4. Num Of Observations Used For ADF Regression and Critical Values Calculation :", dftest[3])
  print("5. Critical Values :")
  for key, val in dftest[4].items():
      print("\t",key, ": ", val)
        
adf_test(df[feature].dropna())

## Figure out order for ARIMA model

In [None]:
from pmdarima import auto_arima

stepwise_fit = auto_arima(df[feature].dropna(), 
                          suppress_warnings=True)           

stepwise_fit.summary()

## Define order based on the output of auto_arima

In [None]:
print(stepwise_fit.order)
order = stepwise_fit.order

## Split Data into Training and Testing

In [None]:
df_dropna = df.dropna()
print(df_dropna.shape)
test_samples_num = int(0.04 * df_dropna.shape[0])
train = df_dropna.iloc[:-test_samples_num]
test = df_dropna.iloc[-test_samples_num:]
print(train.shape, test.shape)

## Train the ARIMA model and perform rolling predictions on test set

In [None]:
from sklearn.metrics import mean_squared_error
from math import sqrt

history = list(train[feature])
predictions = list()
print(len(test))
for t in test[feature].index:
    model = ARIMA(history, order=order)
    model_fit = model.fit()
    output = model_fit.forecast()
    predictions.append(output[0])
    #history.append(output[0])
    history.append(test[feature][t])
    print('predicted=%f, expected=%f' % (output[0], test[feature][t]))
    
# evaluate forecasts
rmse = sqrt(mean_squared_error(test[feature], predictions))
print('Test RMSE: %.3f' % rmse)

In [None]:
# plot forecasts against actual outcomes
plt.plot(np.arange(len(test)), test[feature])
plt.plot(predictions, color='red')
plt.show()

## Test residuals

In [None]:
# Plot residual errors
model_fit.plot_diagnostics(figsize=(15,15))