### Import the data set

In [None]:
import warnings
warnings.filterwarnings('ignore' )

import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import time

plt.style.use(style="seaborn")
%matplotlib inline

: 

In [None]:
df = pd.read_csv("D1_data.csv")
df.head()

: 

In [None]:
df["Date"] = pd.to_datetime(df["Date"])
df.set_index("Date", inplace=True)

: 

In [None]:
df.head()

: 

In [None]:
df['Power'].plot(figsize=(24,8))

: 

### Testing for stationary_ADF Test

In [None]:
from statsmodels.tsa.stattools import adfuller

: 

In [None]:
test_result=adfuller(df['Power'])

: 

In [None]:
#Ho: It is non stationary
#H1: It is stationary

def adfuller_test(Power):
    result=adfuller(Power)
    labels = ['ADF Test Statistic','p-value','#Lags Used','Number of Observations Used']
    for value,label in zip(result,labels):
        print(label+' : '+str(value) )
    if result[1] <= 0.05:
        print("strong evidence against the null hypothesis(Ho), reject the null hypothesis. Data has no unit root and is stationary")
    else:
        print("weak evidence against null hypothesis, time series has a unit root, indicating it is non-stationary ")
    

: 

In [None]:
adfuller_test(df['Power'])

: 

So, need to do differencing part.

### Figure out order of the ARIMA Model 

In [None]:
from pmdarima import auto_arima

stepwise_fit = auto_arima(df['Power'],trace=True,suppress_warnings=True)
stepwise_fit.summary()

: 

### Start to fit the model 

In [None]:
from statsmodels.tsa.arima.model import ARIMA
import warnings
warnings.filterwarnings('ignore' )

: 

#### Split data into Training and Testing

In [None]:
print(df.shape)
train=df.iloc[:300]
test=df.iloc[300:]
print(train.shape,test.shape)

: 

#### Train the model

In [None]:
model=ARIMA(train['Power'],order=(2,1,1))
model=model.fit()
model.summary()

: 

### Plot residual errors 

In [None]:
residuals = pd.DataFrame(model.resid)
fig, ax = plt.subplots(1,2)
residuals.plot(title="Residuals", ax=ax[0],figsize=(24,8))
residuals.plot(kind='kde', title='Density', ax=ax[1])
plt.show()

: 

The residual errors seem fine with near zero mean and uniform variance.

### Make prediction on Test Set 

In [None]:
start=len(train)
end=len(train)+len(test)-1
pred=model.predict(start=start,end=end,dynamic=False,typ='level')

pred.index=df.index[start:end+1]
print(pred)

: 

In [None]:
# Actual vs Fitted Plot
actual = test.Power
pd.DataFrame({"actual": actual, "predicted": pred}).plot(
    title="Actuals vs Predicted", figsize=(16, 4)
)

: 

In [None]:
pred.to_csv('sample.csv', sep=';')

: 

In [None]:
test['Power'].mean()

: 

In ARIMA,
Typically, if you are comparing forecasts of two different series, the MAPE, Correlation and Min-Max Error can be used.

### Retrain the model on entire data set

In [None]:
model2=ARIMA(df['Power'],order=(2,1,1))
model2=model2.fit()
df.tail()

: 

### For future dates 

In [None]:
pred2=model2.predict(starts=len(df),end=len(df)+30,typ='levels').rename('ARIMA Prediction')
print(pred2)

: 

: 