# Assignment 1: Time Series Forecast With Python (Seasonal ARIMA)

**Lecturer**: Vincent Claes<br>
**Authors:** Bryan Honof, Jeffrey Gorissen<br>
**Start Date:** 19/10/2018
    
**Objective:** Visualize and predict the future temperatures via ARIMA

**Description:** In this notebook we train our model

In [1]:
import math
import warnings
import datetime

import pandas            as pd
import itertools         as it
import statsmodels.api   as sm
import matplotlib.pyplot as plt

warnings.filterwarnings("ignore") # specify to ignore warning messages

In [2]:
data_csv = pd.read_csv('./data/rolmean_data.csv')
data = pd.DataFrame()

# Convert the creation_date column to datetime64
data['dateTime'] = pd.to_datetime(data_csv['dateTime'])
# Convert the value column to float
data['temperature'] = pd.to_numeric(data_csv['temperature'])

# Set the dateTime column as index
data = data.set_index(['dateTime'])

# Sort the dataFrame just to be sure...
data = data.sort_index()

data = data.dropna()

# Double check the results
data.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 834 entries, 2018-11-11 03:00:00 to 2018-11-19 19:15:00
Data columns (total 1 columns):
temperature    834 non-null float64
dtypes: float64(1)
memory usage: 13.0 KB


In [3]:
data.tail(5)

Unnamed: 0_level_0,temperature
dateTime,Unnamed: 1_level_1
2018-11-19 18:15:00,16.81
2018-11-19 18:30:00,16.7
2018-11-19 18:45:00,16.52
2018-11-19 19:00:00,16.44
2018-11-19 19:15:00,16.48


In [4]:
# Tune Seasonal ARIMA model
# Define the p, d and q parameters to take any value between 0 and 2
p = d = q = range(0,2)

# Generate all different combinations of p, q and q triplets
pdq = list(it.product(p, d, q))
print(pdq)

# Generate all different combinations of seasonal p, q and q triplets
# Seasonality is one week (24*7 = 168 hours)
seasonal_pdq = [(x[0], x[1], x[2], 96) for x in list(it.product(p, d, q))]

print('Examples of parameter combinations for Seasonal ARIMA...')
print('SARIMAX: {} x {}'.format(pdq[1], seasonal_pdq[1]))
print('SARIMAX: {} x {}'.format(pdq[1], seasonal_pdq[2]))
print('SARIMAX: {} x {}'.format(pdq[2], seasonal_pdq[3]))
print('SARIMAX: {} x {}'.format(pdq[2], seasonal_pdq[4]))

[(0, 0, 0), (0, 0, 1), (0, 1, 0), (0, 1, 1), (1, 0, 0), (1, 0, 1), (1, 1, 0), (1, 1, 1)]
Examples of parameter combinations for Seasonal ARIMA...
SARIMAX: (0, 0, 1) x (0, 0, 1, 96)
SARIMAX: (0, 0, 1) x (0, 1, 0, 96)
SARIMAX: (0, 1, 0) x (0, 1, 1, 96)
SARIMAX: (0, 1, 0) x (1, 0, 0, 96)


In [5]:
result_list = []
AIC         = []
parm_       = []
parm_s      = []

for param in pdq:
    for param_seasonal in seasonal_pdq:
        try:
            mod = sm.tsa.statespace.SARIMAX(data,
                                            order=param,
                                            seasonal_order=param_seasonal,
                                            enforce_stationarity=False,
                                            enforce_invertibility=False)
            results = mod.fit()
            AIC.append(results.aic)
            parm_.append(param)
            parm_s.append(param_seasonal)
            
            print('ARIMA{}x{} - AIC:{}'.format(param, param_seasonal, round(results.aic,2)))
            result_list.extend([param, param_seasonal, round(results.aic,2)])
        except:
            print('error')
            continue
            
print('Done!')

ARIMA(0, 0, 0)x(0, 0, 0, 96) - AIC:7206.12
ARIMA(0, 0, 0)x(0, 0, 1, 96) - AIC:5650.0
ARIMA(0, 0, 0)x(0, 1, 0, 96) - AIC:2578.82
ARIMA(0, 0, 0)x(0, 1, 1, 96) - AIC:2219.09
ARIMA(0, 0, 0)x(1, 0, 0, 96) - AIC:2459.97
ARIMA(0, 0, 0)x(1, 0, 1, 96) - AIC:2238.85
ARIMA(0, 0, 0)x(1, 1, 0, 96) - AIC:2200.11
ARIMA(0, 0, 0)x(1, 1, 1, 96) - AIC:2128.36
ARIMA(0, 0, 1)x(0, 0, 0, 96) - AIC:6059.01
ARIMA(0, 0, 1)x(0, 0, 1, 96) - AIC:4639.05
ARIMA(0, 0, 1)x(0, 1, 0, 96) - AIC:1643.19
ARIMA(0, 0, 1)x(0, 1, 1, 96) - AIC:1401.57
ARIMA(0, 0, 1)x(1, 0, 0, 96) - AIC:1531.1
ARIMA(0, 0, 1)x(1, 0, 1, 96) - AIC:1306.82
ARIMA(0, 0, 1)x(1, 1, 0, 96) - AIC:1381.72
ARIMA(0, 0, 1)x(1, 1, 1, 96) - AIC:1324.79
ARIMA(0, 1, 0)x(0, 0, 0, 96) - AIC:-1247.57
ARIMA(0, 1, 0)x(0, 0, 1, 96) - AIC:-1227.61
ARIMA(0, 1, 0)x(0, 1, 0, 96) - AIC:-886.18
ARIMA(0, 1, 0)x(0, 1, 1, 96) - AIC:-967.66
ARIMA(0, 1, 0)x(1, 0, 0, 96) - AIC:-1231.65
ARIMA(0, 1, 0)x(1, 0, 1, 96) - AIC:-1232.24
ARIMA(0, 1, 0)x(1, 1, 0, 96) - AIC:-907.52
ARIMA(0, 

In [6]:
min(AIC)
pos = AIC.index(min(AIC))
print(parm_[pos], parm_s[pos], min(AIC))

(1, 1, 1) (0, 0, 0, 96) -1695.3828175455367


[next notebook](./5_fitting_and_predicting.ipynb)