# Assignment 1: Time Series Forecast With Python (Seasonal ARIMA)

**Lecturer**: Vincent Claes<br>
**Authors:** Bryan Honof, Jeffrey Gorissen<br>
**Start Date:** 19/10/2018
    
**Objective:** Visualize and predict the future temperatures via ARIMA

**Description:** In this notebook we train our model

In [1]:
import warnings

warnings.filterwarnings("ignore") # specify to ignore warning messages

import pandas as pd
import matplotlib.pyplot as plt
import itertools as it
import datetime
import math

import statsmodels.api as sm

In [2]:
data_csv = pd.read_csv('data.csv')
data = pd.DataFrame()

# Convert the creation_date column to datetime64
data['dateTime'] = pd.to_datetime(data_csv['dateTime'])
# Convert the value column to float
data['temperature'] = pd.to_numeric(data_csv['temperature'])

# Set the dateTime column as index
data = data.set_index(['dateTime'])

# Sort the dataFrame just to be sure...
data = data.sort_index()

# Double check the results
data.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 760 entries, 2018-11-10 23:30:00 to 2018-11-18 21:15:00
Data columns (total 1 columns):
temperature    760 non-null float64
dtypes: float64(1)
memory usage: 11.9 KB


In [3]:
data.tail()

Unnamed: 0_level_0,temperature
dateTime,Unnamed: 1_level_1
2018-11-18 20:15:00,16.07
2018-11-18 20:30:00,15.36
2018-11-18 20:45:00,15.44
2018-11-18 21:00:00,16.07
2018-11-18 21:15:00,15.57


In [4]:
# Tune Seasonal ARIMA model
# Define the p, d and q parameters to take any value between 0 and 2
p = d = q = range(0,2)

# Generate all different combinations of p, q and q triplets
pdq = list(it.product(p, d, q))
print(pdq)

# Generate all different combinations of seasonal p, q and q triplets
# Seasonality is one week (24*7 = 168 hours)
seasonal_pdq = [(x[0], x[1], x[2], 15) for x in list(it.product(p, d, q))]

print('Examples of parameter combinations for Seasonal ARIMA...')
print('SARIMAX: {} x {}'.format(pdq[1], seasonal_pdq[1]))
print('SARIMAX: {} x {}'.format(pdq[1], seasonal_pdq[2]))
print('SARIMAX: {} x {}'.format(pdq[2], seasonal_pdq[3]))
print('SARIMAX: {} x {}'.format(pdq[2], seasonal_pdq[4]))

[(0, 0, 0), (0, 0, 1), (0, 1, 0), (0, 1, 1), (1, 0, 0), (1, 0, 1), (1, 1, 0), (1, 1, 1)]
Examples of parameter combinations for Seasonal ARIMA...
SARIMAX: (0, 0, 1) x (0, 0, 1, 15)
SARIMAX: (0, 0, 1) x (0, 1, 0, 15)
SARIMAX: (0, 1, 0) x (0, 1, 1, 15)
SARIMAX: (0, 1, 0) x (1, 0, 0, 15)


In [5]:
train = data

train.index = pd.DatetimeIndex(train.index.values,
                               freq='15T')

result_list = []

for param in pdq:
    for param_seasonal in seasonal_pdq:
        try:
            mod = sm.tsa.statespace.SARIMAX(data,
                                            
                                            order=param,
                                            seasonal_order=param_seasonal,
                                            enforce_stationarity=False,
                                            enforce_invertibility=False)
            results = mod.fit(maxiter=2000)

            print('ARIMA{}x{} - AIC:{}'.format(param, param_seasonal, round(results.aic,2)))
            result_list.extend([param, param_seasonal, round(results.aic,2)])
        except:
            print('error')
            continue
            
print('Done!')

ARIMA(0, 0, 0)x(0, 0, 0, 15) - AIC:6592.6
ARIMA(0, 0, 0)x(0, 0, 1, 15) - AIC:5568.19
ARIMA(0, 0, 0)x(0, 1, 0, 15) - AIC:2929.69
ARIMA(0, 0, 0)x(0, 1, 1, 15) - AIC:2702.88
ARIMA(0, 0, 0)x(1, 0, 0, 15) - AIC:2929.52




ARIMA(0, 0, 0)x(1, 0, 1, 15) - AIC:2706.31
ARIMA(0, 0, 0)x(1, 1, 0, 15) - AIC:2809.92
ARIMA(0, 0, 0)x(1, 1, 1, 15) - AIC:2675.8
ARIMA(0, 0, 1)x(0, 0, 0, 15) - AIC:5623.2
ARIMA(0, 0, 1)x(0, 0, 1, 15) - AIC:4853.17
ARIMA(0, 0, 1)x(0, 1, 0, 15) - AIC:2782.54
ARIMA(0, 0, 1)x(0, 1, 1, 15) - AIC:2458.93
ARIMA(0, 0, 1)x(1, 0, 0, 15) - AIC:2784.66
ARIMA(0, 0, 1)x(1, 0, 1, 15) - AIC:2782.39
ARIMA(0, 0, 1)x(1, 1, 0, 15) - AIC:2612.79
ARIMA(0, 0, 1)x(1, 1, 1, 15) - AIC:2460.51
ARIMA(0, 1, 0)x(0, 0, 0, 15) - AIC:2393.32
ARIMA(0, 1, 0)x(0, 0, 1, 15) - AIC:2343.32
ARIMA(0, 1, 0)x(0, 1, 0, 15) - AIC:2867.97
ARIMA(0, 1, 0)x(0, 1, 1, 15) - AIC:2324.94
ARIMA(0, 1, 0)x(1, 0, 0, 15) - AIC:2345.43
ARIMA(0, 1, 0)x(1, 0, 1, 15) - AIC:2343.16
ARIMA(0, 1, 0)x(1, 1, 0, 15) - AIC:2556.29
ARIMA(0, 1, 0)x(1, 1, 1, 15) - AIC:2336.39
ARIMA(0, 1, 1)x(0, 0, 0, 15) - AIC:2092.84
ARIMA(0, 1, 1)x(0, 0, 1, 15) - AIC:2042.54
ARIMA(0, 1, 1)x(0, 1, 0, 15) - AIC:2564.74
ARIMA(0, 1, 1)x(0, 1, 1, 15) - AIC:2042.74
ARIMA(0, 1, 1

In [9]:
print_result = zip(*[iter(result_list)]*3) 
print_result = sorted(print_result, key=lambda x: x[1])

print('Result summary:\n')
print('((p, d, q), (P, D, Q, S), AIC)')
print('------------------------------')
for item in print_result:
    print(item)

Result summary:

((p, d, q), (P, D, Q, S), AIC)
------------------------------
((0, 0, 0), (0, 0, 0, 15), 6592.6)
((0, 0, 1), (0, 0, 0, 15), 5623.2)
((0, 1, 0), (0, 0, 0, 15), 2393.32)
((0, 1, 1), (0, 0, 0, 15), 2092.84)
((1, 0, 0), (0, 0, 0, 15), 2397.35)
((1, 0, 1), (0, 0, 0, 15), 2096.6)
((1, 1, 0), (0, 0, 0, 15), 2181.52)
((1, 1, 1), (0, 0, 0, 15), 2090.07)
((0, 0, 0), (0, 0, 1, 15), 5568.19)
((0, 0, 1), (0, 0, 1, 15), 4853.17)
((0, 1, 0), (0, 0, 1, 15), 2343.32)
((0, 1, 1), (0, 0, 1, 15), 2042.54)
((1, 0, 0), (0, 0, 1, 15), 2346.5)
((1, 0, 1), (0, 0, 1, 15), 2047.16)
((1, 1, 0), (0, 0, 1, 15), 2133.37)
((1, 1, 1), (0, 0, 1, 15), 2039.23)
((0, 0, 0), (0, 1, 0, 15), 2929.69)
((0, 0, 1), (0, 1, 0, 15), 2782.54)
((0, 1, 0), (0, 1, 0, 15), 2867.97)
((0, 1, 1), (0, 1, 0, 15), 2564.74)
((1, 0, 0), (0, 1, 0, 15), 2677.7)
((1, 0, 1), (0, 1, 0, 15), 2542.07)
((1, 1, 0), (0, 1, 0, 15), 2648.41)
((1, 1, 1), (0, 1, 0, 15), 2558.01)
((0, 0, 0), (0, 1, 1, 15), 2702.88)
((0, 0, 1), (0, 1, 1, 15),