# Augoregresion for One Step Prediction of All Cells

## 1. Load the dataset

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import statsmodels.api as sm
from sklearn.metrics import mean_squared_error

In [2]:
df = pd.read_csv('dataset.csv', index_col='Timestamp')
df.head()

Unnamed: 0_level_0,Cell_000111,Cell_000112,Cell_000113,Cell_000231,Cell_000232,Cell_000233,Cell_000461,Cell_000462,Cell_000463,Cell_000821,...,Cell_006653,Cell_006661,Cell_006662,Cell_006663,Cell_007371,Cell_007372,Cell_007373,Cell_039871,Cell_039872,Cell_039873
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-10-23 00:00:00,29.2649,59.15291,59.01721,2.63658,14.11635,3.37565,7.69811,0.62042,0.74651,0.0,...,36.89798,148.60854,98.66328,6.6083,0.82699,3.27327,80.90784,18.22045,6.99716,1.73569
2017-10-23 01:00:00,19.2617,60.22876,40.34814,13.16293,0.96246,7.47019,31.07216,0.52249,14.31437,19.78196,...,35.80842,11.13813,0.43016,1.45869,0.73014,0.35795,1.58017,38.19131,9.75174,7.38484
2017-10-23 02:00:00,18.37236,14.839,1.27435,25.36683,1.07481,3.88256,26.1419,1.10567,27.31817,0.0,...,42.86074,39.93543,0.28038,1.09835,0.38237,0.66306,0.75476,1.29435,3.66695,2.5709
2017-10-23 03:00:00,18.69268,0.98166,2.11746,1.66842,0.85055,14.04557,0.56444,0.5009,12.28002,0.0,...,55.43529,10.91845,4.04031,5.95218,0.50307,0.26729,1.32703,1.01926,0.17117,11.62664
2017-10-23 04:00:00,21.62559,4.28219,3.51484,1.78151,0.71691,12.94846,27.45815,17.99582,5.18661,0.00033,...,16.57459,46.98406,0.89364,5.47769,4.58068,0.30167,1.20668,0.82393,123.40943,28.72087


## 2. Fill NaNs

In [3]:
ndf = df.interpolate(method='linear', axis=1) # linear interpolate
#ndf = df.fillna(method='ffill', axis=1) # forward the last valid observation
ndf = df.fillna(0) # fill remaned nans with 0s
ndf.isna().sum().sum()

0

## 3. Train-Test Split

In [4]:
data = ndf.values
data = data.astype('float32')
ratio = 0.8
train_size = int(ratio*len(data))
train = data[:train_size]
test = data[train_size:]
print(train.shape)
print(test.shape)

(6990, 57)
(1748, 57)


## 4.Normalization

In [5]:
scaler = MinMaxScaler()
train = scaler.fit_transform(train)
test = scaler.transform(test)

## 5.Time Series Generators

In [34]:
days_back = 12 # back days for prediction
input_steps = 24*days_back 
output_steps = 1 
batch_size = 1
train_gen = tf.keras.preprocessing.sequence.TimeseriesGenerator(train,
                                                                train,
                                                                length=input_steps,
                                                                batch_size=batch_size)
test_gen = tf.keras.preprocessing.sequence.TimeseriesGenerator(test,
                                                               test,
                                                               length=input_steps,
                                                               batch_size=batch_size)
print("Number of train samples: ", len(train_gen))
print("Number of test samples: ", len(test_gen))

Number of train samples:  6702
Number of test samples:  1460


## 6. Vector AutoRegression

In [35]:
mse_list = list()
for i in range(len(test_gen)):
    tr, y = test_gen[i]
    tr = tr.reshape((input_steps, test.shape[1]))
    model = sm.tsa.VAR(tr)
    model_fit = model.fit(maxlags=1) # lag order
    y_predict = model_fit.forecast(tr, steps=1)
    mse = mean_squared_error(y, y_predict)
    mse_list.append(mse)
# print results
mse_array = np.array(mse_list)
print(f"MSE of VAR Model is {mse_array.mean()}")

MSE of VAR Model is 0.016938257560481423


## 7. Vector AutoRegression Moving Average

order = (1,1) # model order (p,q)
mse_list = list()
for i in range(len(test_gen)):
    tr, y = test_gen[i]
    tr = tr.reshape((input_steps, test.shape[1]))
    model = sm.tsa.VARMAX(tr, order=order, trend='c', error_cov_type='diagonal')
    model_fit = model.fit(maxiter=100, disp=False)
    y_predict = model_fit.forecast()
    mse = mean_squared_error(y, y_predict)
    print(i, mse)
    mse_list.append(mse)

mse_array = np.array(mse_list)
print(f"MSE of VARMA Model is {mse_array.mean()}")