In [35]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import mean_squared_error

In [14]:
dataset = pd.read_csv("C:\\Users\\kevin\\ziknet-trends-rolling\\data\\Mexico\\processed_data\\Chiapas_2016-2017.csv")
print(dataset.head())
print("Number of observations: ", len(dataset))

         Date  Searches  Cases
0  02/01/2016       100      9
1  09/01/2016       100      0
2  16/01/2016       100      3
3  23/01/2016        55     11
4  30/01/2016        98     11
Number of observations:  104


In [12]:
def series_to_supervised(df, outputColumn, n_in=1, n_out=1, dropnan=True):
  n_vars = df.shape[1]
  cols, names = list(), list()
  # input sequence (t-n, ... t-1)
  for i in range(n_in, 0, -1):
    cols.append(df.shift(i))
    names += [("{}(t-{})".format(col, i)) for col in df.columns]
  
  # Append next observation[outputColumn] at n_out obs
  cols.append(df[outputColumn].shift(-n_out+1))
  names+=[outputColumn + "(t+{})".format(n_out-1)]

  # put it all together
  agg = pd.concat(cols, axis=1)
  agg.columns = names
  # drop rows with NaN values
  if dropnan:
    agg.dropna(inplace=True)
  return agg

In [16]:
def getXY(dataset, state, weeksAhead):
    n_features = dataset.shape[1]
    
    n_weeks = 4
    reframed = series_to_supervised(dataset, "Cases",  n_weeks, weeksAhead)
    values = reframed.values
    
    totalFeatures = values.shape[1]

    x,y = values[:, :totalFeatures-1], values[:, totalFeatures-1] #Y is the last column, X is all the previous columns 

    x = x.reshape((x.shape[0], n_weeks, n_features)) # Reshape as 3-D
    return x, y

In [25]:
import tensorflow as tf
from keras.models import Model
from keras.layers import Input, Flatten, Dense, LSTM
from keras.optimizers import Adam
from keras.layers.merge import concatenate

def LSTM_NN_Model():
    input_layer = Input(shape=(4,2))
    b1_out = LSTM(64, return_sequences=False)(input_layer)

    b2_out = Dense(32, activation="relu", kernel_regularizer="l2")(input_layer)
    b2_out = Flatten()(b2_out)

    concatenated = concatenate([b1_out, b2_out])
    out = Dense(4, activation="relu", kernel_regularizer="l2")(concatenated)
    out = Dense(4, activation="relu", kernel_regularizer="l2")(out)
    # out = Dense(1, activation="linear", kernel_constraint=non_neg(), name='output_layer')(out)
    out = Dense(1, activation="linear", name='output_layer')(out)

    model = Model([input_layer], out)
    model.compile(loss=["mse"], optimizer=Adam(0.0001), metrics=["mae"])

    return model

Using TensorFlow backend.


In [22]:
x, y = getXY(dataset[["Searches", "Cases"]], "", 4)

In [26]:
model = LSTM_NN_Model()

In [37]:
xDim1 = x[0].shape[0]
xDim2 = x[0].shape[1]
splitIndex = len(x) - 52

train_X = x[:splitIndex]
train_y = y[:splitIndex]

test_y = y[splitIndex:]
outDataset = pd.DataFrame()
outDataset["Observerd"] = test_y

model.fit(
    train_X,
    train_y,
    epochs=200,
    batch_size=32,
    verbose=0,
    shuffle=False)

predicted_y_history = []

while(splitIndex < len(y)):
    predicted_y = \
        model.predict(x[splitIndex].reshape(1, xDim1, xDim2))[0]

    model.fit(
        x[:splitIndex+1],
        y[:splitIndex+1],
        epochs=1,
        batch_size=32,
        verbose=0,
        shuffle=False)

    predicted_y_history.append(predicted_y[0])
    splitIndex += 1
    
outDataset["PREDICTED"] = predicted_y_history
print(mean_squared_error(test_y, predicted_y_history))
print(outDataset)

0.2618866904392326
    Observerd  PREDICTED
0         0.0   0.234046
1         0.0   0.234254
2         0.0   0.234461
3         0.0   0.234665
4         0.0   0.104889
5         0.0   0.235063
6         0.0   0.235257
7         0.0   0.111564
8         0.0  -0.074355
9         0.0  -0.262311
10        0.0  -0.087333
11        0.0   0.236171
12        0.0  -0.364461
13        0.0  -0.149571
14        0.0  -0.141648
15        0.0   0.236848
16        0.0   0.237011
17        0.0   0.237172
18        0.0   0.093658
19        0.0   0.237487
20        0.0   0.092985
21        0.0   0.237912
22        0.0  -0.617185
23        0.0  -0.382445
24        0.0  -0.398536
25        0.0   0.238609
26        0.0   0.238769
27        0.0  -0.056774
28        0.0   0.239080
29        3.0   0.239234
30        0.0   0.239386
31        2.0   0.239538
32        0.0   0.239690
33        0.0   0.239842
34        0.0   0.239994
35        0.0   0.240146
36        0.0   0.122759
37        0.0  -0.140281
38    