In [257]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import math
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import datetime

In [294]:
numpy.random.seed(7)

In [295]:
def normalize(column):
    new_column = []
    min_val = min(column)
    max_val = max(column)
    range_val = max_val-min_val
    for entry in column:
        new_column.append((entry-min_val)/range_val)
    return new_column

In [307]:
total_labels = pd.read_csv('./input/dengue_labels_train.csv', engine='python')
total_test_features = pd.read_csv('./input/dengue_features_test.csv')
total_labels = np.ravel(total_labels.drop(total_labels.columns[[0, 1, 2]], axis=1).values.astype('float32'))
sj_final_test = total_labels[:260]
iq_final_test = total_labels[261:]

In [299]:
dataset = sj_labels
scaler = MinMaxScaler(feature_range=(0, 1))
scaler.fit_transform(dataset)
train_size = int(len(dataset) * 0.67)
test_size = len(dataset) - train_size
sj_train, sj_test = dataset[0:train_size], dataset[train_size:len(dataset)]

dataset = iq_labels
scaler = MinMaxScaler(feature_range=(0, 1))
scaler.fit_transform(dataset)
train_size = int(len(dataset) * 0.67)
test_size = len(dataset) - train_size
iq_train, iq_test = dataset[0:train_size], dataset[train_size:len(dataset)]



In [300]:
def create_dataset(dataset, look_back=1):
    dataX, dataY = [], []
    for i in range(len(dataset)-look_back-1):
        a = dataset[i:(i+look_back)]
        dataX.append(a)
        dataY.append(dataset[i + look_back])
    return numpy.array(dataX), numpy.array(dataY)

In [314]:
look_back = 1
sj_trainX, sj_trainY = create_dataset(sj_train, look_back)
sj_testX, sj_testY = create_dataset(sj_test, look_back)
iq_trainX, iq_trainY = create_dataset(iq_train, look_back)
iq_testX, iq_testY = create_dataset(iq_test, look_back)
sj_x, sj_y = create_dataset(sj_final_test, look_back)
iq_x, iq_y = create_dataset(iq_final_test, look_back)

In [317]:
# reshape input to be [samples, time steps, features]
sj_trainX = numpy.reshape(sj_trainX, (sj_trainX.shape[0], 1, sj_trainX.shape[1]))
sj_testX = numpy.reshape(sj_testX, (sj_testX.shape[0], 1, sj_testX.shape[1]))

# reshape input to be [samples, time steps, features]
iq_trainX = numpy.reshape(iq_trainX, (iq_trainX.shape[0], 1, iq_trainX.shape[1]))
iq_testX = numpy.reshape(iq_testX, (iq_testX.shape[0], 1, iq_testX.shape[1]))

sj_x = numpy.reshape(sj_x, (sj_x.shape[0], 1, sj_x.shape[1]))
iq_x = numpy.reshape(iq_x, (iq_x.shape[0], 1, iq_x.shape[1]))

In [344]:
print(sj_trainY)

[   5.    4.    3.    6.    2.    4.    5.   10.    6.    8.    2.    6.
   17.   23.   13.   21.   28.   24.   20.   40.   27.   42.   33.   43.
   37.   57.   71.   44.   56.   53.   52.   47.   26.   27.   21.   21.
   26.   34.   37.   17.   19.   25.   18.   21.   17.   17.   16.   16.
   15.   23.   16.   17.   12.   17.   10.   15.   19.   21.   14.   18.
   13.   14.   18.   23.   25.   62.   60.   76.   66.   64.   68.   89.
   92.  140.  116.  142.  129.  140.  140.  127.  129.  169.  141.  108.
   78.   70.   81.  104.   90.   85.   55.   53.   65.   33.   38.   59.
   40.   37.   29.   30.   30.   28.   23.   24.   29.   26.   23.   20.
   19.   20.   26.   29.   31.   28.   26.   32.   35.   33.   30.   52.
   59.   67.   65.   74.   70.   61.   53.   76.   61.   57.   44.   34.
   47.   60.   60.   53.   36.   31.   30.   32.   28.   33.   33.   35.
   22.   13.   13.   21.   17.   11.    8.    8.    6.    6.    7.   12.
   17.   10.   10.   18.   19.   12.   22.   12.   

In [304]:
# create and fit the LSTM network
model = Sequential()
model.add(LSTM(4, input_shape=(1, look_back)))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(sj_trainX, sj_trainY, epochs=100, batch_size=1, verbose=2)

Epoch 1/100
5s - loss: 5086.3319
Epoch 2/100
4s - loss: 4888.6072
Epoch 3/100
4s - loss: 4736.6310
Epoch 4/100
4s - loss: 4639.1163
Epoch 5/100
4s - loss: 4553.5422
Epoch 6/100
5s - loss: 4474.0251
Epoch 7/100
4s - loss: 4398.5189
Epoch 8/100
4s - loss: 4327.1965
Epoch 9/100
4s - loss: 4259.5929
Epoch 10/100
4s - loss: 4194.9315
Epoch 11/100
4s - loss: 4133.2580
Epoch 12/100
4s - loss: 4074.5755
Epoch 13/100
4s - loss: 4019.4687
Epoch 14/100
4s - loss: 3967.8469
Epoch 15/100
4s - loss: 3916.7937
Epoch 16/100
4s - loss: 3868.1681
Epoch 17/100
4s - loss: 3821.2586
Epoch 18/100
4s - loss: 3775.9085
Epoch 19/100
4s - loss: 3732.9784
Epoch 20/100
4s - loss: 3692.4698
Epoch 21/100
4s - loss: 3652.6625
Epoch 22/100
4s - loss: 3613.5952
Epoch 23/100
4s - loss: 3575.8650
Epoch 24/100
4s - loss: 3539.9864
Epoch 25/100
4s - loss: 3504.5799
Epoch 26/100
4s - loss: 3471.0620
Epoch 27/100
4s - loss: 3438.5150
Epoch 28/100
4s - loss: 3407.2355
Epoch 29/100
4s - loss: 3376.2545
Epoch 30/100
4s - loss:

<keras.callbacks.History at 0x11f821d30>

In [330]:
# make predictions
trainPredict = model.predict(sj_trainX)
testPredict = model.predict(sj_testX)
sj_predictions = [int(i) for i in model.predict(sj_x)]
# invert predictions
trainPredict = scaler.inverse_transform(trainPredict)
trainY = scaler.inverse_transform([sj_trainY])
testPredict = scaler.inverse_transform(testPredict)
testY = scaler.inverse_transform([sj_testY])
# calculate root mean squared error
trainScore = math.sqrt(mean_squared_error(trainY[0], trainPredict[:,0]))
print('Train Score: %.2f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(testY[0], testPredict[:,0]))
print('Test Score: %.2f RMSE' % (testScore))

Train Score: 7230.12 RMSE
Test Score: 2788.00 RMSE


In [306]:
# create and fit the LSTM network
model = Sequential()
model.add(LSTM(4, input_shape=(1, look_back)))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(iq_trainX, iq_trainY, epochs=100, batch_size=1, verbose=2)

Epoch 1/100
3s - loss: 146.3292
Epoch 2/100
2s - loss: 132.1529
Epoch 3/100
2s - loss: 119.0504
Epoch 4/100
2s - loss: 111.8936
Epoch 5/100
2s - loss: 107.9576
Epoch 6/100
2s - loss: 104.0583
Epoch 7/100
2s - loss: 98.3277
Epoch 8/100
2s - loss: 95.5136
Epoch 9/100
2s - loss: 93.5305
Epoch 10/100
2s - loss: 91.8790
Epoch 11/100
2s - loss: 90.3886
Epoch 12/100
2s - loss: 89.0408
Epoch 13/100
2s - loss: 87.7940
Epoch 14/100
2s - loss: 86.6599
Epoch 15/100
2s - loss: 85.6179
Epoch 16/100
2s - loss: 84.6891
Epoch 17/100
2s - loss: 83.7484
Epoch 18/100
2s - loss: 82.9185
Epoch 19/100
2s - loss: 82.1246
Epoch 20/100
2s - loss: 81.4252
Epoch 21/100
2s - loss: 80.6598
Epoch 22/100
2s - loss: 79.9635
Epoch 23/100
2s - loss: 79.4307
Epoch 24/100
2s - loss: 78.7654
Epoch 25/100
2s - loss: 78.1697
Epoch 26/100
2s - loss: 77.6217
Epoch 27/100
2s - loss: 77.0622
Epoch 28/100
2s - loss: 76.5583
Epoch 29/100
2s - loss: 76.0560
Epoch 30/100
2s - loss: 75.6292
Epoch 31/100
2s - loss: 75.1266
Epoch 32/10

<keras.callbacks.History at 0x120f20cf8>

In [331]:
# make predictions
trainPredict = model.predict(iq_trainX)
testPredict = model.predict(iq_testX)
iq_predictions = [int(i) for i in model.predict(iq_x)]
# invert predictions
trainPredict = scaler.inverse_transform(trainPredict)
trainY = scaler.inverse_transform([iq_trainY])
testPredict = scaler.inverse_transform(testPredict)
testY = scaler.inverse_transform([iq_testY])
# calculate root mean squared error
trainScore = math.sqrt(mean_squared_error(trainY[0], trainPredict[:,0]))
print('Train Score: %.2f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(testY[0], testPredict[:,0]))
print('Test Score: %.2f RMSE' % (testScore))

Train Score: 919.51 RMSE
Test Score: 912.47 RMSE


In [342]:
print(iq_trainX)

[[[   0.]]

 [[   0.]]

 [[   0.]]

 [[   0.]]

 [[   0.]]

 [[   0.]]

 [[   0.]]

 [[   0.]]

 [[   0.]]

 [[   0.]]

 [[   1.]]

 [[   0.]]

 [[   0.]]

 [[   0.]]

 [[   0.]]

 [[   1.]]

 [[   1.]]

 [[   0.]]

 [[   0.]]

 [[   0.]]

 [[   0.]]

 [[   0.]]

 [[   0.]]

 [[   0.]]

 [[   1.]]

 [[   0.]]

 [[   0.]]

 [[   0.]]

 [[   0.]]

 [[   0.]]

 [[   0.]]

 [[   0.]]

 [[   0.]]

 [[   0.]]

 [[   0.]]

 [[   0.]]

 [[   0.]]

 [[   0.]]

 [[   0.]]

 [[   0.]]

 [[   0.]]

 [[   1.]]

 [[   0.]]

 [[   0.]]

 [[   0.]]

 [[   0.]]

 [[   1.]]

 [[   1.]]

 [[   0.]]

 [[   0.]]

 [[   1.]]

 [[   0.]]

 [[   0.]]

 [[   0.]]

 [[   0.]]

 [[   0.]]

 [[   0.]]

 [[   0.]]

 [[   0.]]

 [[   0.]]

 [[   0.]]

 [[   0.]]

 [[   0.]]

 [[   0.]]

 [[   0.]]

 [[   0.]]

 [[   0.]]

 [[   0.]]

 [[   0.]]

 [[   0.]]

 [[   0.]]

 [[   0.]]

 [[   1.]]

 [[   1.]]

 [[   1.]]

 [[   2.]]

 [[   4.]]

 [[   1.]]

 [[   4.]]

 [[  11.]]

 [[  16.]]

 [[  23.]]

 [[  12.]]

 [[ 

In [340]:
print(iq_testX)

[[[  6.]]

 [[  7.]]

 [[  6.]]

 [[  5.]]

 [[  9.]]

 [[  5.]]

 [[  8.]]

 [[  3.]]

 [[  4.]]

 [[ 11.]]

 [[  5.]]

 [[  8.]]

 [[  4.]]

 [[  3.]]

 [[  1.]]

 [[  2.]]

 [[  3.]]

 [[  4.]]

 [[  1.]]

 [[  8.]]

 [[  5.]]

 [[  3.]]

 [[  2.]]

 [[  7.]]

 [[  1.]]

 [[  6.]]

 [[  7.]]

 [[  5.]]

 [[  2.]]

 [[  6.]]

 [[ 11.]]

 [[  6.]]

 [[  3.]]

 [[ 11.]]

 [[ 11.]]

 [[  5.]]

 [[  4.]]

 [[  9.]]

 [[ 23.]]

 [[ 28.]]

 [[ 26.]]

 [[  7.]]

 [[ 29.]]

 [[ 58.]]

 [[ 26.]]

 [[ 38.]]

 [[ 35.]]

 [[ 37.]]

 [[ 20.]]

 [[ 29.]]

 [[ 25.]]

 [[ 23.]]

 [[  9.]]

 [[  3.]]

 [[  6.]]

 [[  6.]]

 [[  3.]]

 [[  1.]]

 [[  3.]]

 [[  1.]]

 [[  1.]]

 [[  0.]]

 [[  2.]]

 [[  1.]]

 [[  1.]]

 [[  0.]]

 [[  0.]]

 [[  1.]]

 [[  0.]]

 [[  3.]]

 [[  3.]]

 [[  1.]]

 [[  5.]]

 [[  2.]]

 [[  5.]]

 [[  5.]]

 [[  5.]]

 [[  9.]]

 [[ 17.]]

 [[ 19.]]

 [[ 25.]]

 [[ 45.]]

 [[ 34.]]

 [[ 63.]]

 [[ 44.]]

 [[ 50.]]

 [[ 35.]]

 [[ 16.]]

 [[ 16.]]

 [[ 13.]]

 [[  9.]]


In [339]:
print((len(iq_x.ravel())))

518


In [335]:
print(len(iq_predictions))

518


In [334]:
submission = pd.read_csv('./input/submission_format.csv',
                         index_col=[0, 1, 2])

submission.total_cases = np.concatenate([sj_predictions, iq_predictions])
submission.to_csv('./input/lstm_submission.csv')

ValueError: Length of values does not match length of index