# Imports

In [1]:
%load_ext autoreload
%autoreload 2
from tensorflow import keras
from models import CLSTM_classifier, LSTM_classifier
from keras.utils import to_categorical
from keras.callbacks import History 
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

Using TensorFlow backend.


# Data setup

In [2]:
series_data = pd.read_csv("../data/denver_series_data.csv")
X_variables = ["humidity_Denver","pressure_Denver","temperature_Denver","wind_direction_Denver","wind_speed_Denver"]
y_variables = ["weather_description_Denver"]

X_data = series_data[X_variables]
y_data = series_data[y_variables]

number_of_hours_to_predict = 1

## Normalize Data

In [3]:
non_zero_X_data = X_data.loc[X_data["temperature_Denver"] != 0]
min_X_data = non_zero_X_data.min()
max_X_data = non_zero_X_data.max()

normalized_X_data = (X_data - min_X_data) / (max_X_data - min_X_data) 
normalized_X_data = pd.concat([normalized_X_data, y_data], axis=1)
normalized_X_data = normalized_X_data.clip(lower=0)

# Merge similar weather types

In [None]:
def merge_similar(data, similarities, new_value):
    for similarity in similarities:
        data.loc[data["weather_description_Denver"] == similarity] = new_value
    return data
    
light_rain = [3, 16, 19, 22, 23, 30, 32, 33]
mist = [13]
haze = [24, 26, 7]
snow = [9, 31]
moderate_rain = [12, 17, 18, 20, 25, 27, 28, 29, 34]

y_data = merge_similar(y_data, light_rain, 1)
y_data = merge_similar(y_data, mist, 10)
y_data = merge_similar(y_data, haze, 11)
y_data = merge_similar(y_data, snow, 15)
y_data = merge_similar(y_data, moderate_rain, 21)

## Generate All Possible Sequences for LSTM models.

In [5]:
X_data_arrays = []
y_data_arrays = []

for index in series_data.loc[series_data["weather_description_Denver"] != 0].index:
    if index >= 24 and index < series_data.shape[0]:
        X_data_arrays.append(normalized_X_data.iloc[index - 24: index][X_variables + y_variables].values)
        y_data_arrays.append(y_data.iloc[index: index+number_of_hours_to_predict][y_variables].values)
        
X_data_arrays = np.array(X_data_arrays)
y_data_arrays = np.array(y_data_arrays)

## Convert Y Targets to Categorical 1-Hot Encoded Vectors

In [6]:
squeezed_y_data = np.squeeze(y_data_arrays, axis=1)

def transform_labels_0_n(y_data):
    label = 0
    labels = {}
    
    for i, target in enumerate(y_data):
        if target[0] not in labels:
            labels[target[0]] = label
            label += 1
            
        y_data[i][0] = labels[target[0]]
                                     
    return y_data

transformed_y_data = transform_labels_0_n(squeezed_y_data)
categorical_y_data = to_categorical(squeezed_y_data)

## Split Data Into training and testing.

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X_data_arrays, categorical_y_data, test_size=0.2)

### HyperParams

In [8]:
epochs=30
batch_size = 500
sequence_size = 24
inputs = 6

# Train LSTM classifier.

In [9]:
history = History()
model = LSTM_classifier(sequence_size,inputs)
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
masking_1 (Masking)          (None, 24, 6)             0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 24, 128)           69120     
_________________________________________________________________
activation_1 (Activation)    (None, 24, 128)           0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 128)               131584    
_________________________________________________________________
activation_2 (Activation)    (None, 128)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 256)               33024     
_________________________________________________________________
activation_3 (Activation)    (None, 256)               0         
__________

In [10]:
model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, callbacks=[history])

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x7f97674def60>

### Cross Validation LSTM

In [None]:
from sklearn.model_selection import KFold
n_folds = 5
cross_val = KFold(n_splits=n_folds, shuffle=True)

lstm_evaluations = []
for train_index, test_index in cross_val.split(X_data_arrays[:,0]):
    x_train = X_data_arrays[train_index]

    y_train = categorical_y_data[train_index]
    
    x_test = X_data_arrays[test_index]
    y_test = categorical_y_data[test_index]
    
    model = LSTM_classifier(sequence_size, inputs)
    model.fit(x_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(x_test,y_test))

    lstm_evaluations.append(model.evaluate(x_test, y_test))

# Train CLSTM.

In [11]:
history2 = History()
model2 = CLSTM_classifier(sequence_size, inputs)
model2.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_1 (Conv1D)            (None, 22, 128)           2432      
_________________________________________________________________
activation_5 (Activation)    (None, 22, 128)           0         
_________________________________________________________________
lstm_3 (LSTM)                (None, 22, 128)           131584    
_________________________________________________________________
activation_6 (Activation)    (None, 22, 128)           0         
_________________________________________________________________
lstm_4 (LSTM)                (None, 128)               131584    
_________________________________________________________________
activation_7 (Activation)    (None, 128)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 256)               33024     
__________

In [12]:
model2.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, callbacks=[history2])

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x7f975c073198>

### Cross Validation CLSTM

In [None]:
from sklearn.model_selection import KFold
n_folds = 5
cross_val = KFold(n_splits=n_folds, shuffle=True)

clstm_evaluations = []
for train_index, test_index in cross_val.split(X_data_arrays[:,0]):
    x_train = X_data_arrays[train_index]

    y_train = categorical_y_data[train_index]
    
    x_test = X_data_arrays[test_index]
    y_test = categorical_y_data[test_index]
    
    model = CLSTM_classifier(sequence_size, inputs)
    model.fit(x_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(x_test,y_test))

    clstm_evaluations.append(model.evaluate(x_test, y_test))