In [1]:
from numpy.random import seed
seed(42)

import tensorflow as tf
tf.random.set_seed(42)

import numpy as np
np.random.seed(42)

In [2]:
from tensorflow.python.keras.callbacks import TensorBoard

from skopt.space import Real, Categorical, Integer
from skopt.utils import use_named_args
from skopt import gp_minimize

from skopt.plots import plot_objective, plot_evaluations
from skopt.plots import plot_objective_2D #, plot_histogram
from tensorflow.python.keras import backend as K

from sklearn.preprocessing import OneHotEncoder

import tensorflow as tf
from tensorflow import keras
from tensorflow.python.keras.optimizers import Adam
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import os
import pandas as pd
import datetime as dt

from sklearn.preprocessing import StandardScaler

tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

mpl.rcParams['figure.figsize'] = (8, 6)
mpl.rcParams['axes.grid'] = False



In [3]:
def plot_metrics(history):
    metrics =  ['loss', 'auc', 'precision', 'recall', 'fp', 'fn']
    
    for n, metric in enumerate(metrics):
        name = metric.replace("_"," ").capitalize()
        plt.subplot(3,2,n+1)
        plt.plot(history.epoch,  history.history[metric], color=colors[0], label='Train')
        plt.plot(history.epoch, history.history['val_'+metric],
             color=colors[0], linestyle="--", label='Val')
        plt.xlabel('Epoch')
        plt.ylabel(name)
        
    if metric == 'loss':
        plt.ylim([0, plt.ylim()[1]])
        
    elif metric == 'auc':
        plt.ylim([0.8,1])
        
    else:
        #plt.ylim([0,1])
        plt.legend()

def multivariate_data(
    dataset,
    target, 
    start_index, 
    end_index, 
    history_size,
    target_size, 
    step
):
    
    data = []
    labels = []

    start_index = start_index + history_size
    
    if end_index is None:
        end_index = len(dataset) - target_size

    for i in range(start_index, end_index):
        indices = range(i-history_size, i, step)
        data.append(dataset[indices])

        labels.append(target[i+target_size])

    return np.array(data), np.array(labels)

def log_dir_name(
    learning_rate,
    past_history,
    lstm_units,
    hidden_layers,
    hidden_units,
    #lstm_l2_lambda,
    hidden_l2_lambda,
    class_0_weight,
    class_1_weight
):

    # The dir-name for the TensorBoard log-dir.
    s = "./LSTM_logs/past_history_{1}_hidden_layers_{3}/"

    # Insert all the hyper-parameters in the dir-name.
    log_dir = s.format(
        learning_rate,
        past_history,
        lstm_units,
        hidden_layers,
        hidden_units,
        #lstm_l2_lambda,
        hidden_l2_lambda,
        class_0_weight,
        class_1_weight
    )

    return log_dir

def f1(y_true, y_pred): #taken from old keras source code
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    recall = true_positives / (possible_positives + K.epsilon())
    f1_val = 2 * ((precision*10) * recall) / ((precision*10) + recall + K.epsilon())
    
    return f1_val

In [4]:
data_file = '../data/training_data/1992-2015_training_data_added_features.csv'

# Datatypes for dataframe loading
dtypes = {
    'lat': float,
    'lon': float,
    'weather_bin_year': int,
    'weather_bin_month': int,
    'weather_bin_day': int,
    'air.2m': float,
    'apcp': float,
    'rhum.2m': float,
    'dpt.2m': float,
    'pres.sfc': float,
    'uwnd.10m': float,
    'vwnd.10m': float,
    'veg': float,
    'vis': float,
    'ignition': float,
    'mean.air.2m': float,
    'mean.apcp': float,
    'mean.rhum.2m': float,
    'mean.dpt.2m': float,
    'mean.pres.sfc': float,
    'mean.uwnd.10m': float,
    'mean.vwnd.10m': float,
    'mean.veg': float,
    'mean.vis': float,
    'max.air.2m': float,
    'max.apcp': float,
    'max.rhum.2m': float,
    'max.dpt.2m': float,
    'max.pres.sfc': float,
    'max.uwnd.10m': float,
    'max.vwnd.10m': float,
    'max.veg': float,
    'max.vis': float,
    'min.air.2m': float,
    'min.apcp': float,
    'min.rhum.2m': float,
    'min.dpt.2m': float,
    'min.pres.sfc': float,
    'min.uwnd.10m': float,
    'min.vwnd.10m': float,
    'min.veg': float,
    'min.vis': float,
    'total_fires': float

}

# Features to use during training 
features = [
    'lat',
    'lon',
    'weather_bin_month',
    'veg',
    'ignition',
    'mean.air.2m',
    'mean.apcp',
    'mean.rhum.2m',
    'mean.dpt.2m',
    'mean.pres.sfc',
    'mean.uwnd.10m',
    'mean.vwnd.10m',
    'mean.veg',
    'mean.vis',
    'mean.air.2m',
    'mean.apcp',
    'mean.rhum.2m',
    'mean.dpt.2m',
    'mean.pres.sfc',
    'mean.uwnd.10m',
    'mean.vwnd.10m',
    'mean.vis',
    'max.air.2m',
    'max.apcp',
    'max.rhum.2m',
    'max.dpt.2m',
    'max.pres.sfc',
    'max.uwnd.10m',
    'max.vwnd.10m',
    'max.vis',
    'min.air.2m',
    'min.apcp',
    'min.rhum.2m',
    'min.dpt.2m',
    'min.pres.sfc',
    'min.uwnd.10m',
    'min.vwnd.10m',
    'min.vis',
    'total_fires'
]

features_to_scale = [
    'veg',
    'mean.air.2m',
    'mean.apcp',
    'mean.rhum.2m',
    'mean.dpt.2m',
    'mean.pres.sfc',
    'mean.uwnd.10m',
    'mean.vwnd.10m',
    'mean.vis',
    'max.air.2m',
    'max.apcp',
    'max.rhum.2m',
    'max.dpt.2m',
    'max.pres.sfc',
    'max.uwnd.10m',
    'max.vwnd.10m',
    'max.vis',
    'min.air.2m',
    'min.apcp',
    'min.rhum.2m',
    'min.dpt.2m',
    'min.pres.sfc',
    'min.uwnd.10m',
    'min.vwnd.10m',
    'min.vis',
    'total_fires'
]

In [5]:
raw_data = pd.read_csv(data_file, index_col=0, parse_dates=True, dtype=dtypes)

In [6]:
# Pull out columns of intrest
data = raw_data[features]

In [7]:
# Pick one spatial bin with fires
data = data[(data['lat'] == 39.42233) & (data['lon'] == -120.6546)]

In [8]:
# Also drop lat, lon, day and year columns (unnecessary)
data.drop(['lat', 'lon'], axis=1, inplace=True)

In [9]:
# One hot encode month
column_names = [
    'January',
    'February',
    'March',
    'April',
    'May',
    'June',
    'July',
    'August',
    'Septermber',
    'October',
    'November',
    'December'
]


onehot_encoder = OneHotEncoder(sparse=False)

# Training data
month = np.array(data['weather_bin_month']).reshape(-1, 1)
onehot_month = onehot_encoder.fit_transform(month)

data.drop('weather_bin_month', axis=1, inplace=True)
onehot_month_df = pd.DataFrame(onehot_month, columns=column_names)

onehot_month_df['datetime'] = pd.to_datetime(data.index)
onehot_month_df = onehot_month_df.set_index('datetime')
data = pd.concat([data, onehot_month_df], axis=1)

In [10]:
# Scale data
scaler = StandardScaler()
scaled_features = scaler.fit_transform(data[features_to_scale])
data[features_to_scale] = scaled_features

In [11]:
# Sort by date time index
# one_bin_training_data = one_bin_training_data.sort_index()
data = data.sort_index()

In [12]:
data.head()

Unnamed: 0,veg,ignition,mean.air.2m,mean.apcp,mean.rhum.2m,mean.dpt.2m,mean.pres.sfc,mean.uwnd.10m,mean.vwnd.10m,mean.veg,...,March,April,May,June,July,August,Septermber,October,November,December
1992-01-01,0.503702,0.0,-1.138237,-0.368419,-0.485131,-1.69495,0.575796,-0.787113,0.389269,70.7,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1992-01-02,0.503702,0.0,-0.91678,-0.359414,-0.717171,-1.648949,-0.455567,-0.61032,-0.105418,70.7,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1992-01-03,0.503702,0.0,-1.00312,0.155398,0.521572,-0.76331,-1.945359,0.40643,0.978409,70.7,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1992-01-04,0.503702,0.0,-1.013071,1.580237,1.798763,0.13006,-2.642089,0.746722,2.634629,70.7,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1992-01-05,0.503702,0.0,-1.245384,0.64508,1.815982,-0.175384,-3.348068,0.378398,1.619435,70.7,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [13]:
# Split data up into training, testing and validation sets
test_data = data.tail(int(len(data)*0.1))
leftover_data = data.iloc[:-int(len(data)*0.1)]

validation_data = data.tail(int(len(leftover_data)*0.3))
training_data = data.iloc[:-int(len(leftover_data)*0.3)]

In [14]:
# Convert to numpy arrays
training_data = np.array(training_data)
validation_data = np.array(validation_data)
test_data = np.array(test_data)

In [15]:
future_target = 1
step = 1

initial_bias = -1.4
output_bias = tf.keras.initializers.Constant(initial_bias)

# weight_for_0 = 0.5 
# weight_for_1 = 13
# class_weight = {0: weight_for_0, 1: weight_for_1}

EPOCHS = 15
BATCH_SIZE = 100
STEPS_PER_EPOCH = (len(training_data) * 0.25) // BATCH_SIZE
VALIDATION_STEPS = (len(validation_data) * 0.25) // BATCH_SIZE

path_best_model = 'best_LTSM.keras'
best_fraction_incorrect = 1.0

metrics = [
    keras.metrics.TruePositives(name='tp'),
    keras.metrics.FalsePositives(name='fp'),
    keras.metrics.TrueNegatives(name='tn'),
    keras.metrics.FalseNegatives(name='fn'), 
    keras.metrics.BinaryAccuracy(name='accuracy'),
    keras.metrics.Precision(name='precision'),
    keras.metrics.Recall(name='recall'),
    keras.metrics.AUC(name='auc'),
    f1
]

dim_learning_rate = Real(
    low=0.00001, 
    high=0.1, 
    prior='log-uniform',
    name='learning_rate'
)

dim_past_history = Integer(
    low=1,
    high=30, 
    name='past_history'
)

dim_lstm_units = Integer(
    low=5, 
    high=500,
    name='lstm_units'
)

dim_hidden_layers = Integer(
    low=1, 
    high=10,
    name='hidden_layers'
)

dim_hidden_units = Integer(
    low=5, 
    high=500,
    name='hidden_units'
)

# dim_lstm_l2_lambda = Real(
#     low=0.0001, 
#     high=0.1,
#     prior='log-uniform',
#     name='lstm_l2_lambda'
# )

dim_hidden_l2_lambda = Real(
    low=0.0001, 
    high=0.1,
    prior='log-uniform',
    name='hidden_l2_lambda'
)

dim_class_0_weight = Real(
    low=0.1, 
    high=1,
    name='class_0_weight'
)

dim_class_1_weight = Integer(
    low=10, 
    high=20,
    name='class_1_weight'
)

default_parameters = [0.001, 3, 50, 2, 50, 0.1, 0.5, 15]

dimensions = [
    dim_learning_rate,
    dim_past_history,
    dim_lstm_units,
    dim_hidden_layers,
    dim_hidden_units,
    #dim_lstm_l2_lambda,
    dim_hidden_l2_lambda,
    dim_class_0_weight,
    dim_class_1_weight
]

# Use early stopping
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_auc', 
    verbose=1,
    patience=10,
    mode='max',
    restore_best_weights=True
)

In [16]:
def make_model(
    input_dim,
    learning_rate,
    lstm_units,
    hidden_layers,
    hidden_units,
    #lstm_l2_lambda,
    hidden_l2_lambda
):    
    input_shape = (100, input_dim[0], input_dim[1])

    model = tf.keras.models.Sequential()

    model.add(tf.keras.layers.LSTM(
        lstm_units,
        batch_input_shape=input_shape,
#         bias_initializer=keras.initializers.VarianceScaling(
#             scale=1.0,
#             mode='fan_in', 
#             distribution='normal', 
#             seed=None
#         ),
#         kernel_regularizer=keras.regularizers.l2(lstm_l2_lambda),
#         activation = 'relu',
         stateful = True
    ))
    for i in range(hidden_layers):
        model.add(keras.layers.Dense(
            hidden_units,
            bias_initializer=keras.initializers.VarianceScaling(
                scale=1.0,
                mode='fan_in', 
                distribution='normal', 
                seed=None
            ),
            kernel_regularizer=keras.regularizers.l2(hidden_l2_lambda),
            activation = 'relu'
        ))

    model.add(tf.keras.layers.Dense(
        1,
        activation = 'sigmoid',
        bias_initializer = output_bias)
    )

    model.compile(
        optimizer=tf.keras.optimizers.Adam(lr = learning_rate), 
        loss=keras.losses.BinaryCrossentropy(),
        metrics=metrics
    )

    return model

@use_named_args(dimensions=dimensions)
def fitness(
    learning_rate,
    past_history,
    lstm_units,
    hidden_layers,
    hidden_units,
    #lstm_l2_lambda,
    hidden_l2_lambda,
    class_0_weight,
    class_1_weight
):

    # Print the hyper-parameters.
    print('learning rate: {0:.1e}'.format(learning_rate))
    print('past history:', past_history)
    print('LSTM units:', lstm_units)
    print('hidden layers:', hidden_layers)
    print('hidden units:', hidden_units)
    #print('lstm l2 lambda: {0:.1e}'.format(lstm_l2_lambda))
    print('hidden l2 lambda: {0:.1e}'.format(hidden_l2_lambda))
    print('class 0 weight:', class_0_weight)
    print('class 1 weight:', class_1_weight)
    print()
    
    # create data stream
    x_train, y_train = multivariate_data(
        training_data, 
        training_data[:, 1], 
        0,
        None,
        past_history,
        future_target, 
        step
    )
    
    x_validation, y_validation = multivariate_data(
        validation_data, 
        validation_data[:, 1], 
        0,
        None,
        past_history,
        future_target, 
        step
    )
    
    start_index = (x_train.shape[0] - (x_train.shape[0] % 100))
    end_index = x_train.shape[0]
    
    x_train = np.delete(x_train, range(start_index, end_index), axis=0)
    y_train = np.delete(y_train, range(start_index, end_index), axis=0)
    
    start_index = (x_validation.shape[0] - (x_validation.shape[0] % 100))
    end_index = x_validation.shape[0]
    
    x_validation = np.delete(x_validation, range(start_index, end_index), axis=0)
    y_validation = np.delete(y_validation, range(start_index, end_index), axis=0)
    
    input_dim = x_train.shape[-2:]
    
    class_weight = {0: class_0_weight, 1: class_1_weight}
    
    # Create the neural network with these hyper-parameters.
    model = make_model(
        input_dim,
        learning_rate = learning_rate,
        lstm_units = lstm_units,
        hidden_layers = hidden_layers,
        hidden_units = hidden_units,
        #lstm_l2_lambda = lstm_l2_lambda,
        hidden_l2_lambda = hidden_l2_lambda,
    )
    
    model.summary()
    print()

    # Dir-name for the TensorBoard log-files.
    log_dir = log_dir_name(
        learning_rate,
        past_history,
        lstm_units,
        hidden_layers,
        hidden_units,
        #lstm_l2_lambda,
        hidden_l2_lambda,
        class_0_weight,
        class_1_weight
    )
    
    # Create a callback-function for Keras which will be
    # run after each epoch has ended during training.
    # This saves the log-files for TensorBoard.
    # Note that there are complications when histogram_freq=1.
    # It might give strange errors and it also does not properly
    # support Keras data-generators for the validation-set.
    callback_log = TensorBoard(
        log_dir=log_dir,
        histogram_freq=0,
        write_graph=True,
        write_grads=False,
        write_images=False
    )
   
    # Use Keras to train the model.
    history = model.fit(
        x_train,
        y_train,
        epochs=EPOCHS,
        batch_size=BATCH_SIZE,
        steps_per_epoch=STEPS_PER_EPOCH,
        callbacks = [early_stopping],
        validation_data=(x_validation, y_validation),
        validation_steps=VALIDATION_STEPS,
        class_weight=class_weight,
        workers=8
    )

    # Get fraction incorrect on the validation-set
    # after the last training-epoch.
          
    val_fp = history.history['val_fp'][-1]
    val_fn = history.history['val_fn'][-1]
    val_tp = history.history['val_tp'][-1]
    val_tn = history.history['val_tn'][-1]
          
    fraction_incorrect = (val_fn /(val_fn + val_tp + K.epsilon())) + (val_fp / (val_fp + val_tn + K.epsilon()))
    
    print()
    print("Validation fraction incorrect: {0:.2}".format(fraction_incorrect))
    print()

    # Save the model if it improves on the best-found performance.
    # We use the global keyword so we update the variable outside
    # of this function.
    global best_fraction_incorrect

    # If the classification accuracy of the saved model is improved ...
    if fraction_incorrect < best_fraction_incorrect:
        # Save the new model to harddisk.
        model.save(path_best_model)
        
        # Update the classification accuracy.
        best_fraction_incorrect = fraction_incorrect

    # Delete the Keras model with these hyper-parameters from memory.
    del model
    
    # Clear the Keras session, otherwise it will keep adding new
    # models to the same TensorFlow graph each time we create
    # a model with a different set of hyper-parameters.
    K.clear_session()
    
    # NOTE: Scikit-optimize does minimization so it tries to
    # find a set of hyper-parameters with the LOWEST fitness-value.
    # Because we are interested in the HIGHEST classification
    # accuracy, we need to negate this number so it can be minimized.
    return fraction_incorrect

In [None]:
search_result = gp_minimize(
    func=fitness,
    dimensions=dimensions,
    acq_func='EI', # Expected Improvement.
    n_calls=40,
    x0=default_parameters
)

learning rate: 1.0e-03
past history: 3
LSTM units: 50
hidden layers: 2
hidden units: 50
hidden l2 lambda: 1.0e-01
class 0 weight: 0.5
class 1 weight: 15

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (100, 50)                 19800     
_________________________________________________________________
dense (Dense)                (100, 50)                 2550      
_________________________________________________________________
dense_1 (Dense)              (100, 50)                 2550      
_________________________________________________________________
dense_2 (Dense)              (100, 1)                  51        
Total params: 24,951
Trainable params: 24,951
Non-trainable params: 0
_________________________________________________________________

Train on 6300 samples, validate on 2300 samples
Epoch 1/15
Validation fraction incorrect: 0.51



Validation fraction incorrect: 1.0

learning rate: 3.7e-05
past history: 20
LSTM units: 33
hidden layers: 7
hidden units: 470
hidden l2 lambda: 1.0e-04
class 0 weight: 0.992990403362096
class 1 weight: 16

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (100, 33)                 10824     
_________________________________________________________________
dense (Dense)                (100, 470)                15980     
_________________________________________________________________
dense_1 (Dense)              (100, 470)                221370    
_________________________________________________________________
dense_2 (Dense)              (100, 470)                221370    
_________________________________________________________________
dense_3 (Dense)              (100, 470)                221370    
_______________________________________________

Validation fraction incorrect: 0.73

learning rate: 2.8e-03
past history: 1
LSTM units: 16
hidden layers: 6
hidden units: 203
hidden l2 lambda: 1.4e-04
class 0 weight: 0.9763799669573134
class 1 weight: 12

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (100, 16)                 4160      
_________________________________________________________________
dense (Dense)                (100, 203)                3451      
_________________________________________________________________
dense_1 (Dense)              (100, 203)                41412     
_________________________________________________________________
dense_2 (Dense)              (100, 203)                41412     
_________________________________________________________________
dense_3 (Dense)              (100, 203)                41412     
_______________________________________________

1400/6300 [=====>........................] - ETA: 0s - loss: 0.8650 - tp: 37.0000 - fp: 364.0000 - tn: 970.0000 - fn: 29.0000 - accuracy: 0.7193 - precision: 0.0923 - recall: 0.5606 - auc: 0.7346 - f1: 0.5539               Epoch 7/15
1100/6300 [====>.........................] - ETA: 0s - loss: 0.7564 - tp: 41.0000 - fp: 340.0000 - tn: 715.0000 - fn: 4.0000 - accuracy: 0.6873 - precision: 0.1076 - recall: 0.9111 - auc: 0.8212 - f1: 0.9531    Restoring model weights from the end of the best epoch.

Validation fraction incorrect: 0.57

learning rate: 2.3e-05
past history: 19
LSTM units: 194
hidden layers: 10
hidden units: 236
hidden l2 lambda: 3.8e-02
class 0 weight: 0.7122767847290018
class 1 weight: 15

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (100, 194)                188568    
_________________________________________________________________
dens

Validation fraction incorrect: 1.0

learning rate: 1.1e-05
past history: 28
LSTM units: 284
hidden layers: 4
hidden units: 13
hidden l2 lambda: 4.9e-04
class 0 weight: 0.3169229194234106
class 1 weight: 17

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (100, 284)                378288    
_________________________________________________________________
dense (Dense)                (100, 13)                 3705      
_________________________________________________________________
dense_1 (Dense)              (100, 13)                 182       
_________________________________________________________________
dense_2 (Dense)              (100, 13)                 182       
_________________________________________________________________
dense_3 (Dense)              (100, 13)                 182       
_______________________________________________

Validation fraction incorrect: 1.0

learning rate: 2.8e-03
past history: 25
LSTM units: 91
hidden layers: 5
hidden units: 95
hidden l2 lambda: 1.8e-02
class 0 weight: 0.48264028704212036
class 1 weight: 12

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (100, 91)                 50960     
_________________________________________________________________
dense (Dense)                (100, 95)                 8740      
_________________________________________________________________
dense_1 (Dense)              (100, 95)                 9120      
_________________________________________________________________
dense_2 (Dense)              (100, 95)                 9120      
_________________________________________________________________
dense_3 (Dense)              (100, 95)                 9120      
_______________________________________________

Validation fraction incorrect: 0.42

learning rate: 1.9e-03
past history: 2
LSTM units: 422
hidden layers: 5
hidden units: 201
hidden l2 lambda: 6.0e-02
class 0 weight: 0.7545447962707789
class 1 weight: 13

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (100, 422)                795048    
_________________________________________________________________
dense (Dense)                (100, 201)                85023     
_________________________________________________________________
dense_1 (Dense)              (100, 201)                40602     
_________________________________________________________________
dense_2 (Dense)              (100, 201)                40602     
_________________________________________________________________
dense_3 (Dense)              (100, 201)                40602     
______________________________________________


Validation fraction incorrect: 0.51

learning rate: 1.9e-03
past history: 16
LSTM units: 481
hidden layers: 9
hidden units: 375
hidden l2 lambda: 4.2e-03
class 0 weight: 0.6280760490974635
class 1 weight: 20

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (100, 481)                1019720   
_________________________________________________________________
dense (Dense)                (100, 375)                180750    
_________________________________________________________________
dense_1 (Dense)              (100, 375)                141000    
_________________________________________________________________
dense_2 (Dense)              (100, 375)                141000    
_________________________________________________________________
dense_3 (Dense)              (100, 375)                141000    
____________________________________________

Validation fraction incorrect: 0.47

learning rate: 2.7e-03
past history: 9
LSTM units: 152
hidden layers: 2
hidden units: 13
hidden l2 lambda: 1.9e-03
class 0 weight: 0.45539336635801286
class 1 weight: 13

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (100, 152)                122208    
_________________________________________________________________
dense (Dense)                (100, 13)                 1989      
_________________________________________________________________
dense_1 (Dense)              (100, 13)                 182       
_________________________________________________________________
dense_2 (Dense)              (100, 1)                  14        
Total params: 124,393
Trainable params: 124,393
Non-trainable params: 0
_________________________________________________________________

Train on 6300 samples, validate on 2300

Validation fraction incorrect: 0.53

learning rate: 1.1e-05
past history: 7
LSTM units: 357
hidden layers: 8
hidden units: 305
hidden l2 lambda: 6.0e-02
class 0 weight: 0.6859693229517501
class 1 weight: 19

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (100, 357)                579768    
_________________________________________________________________
dense (Dense)                (100, 305)                109190    
_________________________________________________________________
dense_1 (Dense)              (100, 305)                93330     
_________________________________________________________________
dense_2 (Dense)              (100, 305)                93330     
_________________________________________________________________
dense_3 (Dense)              (100, 305)                93330     
______________________________________________

Validation fraction incorrect: 1.0

learning rate: 1.4e-04
past history: 22
LSTM units: 5
hidden layers: 5
hidden units: 500
hidden l2 lambda: 1.0e-04
class 0 weight: 1.0
class 1 weight: 13

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (100, 5)                  1080      
_________________________________________________________________
dense (Dense)                (100, 500)                3000      
_________________________________________________________________
dense_1 (Dense)              (100, 500)                250500    
_________________________________________________________________
dense_2 (Dense)              (100, 500)                250500    
_________________________________________________________________
dense_3 (Dense)              (100, 500)                250500    
_______________________________________________________________

Train on 6300 samples, validate on 2300 samples
Epoch 1/15
Validation fraction incorrect: 0.59

learning rate: 2.5e-03
past history: 30
LSTM units: 5
hidden layers: 10
hidden units: 500
hidden l2 lambda: 1.0e-01
class 0 weight: 0.9360990253698379
class 1 weight: 10

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (100, 5)                  1080      
_________________________________________________________________
dense (Dense)                (100, 500)                3000      
_________________________________________________________________
dense_1 (Dense)              (100, 500)                250500    
_________________________________________________________________
dense_2 (Dense)              (100, 500)                250500    
_________________________________________________________________
dense_3 (Dense)              (100, 500)              


Validation fraction incorrect: 1.0

learning rate: 1.4e-03
past history: 30
LSTM units: 19
hidden layers: 9
hidden units: 193
hidden l2 lambda: 5.3e-03
class 0 weight: 0.6642638407270638
class 1 weight: 20

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (100, 19)                 5168      
_________________________________________________________________
dense (Dense)                (100, 193)                3860      
_________________________________________________________________
dense_1 (Dense)              (100, 193)                37442     
_________________________________________________________________
dense_2 (Dense)              (100, 193)                37442     
_________________________________________________________________
dense_3 (Dense)              (100, 193)                37442     
______________________________________________

Validation fraction incorrect: 0.43

learning rate: 1.4e-03
past history: 30
LSTM units: 187
hidden layers: 8
hidden units: 84
hidden l2 lambda: 5.0e-03
class 0 weight: 0.25068914944815024
class 1 weight: 10

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (100, 187)                176528    
_________________________________________________________________
dense (Dense)                (100, 84)                 15792     
_________________________________________________________________
dense_1 (Dense)              (100, 84)                 7140      
_________________________________________________________________
dense_2 (Dense)              (100, 84)                 7140      
_________________________________________________________________
dense_3 (Dense)              (100, 84)                 7140      
_____________________________________________

Validation fraction incorrect: 0.47

learning rate: 2.1e-03
past history: 30
LSTM units: 214
hidden layers: 1
hidden units: 124
hidden l2 lambda: 3.5e-02
class 0 weight: 0.438445332342449
class 1 weight: 15

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (100, 214)                225128    
_________________________________________________________________
dense (Dense)                (100, 124)                26660     
_________________________________________________________________
dense_1 (Dense)              (100, 1)                  125       
Total params: 251,913
Trainable params: 251,913
Non-trainable params: 0
_________________________________________________________________

Train on 6300 samples, validate on 2300 samples
Epoch 1/15


Validation fraction incorrect: 0.45

learning rate: 5.9e-04
past history: 30
LSTM units: 460
hidden layers: 3
hidden units: 349
hidden l2 lambda: 4.7e-02
class 0 weight: 0.8557537729747847
class 1 weight: 20

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (100, 460)                936560    
_________________________________________________________________
dense (Dense)                (100, 349)                160889    
_________________________________________________________________
dense_1 (Dense)              (100, 349)                122150    
_________________________________________________________________
dense_2 (Dense)              (100, 349)                122150    
_________________________________________________________________
dense_3 (Dense)              (100, 1)                  350       
Total params: 1,342,099
Trainable params: 1,3

In [None]:
dim_names = [
    'learning_rate',
    'past_history',
    'lstm_units',
    'hidden_layers',
    'hidden_units',
    #'lstm_l2_lambda',
    'hidden_l2_lambda',
    'class_0_weight',
    'class_1_weight'
]

In [None]:
fig, ax = plot_objective(result=search_result, dimension_names=dim_names)

**Try a longer training run with the winning hyperparameters** Some of these, we will manually tweak based on the above graph.

In [None]:
space = search_result.space
winning_hyperparams = space.point_to_dict(search_result.x)
winning_hyperparams

In [None]:
# learning_rate = 3.4e-02
# past_history = 27
# lstm_units = 193
# hidden_layers = 1
# hidden_units = 87
# #lstm_l2_lambda = 1.1e-03
# hidden_l2_lambda = 3.6e-03
# class_0_weight = 0.1
# class_1_weight = 10

# learning rate: 3.4e-03
# past history: 27
# LSTM units: 193
# hidden layers: 1
# hidden units: 87
# hidden l2 lambda: 3.6e-03
# class 0 weight: 0.1
# class 1 weight: 10

learning_rate = winning_hyperparams['learning_rate']
past_history = winning_hyperparams['past_history']
lstm_units = winning_hyperparams['lstm_units']
hidden_layers = winning_hyperparams['hidden_layers']
hidden_units = winning_hyperparams['hidden_units']
#lstm_l2_lambda = winning_hyperparams['lstm_l2_lambda']
hidden_l2_lambda = winning_hyperparams['hidden_l2_lambda']
class_0_weight = winning_hyperparams['class_0_weight']
class_1_weight = winning_hyperparams['class_1_weight']

future_target = 1
step = 1

initial_bias = -1.4
output_bias = tf.keras.initializers.Constant(initial_bias)
    
class_weight = {0: class_0_weight, 1: class_1_weight}

EPOCHS = 15
BATCH_SIZE = 100
STEPS_PER_EPOCH = (len(training_data) * 0.5) // BATCH_SIZE
VALIDATION_STEPS = (len(validation_data) * 0.5) // BATCH_SIZE

In [None]:
x_train, y_train = multivariate_data(
    training_data, 
    training_data[:, 1], 
    0,
    None,
    past_history,
    future_target, 
    step
)

start_index = (x_train.shape[0] - (x_train.shape[0] % BATCH_SIZE))
end_index = x_train.shape[0]
x_train = np.delete(x_train, range(start_index, end_index), axis=0)
y_train = np.delete(y_train, range(start_index, end_index), axis=0)

x_validation, y_validation = multivariate_data(
    validation_data, 
    validation_data[:, 1], 
    0,
    None,
    past_history,
    future_target, 
    step
)

start_index = (x_validation.shape[0] - (x_validation.shape[0] % BATCH_SIZE))
end_index = x_validation.shape[0]
x_validation = np.delete(x_validation, range(start_index, end_index), axis=0)
y_validation = np.delete(y_validation, range(start_index, end_index), axis=0)

input_dim = x_train.shape[-2:]

input_shape = (BATCH_SIZE, input_dim[0], input_dim[1])

In [None]:
model = tf.keras.models.Sequential()

model.add(tf.keras.layers.LSTM(
    lstm_units,
    batch_input_shape=input_shape,
#         bias_initializer=keras.initializers.VarianceScaling(
#             scale=1.0,
#             mode='fan_in', 
#             distribution='normal', 
#             seed=None
#         ),
#         kernel_regularizer=keras.regularizers.l2(lstm_l2_lambda),
#         activation = 'relu',
     stateful = True
))

for i in range(hidden_layers):
    model.add(keras.layers.Dense(
        hidden_units,
        bias_initializer=keras.initializers.VarianceScaling(
            scale=1.0,
            mode='fan_in', 
            distribution='normal', 
            seed=None
        ),
        kernel_regularizer=keras.regularizers.l2(hidden_l2_lambda),
        activation = 'relu'
    ))

model.add(tf.keras.layers.Dense(
    1,
    activation = 'softmax',
    bias_initializer = output_bias)
)

model.compile(
    optimizer=tf.keras.optimizers.Adam(lr = learning_rate), 
    loss=keras.losses.BinaryCrossentropy(),
    metrics=metrics
)

In [None]:
history = model.fit(
    x_train,
    y_train,
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    steps_per_epoch=STEPS_PER_EPOCH,
    callbacks = [early_stopping],
    validation_data=(x_validation, y_validation),
    validation_steps=VALIDATION_STEPS,
    class_weight=class_weight,
    workers=8
)

In [None]:
mpl.rcParams['figure.figsize'] = (12, 10)
colors = plt.rcParams['axes.prop_cycle'].by_key()['color']

plot_metrics(history)

In [None]:
predictions = model.predict(x_train)

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

plt.subplots(2,2,figsize=(12,8))

plt.subplot(2, 2, 1)

plt.plot(
    range(len(y_train)), 
    y_train,
    color = "darkred",
    label ='True ignitions'
)
plt.plot(
    range(len(y_train)), 
    predictions,
    color = "darkgray",
    label ='predicted ignitions'
)

plt.xlabel('Day')
plt.ylabel('Ignition')
plt.title('Predicted vs. actual ignition')
plt.legend()
plt.xlim(155,176)

plt.subplot(2, 2, 2)

plt.plot(
    range(len(y_train)), 
    y_train,
    color = "darkred",
    label ='True ignitions'
)
plt.plot(
    range(len(y_train)), 
    predictions,
    color = "darkgray",
    label ='predicted ignitions'
)

plt.xlabel('Day')
plt.ylabel('Ignition')
plt.title('Predicted vs. actual ignition')
plt.legend()
plt.xlim(175,195)

plt.subplot(2, 2, 3)

plt.plot(
    range(len(y_train)), 
    y_train,
    color = "darkred",
    label ='True ignitions'
)
plt.plot(
    range(len(y_train)), 
    predictions,
    color = "darkgray",
    label ='predicted ignitions'
)

plt.xlabel('Day')
plt.ylabel('Ignition')
plt.title('Predicted vs. actual ignition')
plt.legend()
plt.xlim(195,215)

plt.subplot(2, 2, 4)

plt.plot(
    range(len(y_train)), 
    y_train,
    color = "darkred",
    label ='True ignitions'
)
plt.plot(
    range(len(y_train)), 
    predictions,
    color = "darkgray",
    label ='predicted ignitions'
)

plt.xlabel('Day')
plt.ylabel('Ignition')
plt.title('Predicted vs. actual ignition')
plt.legend()
#plt.xlim(195,215)

plt.tight_layout()
plt.show()