In [32]:
# univariate multi-step lstm
import pandas as pd
import numpy as np
import scipy.stats as st
import kerastuner as kt
from pandas import read_csv
from kerastuner.tuners import Hyperband
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from matplotlib import pyplot
from keras.models import Sequential
from keras.layers import (LSTM, Dense, Flatten)
from keras.utils.np_utils import to_categorical

In [2]:
df = read_csv("predicted_temperature_and_test_values.csv")

In [31]:
df

Unnamed: 0,pred_temperature,temperature,humidity,label
0,23.947466,24.833333,14.416667,False
1,24.20264,24.661017,14.915254,False
2,23.895992,24.583333,14.966667,False
3,23.915253,24.589286,17.446429,False
4,23.957087,23.666667,19.4,True
5,23.699652,22.016667,20.883333,False
6,23.566374,22.583333,20.05,False
7,23.349136,22.762712,18.847458,False
8,23.766033,23.15,17.766667,False
9,23.383087,24.157895,17.298246,False


In [4]:
def calculate_confidence(data):
    diff_actual_pred = data.temperature - data.pred_temperature
    confidence_interval= st.t.interval(0.99, diff_actual_pred.shape[0]-1, loc=diff_actual_pred.mean(), scale=st.sem(diff_actual_pred.values.tolist()))
    return confidence_interval

In [5]:
interval = calculate_confidence(df)
print(interval, (interval[1] - interval[0])/2)

(-0.3925278235895418, 0.3670887003061874) 0.3798082619478646


In [6]:
def to_labeled(data):
    diff_actual_pred = data.temperature - data.pred_temperature
    interval = calculate_confidence(data)
    data['label'] = ((diff_actual_pred>interval[0]) & (diff_actual_pred<interval[1]))

In [7]:
# split a univariate dataset into train/test sets
def split_dataset(data):
    split_index = int((len(data)*0.8))
    train, test = data[:split_index], data[split_index:]
    return train, test

# train the model
def build_model(train):
    train_x = train[["temperature", "humidity"]].values.reshape((-1, 1, 2))
    train_y = to_categorical(train.label.values, num_classes=2)
    # define parameters
    verbose, epochs, batch_size = 1, 10, 5
    n_features, n_outputs = train_x.shape[2], train_y.shape[1]
    n_timesteps = 1
    print(n_outputs, n_timesteps, n_features)
    # define model
    model = Sequential()
    model.add(LSTM(200, activation='relu', input_shape=(n_timesteps, n_features)))
    model.add(Dense(100, activation='relu'))
    model.add(Dense(n_outputs))
    model.compile(loss='mse', optimizer='adam')
    # fit network
    model.fit(train_x, train_y, epochs=epochs, batch_size=batch_size, verbose=verbose)
    return model

In [49]:
# train the model
def build_model(hp):
    # define hyperparameters parameters
    num_layers = hp.Int('num_layers', 1, 7, step=2)
    activation = hp.Choice("activation", ["sigmoid", "relu", "tanh"])
    hiddent_units = hp.Choice('hiddent_units', [50, 100, 150, 200])

    n_features, n_outputs = 2, 2
    n_timesteps = 1

    # define model
    model = Sequential()
    # we want sigmoid in the first layer so that we convert the data between 0 and 1
    model.add(LSTM(hiddent_units, activation='sigmoid', input_shape=(n_timesteps, n_features), return_sequences=num_layers > 1))
    for i in range(num_layers - 2):
        model.add(LSTM(hiddent_units, activation=activation, return_sequences=True, dropout=hp.Float("dropout", 0.0 , 0.8 , 0.2)))
    if num_layers > 1:
        model.add(LSTM(hiddent_units, activation=activation))
    model.add(Dense(n_outputs))
    model.compile(loss='mse', metrics=['accuracy'], optimizer='adam')
    return model

In [50]:
train, test = split_dataset(df)

train_x = train[["temperature", "humidity"]].values.reshape((-1, 1, 2))
train_y = to_categorical(train.label.values, num_classes=2)
val_x = test[["temperature", "humidity"]].values.reshape((-1, 1, 2))
val_y = to_categorical(test.label.values, num_classes=2)
print(train_x, train_y)
print(val_x, val_y)

[[[24.83333333 14.41666667]]

 [[24.66101695 14.91525424]]

 [[24.58333333 14.96666667]]

 [[24.58928571 17.44642857]]

 [[23.66666667 19.4       ]]

 [[22.01666667 20.88333333]]

 [[22.58333333 20.05      ]]

 [[22.76271186 18.84745763]]

 [[23.15       17.76666667]]

 [[24.15789474 17.29824561]]

 [[22.06896552 20.20689655]]

 [[21.52542373 20.06779661]]

 [[21.01694915 19.23728814]]

 [[21.         19.05      ]]

 [[21.63793103 17.62068966]]

 [[24.         14.56666667]]

 [[24.1        14.03333333]]

 [[24.14035088 14.01754386]]

 [[24.06779661 14.05084746]]

 [[24.33333333 14.        ]]

 [[24.48333333 13.6       ]]

 [[24.28813559 13.61016949]]

 [[24.44067797 13.45762712]]

 [[24.78947368 13.01754386]]

 [[24.48275862 13.01724138]]

 [[24.3559322  13.01694915]]

 [[23.24561404 14.92982456]]

 [[23.93333333 14.2       ]]

 [[24.         14.48275862]]

 [[24.         15.33333333]]

 [[24.         16.3       ]]

 [[23.23728814 17.77966102]]

 [[22.73333333 19.61666667]]

 [[23.0338

# When training with hyperparameters change the epoch size to 50 and the max_trails to min 72000
class MyTuner(kt.Tuner):
    
    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def run_trial(self, trial, *atgs, **kwargs):
        hp = trial.hyperparameters
        batch_size = hp.Choice("batch_size", [0, 32, 64, 128, 256, 512])
        epochs = hp.Choice("epochs", [15])
        model = self.hypermodel.build(trial.hyperparameters)
        model.fit(train_x, train_y, epochs=epochs, batch_size=batch_size, verbose=False)
        score, accuracy = model.evaluate(val_x, val_y)
        self.oracle.update_trial(trial.trial_id, {'score': score, 'accuracy': accuracy})

tuner = MyTuner(
    oracle=kt.oracles.BayesianOptimization(
          objective=kt.Objective('accuracy', 'min'),
          max_trials=20),
    hypermodel=build_model,
    directory='fault_classification_evaluation',
    project_name='lstm', overwrite=True)
tuner.search_space_summary()

In [66]:
tuner = Hyperband(build_model,
                     objective='val_accuracy',
                     max_epochs=5,
                     factor=3,
                     directory='fault_classification_evaluation',
                     project_name='lstm',
                     overwrite=True)

In [67]:
tuner.search(train_x, train_y,
             epochs=5,
             validation_split=0.2)

Trial 10 Complete [00h 00m 22s]
val_accuracy: 0.6666666865348816

Best val_accuracy So Far: 0.6666666865348816
Total elapsed time: 00h 02m 27s
INFO:tensorflow:Oracle triggered exit


In [None]:
to_labeled(df)
train, test = split_dataset(df)
build_model(train).predict(test[["temperature", "humidity"]].values.reshape((-1, 1, 2)))