In [1]:
import numpy as np 
import pandas as pd 
import csv
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set_style("whitegrid")
from datetime import datetime, date
from os import path
import os
import sys

import keras
from keras.callbacks import EarlyStopping

In [2]:
# # load data, oversampled, 700 time units
# X_train_temporal = np.load('feature_temporal_cat_oversample_standardscalar_X_train.npy')[:,:700,:]
# X_train_cat = np.load('feature_temporal_cat_oversample_X_train.npy')[:,700:,1]

# X_test_temporal = np.load('features_temporal_test_norm_standardscalar.npy')
# X_test_cat = np.load('X_test_cat_features.npy')

# y_train = np.load('feature_temporal_cat_oversample_standardscalar_y_train.npy')
# y_test = np.load('y_test.npy')

In [3]:
# # load data, oversampled, 35 time units
X_train_temporal = np.load('feature_temporal_cat_agg_oversample_standardscalar_X_train.npy')[:,:35,:]
X_train_cat = np.load('feature_temporal_cat_agg_oversample_standardscalar_X_train.npy')[:,35:,1]

X_test_temporal = np.load('features_temporal_test_norm_agg_standardscalar.npy')
X_test_cat = np.load('X_test_cat_features.npy')

y_train = np.load('feature_temporal_cat_agg_oversample_standardscalar_y_train.npy')
y_test = np.load('y_test.npy')

In [4]:
print(X_train_temporal.shape)
print(X_test_temporal.shape)
print(X_train_cat.shape)
print(X_test_cat.shape)

(350, 35, 7)
(30, 35, 7)
(350, 68)
(30, 68)


In [5]:
from keras.models import Sequential, load_model

from keras.layers import Dense, Dropout
from keras.layers import LSTM
from keras.layers import Input
from keras.models import Model
from keras import regularizers
import keras

In [6]:
main_input = Input(shape=(X_train_temporal.shape[1], X_train_temporal.shape[2]), name='main_input')
lstm_out = LSTM(50, dropout=0.1, recurrent_dropout=0.1)(main_input)

auxiliary_input = Input(shape=(X_train_cat.shape[1],), name='aux_input')
# aux_1 = Dense(30, activation='relu')(auxiliary_input)
# aux_1= Dropout(0.1)(aux_1)
x = keras.layers.concatenate([lstm_out, auxiliary_input])

# stack a deep densely-connected network on top
x = Dense(8, activation='relu')(x)
x= Dropout(0.1)(x)
# x = Dense(3, activation='relu')(x)
# x= Dropout(0.1)(x)


# add the main logistic regression layer
main_output = Dense(1, activation='sigmoid', name='main_output')(x)
#This defines a model with two inputs:
model = Model(inputs=[main_input, auxiliary_input], outputs=[main_output])

In [7]:
# Model output shape
model.output_shape
# Model summary
model.summary()
# Model config
model.get_config()
# List all weight tensors 
model.get_weights()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
main_input (InputLayer)         [(None, 35, 7)]      0                                            
__________________________________________________________________________________________________
lstm (LSTM)                     (None, 50)           11600       main_input[0][0]                 
__________________________________________________________________________________________________
aux_input (InputLayer)          [(None, 68)]         0                                            
__________________________________________________________________________________________________
concatenate (Concatenate)       (None, 118)          0           lstm[0][0]                       
                                                                 aux_input[0][0]              

[array([[ 0.07638404,  0.04154107,  0.06486528, ...,  0.06169215,
         -0.16041398,  0.0311897 ],
        [-0.08862188, -0.1313782 , -0.07824925, ..., -0.12464054,
         -0.06388788, -0.16331913],
        [ 0.121438  , -0.08952495,  0.00792842, ..., -0.04797573,
          0.06731211,  0.04915234],
        ...,
        [-0.07030415, -0.00492679, -0.05372164, ..., -0.07734483,
          0.07170068,  0.1626527 ],
        [ 0.0258173 ,  0.13877878,  0.14571011, ..., -0.01803985,
          0.12262031, -0.04218626],
        [-0.02379616,  0.13987708,  0.04139754, ...,  0.06552634,
         -0.15832825, -0.00155692]], dtype=float32),
 array([[-0.0912993 , -0.00522477, -0.0505947 , ..., -0.08093317,
          0.00284258,  0.03007905],
        [ 0.14688835, -0.009341  ,  0.1375206 , ...,  0.07899708,
         -0.0011789 ,  0.04956714],
        [-0.02023003,  0.02591759, -0.04390585, ...,  0.0511161 ,
          0.00305995,  0.06004294],
        ...,
        [ 0.02202007,  0.0068798 , -0.0

In [8]:
import keras.backend as K
from keras.callbacks import ReduceLROnPlateau,ModelCheckpoint
from keras.optimizers import adam_v2
def mean_pred(y_true, y_pred):
    return K.mean(y_pred)


opt = adam_v2.Adam(lr=0.0001)

model.compile(optimizer=opt,
              loss={'main_output': 'binary_crossentropy'},metrics=['accuracy',mean_pred])

reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.3, patience=5, min_lr=0.000001, verbose=1)
checkpointer = ModelCheckpoint(filepath='lstm.hdf5', verbose=1, save_best_only=True)

history=model.fit({'main_input': X_train_temporal, 'aux_input': X_train_cat},
          {'main_output': y_train},
          epochs=60, batch_size=128, callbacks=[reduce_lr, checkpointer],validation_split=0.1)



Epoch 1/60

Epoch 00001: val_loss improved from inf to 0.90998, saving model to lstm.hdf5
Epoch 2/60

Epoch 00002: val_loss improved from 0.90998 to 0.90822, saving model to lstm.hdf5
Epoch 3/60

Epoch 00003: val_loss improved from 0.90822 to 0.90642, saving model to lstm.hdf5
Epoch 4/60

Epoch 00004: val_loss improved from 0.90642 to 0.90394, saving model to lstm.hdf5
Epoch 5/60

Epoch 00005: val_loss improved from 0.90394 to 0.90186, saving model to lstm.hdf5
Epoch 6/60

Epoch 00006: val_loss improved from 0.90186 to 0.89964, saving model to lstm.hdf5
Epoch 7/60

Epoch 00007: val_loss improved from 0.89964 to 0.89735, saving model to lstm.hdf5
Epoch 8/60

Epoch 00008: val_loss improved from 0.89735 to 0.89539, saving model to lstm.hdf5
Epoch 9/60

Epoch 00009: val_loss improved from 0.89539 to 0.89313, saving model to lstm.hdf5
Epoch 10/60

Epoch 00010: val_loss improved from 0.89313 to 0.89111, saving model to lstm.hdf5
Epoch 11/60

Epoch 00011: val_loss improved from 0.89111 to 0.8


Epoch 00032: val_loss improved from 0.84727 to 0.84612, saving model to lstm.hdf5
Epoch 33/60

Epoch 00033: val_loss improved from 0.84612 to 0.84521, saving model to lstm.hdf5
Epoch 34/60

Epoch 00034: val_loss improved from 0.84521 to 0.84414, saving model to lstm.hdf5
Epoch 35/60

Epoch 00035: val_loss improved from 0.84414 to 0.84316, saving model to lstm.hdf5
Epoch 36/60

Epoch 00036: val_loss improved from 0.84316 to 0.84253, saving model to lstm.hdf5
Epoch 37/60

Epoch 00037: val_loss improved from 0.84253 to 0.84162, saving model to lstm.hdf5
Epoch 38/60

Epoch 00038: val_loss improved from 0.84162 to 0.84063, saving model to lstm.hdf5
Epoch 39/60

Epoch 00039: val_loss improved from 0.84063 to 0.83931, saving model to lstm.hdf5
Epoch 40/60

Epoch 00040: val_loss improved from 0.83931 to 0.83805, saving model to lstm.hdf5
Epoch 41/60

Epoch 00041: val_loss improved from 0.83805 to 0.83703, saving model to lstm.hdf5
Epoch 42/60

Epoch 00042: val_loss improved from 0.83703 to 0.

In [9]:
model.load_weights('lstm.hdf5')

In [10]:
y_score = model.predict({'main_input': X_test_temporal, 'aux_input': X_test_cat})

In [11]:
from sklearn.metrics import precision_score, average_precision_score,roc_auc_score

In [12]:
roc_auc_score(y_test, y_score)

0.6639999999999999

In [13]:
average_precision_score(y_test, y_score)

0.42777777777777776

In [14]:
from sklearn.metrics import classification_report
print(classification_report(y_test, (y_score>0.5).astype(int)))

              precision    recall  f1-score   support

           0       0.87      0.80      0.83        25
           1       0.29      0.40      0.33         5

    accuracy                           0.73        30
   macro avg       0.58      0.60      0.58        30
weighted avg       0.77      0.73      0.75        30

