# Importing the Libraries

In [1]:
from imblearn.over_sampling import SMOTE
import numpy as np
import pandas as pd
from collections import Counter
from skmultilearn.model_selection.measures import get_combination_wise_output_matrix 
from tensorflow.keras import callbacks
# Importing the Keras libraries and packages
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Activation
from tensorflow.keras.optimizers import Adam

# Loading the dataset

In [2]:
def load_dataset():
    X_train_load = np.loadtxt('D:\Dataset\Combined Datasets\X_train_reshaped_multi.csv', delimiter=',')
    X_train_scaled = np.reshape(X_train_load, (X_train_load.shape[0], X_train_load.shape[1], 1))   
    X_test_load = np.loadtxt('D:\Dataset\Combined Datasets\X_test_reshaped_multi.csv', delimiter=',')
    X_test_scaled = np.reshape(X_test_load, (X_test_load.shape[0], X_test_load.shape[1], 1))  
    y_train_scaled = np.loadtxt('D:\Dataset\Combined Datasets\y_train_reshaped_multi.csv', delimiter=',')
    y_test_scaled = np.loadtxt('D:\Dataset\Combined Datasets\y_test_reshaped_multi.csv', delimiter=',')
    X_val_load = np.loadtxt('D:\Dataset\Combined Datasets\X_val_reshaped_multi.csv', delimiter=',')
    X_val_scaled = np.reshape(X_val_load, (X_val_load.shape[0], X_val_load.shape[1], 1))
    y_val_scaled = np.loadtxt('D:\Dataset\Combined Datasets\y_val_reshaped_multi.csv', delimiter=',')
    return X_train_scaled, X_test_scaled, y_train_scaled, y_test_scaled, X_val_scaled, y_val_scaled

# Resample the dataset with SMOTE

In [3]:
def smote_resample(X_train_scaled, y_train_scaled, sample_count, n_samples):
    #sample_count = int(sum(list(labels_dict.values())[1:]) /14)
    strategy = {2:sample_count, 3:sample_count, 12:sample_count, 13:sample_count}
    oversample = SMOTE(sampling_strategy=strategy)
    X_train_reshaped = X_train_scaled.reshape(X_train_scaled.shape[0], -1)
    X_train_smote, y_train_smote = oversample.fit_resample(X_train_reshaped, y_train_scaled)
    for i in [2,3,12,13]:
        n_samples[i] = sample_count
    labels_smote_dict = { i : n_samples[i] for i in range(0, len(n_samples) ) }
    smote = SMOTE()
    X_train_reshaped = X_train_scaled.reshape(X_train_scaled.shape[0], -1)
    X_train_smote, y_train_smote = smote.fit_resample(X_train_reshaped,y_train_scaled)
    return(X_train_smote, y_train_smote, labels_smote_dict, n_samples)

# Calculate Dataset Statistics

In [4]:
def count_stats(y_train_scaled):
    y_train_new = np.where(y_train_scaled==1)[1]
    counts = Counter(y_train_new)
    n_samples = list(list(zip(*sorted(counts.most_common())))[1])
    return(n_samples)

# Calculate Class Weights

In [5]:
def calc_weights(labels_smote_dict, mu=10):
    total = sum(labels_smote_dict.values())
    keys = labels_smote_dict.keys()
    dict_class_weights = dict()
    for i in keys:
        score = np.log(mu*total/labels_smote_dict[i]*14)
        dict_class_weights[i] = score
    benign_value = np.log(mu*total/labels_smote_dict[0]*2)
    dict_class_weights[0] = benign_value
    return dict_class_weights 

# Creating the LSTM model for multi-class w/SMOTE classification

In [6]:
def create_model(X_train_smote):
    model = Sequential()
    # Adding the first LSTM layer and some Dropout regularization
    model.add(LSTM(units= 76, return_sequences= True, input_shape= (X_train_smote.shape[1], 1)))
    model.add(Dropout(0.2))
    # Adding the second LSTM layer and Dropout regularization
    model.add(LSTM(units= 76, return_sequences= True))
    model.add(Dropout(0.2))
    # Adding the third LSTM layer and Dropout regularization
    model.add(LSTM(units= 76, return_sequences= True))
    model.add(Dropout(0.2))
    # Adding the fourth LSTM layer and Dropout regularization
    model.add(LSTM(units= 76))
    model.add(Dropout(0.2))
    # Adding the output layer
    model.add(Dense(units= 15))
    model.add(Activation('softmax'))
    opt = Adam(lr=0.0002) 
    # Compiling the LSTM
    model.compile(optimizer= opt, loss= 'categorical_crossentropy', metrics=['accuracy'])
    model.summary()
    return model

# Training the model with class weights

In [7]:
def fit_model(model, X_train_smote, y_train_smote, X_val_scaled, y_val_scaled, dict_class_weights):
    X_train_smote = np.reshape(X_train_smote, (X_train_smote.shape[0], X_train_smote.shape[1], 1))   
    earlystopping = callbacks.EarlyStopping(monitor ="val_loss",
    										 mode ="min", patience = 5,
    										 restore_best_weights = True)
    hist = model.fit(X_train_smote, y_train_smote, batch_size = 1024, epochs = 40, validation_data =(X_val_scaled, y_val_scaled), callbacks = earlystopping, class_weight=dict_class_weights)
    return hist

In [None]:
if __name__=="__main__": 
    X_train_scaled, X_test_scaled, y_train_scaled, y_test_scaled, X_val_scaled, y_val_scaled = load_dataset()
    n_samples = count_stats(y_train_scaled)
    labels_dict = { i : n_samples[i] for i in range(0, len(n_samples) ) }
    sample_count = max(list(labels_dict.values())[1:])
    X_train_smote, y_train_smote, labels_smote_dict, n_samples_smote = smote_resample(X_train_scaled, y_train_scaled, sample_count, n_samples)
    dict_class_weights = calc_weights(labels_smote_dict)
    model = create_model(X_train_smote)
    hist = fit_model(model, X_train_smote, y_train_smote, X_val_scaled, y_val_scaled, dict_class_weights)