In [10]:
#import tensorflow.compat.v1 as tf
#tf.enable_eager_execution(tf.ConfigProto(log_device_placement=False))
#tf.test.gpu_device_name()

import numpy as np
from sklearn.model_selection import KFold
from itertools import product
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout, Masking
from sklearn.model_selection import KFold
# import early stopping
from tensorflow.keras.callbacks import EarlyStopping
import pickle
from sklearn.metrics import accuracy_score, recall_score, precision_score
from sklearn.utils import class_weight

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [12]:
def specificity_score(y_true, y_pred):

    # Convert probabilities to binary predictions
    y_pred_bin = np.argmax(y_pred, axis=1)
    y_true_bin = np.argmax(y_true, axis=1)

    tn = np.sum((y_true_bin == 0) & (y_pred_bin == 0))
    fp = np.sum((y_true_bin == 0) & (y_pred_bin != 0))
    specificity = tn / (tn + fp) if (tn + fp) != 0 else 0
    return specificity

In [24]:
def train_and_evaluate(dataset_names, create_model_fn, class_weights):

    outer_cv = KFold(n_splits=3, shuffle=True, random_state=42)

    # Find best dataset to train and test model on
    #dataset_names = ['X_smoothed_mean_norm']

    for dataset_name in dataset_names:

         # Load in dataset from pickle
        with open(f'drive/MyDrive/Pickles/{dataset_name}.pickle', 'rb') as handle:
            dataset = pickle.load(handle)

        dataset = np.array(dataset)

        print(f'Working on dataset: {dataset_name}')

        accuracies = []
        sensitivities = []
        false_positive_rates = []
        specificities = []
        precisions = []

        fold = 1

        for train_index, test_index in outer_cv.split(dataset):

            # Print current progress
            print(f'Working on fold: {fold}')
            fold += 1

            X_train, X_test = dataset[train_index], dataset[test_index]
            Y_train, Y_test = abnormal_encoded[train_index], abnormal_encoded[test_index]

            X_train = X_train.astype('float32')
            Y_train = Y_train.astype('float32')
            X_test = X_test.astype('float32')
            Y_test = Y_test.astype('float32')

            model = create_model_fn(X_train.shape[1:])
            early_stopping = EarlyStopping(monitor='val_loss', patience=3)

            if class_weights:

                Y_train_classes = np.argmax(Y_train, axis=1)

                # Compute class weights
                cw = class_weight.compute_class_weight('balanced',
                                                    classes=np.unique(Y_train_classes),
                                                    y=Y_train_classes)

                class_weights_dict = dict(enumerate(cw))

                #Fit the model
                print('Fitting model')
                model.fit(X_train, Y_train, epochs=5, batch_size=4, validation_split=0.2, callbacks=[early_stopping], verbose=1, class_weight=class_weights_dict)

            else:
                #Fit the model
                print('Fitting model')
                model.fit(X_train, Y_train, epochs=5, batch_size=4, validation_split=0.2, callbacks=[early_stopping], verbose=1)

            # Predict the test set
            print('Predicting test set')
            Y_pred = model.predict(X_test)

            Y_pred_classes = np.argmax(Y_pred, axis=1)
            Y_test_classes = np.argmax(Y_test, axis=1)

            # Calulate accuracy, sensitivity, false positive rate, specificity and precision
            accuracies.append(accuracy_score(Y_test_classes, Y_pred_classes))
            sensitivities.append(recall_score(Y_test_classes, Y_pred_classes, average='macro'))
            false_positive_rates.append(1 - specificity_score(Y_test, Y_pred))
            specificities.append(specificity_score(Y_test, Y_pred))
            precisions.append(precision_score(Y_test_classes, Y_pred_classes, average='macro'))



        avg_accuracy = np.mean(accuracies)
        avg_sensitivity = np.mean(sensitivities)
        avg_false_positive_rate = np.mean(false_positive_rates)
        avg_specificity = np.mean(specificities)
        avg_precision = np.mean(precisions)

        std_accuracy = np.std(accuracies)
        std_sensitivity = np.std(sensitivities)
        std_false_positive_rate = np.std(false_positive_rates)
        std_specificity = np.std(specificities)
        std_precision = np.std(precisions)

        dataset_results[dataset_name]['Accuracy'] = (avg_accuracy, std_accuracy)
        dataset_results[dataset_name]['Sensitivity'] = (avg_sensitivity, std_sensitivity)
        dataset_results[dataset_name]['False Positive Rate'] = (avg_false_positive_rate, std_false_positive_rate)
        dataset_results[dataset_name]['Specificity'] = (avg_specificity, std_specificity)
        dataset_results[dataset_name]['Precision'] = (avg_precision, std_precision)

        for dataset_name, results in dataset_results.items():
            print(f'Dataset: {dataset_name}')
            for metric, (avg, std) in results.items():
                print(f'{metric}: {avg} +/- {std}')
            print('\n')


        # Delete dataset to free up memory
        del dataset
        del Y_pred
    return dataset_results

In [25]:
# Model functions

def create_cnn_model(shape, filters=32, kernel_size=3, dropout_rate=0.5):
    model = Sequential()
    model.add(Masking(mask_value=0., input_shape=shape))  # Adjust the input_shape to match your dataset
    model.add(Conv1D(filters, kernel_size, activation='relu', input_shape=(19301, 16)))
    model.add(MaxPooling1D(2))
    model.add(Flatten())
    model.add(Dense(32, activation='relu'))  # Reduced the number of neurons in the dense layer
    model.add(Dense(3, activation='softmax'))
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

def create_lstm_model(shape, lstm_units=32, dropout_rate=0.5, output_classes=3):
    model = Sequential()
    model.add(Masking(mask_value=0., input_shape=shape))
    model.add(LSTM(lstm_units, return_sequences=False))  # 'return_sequences=False' because we only need the last output
    model.add(Dropout(dropout_rate))
    model.add(Dense(lstm_units, activation='relu'))
    model.add(Dense(output_classes, activation='softmax'))
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

def create_gru_model(shape, gru_units=32, dropout_rate=0.5, output_classes=3):
    model = Sequential()
    model.add(Masking(mask_value=0., input_shape=shape))
    model.add(GRU(gru_units, return_sequences=False))  # return_sequences=False because we only need the last output
    model.add(Dropout(dropout_rate))
    model.add(Dense(gru_units, activation='relu'))
    model.add(Dense(output_classes, activation='softmax'))
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [8]:
 # Load in dataset from pickle
with open('drive/MyDrive/Pickles/abnormal_encoded.pickle', 'rb') as handle:
    abnormal_encoded = pickle.load(handle)

In [27]:
dataset_results = {'X_smoothed_mean_norm_month': {}, 'X_smoothed_median_norm_month': {}, 'X_smoothed_mean_norm': {}, 'X_smoothed_median_norm': {}}

cnn_dataset_result_cw = train_and_evaluate(['X_smoothed_mean_norm', 'X_smoothed_mean_norm_month','X_smoothed_median_norm', 'X_smoothed_median_norm_month'], create_cnn_model, True)



Working on dataset: X_smoothed_mean_norm
Working on fold: 1
Fitting model
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Predicting test set
Working on fold: 2
Fitting model
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Predicting test set
Working on fold: 3
Fitting model
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Predicting test set
Dataset: X_smoothed_mean_norm_month


Dataset: X_smoothed_median_norm_month


Dataset: X_smoothed_mean_norm
Accuracy: 0.6024924295364547 +/- 0.022698545586030232
Sensitivity: 0.5954031005114596 +/- 0.026547838177203346
False Positive Rate: 0.35944121238238885 +/- 0.05336288150401101
Specificity: 0.6405587876176112 +/- 0.05336288150401101
Precision: 0.5906391501219087 +/- 0.03294091467480022


Dataset: X_smoothed_median_norm


Working on dataset: X_smoothed_mean_norm_month
Working on fold: 1
Fitting model
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Predicting test set
Working on fold: 2
Fitting model
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Predict

In [28]:
cnn_dataset_result_ncw = train_and_evaluate(['X_smoothed_mean_norm', 'X_smoothed_mean_norm_month','X_smoothed_median_norm', 'X_smoothed_median_norm_month'], create_cnn_model, False)


Working on dataset: X_smoothed_mean_norm
Working on fold: 1
Fitting model
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Predicting test set
Working on fold: 2
Fitting model
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Predicting test set
Working on fold: 3
Fitting model
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Predicting test set
Dataset: X_smoothed_mean_norm_month
Accuracy: 0.5284183554623806 +/- 0.12529679807062674
Sensitivity: 0.6054122863720387 +/- 0.05393824439757144
False Positive Rate: 0.5861164978812038 +/- 0.2138975519880295
Specificity: 0.41388350211879626 +/- 0.2138975519880295
Precision: 0.6159892094489617 +/- 0.03942193675566511


Dataset: X_smoothed_median_norm_month
Accuracy: 0.6087817377125554 +/- 0.024707284922990343
Sensitivity: 0.5496908750004725 +/- 0.022459985618095736
False Positive Rate: 0.21487825899590607 +/- 0.025739180275967184
Specificity: 0.7851217410040939 +/- 0.025739180275967184
Precision: 0.5611061738147746 +/- 0.029737200420561147


Dataset: X_

  _warn_prf(average, modifier, msg_start, len(result))


Fitting model
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Predicting test set
Working on fold: 3
Fitting model
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Predicting test set
Dataset: X_smoothed_mean_norm_month
Accuracy: 0.5893314698346145 +/- 0.08317934416473478
Sensitivity: 0.5943452144690534 +/- 0.02854351557406235
False Positive Rate: 0.3501759678230267 +/- 0.3309075558924957
Specificity: 0.6498240321769734 +/- 0.3309075558924957
Precision: 0.6681504292646508 +/- 0.09588601144841005


Dataset: X_smoothed_median_norm_month
Accuracy: 0.47262986256696954 +/- 0.1003897780199082
Sensitivity: 0.55306686777275 +/- 0.07686810635873144
False Positive Rate: 0.516088486676722 +/- 0.3735401705280771
Specificity: 0.48391151332327803 +/- 0.37354017052807703
Precision: 0.49363015497767265 +/- 0.19062817030587245


Dataset: X_smoothed_mean_norm
Accuracy: 0.5159562077801071 +/- 0.1165314296316961
Sensitivity: 0.5244137871072856 +/- 0.05672229597543082
False Positive Rate: 0.3939524527759821 +/- 0

In [None]:
lstm_dataset_results_cw = train_and_evaluate(['X_smoothed_mean_norm', 'X_smoothed_mean_norm_month','X_smoothed_median_norm', 'X_smoothed_median_norm_month'], create_lstm_model, True)


In [None]:
lstm_dataset_results_ncw = train_and_evaluate(['X_smoothed_mean_norm', 'X_smoothed_mean_norm_month','X_smoothed_median_norm', 'X_smoothed_median_norm_month'], create_lstm_model, False)


In [None]:
gru_dataset_results_cw = train_and_evaluate(['X_smoothed_mean_norm', 'X_smoothed_mean_norm_month','X_smoothed_median_norm', 'X_smoothed_median_norm_month'], create_gru_model, True)


In [None]:
gru_dataset_results_ncw = train_and_evaluate(['X_smoothed_mean_norm', 'X_smoothed_mean_norm_month','X_smoothed_median_norm', 'X_smoothed_median_norm_month'], create_gru_model, False)
