In [1]:
# Code to allow GPU access
import tensorflow.compat.v1 as tf 
tf.enable_eager_execution(tf.ConfigProto(log_device_placement=False)) 
tf.test.gpu_device_name()

import numpy as np
from sklearn.model_selection import KFold
from itertools import product
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout, Masking
from sklearn.model_selection import KFold
# import early stopping
from tensorflow.keras.callbacks import EarlyStopping
import pickle
from sklearn.metrics import accuracy_score, recall_score, precision_score

In [6]:

# Parameters to be recorded, along with standard deviation:
# Accuracy 
# Sensitivity(Recall) 
# False Positive Rate 
# Specificity 
# Precision

def create_model(filters=32, kernel_size=3, dropout_rate=0.5):    
    model = Sequential()
    model.add(Masking(mask_value=0., input_shape=(19301, 16)))  # Adjust the input_shape to match your dataset
    model.add(Conv1D(filters, kernel_size, activation='relu', input_shape=(19301, 16)))
    model.add(MaxPooling1D(2))
    model.add(Flatten())
    model.add(Dense(32, activation='relu'))  # Reduced the number of neurons in the dense layer
    model.add(Dense(3, activation='softmax'))
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

def specificity_score(y_true, y_pred):
    
    # Convert probabilities to binary predictions
    y_pred_bin = np.argmax(y_pred, axis=1)
    y_true_bin = np.argmax(y_true, axis=1)
    
    tn = np.sum((y_true_bin == 0) & (y_pred_bin == 0))
    fp = np.sum((y_true_bin == 0) & (y_pred_bin != 0))
    specificity = tn / (tn + fp) if (tn + fp) != 0 else 0
    return specificity

# Create a nested dictionary to store the results
dataset_results = {'X_smoothed_mean_norm': {}, 'X_smoothed_median_norm': {}}

 # Load in dataset from pickle
with open('Pickles/abnormal_encoded.pickle', 'rb') as handle:
    abnormal_encoded = pickle.load(handle)

def train_and_evaluate():

    outer_cv = KFold(n_splits=5, shuffle=True, random_state=42)
    
    # Find best dataset to train and test model on
    dataset_names = ['X_smoothed_mean_norm', 'X_smoothed_median_norm']   

    for dataset_name in dataset_names:

         # Load in dataset from pickle
        with open(f'Pickles/{dataset_name}.pickle', 'rb') as handle:
            dataset = pickle.load(handle)
        
        dataset = np.array(dataset)

        print(f'Working on dataset: {dataset_name}')

        accuracies = []
        sensitivities = []
        false_positive_rates = []
        specificities = []
        precisions = []

        fold = 1

        for train_index, test_index in outer_cv.split(dataset):

            # Print current progress
            print(f'Working on fold: {fold}')
            fold += 1
            
            X_train, X_test = dataset[train_index], dataset[test_index]
            Y_train, Y_test = abnormal_encoded[train_index], abnormal_encoded[test_index]
            
            model = create_model()
            early_stopping = EarlyStopping(monitor='val_loss', patience=5)
            
            #Fit the model
            print('Fitting model')
            model.fit(X_train, Y_train, epochs=5, batch_size=4, validation_split=0.2, callbacks=[early_stopping], verbose=1)

            # Predict the test set
            print('Predicting test set')
            Y_pred = model.predict(X_test)

            Y_pred_classes = np.argmax(Y_pred, axis=1)
            Y_test_classes = np.argmax(Y_test, axis=1)
            
            # Calulate accuracy, sensitivity, false positive rate, specificity and precision
            accuracies.append(accuracy_score(Y_test_classes, Y_pred_classes))
            sensitivities.append(recall_score(Y_test_classes, Y_pred_classes))
            false_positive_rates.append(1 - specificity_score(Y_test, Y_pred))
            specificities.append(specificity_score(Y_test, Y_pred))
            precisions.append(precision_score(Y_test_classes, Y_pred_classes))



        avg_accuracy = np.mean(accuracies)
        avg_sensitivity = np.mean(sensitivities)
        avg_false_positive_rate = np.mean(false_positive_rates)
        avg_specificity = np.mean(specificities)
        avg_precision = np.mean(precisions)

        std_accuracy = np.std(accuracies)
        std_sensitivity = np.std(sensitivities)
        std_false_positive_rate = np.std(false_positive_rates)
        std_specificity = np.std(specificities)
        std_precision = np.std(precisions)

        dataset_results[dataset_name]['Accuracy'] = (avg_accuracy, std_accuracy)
        dataset_results[dataset_name]['Sensitivity'] = (avg_sensitivity, std_sensitivity)
        dataset_results[dataset_name]['False Positive Rate'] = (avg_false_positive_rate, std_false_positive_rate)
        dataset_results[dataset_name]['Specificity'] = (avg_specificity, std_specificity)
        dataset_results[dataset_name]['Precision'] = (avg_precision, std_precision)

        # Delete dataset to free up memory
        del dataset
        del Y_pred

train_and_evaluate()
print(dataset_results)     


Working on dataset: X_smoothed_mean_norm
Working on fold: 1
Fitting model
Train on 102 samples, validate on 26 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Predicting test set
Working on fold: 2
Fitting model
Train on 103 samples, validate on 26 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Predicting test set
Working on fold: 3
Fitting model
Train on 103 samples, validate on 26 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Predicting test set
Working on fold: 4
Fitting model
Train on 103 samples, validate on 26 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Predicting test set


  _warn_prf(average, modifier, msg_start, len(result))


Working on fold: 5
Fitting model
Train on 103 samples, validate on 26 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Predicting test set
Working on dataset: X_smoothed_median_norm
Working on fold: 1
Fitting model
Train on 102 samples, validate on 26 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Predicting test set
Working on fold: 2
Fitting model
Train on 103 samples, validate on 26 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Predicting test set
Working on fold: 3
Fitting model
Train on 103 samples, validate on 26 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Predicting test set
Working on fold: 4
Fitting model
Train on 103 samples, validate on 26 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Predicting test set
Working on fold: 5
Fitting model
Train on 103 samples, validate on 26 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Predicting test set
{'X_smoothed_mean_norm': {'Accuracy': (0.646780303030303, 0.099733488

In [7]:
# print dataset results in a nice way:
for dataset_name, results in dataset_results.items():
    print(f'Dataset: {dataset_name}')
    for metric, (avg, std) in results.items():
        print(f'{metric}: {avg} +/- {std}')
    print('\n')

Dataset: X_smoothed_mean_norm
Accuracy: 0.646780303030303 +/- 0.09973348898243625
Sensitivity: 0.25665445665445663 +/- 0.20892954310299036
False Positive Rate: 0.10536689549961861 +/- 0.08887055485462426
Specificity: 0.8946331045003815 +/- 0.08887055485462426
Precision: 0.4442424242424242 +/- 0.23422184236678187


Dataset: X_smoothed_median_norm
Accuracy: 0.6096590909090909 +/- 0.08332343260744607
Sensitivity: 0.2796703296703297 +/- 0.20223134745596413
False Positive Rate: 0.15133790999237223 +/- 0.10233053478672907
Specificity: 0.8486620900076278 +/- 0.10233053478672907
Precision: 0.47555555555555556 +/- 0.08703624901140507




In [12]:

# Parameters to be recorded, along with standard deviation:
# Accuracy 
# Sensitivity(Recall) 
# False Positive Rate 
# Specificity 
# Precision

def create_model(filters=32, kernel_size=3, dropout_rate=0.5):    
    model = Sequential()
    model.add(Masking(mask_value=0., input_shape=(19301, 17)))  # Adjust the input_shape to match your dataset
    model.add(Conv1D(filters, kernel_size, activation='relu', input_shape=(19301, 17)))
    model.add(MaxPooling1D(2))
    model.add(Flatten())
    model.add(Dense(32, activation='relu'))  # Reduced the number of neurons in the dense layer
    model.add(Dense(3, activation='softmax'))
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

def specificity_score(y_true, y_pred):
    
    # Convert probabilities to binary predictions
    y_pred_bin = np.argmax(y_pred, axis=1)
    y_true_bin = np.argmax(y_true, axis=1)
    
    tn = np.sum((y_true_bin == 0) & (y_pred_bin == 0))
    fp = np.sum((y_true_bin == 0) & (y_pred_bin != 0))
    specificity = tn / (tn + fp) if (tn + fp) != 0 else 0
    return specificity

# Create a nested dictionary to store the results
dataset_results = {'X_smoothed_mean_norm_month': {}, 'X_smoothed_median_norm_month': {}}

 # Load in dataset from pickle
with open('Pickles/abnormal_encoded.pickle', 'rb') as handle:
    abnormal_encoded = pickle.load(handle)

def train_and_evaluate():

    outer_cv = KFold(n_splits=5, shuffle=True, random_state=42)
    
    # Find best dataset to train and test model on
    dataset_names = ['X_smoothed_mean_norm_month', 'X_smoothed_median_norm_month']   

    for dataset_name in dataset_names:

         # Load in dataset from pickle
        with open(f'Pickles/{dataset_name}.pickle', 'rb') as handle:
            dataset = pickle.load(handle)
        
        dataset = np.array(dataset)

        print(f'Working on dataset: {dataset_name}')

        accuracies = []
        sensitivities = []
        false_positive_rates = []
        specificities = []
        precisions = []

        fold = 1

        for train_index, test_index in outer_cv.split(dataset):

            # Print current progress
            print(f'Working on fold: {fold}')
            fold += 1
            
            X_train, X_test = dataset[train_index], dataset[test_index]
            Y_train, Y_test = abnormal_encoded[train_index], abnormal_encoded[test_index]
            
            model = create_model()
            early_stopping = EarlyStopping(monitor='val_loss', patience=5)
            
            #Fit the model
            print('Fitting model')
            model.fit(X_train, Y_train, epochs=5, batch_size=4, validation_split=0.2, callbacks=[early_stopping], verbose=1)

            # Predict the test set
            print('Predicting test set')
            Y_pred = model.predict(X_test)

            Y_pred_classes = np.argmax(Y_pred, axis=1)
            Y_test_classes = np.argmax(Y_test, axis=1)
            
            # Calulate accuracy, sensitivity, false positive rate, specificity and precision
            accuracies.append(accuracy_score(Y_test_classes, Y_pred_classes))
            sensitivities.append(recall_score(Y_test_classes, Y_pred_classes,  average='macro'))
            false_positive_rates.append(1 - specificity_score(Y_test, Y_pred))
            specificities.append(specificity_score(Y_test, Y_pred))
            precisions.append(precision_score(Y_test_classes, Y_pred_classes,  average='macro'))



        avg_accuracy = np.mean(accuracies)
        avg_sensitivity = np.mean(sensitivities)
        avg_false_positive_rate = np.mean(false_positive_rates)
        avg_specificity = np.mean(specificities)
        avg_precision = np.mean(precisions)

        std_accuracy = np.std(accuracies)
        std_sensitivity = np.std(sensitivities)
        std_false_positive_rate = np.std(false_positive_rates)
        std_specificity = np.std(specificities)
        std_precision = np.std(precisions)

        dataset_results[dataset_name]['Accuracy'] = (avg_accuracy, std_accuracy)
        dataset_results[dataset_name]['Sensitivity'] = (avg_sensitivity, std_sensitivity)
        dataset_results[dataset_name]['False Positive Rate'] = (avg_false_positive_rate, std_false_positive_rate)
        dataset_results[dataset_name]['Specificity'] = (avg_specificity, std_specificity)
        dataset_results[dataset_name]['Precision'] = (avg_precision, std_precision)

        # Delete dataset to free up memory
        del dataset
        del Y_pred

train_and_evaluate()
print(dataset_results)     


Working on dataset: X_smoothed_mean_norm_month
Working on fold: 1
Fitting model
Train on 102 samples, validate on 26 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Predicting test set
Working on fold: 2
Fitting model
Train on 103 samples, validate on 26 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Predicting test set
Working on fold: 3
Fitting model
Train on 103 samples, validate on 26 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Predicting test set
Working on fold: 4
Fitting model
Train on 103 samples, validate on 26 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Predicting test set
Working on fold: 5
Fitting model
Train on 103 samples, validate on 26 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Predicting test set
Working on dataset: X_smoothed_median_norm_month
Working on fold: 1
Fitting model
Train on 102 samples, validate on 26 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Predicting test set
Working on fold:

In [13]:
# print dataset results in a nice way:
for dataset_name, results in dataset_results.items():
    print(f'Dataset: {dataset_name}')
    for metric, (avg, std) in results.items():
        print(f'{metric}: {avg} +/- {std}')
    print('\n')

Dataset: X_smoothed_mean_norm_month
Accuracy: 0.696780303030303 +/- 0.09978095269929742
Sensitivity: 0.6442111633599048 +/- 0.07147403266931529
False Positive Rate: 0.1610282227307399 +/- 0.08355097822644932
Specificity: 0.8389717772692601 +/- 0.08355097822644932
Precision: 0.6796386528957743 +/- 0.06602168250829586


Dataset: X_smoothed_median_norm_month
Accuracy: 0.602840909090909 +/- 0.07257174421116831
Sensitivity: 0.5478065649072514 +/- 0.028490875610738826
False Positive Rate: 0.26574218154080853 +/- 0.1448530873374119
Specificity: 0.7342578184591915 +/- 0.1448530873374119
Precision: 0.5717232183021658 +/- 0.05136835587495588




In [19]:

# Parameters to be recorded, along with standard deviation:
# Accuracy 
# Sensitivity(Recall) 
# False Positive Rate 
# Specificity 
# Precision

from sklearn.utils import class_weight


def create_model(filters=32, kernel_size=3, dropout_rate=0.5):    
    model = Sequential()
    model.add(Masking(mask_value=0., input_shape=(19301, 17)))  # Adjust the input_shape to match your dataset
    model.add(Conv1D(filters, kernel_size, activation='relu', input_shape=(19301, 17)))
    model.add(MaxPooling1D(2))
    model.add(Flatten())
    model.add(Dense(32, activation='relu'))  # Reduced the number of neurons in the dense layer
    model.add(Dense(3, activation='softmax'))
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

def specificity_score(y_true, y_pred):
    
    # Convert probabilities to binary predictions
    y_pred_bin = np.argmax(y_pred, axis=1)
    y_true_bin = np.argmax(y_true, axis=1)
    
    tn = np.sum((y_true_bin == 0) & (y_pred_bin == 0))
    fp = np.sum((y_true_bin == 0) & (y_pred_bin != 0))
    specificity = tn / (tn + fp) if (tn + fp) != 0 else 0
    return specificity

# Create a nested dictionary to store the results
dataset_results = {'X_smoothed_mean_norm_month': {}, 'X_smoothed_median_norm_month': {}}

 # Load in dataset from pickle
with open('Pickles/abnormal_encoded.pickle', 'rb') as handle:
    abnormal_encoded = pickle.load(handle)

def train_and_evaluate():

    outer_cv = KFold(n_splits=5, shuffle=True, random_state=42)
    
    # Find best dataset to train and test model on
    dataset_names = ['X_smoothed_mean_norm_month', 'X_smoothed_median_norm_month']   

    for dataset_name in dataset_names:

         # Load in dataset from pickle
        with open(f'Pickles/{dataset_name}.pickle', 'rb') as handle:
            dataset = pickle.load(handle)
        
        dataset = np.array(dataset)

        print(f'Working on dataset: {dataset_name}')

        accuracies = []
        sensitivities = []
        false_positive_rates = []
        specificities = []
        precisions = []

        fold = 1

        for train_index, test_index in outer_cv.split(dataset):

            # Print current progress
            print(f'Working on fold: {fold}')
            fold += 1
            
            X_train, X_test = dataset[train_index], dataset[test_index]
            Y_train, Y_test = abnormal_encoded[train_index], abnormal_encoded[test_index]

            Y_train_classes = np.argmax(Y_train, axis=1)

            # Compute class weights
            class_weights = class_weight.compute_class_weight('balanced',
                                                  classes=np.unique(Y_train_classes),
                                                  y=Y_train_classes)

            class_weights_dict = dict(enumerate(class_weights))
            
            model = create_model()
            early_stopping = EarlyStopping(monitor='val_loss', patience=5)


            
            #Fit the model
            print('Fitting model')
            model.fit(X_train, Y_train, epochs=5, batch_size=4, validation_split=0.2, callbacks=[early_stopping], verbose=1, class_weight=class_weights_dict)

            # Predict the test set
            print('Predicting test set')
            Y_pred = model.predict(X_test)

            Y_pred_classes = np.argmax(Y_pred, axis=1)
            Y_test_classes = np.argmax(Y_test, axis=1)
            
            # Calulate accuracy, sensitivity, false positive rate, specificity and precision
            accuracies.append(accuracy_score(Y_test_classes, Y_pred_classes))
            sensitivities.append(recall_score(Y_test_classes, Y_pred_classes,  average='macro'))
            false_positive_rates.append(1 - specificity_score(Y_test, Y_pred))
            specificities.append(specificity_score(Y_test, Y_pred))
            precisions.append(precision_score(Y_test_classes, Y_pred_classes,  average='macro'))



        avg_accuracy = np.mean(accuracies)
        avg_sensitivity = np.mean(sensitivities)
        avg_false_positive_rate = np.mean(false_positive_rates)
        avg_specificity = np.mean(specificities)
        avg_precision = np.mean(precisions)

        std_accuracy = np.std(accuracies)
        std_sensitivity = np.std(sensitivities)
        std_false_positive_rate = np.std(false_positive_rates)
        std_specificity = np.std(specificities)
        std_precision = np.std(precisions)

        dataset_results[dataset_name]['Accuracy'] = (avg_accuracy, std_accuracy)
        dataset_results[dataset_name]['Sensitivity'] = (avg_sensitivity, std_sensitivity)
        dataset_results[dataset_name]['False Positive Rate'] = (avg_false_positive_rate, std_false_positive_rate)
        dataset_results[dataset_name]['Specificity'] = (avg_specificity, std_specificity)
        dataset_results[dataset_name]['Precision'] = (avg_precision, std_precision)

        # Delete dataset to free up memory
        del dataset
        del Y_pred

train_and_evaluate()

# print dataset results in a nice way:
for dataset_name, results in dataset_results.items():
    print(f'Dataset: {dataset_name}')
    for metric, (avg, std) in results.items():
        print(f'{metric}: {avg} +/- {std}')
    print('\n')


Working on dataset: X_smoothed_mean_norm_month
Working on fold: 1
Fitting model
Train on 102 samples, validate on 26 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Predicting test set
Working on fold: 2
Fitting model
Train on 103 samples, validate on 26 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Predicting test set
Working on fold: 3
Fitting model
Train on 103 samples, validate on 26 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Predicting test set
Working on fold: 4
Fitting model
Train on 103 samples, validate on 26 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Predicting test set
Working on fold: 5
Fitting model
Train on 103 samples, validate on 26 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Predicting test set
Working on dataset: X_smoothed_median_norm_month
Working on fold: 1
Fitting model
Train on 102 samples, validate on 26 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Predicting test set
Working on fold:

In [22]:
%pip install --upgrade tensorflow numpy


Collecting tensorflowNote: you may need to restart the kernel to use updated packages.


ERROR: tensorflow-directml 1.15.8 has requirement numpy<1.19.0,>=1.16.0, but you'll have numpy 1.21.6 which is incompatible.
ERROR: tensorflow-directml 1.15.8 has requirement tensorboard<1.16.0,>=1.15.0, but you'll have tensorboard 2.11.2 which is incompatible.
ERROR: tensorflow-directml 1.15.8 has requirement tensorflow-estimator==1.15.1, but you'll have tensorflow-estimator 2.11.0 which is incompatible.
You should consider upgrading via the 'c:\Users\User\AppData\Local\Programs\Python\Python37\python.exe -m pip install --upgrade pip' command.



  Using cached tensorflow-2.11.0-cp37-cp37m-win_amd64.whl (1.9 kB)
Requirement already up-to-date: numpy in c:\users\user\appdata\local\programs\python\python37\lib\site-packages (1.21.6)
Collecting tensorflow-intel==2.11.0; platform_system == "Windows"
  Using cached tensorflow_intel-2.11.0-cp37-cp37m-win_amd64.whl (266.3 MB)
Collecting tensorflow-estimator<2.12,>=2.11.0
  Using cached tensorflow_estimator-2.11.0-py2.py3-none-any.whl (439 kB)
Collecting tensorflow-io-gcs-filesystem>=0.23.1; platform_machine != "arm64" or platform_system != "Darwin"
  Using cached tensorflow_io_gcs_filesystem-0.31.0-cp37-cp37m-win_amd64.whl (1.5 MB)
Collecting tensorboard<2.12,>=2.11
  Using cached tensorboard-2.11.2-py3-none-any.whl (6.0 MB)
Collecting libclang>=13.0.0
  Using cached libclang-16.0.6-py2.py3-none-win_amd64.whl (24.4 MB)
Collecting flatbuffers>=2.0
  Using cached flatbuffers-23.5.26-py2.py3-none-any.whl (26 kB)
Installing collected packages: tensorflow-estimator, tensorflow-io-gcs-file

In [12]:

# Parameters to be recorded, along with standard deviation:
# Accuracy 
# Sensitivity(Recall) 
# False Positive Rate 
# Specificity 
# Precision

import logging

# Suppress TensorFlow warnings
tf.get_logger().setLevel('ERROR')

# Alternatively, to suppress all warnings including those not from TensorFlow
logging.getLogger('tensorflow').setLevel(logging.ERROR)

# If you also want to suppress warnings from other libraries, you can use
import warnings
warnings.filterwarnings('ignore')


tf.autograph.set_verbosity(0)

def create_model(filters=32, kernel_size=3, dropout_rate=0.5):    
    model = Sequential()
    model.add(Masking(mask_value=0., input_shape=(19301, 16)))  # Adjust the input_shape to match your dataset
    model.add(Conv1D(filters, kernel_size, activation='relu', input_shape=(19301, 16)))
    model.add(MaxPooling1D(2))
    model.add(Flatten())
    model.add(Dense(32, activation='relu'))  # Reduced the number of neurons in the dense layer
    model.add(Dense(3, activation='softmax'))
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Masking

def create_lstm_model(input_shape=(19301, 16), lstm_units=32, dropout_rate=0.5, output_classes=3):
    model = Sequential()
    # Masking layer to ignore the padded values
    model.add(Masking(mask_value=0., input_shape=input_shape))
    
    # LSTM layer
    model.add(LSTM(lstm_units, return_sequences=False))  # 'return_sequences=False' because we only need the last output
    
    # Dropout for regularization
    model.add(Dropout(dropout_rate))
    
    # A Dense layer for further processing
    model.add(Dense(lstm_units, activation='relu'))
    
    # The output layer with softmax activation for classification
    model.add(Dense(output_classes, activation='softmax'))
    
    # Compile the model
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    
    return model

@tf.autograph.experimental.do_not_convert
def create_simple_lstm_model():
    model = Sequential()
    model.add(LSTM(32, input_shape=(19301, 16)))  # Adjust input_shape as needed
    model.add(Dense(3, activation='softmax'))
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model


def specificity_score(y_true, y_pred):
    
    # Convert probabilities to binary predictions
    y_pred_bin = np.argmax(y_pred, axis=1)
    y_true_bin = np.argmax(y_true, axis=1)
    
    tn = np.sum((y_true_bin == 0) & (y_pred_bin == 0))
    fp = np.sum((y_true_bin == 0) & (y_pred_bin != 0))
    specificity = tn / (tn + fp) if (tn + fp) != 0 else 0
    return specificity

# Create a nested dictionary to store the results
dataset_results = {'X_smoothed_mean_norm': {}, 'X_smoothed_median_norm': {}}

 # Load in dataset from pickle
with open('Pickles/abnormal_encoded.pickle', 'rb') as handle:
    abnormal_encoded = pickle.load(handle)

@tf.autograph.experimental.do_not_convert
def train_and_evaluate():

    outer_cv = KFold(n_splits=3, shuffle=True, random_state=42)
    
    # Find best dataset to train and test model on
    dataset_names = ['X_smoothed_mean_norm']   

    for dataset_name in dataset_names:

         # Load in dataset from pickle
        with open(f'Pickles/{dataset_name}.pickle', 'rb') as handle:
            dataset = pickle.load(handle)
        
        dataset = np.array(dataset)

        print(f'Working on dataset: {dataset_name}')

        accuracies = []
        sensitivities = []
        false_positive_rates = []
        specificities = []
        precisions = []

        fold = 1

        for train_index, test_index in outer_cv.split(dataset):

            # Print current progress
            print(f'Working on fold: {fold}')
            fold += 1
            
            X_train, X_test = dataset[train_index], dataset[test_index]
            Y_train, Y_test = abnormal_encoded[train_index], abnormal_encoded[test_index]

            X_train = X_train.astype('float32')
            Y_train = Y_train.astype('float32')
            X_test = X_test.astype('float32')
            Y_test = Y_test.astype('float32')

            
            model = create_simple_lstm_model()
            early_stopping = EarlyStopping(monitor='val_loss', patience=5)
            
            #Fit the model
            print('Fitting model')
            model.fit(X_train, Y_train, epochs=5, batch_size=4, validation_split=0.2, callbacks=[early_stopping], verbose=1)

            # Predict the test set
            print('Predicting test set')
            Y_pred = model.predict(X_test)

            Y_pred_classes = np.argmax(Y_pred, axis=1)
            Y_test_classes = np.argmax(Y_test, axis=1)
            
            # Calulate accuracy, sensitivity, false positive rate, specificity and precision
            accuracies.append(accuracy_score(Y_test_classes, Y_pred_classes))
            sensitivities.append(recall_score(Y_test_classes, Y_pred_classes, average='macro'))
            false_positive_rates.append(1 - specificity_score(Y_test, Y_pred, average='macro'))
            specificities.append(specificity_score(Y_test, Y_pred))
            precisions.append(precision_score(Y_test_classes, Y_pred_classes, average='macro'))



        avg_accuracy = np.mean(accuracies)
        avg_sensitivity = np.mean(sensitivities)
        avg_false_positive_rate = np.mean(false_positive_rates)
        avg_specificity = np.mean(specificities)
        avg_precision = np.mean(precisions)

        std_accuracy = np.std(accuracies)
        std_sensitivity = np.std(sensitivities)
        std_false_positive_rate = np.std(false_positive_rates)
        std_specificity = np.std(specificities)
        std_precision = np.std(precisions)

        dataset_results[dataset_name]['Accuracy'] = (avg_accuracy, std_accuracy)
        dataset_results[dataset_name]['Sensitivity'] = (avg_sensitivity, std_sensitivity)
        dataset_results[dataset_name]['False Positive Rate'] = (avg_false_positive_rate, std_false_positive_rate)
        dataset_results[dataset_name]['Specificity'] = (avg_specificity, std_specificity)
        dataset_results[dataset_name]['Precision'] = (avg_precision, std_precision)

        # Delete dataset to free up memory
        del dataset
        del Y_pred

train_and_evaluate()
# print dataset results in a nice way:
for dataset_name, results in dataset_results.items():
    print(f'Dataset: {dataset_name}')
    for metric, (avg, std) in results.items():
        print(f'{metric}: {avg} +/- {std}')
    print('\n')
    


Working on dataset: X_smoothed_mean_norm
Working on fold: 1
Fitting model
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Predicting test set


TypeError: specificity_score() got an unexpected keyword argument 'average'

In [13]:
def train_and_evaluate(dataset_names, create_model_fn):

    outer_cv = KFold(n_splits=3, shuffle=True, random_state=42)
    
    # Find best dataset to train and test model on
    #dataset_names = ['X_smoothed_mean_norm']   

    for dataset_name in dataset_names:

         # Load in dataset from pickle
        with open(f'Pickles/{dataset_name}.pickle', 'rb') as handle:
            dataset = pickle.load(handle)
        
        dataset = np.array(dataset)

        print(f'Working on dataset: {dataset_name}')

        accuracies = []
        sensitivities = []
        false_positive_rates = []
        specificities = []
        precisions = []

        fold = 1

        for train_index, test_index in outer_cv.split(dataset):

            # Print current progress
            print(f'Working on fold: {fold}')
            fold += 1
            
            X_train, X_test = dataset[train_index], dataset[test_index]
            Y_train, Y_test = abnormal_encoded[train_index], abnormal_encoded[test_index]

            X_train = X_train.astype('float32')
            Y_train = Y_train.astype('float32')
            X_test = X_test.astype('float32')
            Y_test = Y_test.astype('float32')

            
            model = create_model_fn()
            early_stopping = EarlyStopping(monitor='val_loss', patience=5)
            
            #Fit the model
            print('Fitting model')
            model.fit(X_train, Y_train, epochs=5, batch_size=4, validation_split=0.2, callbacks=[early_stopping], verbose=1)

            # Predict the test set
            print('Predicting test set')
            Y_pred = model.predict(X_test)

            Y_pred_classes = np.argmax(Y_pred, axis=1)
            Y_test_classes = np.argmax(Y_test, axis=1)
            
            # Calulate accuracy, sensitivity, false positive rate, specificity and precision
            accuracies.append(accuracy_score(Y_test_classes, Y_pred_classes))
            sensitivities.append(recall_score(Y_test_classes, Y_pred_classes, average='macro'))
            false_positive_rates.append(1 - specificity_score(Y_test, Y_pred, average='macro'))
            specificities.append(specificity_score(Y_test, Y_pred))
            precisions.append(precision_score(Y_test_classes, Y_pred_classes, average='macro'))



        avg_accuracy = np.mean(accuracies)
        avg_sensitivity = np.mean(sensitivities)
        avg_false_positive_rate = np.mean(false_positive_rates)
        avg_specificity = np.mean(specificities)
        avg_precision = np.mean(precisions)

        std_accuracy = np.std(accuracies)
        std_sensitivity = np.std(sensitivities)
        std_false_positive_rate = np.std(false_positive_rates)
        std_specificity = np.std(specificities)
        std_precision = np.std(precisions)

        dataset_results[dataset_name]['Accuracy'] = (avg_accuracy, std_accuracy)
        dataset_results[dataset_name]['Sensitivity'] = (avg_sensitivity, std_sensitivity)
        dataset_results[dataset_name]['False Positive Rate'] = (avg_false_positive_rate, std_false_positive_rate)
        dataset_results[dataset_name]['Specificity'] = (avg_specificity, std_specificity)
        dataset_results[dataset_name]['Precision'] = (avg_precision, std_precision)

        # Delete dataset to free up memory
        del dataset
        del Y_pred


In [14]:
train_and_evaluate(['X_smoothed_mean_norm'] , create_lstm_model)

Working on dataset: X_smoothed_mean_norm
Working on fold: 1
Fitting model
Epoch 1/5


KeyboardInterrupt: 