In [1]:
# Code to allow GPU access
import tensorflow.compat.v1 as tf 
tf.enable_eager_execution(tf.ConfigProto(log_device_placement=False)) 
tf.test.gpu_device_name()

import numpy as np
from sklearn.model_selection import KFold
from itertools import product
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout, Masking
from sklearn.model_selection import KFold
# import early stopping
from tensorflow.keras.callbacks import EarlyStopping
import pickle
from sklearn.metrics import accuracy_score, recall_score, precision_score

In [2]:

# Parameters to be recorded, along with standard deviation:
# Accuracy 
# Sensitivity(Recall) 
# False Positive Rate 
# Specificity 
# Precision

def create_model(filters=32, kernel_size=3, dropout_rate=0.5):    
    model = Sequential()
    model.add(Masking(mask_value=0., input_shape=(184217, 17)))  # Adjust the input_shape to match your dataset
    model.add(Conv1D(filters, kernel_size, activation='relu', input_shape=(184217, 17)))
    model.add(MaxPooling1D(2))
    model.add(Flatten())
    model.add(Dense(32, activation='relu'))  # Reduced the number of neurons in the dense layer
    model.add(Dense(3, activation='softmax'))
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

def specificity_score(y_true, y_pred):
    
    # Convert probabilities to binary predictions
    y_pred_bin = np.argmax(y_pred, axis=1)
    y_true_bin = np.argmax(y_true, axis=1)
    
    tn = np.sum((y_true_bin == 0) & (y_pred_bin == 0))
    fp = np.sum((y_true_bin == 0) & (y_pred_bin != 0))
    specificity = tn / (tn + fp) if (tn + fp) != 0 else 0
    return specificity

# Create a nested dictionary to store the results
dataset_results = {'X_measurements': {}, 'X_normalised': {}}

 # Load in dataset from pickle
with open('Pickles/abnormal_encoded.pickle', 'rb') as handle:
    abnormal_encoded = pickle.load(handle)

def train_and_evaluate():

    outer_cv = KFold(n_splits=5, shuffle=True, random_state=42)
    
    # Find best dataset to train and test model on
    dataset_names = ['X_measurements', 'X_normalised']   

    for dataset_name in dataset_names:

         # Load in dataset from pickle
        with open(f'Pickles/{dataset_name}.pickle', 'rb') as handle:
            dataset = pickle.load(handle)

        print(f'Working on dataset: {dataset_name}')

        accuracies = []
        sensitivities = []
        false_positive_rates = []
        specificities = []
        precisions = []

        fold = 1

        for train_index, test_index in outer_cv.split(dataset):

            # Print current progress
            print(f'Working on fold: {fold}')
            fold += 1
            
            X_train, X_test = dataset[train_index], dataset[test_index]
            Y_train, Y_test = abnormal_encoded[train_index], abnormal_encoded[test_index]
            
            model = create_model()
            early_stopping = EarlyStopping(monitor='val_loss', patience=5)
            
            #Fit the model
            print('Fitting model')
            model.fit(X_train, Y_train, epochs=5, batch_size=4, validation_split=0.2, callbacks=[early_stopping], verbose=1)

            # Predict the test set
            print('Predicting test set')
            Y_pred = model.predict(X_test)

            Y_pred_classes = np.argmax(Y_pred, axis=1)
            Y_test_classes = np.argmax(Y_test, axis=1)
            
            # Calulate accuracy, sensitivity, false positive rate, specificity and precision
            accuracies.append(accuracy_score(Y_test_classes, Y_pred_classes))
            sensitivities.append(recall_score(Y_test_classes, Y_pred_classes))
            false_positive_rates.append(1 - specificity_score(Y_test, Y_pred))
            specificities.append(specificity_score(Y_test, Y_pred))
            precisions.append(precision_score(Y_test_classes, Y_pred_classes))



        avg_accuracy = np.mean(accuracies)
        avg_sensitivity = np.mean(sensitivities)
        avg_false_positive_rate = np.mean(false_positive_rates)
        avg_specificity = np.mean(specificities)
        avg_precision = np.mean(precisions)

        std_accuracy = np.std(accuracies)
        std_sensitivity = np.std(sensitivities)
        std_false_positive_rate = np.std(false_positive_rates)
        std_specificity = np.std(specificities)
        std_precision = np.std(precisions)

        dataset_results[dataset_name]['Accuracy'] = (avg_accuracy, std_accuracy)
        dataset_results[dataset_name]['Sensitivity'] = (avg_sensitivity, std_sensitivity)
        dataset_results[dataset_name]['False Positive Rate'] = (avg_false_positive_rate, std_false_positive_rate)
        dataset_results[dataset_name]['Specificity'] = (avg_specificity, std_specificity)
        dataset_results[dataset_name]['Precision'] = (avg_precision, std_precision)

        # Delete dataset to free up memory
        del dataset
        del Y_pred

train_and_evaluate()
print(dataset_results)     


Working on dataset: X_measurements
Working on fold: 1
Fitting model
Train on 102 samples, validate on 26 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Predicting test set


  _warn_prf(average, modifier, msg_start, len(result))


Working on fold: 2
Fitting model
Train on 103 samples, validate on 26 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Predicting test set


  _warn_prf(average, modifier, msg_start, len(result))


Working on fold: 3
Fitting model
Train on 103 samples, validate on 26 samples
Epoch 1/5


In [2]:
import tensorflow as tf

def create_model(filters=32, kernel_size=3, dropout_rate=0.5):    
    model = Sequential()
    model.add(Masking(mask_value=0., input_shape=(184217, 17)))  # Adjust the input_shape to match your dataset
    model.add(Conv1D(filters, kernel_size, activation='relu', input_shape=(184217, 17)))
    model.add(MaxPooling1D(2))
    model.add(Flatten())
    model.add(Dense(32, activation='relu'))  # Reduced the number of neurons in the dense layer
    model.add(Dense(3, activation='softmax'))
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

def specificity_score(y_true, y_pred):
    
    # Convert probabilities to binary predictions
    y_pred_bin = np.argmax(y_pred, axis=1)
    y_true_bin = np.argmax(y_true, axis=1)
    
    tn = np.sum((y_true_bin == 0) & (y_pred_bin == 0))
    fp = np.sum((y_true_bin == 0) & (y_pred_bin != 0))
    specificity = tn / (tn + fp) if (tn + fp) != 0 else 0
    return specificity

def load_dataset(features_path, labels_path, batch_size=4, validation_split=0.2):
    # Load features and labels from separate pickle files
    with open(features_path, 'rb') as f:
        features = pickle.load(f)
    with open(labels_path, 'rb') as f:
        labels = pickle.load(f)

    # Ensure features and labels are numpy arrays
    features = np.array(features)
    labels = np.array(labels)

    # Create a tf.data.Dataset from features and labels
    dataset = tf.data.Dataset.from_tensor_slices((features, labels))

    # Shuffle the dataset (if not already shuffled) and split into training and validation
    dataset = dataset.shuffle(buffer_size=len(features))
    train_size = int((1 - validation_split) * len(features))
    train_dataset = dataset.take(train_size).batch(batch_size)
    val_dataset = dataset.skip(train_size).batch(batch_size)

    return train_dataset, val_dataset

# Create a nested dictionary to store the results
dataset_results = {'X_measurements': {}, 'X_normalised': {}}

    
from sklearn.model_selection import train_test_split

def train_and_evaluate():
    outer_cv = KFold(n_splits=5, shuffle=True, random_state=42)

    # Find the best dataset to train and test model on
    dataset_names = ['X_measurements', 'X_normalised']

    for dataset_name in dataset_names:
        print(f'Working on dataset: {dataset_name}')

        accuracies = []
        sensitivities = []
        false_positive_rates = []
        specificities = []
        precisions = []

        fold = 1

        # Assuming dataset is already split into features and labels and saved in pickle files
        # Modify to load your specific dataset format as needed
        train_dataset, val_dataset = load_dataset(f'Pickles/{dataset_name}.pickle', 'Pickles/abnormal_encoded.pickle')

        print(f'Working on fold: {fold}')
        fold += 1

        model = create_model()
        early_stopping = EarlyStopping(monitor='val_loss', patience=5)

        # Fit the model
        print('Fitting model')
        model.fit(train_dataset, epochs=5, validation_data=val_dataset, callbacks=[early_stopping], verbose=1)

        # Predict the test set
        print('Predicting test set')
        # Note: Adjust this part if your validation dataset requires different handling
        for X_test, Y_test in val_dataset.unbatch().batch(1):
            Y_pred = model.predict(X_test)
            Y_pred_classes = np.argmax(Y_pred, axis=1)
            Y_test_classes = np.argmax(Y_test, axis=1)

            # Calculate accuracy, sensitivity, false positive rate, specificity, and precision
            accuracies.append(accuracy_score(Y_test_classes, Y_pred_classes))
            sensitivities.append(recall_score(Y_test_classes, Y_pred_classes))
            false_positive_rates.append(1 - specificity_score(Y_test, Y_pred))
            specificities.append(specificity_score(Y_test, Y_pred))
            precisions.append(precision_score(Y_test_classes, Y_pred_classes))

        # Aggregate and print results as before
        # (Your existing code for calculating averages and storing results)
        avg_accuracy = np.mean(accuracies)
        avg_sensitivity = np.mean(sensitivities)
        avg_false_positive_rate = np.mean(false_positive_rates)
        avg_specificity = np.mean(specificities)
        avg_precision = np.mean(precisions)

        std_accuracy = np.std(accuracies)
        std_sensitivity = np.std(sensitivities)
        std_false_positive_rate = np.std(false_positive_rates)
        std_specificity = np.std(specificities)
        std_precision = np.std(precisions)

        dataset_results[dataset_name]['Accuracy'] = (avg_accuracy, std_accuracy)
        dataset_results[dataset_name]['Sensitivity'] = (avg_sensitivity, std_sensitivity)
        dataset_results[dataset_name]['False Positive Rate'] = (avg_false_positive_rate, std_false_positive_rate)
        dataset_results[dataset_name]['Specificity'] = (avg_specificity, std_specificity)
        dataset_results[dataset_name]['Precision'] = (avg_precision, std_precision)

train_and_evaluate()
print(dataset_results)  


Working on dataset: X_measurements
Working on fold: 1
Fitting model
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Predicting test set


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Working on dataset: X_normalised


KeyboardInterrupt: 