In [1]:
# Code to allow GPU access
import tensorflow.compat.v1 as tf 
tf.enable_eager_execution(tf.ConfigProto(log_device_placement=False)) 
tf.test.gpu_device_name()

'/device:DML:0'

In [2]:
import numpy as np
from sklearn.model_selection import KFold
from itertools import product
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from sklearn.model_selection import KFold
# import early stopping
from tensorflow.keras.callbacks import EarlyStopping
import pickle


In [3]:
# open accelData pickle
with open('Pickles/accelData.pickle', 'rb') as handle:
    accelData = pickle.load(handle)

In [4]:
print(accelData.keys())

dict_keys(['Measurements', 'Month', 'Code', 'Abnormal', 'AIMS', 'Optimality', 'Normalised', 'Segmented Normalised', 'Segmented'])


In [5]:
# Generate four different feature datasets from the following lists:
# accelData['Measurements'], accelData['Normalised'], accelData['Segmented Normalised'], accelData['Segmented']

# Start with unsegmented data

# 1. accelData['Measurements']
X_measurements = np.array(accelData['Measurements'])

# 2. accelData['Normalised']
X_normalised = np.array(accelData['Normalised'])

# 3. accelData['Segmented Normalised']
X_segmented_normalised = np.array(accelData['Segmented Normalised'])

# 4. accelData['Segmented']
X_segmented = np.array(accelData['Segmented'])

# Print shapes of X_measurements and X_normalised
print(X_measurements.shape)
print(X_normalised.shape)

# Print shapes of X_segmented_normalised and X_segmented
print(X_segmented_normalised.shape)
print(X_segmented.shape)



(161, 184217, 17)
(161, 184217, 17)
(161, 1842, 100, 17)
(161, 1842, 100, 17)


In [6]:
# Create models for multitask prediction. I want model to be able to predict optimality, abnormal and AIMS
optimality = np.array(accelData['Optimality'])
abnormal = np.array(accelData['Abnormal'])
AIMS = np.array(accelData['AIMS'])

print(optimality.shape)
print(abnormal.shape)
print(AIMS.shape)

from tensorflow.compat.v1.keras.utils import to_categorical

# One hot encoding of Y_train and Y_test
abnormal_encoded = to_categorical(abnormal, num_classes=3)

(161,)
(161,)
(161,)


In [7]:
# Save each dataset to a pickle file, to reduce memory usage. Only load into memory each dataset when needed
with open('Pickles/X_measurements.pickle', 'wb') as handle:
    pickle.dump(X_measurements, handle, protocol=pickle.HIGHEST_PROTOCOL)

with open('Pickles/X_normalised.pickle', 'wb') as handle:
    pickle.dump(X_normalised, handle, protocol=pickle.HIGHEST_PROTOCOL)

with open('Pickles/X_segmented_normalised.pickle', 'wb') as handle:
    pickle.dump(X_segmented_normalised, handle, protocol=pickle.HIGHEST_PROTOCOL)

with open('Pickles/X_segmented.pickle', 'wb') as handle:
    pickle.dump(X_segmented, handle, protocol=pickle.HIGHEST_PROTOCOL)

with open('Pickles/optimality.pickle', 'wb') as handle:
    pickle.dump(optimality, handle, protocol=pickle.HIGHEST_PROTOCOL)

with open('Pickles/abnormal.pickle', 'wb') as handle:
    pickle.dump(abnormal, handle, protocol=pickle.HIGHEST_PROTOCOL)

with open('Pickles/AIMS.pickle', 'wb') as handle:
    pickle.dump(AIMS, handle, protocol=pickle.HIGHEST_PROTOCOL)

# Delete variables to free up memory
del X_measurements
del X_normalised
del X_segmented_normalised
del X_segmented
del optimality
del abnormal
del AIMS
del accelData


In [3]:
def create_model(filters=32, kernel_size=3, dropout_rate=0.5):
    model = Sequential()
    model.add(Conv1D(filters, kernel_size, activation='relu', input_shape=(184217, 17)))
    model.add(MaxPooling1D(2))
    model.add(Dropout(dropout_rate))
    model.add(Flatten())
    model.add(Dense(50, activation='relu'))  # Reduced the number of neurons in the dense layer
    model.add(Dropout(dropout_rate))
    model.add(Dense(3, activation='softmax'))
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [3]:
from tensorflow.compat.v1.keras.utils import to_categorical

 # Load in dataset from pickle
with open('Pickles/abnormal.pickle', 'rb') as handle:
    abnormal = pickle.load(handle)

# One hot encoding of Y_train and Y_test
abnormal_encoded = to_categorical(abnormal, num_classes=3)

# dump abnormal_encoded
with open('Pickles/abnormal_encoded.pickle', 'wb') as handle:
    pickle.dump(abnormal_encoded, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [4]:
# Parameters to be recorded, along with standard deviation:
# Accuracy 
# Sensitivity(Recall) 
# False Positive Rate 
# Specificity 
# Precision

# Create a nested dictionary to store the results
dataset_results = {'X_measurements': {}, 'X_normalised': {}}

 # Load in dataset from pickle
with open('Pickles/abnormal_encoded.pickle', 'rb') as handle:
    abnormal_encoded = pickle.load(handle)

def train_and_evaluate():

    outer_cv = KFold(n_splits=5, shuffle=True, random_state=42)
    
    # Find best dataset to train and test model on
    dataset_names = ['X_measurements', 'X_normalised']   

    for dataset_name in dataset_names:

         # Load in dataset from pickle
        with open(f'Pickles/{dataset_name}.pickle', 'rb') as handle:
            dataset = pickle.load(handle)

        print(f'Working on dataset: {dataset_name}')

        accuracies = []
        sensitivities = []
        false_positive_rates = []
        specificities = []
        precisions = []

        fold = 1

        for train_index, test_index in outer_cv.split(dataset):

            # Print current progress
            print(f'Working on fold: {fold}')
            fold += 1
            
            X_train, X_test = dataset[train_index], dataset[test_index]
            Y_train, Y_test = abnormal_encoded[train_index], abnormal_encoded[test_index]
            
            model = create_model()
            early_stopping = EarlyStopping(monitor='val_loss', patience=5)
            
            #Fit the model
            print('Fitting model')
            model.fit(X_train, Y_train, epochs=10, batch_size=32, validation_split=0.2, callbacks=[early_stopping], verbose=1)

            # Predict the test set
            print('Predicting test set')
            Y_pred = model.predict(X_test)
            
            # Calulate accuracy, sensitivity, false positive rate, specificity and precision
            accuracies.append(accuracy_score(Y_test, Y_pred))
            sensitivities.append(recall_score(Y_test, Y_pred))
            false_positive_rates.append(1 - specificity_score(Y_test, Y_pred))
            specificities.append(specificity_score(Y_test, Y_pred))
            precisions.append(precision_score(Y_test, Y_pred))

            # Add code to clear keras backend
            tf.keras.backend.clear_session()

        avg_accuracy = np.mean(accuracies)
        avg_sensitivity = np.mean(sensitivities)
        avg_false_positive_rate = np.mean(false_positive_rates)
        avg_specificity = np.mean(specificities)
        avg_precision = np.mean(precisions)

        std_accuracy = np.std(accuracies)
        std_sensitivity = np.std(sensitivities)
        std_false_positive_rate = np.std(false_positive_rates)
        std_specificity = np.std(specificities)
        std_precision = np.std(precisions)

        dataset_results[dataset_name]['Accuracy'] = (avg_accuracy, std_accuracy)
        dataset_results[dataset_name]['Sensitivity'] = (avg_sensitivity, std_sensitivity)
        dataset_results[dataset_name]['False Positive Rate'] = (avg_false_positive_rate, std_false_positive_rate)
        dataset_results[dataset_name]['Specificity'] = (avg_specificity, std_specificity)
        dataset_results[dataset_name]['Precision'] = (avg_precision, std_precision)

        # Delete dataset to free up memory
        del dataset

train_and_evaluate()
print(dataset_results)

        


Working on dataset: X_measurements
Working on fold: 1
Fitting model
Train on 102 samples, validate on 26 samples
Epoch 1/10


: 