In [None]:
# Perform body part based prediction: Based on 
# Ho, E.S.L., McCay, K.D., Sakkos, D., Woo, W.L., Marcroft, C., Dulson, P., Embleton, N.D., 2021. 
# Towards Explainable Abnormal Infant Movements Identification: A Body-part Based Prediction and Visualisation Framework, 
# in: 2021 IEEE EMBS International Conference on Biomedical and Health Informatics (BHI). 
# Presented at the 2021 IEEE EMBS International Conference on Biomedical and Health Informatics (BHI), 
# pp. 1â€“4. https://doi.org/10.1109/BHI50953.2021.9508603



In [1]:
# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os
import pickle

In [2]:
# Load in data from pickles
with open('drive/MyDrive/Pickles/X_smoothed_mean_norm.pickle', 'rb') as handle:
    X_smoothed_mean_norm = pickle.load(handle)

In [5]:
print(X_smoothed_mean_norm[0])
print(type(X_smoothed_mean_norm))

         AccXRA    AccYRA    AccZRA    AccXLA    AccYLA    AccZLA    AccXRW  \
0      0.910473  0.024397 -5.176289 -0.559843  3.420900 -4.765396 -3.138499   
1      1.036153  0.005995 -5.023745 -0.558799  3.408857 -4.766926 -2.831541   
2      1.032857  0.194585 -4.841748 -0.560481  3.408845 -4.766155 -2.921978   
3      1.018236  0.196418 -5.382666 -0.560568  3.408325 -4.763335 -2.849972   
4      1.119276  1.241573 -4.833676 -0.561957  3.405383 -4.770709 -2.745618   
...         ...       ...       ...       ...       ...       ...       ...   
19296  0.730245 -0.130767 -0.530644 -0.721722  0.051651 -0.540139 -0.433730   
19297  0.730245 -0.130767 -0.530644 -0.721722  0.051651 -0.540139 -0.433730   
19298  0.730245 -0.130767 -0.530644 -0.721722  0.051651 -0.540139 -0.433730   
19299  0.730245 -0.130767 -0.530644 -0.721722  0.051651 -0.540139 -0.433730   
19300  0.730245 -0.130767 -0.530644 -0.721722  0.051651 -0.540139 -0.433730   

         AccYRW    AccZRW    AccXLW    AccYLW    Ac

In [15]:
# RA will be a df with the cols AccXRA, AccYRA, AccZRA, AccSumRA extracted from X_smoothed_mean_norm
RA = [pd.DataFrame(trial, columns=['AccXRA', 'AccYRA', 'AccZRA', 'AccSumRA']) for trial in X_smoothed_mean_norm]
RW = [pd.DataFrame(trial, columns=['AccXRW', 'AccYRW', 'AccZRW', 'AccSumRW']) for trial in X_smoothed_mean_norm]
LA = [pd.DataFrame(trial, columns=['AccXLA', 'AccYLA', 'AccZLA', 'AccSumLA']) for trial in X_smoothed_mean_norm]
LW = [pd.DataFrame(trial, columns=['AccXLW', 'AccYLW', 'AccZLW', 'AccSumLW']) for trial in X_smoothed_mean_norm]

# Save the dataframes to pickle
with open('drive/MyDrive/Pickles/RA.pickle', 'wb') as handle:
    pickle.dump(RA, handle)
with open('drive/MyDrive/Pickles/RW.pickle', 'wb') as handle:
    pickle.dump(RW, handle)
with open('drive/MyDrive/Pickles/LA.pickle', 'wb') as handle:
    pickle.dump(LA, handle)
with open('drive/MyDrive/Pickles/LW.pickle', 'wb') as handle:
    pickle.dump(LW, handle)

In [10]:
# Load in labels
with open('drive/MyDrive/Pickles/abnormal_encoded.pickle', 'rb') as handle:
    abnormal_encoded = pickle.load(handle)

In [13]:
print(abnormal_encoded[0])

[0. 1. 0.]


In [21]:
@tf.autograph.experimental.do_not_convert
def create_cnn_model(shape, filters=32, kernel_size=3, dropout_rate=0.5):
    model = Sequential()
    model.add(Masking(mask_value=0., input_shape=shape))  # Adjust the input_shape to match your dataset
    model.add(Conv1D(filters, kernel_size, activation='relu', input_shape=(19301, 16)))
    model.add(MaxPooling1D(2))
    model.add(Flatten())
    model.add(Dense(32, activation='relu', kernel_regularizer=l2(0.01)) )  # Reduced the number of neurons in the dense layer
    model.add(Dense(3, activation='softmax'))
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [22]:
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, recall_score, precision_score, confusion_matrix
from sklearn.utils.class_weight import compute_class_weight
import numpy as np
import pickle
from keras.callbacks import EarlyStopping
from keras.models import Sequential
from keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Masking
from keras.regularizers import l2
import tensorflow as tf

tf.autograph.set_verbosity(0)

@tf.autograph.experimental.do_not_convert
def train_and_evaluate(dataset_names, create_model_fn, class_weights):
    outer_cv = KFold(n_splits=5, shuffle=True, random_state=42)
    fold_results = []

    for fold, (train_index, test_index) in enumerate(outer_cv.split(np.arange(len(abnormal_encoded)))):
        print(f'\nStarting fold {fold + 1}/{outer_cv.n_splits}')
        
        fold_predictions = []
        Y_test_fold = None

        for dataset_name in dataset_names:
            print(f'\nTraining on dataset: {dataset_name}')

            with open(f'drive/MyDrive/Pickles/{dataset_name}.pickle', 'rb') as handle:
                dataset = pickle.load(handle)
            dataset = np.array(dataset)

            X_train, X_test = dataset[train_index], dataset[test_index]
            Y_train, Y_test = abnormal_encoded[train_index], abnormal_encoded[test_index]
            X_train, Y_train, X_test, Y_test = map(lambda x: x.astype('float32'), [X_train, Y_train, X_test, Y_test])

            if Y_test_fold is None:
                Y_test_fold = Y_test

            model = create_model_fn(X_train.shape[1:])
            early_stopping = EarlyStopping(monitor='val_loss', patience=3, verbose=0)
            class_weights_dict = compute_class_weights(Y_train) if class_weights else None

            print('Starting training...')
            history = model.fit(X_train, Y_train, epochs=15, batch_size=16, validation_split=0.2, callbacks=[early_stopping], verbose=0, class_weight=class_weights_dict)

            print('Training completed.')
            print('Evaluating model on test set...')
            Y_pred = model.predict(X_test)
            fold_predictions.append(Y_pred)

        avg_predictions = np.mean(fold_predictions, axis=0)
        Y_pred_classes = np.argmax(avg_predictions, axis=1)
        Y_test_classes = np.argmax(Y_test_fold, axis=1)

        accuracy = accuracy_score(Y_test_classes, Y_pred_classes)
        sensitivity = recall_score(Y_test_classes, Y_pred_classes, average='macro')
        precision = precision_score(Y_test_classes, Y_pred_classes, average='macro')

        fold_result = {
            'Accuracy': accuracy,
            'Sensitivity': sensitivity,
            'Precision': precision
        }
        fold_results.append(fold_result)

        print(f'\nFold {fold + 1} results:')
        print(f'Accuracy: {accuracy:.4f}, Sensitivity: {sensitivity:.4f}, Precision: {precision:.4f}')

    # Aggregate and print final results
    final_results = aggregate_results(fold_results)
    print('\nFinal aggregated results across all folds:')
    for metric, value in final_results.items():
        print(f'{metric}: Mean = {value["mean"]:.4f}, Std = {value["std"]:.4f}')

def compute_class_weights(Y_train):
    Y_train_classes = np.argmax(Y_train, axis=1)
    cw = compute_class_weight('balanced', classes=np.unique(Y_train_classes), y=Y_train_classes)
    return dict(enumerate(cw))

def aggregate_results(all_results):
    # Calculate mean and standard deviation for each metric
    aggregated = {}
    for key in all_results[0].keys():
        values = [result[key] for result in all_results]
        aggregated[key] = {
            'mean': np.mean(values),
            'std': np.std(values)
        }
    return aggregated

def print_aggregated_results(aggregated_results):
    for metric, stats in aggregated_results.items():
        print(f"{metric}: {stats['mean']} +/- {stats['std']}")

# Example usage
# This assumes that `abnormal_encoded`, `create_model_fn`, and the dataset loading are properly defined
dataset_names = ['RA', 'RW', 'LA', 'LW']  # Example dataset names

results = train_and_evaluate(dataset_names, create_cnn_model, False)





Starting fold 1/5

Training on dataset: RA
Starting training...
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'
Training completed.
Evaluating model on test set...
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'

Training on dataset: RW
Starting training...
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object 

In [12]:
# classify each body part and averag the results to classifiy the whole trial
# RA
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

X_train, X_test, y_train, y_test = train_test_split(np.array(RA), np.array(abnormal_encoded), test_size=0.2, random_state=0)

# Create a random forest classifier
RA_clf = RandomForestClassifier(n_estimators=100, random_state=0)

# Train the classifier
RA_clf.fit(X_train, y_train)

# Print the name and gini importance of each feature
for feature in zip(['AccXRA', 'AccYRA', 'AccZRA', 'AccSumRA'], RA_clf.feature_importances_):
    print(feature)

# Predict the labels of the test data: y_pred
y_pred_RA = RA_clf.predict(X_test)

# Print the confusion matrix
from sklearn.metrics import confusion_matrix
print(confusion_matrix(y_test, y_pred_RA))

# Print the accuracy
from sklearn.metrics import accuracy_score

print(accuracy_score(y_test, y_pred_RA))

# RW
X_train, X_test, y_train, y_test = train_test_split(RW, abnormal_encoded, test_size=0.2, random_state=0)

# Create a random forest classifier
RW_clf = RandomForestClassifier(n_estimators=100, random_state=0)

# Train the classifier
RW_clf.fit(X_train, y_train)

# Print the name and gini importance of each feature
for feature in zip(['AccXRW', 'AccYRW', 'AccZRW', 'AccSumRW'], RW_clf.feature_importances_):
    print(feature)

# Predict the labels of the test data: y_pred
y_pred_RW = RW_clf.predict(X_test)

# Print the confusion matrix
print(confusion_matrix(y_test, y_pred_RW))

# Print the accuracy
print(accuracy_score(y_test, y_pred_RW))

# LA
X_train, X_test, y_train, y_test = train_test_split(LA, abnormal_encoded, test_size=0.2, random_state=0)

# Create a random forest classifier
LA_clf = RandomForestClassifier(n_estimators=100, random_state=0)

# Train the classifier
LA_clf.fit(X_train, y_train)

# Print the name and gini importance of each feature
for feature in zip(['AccXLA', 'AccYLA', 'AccZLA', 'AccSumLA'], LA_clf.feature_importances_):
    print(feature)

# Predict the labels of the test data: y_pred
y_pred_LA = LA_clf.predict(X_test)

# Print the confusion matrix
print(confusion_matrix(y_test, y_pred_LA))

# Print the accuracy
print(accuracy_score(y_test, y_pred_LA))

# LW
X_train, X_test, y_train, y_test = train_test_split(LW, abnormal_encoded, test_size=0.2, random_state=0)

# Create a random forest classifier
LW_clf = RandomForestClassifier(n_estimators=100, random_state=0)

# Train the classifier
LW_clf.fit(X_train, y_train)

# Print the name and gini importance of each feature
for feature in zip(['AccXLW', 'AccYLW', 'AccZLW', 'AccSumLW'], LW_clf.feature_importances_):
    print(feature)

# Predict the labels of the test data: y_pred
y_pred_LW = LW_clf.predict(X_test)

# Print the confusion matrix
print(confusion_matrix(y_test, y_pred_LW))

# Print the accuracy
print(accuracy_score(y_test, y_pred_LW))

# Average the results of y_pred_RA, y_pred_RW, y_pred_LA, y_pred_LW to get the final prediction
y_pred = (y_pred_RA + y_pred_RW + y_pred_LA + y_pred_LW) / 4
print(y_pred)

# Print the confusion matrix
print(confusion_matrix(y_test, y_pred))

# Print the accuracy
print(accuracy_score(y_test, y_pred))

ValueError: Found array with dim 3. Estimator expected <= 2.