# Deep learning model validation 

This notebook can be used to validate a deep learning model. At the top the data and model can be loaded into memory, and in the following cells function for validation can be found.

In [1]:
import sys, os, fnmatch, csv
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

sys.path.insert(0, os.path.dirname(os.getcwd()))

from config import PATH_RAW_DATA, PATH_DATA_PROCESSED_DL, PATH_MODELS

# 1. Preparing data, model, and helper functions

## Configuration variables

In [2]:
N_AVERAGE = 40
MODEL_NAME = 'Fully_connected_regressor_01.hdf5'

## Load data

In [3]:
from sklearn.model_selection import train_test_split

# Step 1: Get all the files in the output folder
file_names = os.listdir(PATH_DATA_PROCESSED_DL)

# Step 2: Get the full paths of the files (without extensions)
files = [os.path.splitext(os.path.join(PATH_DATA_PROCESSED_DL, file_name))[0] for file_name in fnmatch.filter(file_names, "*.zarr")]

# Step 3: Load all the metadata
frames = []

for idx, feature_file in enumerate(files):
    df_metadata = pd.read_csv(feature_file.replace("processed_raw_", "processed_metadata_") + ".csv")
    frames.append(df_metadata)

df_metadata = pd.concat(frames) 

# Step 4: Add missing age information based on the age group the subject is in
df_metadata['age_months'].fillna(df_metadata['age_group'], inplace=True)
df_metadata['age_days'].fillna(df_metadata['age_group']*30, inplace=True)
df_metadata['age_years'].fillna(df_metadata['age_group']/12, inplace=True)

# Step 5: List all the unique subject IDs
subject_ids = list(set(df_metadata["code"].tolist()))

In [4]:
from sklearn.model_selection import train_test_split

IDs_train, IDs_temp = train_test_split(subject_ids, test_size=0.3, random_state=42)
IDs_test, IDs_val = train_test_split(IDs_temp, test_size=0.5, random_state=42)

In [5]:
from dataset_generator import DataGenerator

train_generator = DataGenerator(list_IDs = IDs_train,
                                BASE_PATH = PATH_DATA_PROCESSED_DL,
                                metadata = df_metadata,
                                n_average = N_AVERAGE,
                                batch_size = 10,
                                iter_per_epoch = 30,
                                n_timepoints = 501, 
                                n_channels=30, 
                                shuffle=True)

train_generator_noise = DataGenerator(list_IDs = IDs_train,
                                      BASE_PATH = PATH_DATA_PROCESSED_DL,
                                      metadata = df_metadata,
                                      n_average = N_AVERAGE,
                                      batch_size = 10,
                                      gaussian_noise=0.01,
                                      iter_per_epoch = 30,
                                      n_timepoints = 501, 
                                      n_channels=30, 
                                      shuffle=True)

val_generator = DataGenerator(list_IDs = IDs_val,
                              BASE_PATH = PATH_DATA_PROCESSED_DL,
                              metadata = df_metadata,
                              n_average = N_AVERAGE,
                              batch_size = 10,
                              iter_per_epoch = 100,
                              n_timepoints = 501,
                              n_channels=30,
                              shuffle=True)

test_generator = DataGenerator(list_IDs = IDs_test,
                               BASE_PATH = PATH_DATA_PROCESSED_DL,
                               metadata = df_metadata,
                               n_average = N_AVERAGE,
                               batch_size = 10,
                               iter_per_epoch = 100,
                               n_timepoints = 501,
                               n_channels=30,
                               shuffle=True)

In [6]:
df_metadata.head()

Unnamed: 0,code,cnt_path,cnt_file,age_group,age_days,age_months,age_years
0,23,/Volumes/Seagate Expansion Drive/ePodium/Data/...,023_35_mc_mmn36,35,1052.0,35.066667,2.922222
0,337,/Volumes/Seagate Expansion Drive/ePodium/Data/...,337_23_jc_mmn_36_wk,23,692.0,23.066667,1.922222
0,456,/Volumes/Seagate Expansion Drive/ePodium/Data/...,456_23_md_mmn36_wk,23,691.0,23.033333,1.919444
0,328,/Volumes/Seagate Expansion Drive/ePodium/Data/...,328_23_jc_mmn36_wk,23,699.0,23.3,1.941667
0,314,/Volumes/Seagate Expansion Drive/ePodium/Data/...,314_29_mmn_36_wk,29,877.0,29.233333,2.436111


## Load model

In [7]:
import tensorflow as tf

model_path = os.path.join(PATH_MODELS, MODEL_NAME)
loaded_model = tf.keras.models.load_model(model_path)

In [9]:
loaded_model.optimizer.iterations

<tf.Variable 'iter:0' shape=() dtype=int64, numpy=529386>

## Helper functions for validation

In [18]:
def evaluate_model(model):
    """ Evaluates the model """
    model.evaluate(train_generator)
    model.evaluate(val_generator)
    model.evaluate(test_generator)
    
def print_few_predictions(model):
    """ Prints a few predictions, as a sanity check """
    x_test, y_test = test_generator.__getitem__(0)
    predictions = model.predict(x_test)

    for idx in range(len(y_test)): print(f"{y_test[idx]} -> {predictions[idx]}")

Definition of error stability (Vandenbosch et al., 2018): 

_"Stability was assessed as the correlation between the prediction errors (estimated minus actual age) of subjects at baseline with their own prediction error at follow-up."_

In [43]:
import zarr
from scipy.stats import pearsonr

def create_averaged_epoch(data_signal):
    
    """
    Function to create averages of N_AVERAGE epochs.
    Will create one averaged epoch per found unique label from N_AVERAGE random epochs.

    Args:
    --------
    data_signal: numpy array
        Data from one person as numpy array
    """

    X_data = np.zeros((0, 30, 501))
    num_epochs = len(data_signal)

    if num_epochs >= N_AVERAGE:
        select = np.random.choice(num_epochs, N_AVERAGE, replace=False)
        signal_averaged = np.mean(data_signal.oindex[select,:,:], axis=0)
    else:
        signal_averaged = np.mean(data_signal.oindex[:,:,:], axis=0)

    X_data = np.concatenate([X_data, np.expand_dims(signal_averaged, axis=0)], axis=0)
    return X_data

def error_stability(model, IDs_test, N_per_file):
    """Takes in the IDs of the test subjects, calculates the error stability per subject
    and returns this as a dictionary"""
    
    combined_errors = []
    cobined_ages = []
    
    # Step 1: Iterate over subjects
    for ID in IDs_test:
        
        # Step 2: Find all files of a subject
        df_temp = df_metadata[df_metadata['code'] == ID]
        
        subject_errors = []
        subject_ages = []
        
        for i, metadata_file in df_temp.iterrows():
            
            X_data = np.zeros((0, 30, 501))
            y_data = []
            
            for n in range(N_per_file):
                filename = os.path.join(PATH_DATA_PROCESSED_DL, 'processed_raw_' + metadata_file['cnt_file'] + '.zarr')
                data_signal = zarr.open(os.path.join(filename), mode='r')

                X = create_averaged_epoch(data_signal)

                X_data = np.concatenate((X_data, X), axis=0)
                y_data.append(metadata_file['age_months'])
            
            # Step 3: Calculate age prediction for N (averaged) epochs of each file
            X_data, y_data = np.swapaxes(X_data,1,2), np.array(y_data).reshape((-1,1))
            predictions = model.predict(X_data)
            errors = list(np.array(predictions.flatten()) - np.array(y_data.flatten()))
            
            subject_errors.extend(errors)
            subject_ages.extend(y_data.flatten())

        combined_errors.extend(subject_errors)
        cobined_ages.extend(subject_ages)
    
        # Step 4: Look at predictions for the same age (different files), are they stable
        corr, _ = pearsonr(subject_ages, subject_errors)
        print(f"Subject {ID} Pearsons correlation: {corr:.3f}")
        
    # Step 5: Look at predictions within-subject over multiple ages, is the error stable?
    corr, _ = pearsonr(cobined_ages, combined_errors)
    print(f"Pearsons correlation combined: {corr:.3f}")

Subject 712 Pearsons correlation: -0.223
Subject 420 Pearsons correlation: -0.655




Subject 758 Pearsons correlation: nan
Subject 28 Pearsons correlation: -0.689
Subject 732 Pearsons correlation: -0.578
Subject 613 Pearsons correlation: -0.435
Subject 164 Pearsons correlation: -0.556
Subject 709 Pearsons correlation: -0.724
Subject 121 Pearsons correlation: -0.453
Subject 711 Pearsons correlation: -0.816
Subject 329 Pearsons correlation: -0.493
Subject 169 Pearsons correlation: -0.657
Subject 474 Pearsons correlation: -0.812
Subject 154 Pearsons correlation: -0.430
Subject 428 Pearsons correlation: -0.704
Subject 159 Pearsons correlation: -0.546
Subject 472 Pearsons correlation: -0.706
Subject 632 Pearsons correlation: -0.856
Subject 451 Pearsons correlation: -0.473
Subject 426 Pearsons correlation: -0.767
Subject 158 Pearsons correlation: -0.300
Subject 122 Pearsons correlation: -0.911
Subject 496 Pearsons correlation: -0.483
Subject 485 Pearsons correlation: -0.635
Subject 425 Pearsons correlation: -0.701
Subject 149 Pearsons correlation: -0.684
Subject 317 Pearsons

# 2. Model validation

In [1]:
evaluate_model(loaded_model)

NameError: name 'evaluate_model' is not defined

In [None]:
print_few_predictions(loaded_model)

In [None]:
error_stability(loaded_model, IDs_test, 30)