In [3]:

%load_ext autoreload
%autoreload 2
%matplotlib inline

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ['CUDA_VISIBLE_DEVICES']='1' #Set the GPU you wish to use here

import tensorflow as tf
gpus = tf.config.experimental.list_physical_devices('GPU')
display(gpus)
tf.config.experimental.set_memory_growth(gpus[0], enable=True)
tf.get_logger().setLevel('INFO')

from tensorflow import keras
from keras.callbacks import ModelCheckpoint, EarlyStopping
from src.models.sleepstagecnn import *
from sklearn.utils import class_weight

# logging.getLogger('tensorflow').setLevel(logging.FATAL)

from tensorflow.keras import layers
from tensorflow.python.keras import backend as K

from sklearn.metrics import confusion_matrix, classification_report
from skopt.utils import use_named_args


import matplotlib.pyplot as plt
import tqdm 
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from termcolor import colored
from sklearn.model_selection import GroupKFold
from sklearn.utils import shuffle

from src.datasets.sleep_staging_dataset import SleepStagingDataset
import utils.paths as paths
from utils.consts import *
from utils.sleep_extract import *
from utils.utils import *

def time_series_subsequences(ts, window, hop=1):
    window = int(window)
    hop = int(hop)
    assert len(ts.shape) == 1
    shape = (int(int(ts.size - window) / hop + 1), window)
    strides = ts.strides[0] * hop, ts.strides[0]
    return np.lib.stride_tricks.as_strided(ts, shape=shape, strides=strides)

def signal_quality(patient_id):
    quality_dir = data_dir / 'quality' / 'ppg' / f'mesa-sleep-{patient_id}-Pleth-Quality.xlsx'
    quality = pd.read_excel(quality_dir)
    return quality
    
def ppg_data(patient_id):
    ppg = np.load(data_dir / 'npys' / f'mesa-sleep-{patient_id}-Pleth.npy')
    return ppg

def sleep_data(patient_id):
    sleep = sleep_extract_30s_epochs(data_dir / 'annotations-events-nsrr' / f'mesa-sleep-{patient_id}-nsrr.xml')
    return sleep

def intersection(lst1, lst2): 
    return list(set(lst1) & set(lst2)) 

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


[]

IndexError: list index out of range

In [2]:
!pip install tensorflow-gpu



In [2]:
def load_data(these_patient_IDs, min_quality):
    
    ppg = {}
    sleep = {}
    quality = {}
    quality_mean = {}
    could_not_load = []
    
    # Load and downsample data and fetch the patient quality
    for patient in tqdm.tqdm(these_patient_IDs):
        try:
            quality[patient] = signal_quality(patient).quality.to_numpy()
            quality_mean[patient] = quality[patient][10:int(len(quality[patient])*0.95)].mean()
            if quality_mean[patient] < min_quality:
                raise ValueError('Does not meet quality requirements')
            ppg[patient] = filter_and_downsample(ppg_data(patient), in_fs=fs_PPG, out_fs=new_fs_PPG)
            sleep[patient] = sleep_data(patient)
        except: 
            could_not_load.append(patient)
            try: del ppg[patient]
            except: pass
            try: del sleep[patient]
            except: pass
            try: del quality[patient]
            except: pass
            try: del quality_mean[patient]
            except: pass

    if len(could_not_load) > 0:
        print('Could not load: ', len(could_not_load), 'of ', len(these_patient_IDs), "patients.", flush=True)
    these_patient_IDs = [patient for patient in these_patient_IDs if patient not in could_not_load]
    
    return ppg, sleep, quality, quality_mean

def subsequences(ppg, sleep, quality):
    ppg_subs = {}
    sleep_subs = {}
    quality_subs = {}

    for patient in tqdm.tqdm(ppg.keys()):    
        quality_subs[patient] = time_series_subsequences(quality[patient], dT * fs_sleep * windows, hop = dT * fs_sleep * hops)
        sleep_subs[patient] = time_series_subsequences(sleep[patient], dT * fs_sleep * windows, hop = dT * fs_sleep * hops)[0:quality_subs[patient].shape[0], :]
        ppg_subs[patient] = time_series_subsequences(ppg[patient], dT * new_fs_PPG * windows, hop = dT * new_fs_PPG * hops)[0:quality_subs[patient].shape[0], :]
        
        
    return ppg_subs, sleep_subs, quality_subs


def all_patients_subs_dataframe(ppg_subs, sleep_subs, quality_subs):

    all_patient_subs = pd.DataFrame()
    for patient in ppg_subs.keys():
        this_patient_subs = pd.DataFrame({'patient': [patient]*sleep_subs[patient].shape[0], 
                                          'sleep': sleep_subs[patient][:,2], 
                                          'quality': quality_subs[patient].min(axis=1), 
                                          'epoch': np.arange(0, sleep_subs[patient].shape[0])})
        all_patient_subs = pd.concat([all_patient_subs, this_patient_subs])
    return all_patient_subs

In [None]:
#1) Load all data into memory
#2) Create subsequences of the chosen duration/windows
#3) Train Validate split on the patient_subs dataframe
#4) Train a model on the data.....save all the result for the split

name = 'Sleep_Alternative'

# Quicktest
QUICK_TEST_RATIO = 0.01 #1/3 #Set to 1 for the full dataset
if QUICK_TEST_RATIO < 1: print(colored(f'Running in Quick Test Mode with {QUICK_TEST_RATIO*100}% of data', 'yellow'), flush=True)

# Paths are set in utils.paths
data_dir = paths.data_dir
saved_dataset_dir = paths.saved_dataset_dir

# Patient IDs
patient_IDs = np.array([file[11:15] for file in os.listdir(data_dir / 'npys')])
patient_IDs = np.unique(np.array([f"{patient_ID}".zfill(4) for patient_ID in patient_IDs]))

train_patient_IDs, test_patient_IDs = train_test_split(patient_IDs, test_size=0.15, random_state=321)
train_patient_IDs, validation_patient_IDs = train_test_split(train_patient_IDs, test_size=0.2, random_state=222)

train_patient_IDs = train_patient_IDs[0:int(len(train_patient_IDs)*QUICK_TEST_RATIO)]
validation_patient_IDs = validation_patient_IDs[0:int(len(validation_patient_IDs)*QUICK_TEST_RATIO)]
test_patient_IDs = test_patient_IDs[0:int(len(test_patient_IDs)*QUICK_TEST_RATIO)]

train_ppg, train_sleep, train_quality, train_quality_mean = load_data(train_patient_IDs, min_quality = 0.5)
validation_ppg, validation_sleep, validation_quality, validation_quality_mean = load_data(validation_patient_IDs, min_quality=0.6)
test_ppg, test_sleep, test_quality, test_quality_mean = load_data(test_patient_IDs, min_quality=0.6)

train_ppg_subs, train_sleep_subs, train_quality_subs = subsequences(train_ppg, train_sleep, train_quality)
validation_ppg_subs, validation_sleep_subs, validation_quality_subs = subsequences(validation_ppg, validation_sleep, validation_quality)
test_ppg_subs, test_sleep_subs, test_quality_subs = subsequences(test_ppg, test_sleep, test_quality)

train_dataframe = all_patients_subs_dataframe(train_ppg_subs, train_sleep_subs, train_quality_subs)
validation_dataframe = all_patients_subs_dataframe(validation_ppg_subs, validation_sleep_subs, validation_quality_subs)
test_dataframe = all_patients_subs_dataframe(test_ppg_subs, test_sleep_subs, test_quality_subs)

train_dataframe = train_dataframe[train_dataframe.sleep < 9]
validation_dataframe = validation_dataframe[validation_dataframe.sleep < 9]
test_dataframe = test_dataframe[test_dataframe.sleep < 9]

train_dataframe = train_dataframe[train_dataframe.quality > 0.6]
validation_dataframe = validation_dataframe[validation_dataframe.quality > 0.6]
test_dataframe = test_dataframe[test_dataframe.quality > 0.6]

train_dataframe = train_dataframe.replace({'sleep': sleep_encoding})
validation_dataframe = validation_dataframe.replace({'sleep': sleep_encoding})
test_dataframe = test_dataframe.replace({'sleep': sleep_encoding})

[33mRunning in Quick Test Mode with 50.0% of data[0m


  3%|▎         | 20/698 [00:09<05:34,  2.03it/s]

In [4]:
import random

class_weights = class_weight.compute_class_weight('balanced', classes=sorted(np.unique(train_dataframe.sleep)), 
                                                  y=train_dataframe.sleep)
class_weights = {i : class_weights[i]/min(class_weights) for i in range(len(class_weights))}

train_dataframe_balanced = []
for key, value in class_weights.items():
    this_dataframe = train_dataframe[train_dataframe.sleep == key]
    k = int(len(this_dataframe)*(value - 1))
    new_df = this_dataframe.sample(n=k, replace=True)
    train_dataframe_balanced.append(new_df)
    
train_dataframe_balanced = pd.concat(train_dataframe_balanced)
train_dataframe_balanced = pd.concat([train_dataframe, train_dataframe_balanced])

train_dataframe=train_dataframe_balanced

train_dataframe = shuffle(train_dataframe)

display(train_dataframe.head())

Unnamed: 0,patient,sleep,quality,epoch
382,2896,1.0,0.666667,382
633,344,1.0,0.766667,633
183,1906,2.0,0.754717,183
957,2175,3.0,0.740741,957
963,2887,2.0,0.779661,963


In [5]:
print("------Data Summary------")
print(f'Train Patients: {len(train_ppg.keys())}')
print(f'Train Samples: {train_dataframe.shape[0]} have shape of {list(train_ppg_subs.values())[0].shape[1]}')
print(f'Train Sleep Stage Breakdown:\n{train_dataframe["sleep"].value_counts()}')

print(f'Validation Patients: {len(validation_ppg.keys())}')
print(f'Validation Samples: {validation_dataframe.shape[0]} have shape of {list(validation_ppg_subs.values())[0].shape[1]}')
print(f'Validation Sleep Stage Breakdown:\n{validation_dataframe["sleep"].value_counts()}')

print(f'Test Patients: {len(test_ppg.keys())}')
print(f'Test Samples: {test_dataframe.shape}')
print(f'Test Sleep Stage Breakdown:\n{test_dataframe["sleep"].value_counts()}')

------Data Summary------
Train Patients: 558
Train Samples: 716406 have shape of 2400
Train Sleep Stage Breakdown:
1.0    179102
0.0    179102
3.0    179101
2.0    179101
Name: sleep, dtype: int64
Validation Patients: 114
Validation Samples: 72596 have shape of 2400
Validation Sleep Stage Breakdown:
1.0    41613
0.0    13310
3.0     9743
2.0     7930
Name: sleep, dtype: int64
Test Patients: 81
Test Samples: (51104, 4)
Test Sleep Stage Breakdown:
1.0    29724
0.0     9019
3.0     6971
2.0     5390
Name: sleep, dtype: int64


In [6]:
import numpy as np


class DataGenerator(keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self, ppgs, sleep, dataframe, sleep_stage_mappings=None, batch_size=32, shuffle=True):

        self.batch_size = batch_size
        self.dataframe = dataframe
        self.count = dataframe.shape[0]
        
        self.ppgs = ppgs
        self.sleep= sleep
        self.dim = None
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        'Denotes the number of batches per epoch'
        batches =  int(np.ceil(self.count / self.batch_size))
#         print(f'Samples: {self.count}. Batch Size: {self.batch_size}. Num_Batches: {batches})
        return batches
              
    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        
        start = index*self.batch_size
        end = (index+1)*self.batch_size
        if end > self.count:
            end = self.count
            
        indexes = self.indexes[start:end]

        # Generate data
        X, y = self.__data_generation(indexes)

        return X, y

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(self.count)
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, indexes):
        'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
        
        # Initialization
        X = np.empty((len(indexes), 2400))             # 4800 for 32 Hz
        y = np.empty(len(indexes), dtype=int)

        # Generate data
        for i, indx in enumerate(indexes):
            
            patient = self.dataframe.iloc[indx].patient
            data_index = self.dataframe.iloc[indx].epoch
            sleep = self.dataframe.iloc[indx].sleep
            
            # Store sample
            X[i] = self.ppgs[patient][data_index,:]

            # Store class
            y[i] = sleep.reshape(1,1) #self.sleep[patient][data_index,:][2].reshape(1, 1)
                
        
        return X.reshape((X.shape[0], X.shape[1], 1)),  y.reshape((y.shape[0], 1))       


In [7]:
import traceback
import time
@use_named_args(dimensions=dimensions)
def run_fold(dim_single_fold, dim_epochs, dim_dropout, dim_learning_rate, dim_batch_size, dim_kernel_size, dim_channels):
    t = time.time()
    try:
        print(f'Single Fold : {dim_single_fold}', f"dim_epochs={dim_epochs}", 
              f"dim_dropout={dim_dropout}",       f"dim_learning_rate={dim_learning_rate}",
              f"dim_batch_size={dim_batch_size}", f"dim_kernel_size={dim_kernel_size}", 
              f"dim_channels={dim_channels}")

        # Delete the Keras model with these hyper-parameters from memory.
        try: 
            K.clear_session()
            tf.compat.v1.reset_default_graph()
        except:
            pass

        dg_train = DataGenerator(train_ppg_subs, train_sleep_subs, train_dataframe, sleep_encoding, batch_size=np.power(2, dim_batch_size), shuffle=True)
        dg_validate = DataGenerator(validation_ppg_subs, validation_sleep_subs, validation_dataframe, sleep_encoding, batch_size=np.power(2, dim_batch_size), shuffle=False)

        model = SimpleSleepStageCNN(in_size= dT*windows*new_fs_PPG, 
                              out_size=SLEEP_STAGES, 
                              learning_rate=dim_learning_rate, 
                              dropout=dim_dropout, 
                              kernel_size=dim_kernel_size, 
                              channels=dim_channels)

        print(model.summary())
        print('Training model...')
        train_validate_results = model.fit(dg_train,
                                           validation_data=dg_validate,
                                            use_multiprocessing=True,
                                            workers = 8,
                                            epochs = dim_epochs)


        print('Calculating validation predictions...')
        val_preds = model.predict(dg_validate, verbose=1, workers=8, use_multiprocessing=True)
        val_true =  validation_dataframe.sleep

        # Evaluation metrics
        confusion = confusion_matrix(np.argmax(val_preds, axis=1), val_true)
        confusion_norm = confusion / confusion.sum(axis=1).reshape(-1,1)
        confusion_diag = np.diag(confusion_norm)
        evaluation_metric = (np.median(confusion_diag) + 2 * np.min(confusion_diag))/3
        
        if np.isnan(evaluation_metric):
            evaluation_metric = 0

        print("Fold run time: ", time.time() - t)
        print("Confusion Matrix:")
        print(confusion)
        print("Normalized Confusion Matrix:")
        print(confusion_norm)
        print("Evaluation Metric: diagonal median")
        print(evaluation_metric)

#         return -evaluation_metric, model
        return -evaluation_metric

    except:
        print("Could not run model with these params...")
        traceback.print_exc()
#         return 9, None
        return 0

In [8]:
# import logging
# logging.getLogger('tensorflow').disabled = True


# dim_single_fold = 'true'
# dim_epochs = 5 
# dim_dropout = 0.4
# dim_learning_rate = 0.01
# dim_batch_size = 8
# dim_kernel_size = 5
# dim_channels = 6
# results = run_fold([dim_single_fold, dim_epochs,dim_dropout, dim_learning_rate, dim_batch_size, dim_kernel_size, dim_channels])

In [None]:
import logging
logging.getLogger('tensorflow').disabled = True


from skopt import gbrt_minimize, gp_minimize
from utils.consts import *
import time
print(dimensions)
# try:
t = time.time()
gp_result = gp_minimize(func=run_fold,
                        dimensions=dimensions,
                        n_calls=24,
                        noise= 0.01,
                        n_jobs=-1,
                        kappa = 5,
                        x0=default_parameters)  # change n_calls to how much you want.
print("----------time elapsed----------")
print(t - time.time())
# except ValueError:
#     print("Oops!  That was no valid number.  Try again...")
    

display(gp_result)

[Categorical(categories=('true',), prior=None), Integer(low=5, high=15, prior='uniform', transform='identity'), Real(low=0.0001, high=0.5, prior='uniform', transform='identity'), Real(low=0.0001, high=0.1, prior='uniform', transform='identity'), Integer(low=7, high=9, prior='uniform', transform='identity'), Integer(low=3, high=13, prior='uniform', transform='identity'), Integer(low=4, high=7, prior='uniform', transform='identity')]
Single Fold : true dim_epochs=5 dim_dropout=0.25 dim_learning_rate=0.001 dim_batch_size=7 dim_kernel_size=8 dim_channels=5
I am here
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d (Conv1D)              (None, 800, 32)           352       
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 800, 32)           8224      
_________________________________________________________________
max_pooling1d (

In [None]:
from skopt.plots import plot_objective
from utils.consts import *
import time


print(gp_result.x)
df = pd.DataFrame(gp_result.x_iters, columns=dimensions_names)
df["result"] = gp_result.func_vals
print(np.argmin(gp_result.func_vals))

df.to_pickle('results/simple0_res' + str(time.time()) + '.pkl')
df.style.apply(lambda x: ['background: lightgreen' if x.name == np.argmin(gp_result.func_vals)
                              else '' for i in x], 
                   axis=1)


In [None]:
import traceback


def train_and_eval(dim_single_fold, dim_epochs,dim_dropout,dim_optimizer, dim_learning_rate, dim_activation,dim_batch_size, dim_kernel_size, dim_pool_size):
    try:
        print(f'Single Fold : {dim_single_fold}', 
                  f"{dim_epochs=}", 
                  f"{dim_dropout=}",
                  f"{dim_learning_rate=}",
                  f"{dim_batch_size=}", 
                  f"{dim_kernel_size=}", 
                  f"{dim_channels=}", 


        class_weights = class_weight.compute_class_weight('balanced', classes=np.unique(train_dataframe.sleep), 
                                                          y=train_dataframe.sleep)
        class_weights = {i : class_weights[i] for i in range(len(class_weights))}
        print("Weights: ",  class_weights)
        history = []
        confusion = []
        preds = []
        trues = []

        
        
        # Delete the Keras model with these hyper-parameters from memory.
        try: 
            del model
            K.clear_session()
            tf.compat.v1.reset_default_graph()
        except:
            pass

        assert(len(intersection(train_dataframe.patient.unique(), test_dataframe.patient.unique())))==0
        dg_train = DataGenerator(train_ppg_subs, train_sleep_subs, train_dataframe, sleep_encoding, batch_size=np.power(2, dim_batch_size), shuffle=True)
        dg_test = DataGenerator(test_ppg_subs, test_sleep_subs, test_dataframe, sleep_encoding, batch_size=np.power(2, dim_batch_size), shuffle=False)

        #TODO Remember we may need to apply class weights to the test set predictions!!!
        model = SleepStageCNN(in_size= dT*windows*new_fs_PPG, 
                              out_size=SLEEP_STAGES, 
                              learning_rate=dim_learning_rate, 
                              dropout=dim_dropout, 
                              kernel_size=dim_kernel_size, 
                              channels=dim_channels)

        print(model.summary())
        print('Training model...')
        train_test_results = model.fit(dg_train,
                                            validation_data=dg_test,
                                            class_weight=class_weights,
                                            use_multiprocessing=True,
                                            workers = 8,
                                            epochs = dim_epochs)


        print('Calculating test predictions...')
        test_preds = model.predict(dg_test, verbose=1, workers=8, use_multiprocessing=True)
        test_true =  test_dataframe.sleep

        preds.append(test_preds)
        trues.append(test_true)
        history.append(train_test_results)      

        # Evaluation metrics
        confusion = []
        for i in range(1):
            confusion.append(confusion_matrix(np.argmax(preds[i], axis=1), trues[i]))

        confusion = np.array(confusion).sum(axis=0)
        confusion_norm = confusion / confusion.sum(axis=1).reshape(-1,1)
        confusion_diag = np.diag(confusion_norm)
        evaluation_metric = np.median(confusion_diag) + 2 * np.min(confusion_diag)
        
        if np.isnan(evaluation_metric):
            evaluation_metric = 0

        print("Test Confusion Matrix:")
        print(confusion)
        print("Test Normalized Confusion Matrix:")
        print(confusion_norm)
        print("Test Evaluation Metric: diagonal median")
        print(evaluation_metric)

        return -evaluation_metric

    except:
        print("Could not run model with these params...")
        traceback.print_exc()
#         return 9, None
        return 0

In [None]:

test_results = train_and_eval(*gp_result.x)
# test_results = train_and_eval(dim_single_fold = 'true',dim_epochs = 20 ,dim_dropout = 0.25,dim_optimizer = 'adam' ,dim_learning_rate = 0.001,dim_activation = 'relu',dim_batch_size = 5,dim_kernel_size = 3,dim_pool_size = 3)

In [None]:
# patient = list(train_ppg.keys())[5]
# t_sleep = np.arange(0, len(train_sleep[patient]),1)
# t_ppg = np.arange(0, len(train_ppg[patient])/(32*30), 1/(32*30))
# t_quality = np.arange(0, len(train_quality[patient]), 1)          

# fig, ax = plt.subplots(figsize=(10,5))
# # # plt.plot(t_ppg, train_ppg[patient])
# # plt.plot(t_sleep, train_sleep[patient])
# # plt.plot(t_quality, train_quality[patient])
# train_sleep[patient][train_sleep[patient] > 4] = 0

# color = 'tab:red'
# ax.plot(t_sleep, train_sleep[patient], label='Sleep Stage')
# ax.plot(t_quality, train_quality[patient], label='Quality')
# ax.set_ylabel('Value', color=color)
# ax.set_xlabel('Time [N Epoch of 30s]', color=color)
# ax.legend(prop={'size': 10})


# fig, ax1 = plt.subplots(figsize=(20,20))

# color = 'tab:red'
# ax1.set_xlabel('time (s)')
# ax1.set_ylabel('Sleep Stage', color=color)
# ax1.plot(t_sleep, train_sleep[patient], color=color)
# ax1.tick_params(axis='y', labelcolor=color)

# ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis

# color = 'tab:blue'
# ax2.set_ylabel('Signal Quality', color=color)  # we already handled the x-label with ax1
# ax2.plot(t_quality, train_quality[patient], color=color)
# ax2.tick_params(axis='y', labelcolor=color)

# fig.tight_layout()  # otherwise the right y-label is slightly clipped
# plt.show()