In [None]:

# Deep Sense

In [None]:
import re
from os import listdir
import numpy as np
import tensorflow as tf
import pandas as pd
import datetime
from os.path import isfile, join
import random
import keras

from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, GRUCell, Conv3D, TimeDistributed, Conv1D, Bidirectional, Layer
from tensorflow.keras.layers import Input, LSTM, Embedding, Dense, BatchNormalization, GRU
from tensorflow.keras.models import Model, Sequential, load_model
from tensorflow.keras.layers import Dense, Dropout, Activation, Reshape, Concatenate
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import Callback
import tensorflow.keras.backend as K
from sklearn.metrics import confusion_matrix, f1_score, precision_score, recall_score
import math
import os
from scipy import stats
from sklearn.preprocessing import OneHotEncoder
import csv


# read in custom modules 
# custom scripts are found
os.chdir("/kaggle/usr/lib") 
from datagenerator_multi_output import DataGenerator
# reset our working directory
os.chdir("/kaggle/working/")


In [None]:
policy = tf.keras.mixed_precision.experimental.Policy('mixed_float16')
tf.keras.mixed_precision.experimental.set_policy(policy) 

print('Compute dtype: %s' % policy.compute_dtype)
print('Variable dtype: %s' % policy.variable_dtype)


In [None]:
#### Set the seed for random operations.
# This let our experiments to be reproducible.
SEED = 1234
tf.random.set_seed(SEED)
np.random.seed(SEED)

EPOCH_LENGTH = 30

SAMPLE_RATE = 250
SAMPLE_RATE_AIRFLOW = 10
SAMPLE_RATE_BODYPOSITION = 10


ecg_path = '/kaggle/input/shhs-processed/ecgs/shhs2/'
airflow_path = '/kaggle/input/shhs-processed/airflows/shhs2/'
hypnogram_path = '/kaggle/input/shhs-processed/hypnogram/shhs2/'
bp_path = '/kaggle/input/shhs-processed/body_position_encoded/shhs2/'
heatlhy_path = '/kaggle/input/shhs-processed/'


OUT_DIM = 4  # len(idDict)

DATASET_OLD = 'df_all_checkpoint_4.csv'
DATASET_NEW = 'analysis_all.csv'

BATCH_SIZE = 4

TEST_SIZE = 32

TIME_STEPS = SAMPLE_RATE * EPOCH_LENGTH
STEP = 1
STAGES = 4


class_weight = {0: 0.7,
                1: 0.5,
                2: 0.5,
                3: 2,
                4: 1.5,
               }


class_weight = [0.5,0.5,0.5,1.6,1.2]

class_weights_status=[1.5,0.7]

lossWeights = {'hypno_output': 1.8, 'status_output': 1.0}


FILTER_BEFORE = 48
FILTER_AFTER = 48

load = False # load pre-trained model
tb = False #tensorboard
cp = True # checkpoint
early_stop = False # early stoppin

In [None]:
def deepSense_TD(shape_2,shape_3,shape_7,shape_8,shape_9,shape_10):



    #INDIVIDUAL CONVOLUTION LAYERS
    
        #ECH CONV NET

    input_ecg = Input(shape=[None, shape_2,shape_3,1])

    conv1_ecg = TimeDistributed(Conv2D(filters=FILTER_BEFORE, kernel_size=[32,1], activation='relu',strides =(3,1)))(input_ecg)
    batch1_ecg = TimeDistributed(BatchNormalization())(conv1_ecg)


    conv2_ecg = TimeDistributed(Conv2D(filters=FILTER_BEFORE, kernel_size=[32,1], activation='relu',strides =(2,1)))(batch1_ecg)
    batch2_ecg = TimeDistributed(BatchNormalization())(conv2_ecg)

    conv3_ecg = TimeDistributed(Conv2D(filters=FILTER_BEFORE, kernel_size=[32,1], activation='relu',strides =(2,1)))(batch2_ecg)
    batch3_ecg = TimeDistributed(BatchNormalization())(conv3_ecg)

    conv4_ecg = TimeDistributed(Conv2D(filters=FILTER_BEFORE, kernel_size=[32,1], activation='relu',strides =(2,1)))(batch3_ecg)
    batch4_ecg = TimeDistributed(BatchNormalization())(conv4_ecg)

    conv5_ecg = TimeDistributed(Conv2D(filters=FILTER_BEFORE, kernel_size=[16,1], activation='relu',strides =(2,1)))(batch4_ecg)
    batch5_ecg = TimeDistributed(BatchNormalization())(conv5_ecg)

    conv6_ecg = TimeDistributed(Conv2D(filters=FILTER_BEFORE, kernel_size=[4,1], activation='relu',strides =(1,1)))(batch5_ecg)
    batch6_ecg = TimeDistributed(BatchNormalization())(conv6_ecg)

    conv7_ecg = TimeDistributed(Conv2D(filters=FILTER_BEFORE, kernel_size=[4,1], activation='relu',strides =(1,1)))(batch6_ecg)
    batch7_ecg = TimeDistributed(BatchNormalization())(conv7_ecg)
    
    conv8_ecg = TimeDistributed(Conv2D(filters=FILTER_BEFORE, kernel_size=[4,1], activation='relu',strides =(1,1)))(batch7_ecg)
    batch8_ecg = TimeDistributed(BatchNormalization())(conv8_ecg)
    
    #flat_ecg = TimeDistributed(Flatten())(batch8_ecg)

    #flat_ecg = Reshape((-1,flat_ecg.shape[2],1,1))(flat_ecg)
    
    flat_ecg = Reshape((-1,batch8_ecg.shape[2],batch8_ecg.shape[4],1))(batch8_ecg)
    
    
        #AIRFLOW CONV NET
        
    input_air = Input(shape=[None, shape_7,shape_8,1])
        
    conv1_air = TimeDistributed(Conv2D(filters=FILTER_BEFORE, kernel_size=[8,1], activation='relu',strides =(2,1)))(input_air)
    batch1_air = TimeDistributed(BatchNormalization())(conv1_air)
    
    conv2_air = TimeDistributed(Conv2D(filters=FILTER_BEFORE, kernel_size=[8,1], activation='relu',strides =(1,1)))(batch1_air)
    batch2_air = TimeDistributed(BatchNormalization())(conv2_air)

    conv3_air = TimeDistributed(Conv2D(filters=FILTER_BEFORE, kernel_size=[6,1], activation='relu',strides =(1,1)))(batch2_air)
    batch3_air = TimeDistributed(BatchNormalization())(conv3_air)

    conv4_air = TimeDistributed(Conv2D(filters=FILTER_BEFORE, kernel_size=[4,1], activation='relu',strides =(1,1)))(batch3_air)
    batch4_air = TimeDistributed(BatchNormalization())(conv4_air)
    
    conv5_air = TimeDistributed(Conv2D(filters=FILTER_BEFORE, kernel_size=[4,1], activation='relu',strides =(1,1)))(batch4_air)
    batch5_air = TimeDistributed(BatchNormalization())(conv5_air)
    
    conv6_air = TimeDistributed(Conv2D(filters=FILTER_BEFORE, kernel_size=[4,1], activation='relu',strides =(1,1)))(batch5_air)
    batch6_air = TimeDistributed(BatchNormalization())(conv6_air)
    
    #flat_air = TimeDistributed(Flatten())(batch4_air)

    #flat_air = Reshape((-1,flat_air.shape[2],1,1))(flat_air)
    
    flat_air = Reshape((-1,batch6_air.shape[2],batch6_air.shape[4],1))(batch6_air)
    
    
        #BODY POSITION CONV NET
    
    
    input_bp = Input(shape=[None,shape_9,shape_10,1])
    
    conv1_bp = TimeDistributed(Conv2D(filters=FILTER_BEFORE, kernel_size=[32,4], activation='relu',strides =(2,1)))(input_bp)
    batch1_bp = TimeDistributed(BatchNormalization())(conv1_bp)
    
    conv2_bp = TimeDistributed(Conv2D(filters=FILTER_BEFORE, kernel_size=[4,1], activation='relu',strides =(1,1)))(batch1_bp)
    batch2_bp = TimeDistributed(BatchNormalization())(conv2_bp)

    conv3_bp = TimeDistributed(Conv2D(filters=FILTER_BEFORE, kernel_size=[4,1], activation='relu',strides =(1,1)))(batch2_bp)
    batch3_bp = TimeDistributed(BatchNormalization())(conv3_bp)

    conv4_bp = TimeDistributed(Conv2D(filters=FILTER_BEFORE, kernel_size=[4,1], activation='relu',strides =(1,1)))(batch3_bp)
    batch4_bp = TimeDistributed(BatchNormalization())(conv4_bp)
    
    #flat_bp = TimeDistributed(Flatten())(batch4_bp)

    #flat_bp = Reshape((-1,flat_bp.shape[2],1,1))(flat_bp)
    
    flat_bp = Reshape((-1,batch4_bp.shape[2],batch4_bp.shape[4],1))(batch4_bp)
    
    
    
    #CONCATENATION OF THE 2 NET
    print(flat_ecg.shape)
    print(flat_air.shape)
    print(flat_bp.shape)
    
    merge = Concatenate(axis=-2)([flat_ecg,flat_air,flat_bp])


    # MERGE CONVOLUTION LAYERS
    print(merge.shape)

    conv4 = TimeDistributed(Conv2D(filters=FILTER_AFTER, kernel_size=[1,FILTER_BEFORE * 3], activation='relu',strides=(1,1)))(merge)
    batch4 = TimeDistributed(BatchNormalization())(conv4)

    conv5 = TimeDistributed(Conv2D(filters=FILTER_AFTER, kernel_size=[4,1], activation='relu',strides=(2,1)))(batch4)
    batch5 = TimeDistributed(BatchNormalization())(conv5)

    conv6 = TimeDistributed(Conv2D(filters=FILTER_AFTER, kernel_size=[8,1], activation='relu',strides=(2,1)))(batch5)
    batch6 = TimeDistributed(BatchNormalization())(conv6)
    
    conv7 = TimeDistributed(Conv2D(filters=FILTER_AFTER, kernel_size=[8,1], activation='relu',strides=(1,1)))(batch6)
    batch7 = TimeDistributed(BatchNormalization())(conv7)
    
    conv8 = TimeDistributed(Conv2D(filters=FILTER_AFTER, kernel_size=[8,1], activation='relu',strides=(1,1)))(batch7)
    batch8 = TimeDistributed(BatchNormalization())(conv8)


    flat8 = TimeDistributed(Flatten())(batch8)


    #RECURRENT LAYERS FOR HYPNOGRAM

    gru1 = GRU(256, activation='relu',return_sequences=True, kernel_regularizer=l2(0.01))(flat8)
    drop1 = Dropout(rate=0.4)(gru1)
    batch1 = BatchNormalization()(drop1)

    gru2 = GRU(128, activation='relu',return_sequences=True, kernel_regularizer=l2(0.01))(batch1)
    drop2 = Dropout(rate=0.4)(gru2)
    batch2 = BatchNormalization()(drop2)


    dense_hypno = TimeDistributed(Dense(OUT_DIM + 1, activation='softmax',dtype=tf.float32),name = 'hypno_output')(batch2)
    
    
    # RECURRENT LAYER FOR HEALTHY - UNHEALTHY
    
    gru3 = GRU(256, activation='relu',return_sequences=True, kernel_regularizer=l2(0.01))(flat8)
    drop3 = Dropout(rate=0.4)(gru3)
    batch3 = BatchNormalization()(drop3)

    gru4 = GRU(128, activation='relu',return_sequences=False, kernel_regularizer=l2(0.01))(batch3)
    drop4 = Dropout(rate=0.4)(gru4)
    batch4 = BatchNormalization()(drop4)
    
    dense_healthy = Dense(1, activation='sigmoid',name = 'status_output',dtype=tf.float32)(batch4)
    
    
    return [input_ecg,input_air,input_bp], [dense_hypno, dense_healthy]

In [None]:
#https://towardsdatascience.com/implementing-macro-f1-score-in-keras-what-not-to-do-e9f1aa04029d

#F1 Metrics
def f1(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    recall = true_positives / (possible_positives + K.epsilon())
    f1_val = 2*(precision*recall)/(precision+recall+K.epsilon())
    return f1_val

In [None]:

random.seed(SEED)

if not load:
    onlyfiles = [f for f in listdir(ecg_path) if isfile(join(ecg_path, f))]
    
    id = [re.search('(.+?).npz', x).group(1) for x in onlyfiles]
    id.sort()
    
    #print(id)
    
    print(len(id))
    
    np.random.seed(SEED)
    id_test = np.random.choice(id, size=TEST_SIZE,replace=False)
    
    id = list(set(id) - set(id_test))
    
    
    id_validation = np.random.choice(id, size=TEST_SIZE,replace=False)
    
    id = list(set(id) - set(id_validation))
    
    #id = id[0:int(len(id)*0.2)]
    
    print(len(id))

In [None]:
# learning rate
lr = 1*1e-4




In [None]:
#custom loss 
from sklearn.metrics import hamming_loss


def hamming(y_true, y_pred):
    
    
    return hamming_loss_fn(y_true,y_pred)
    

### TPU

In [None]:
"""
# detect and init the TPU
tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
tf.config.experimental_connect_to_cluster(tpu)
tf.tpu.experimental.initialize_tpu_system(tpu)

# instantiate a distribution strategy
tpu_strategy = tf.distribute.experimental.TPUStrategy(tpu)
"""

In [None]:
"""
BATCH_SIZE = 2*tpu_strategy.num_replicas_in_sync
with tpu_strategy.scope():
    input, output = deepSense_TD(SAMPLE_RATE*EPOCH_LENGTH,1,SAMPLE_RATE_AIRFLOW*EPOCH_LENGTH,1)
    model = Model(inputs=input,outputs=output)
    optimizer = keras.optimizers.Adam(learning_rate=lr)
    # Compile Model
    model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy',
        metrics=['accuracy'])
    
training_generator = DataGenerator(id,  ecg_path=ecg_path, airflow_path=airflow_path,hypnogram_path=hypnogram_path, batch_size=BATCH_SIZE)
validation_generator = DataGenerator(id_validation,  ecg_path=ecg_path, airflow_path=airflow_path,hypnogram_path=hypnogram_path, batch_size=BATCH_SIZE)
  """  
    


### GPU

In [None]:
input, output = deepSense_TD(SAMPLE_RATE*EPOCH_LENGTH,1,SAMPLE_RATE_AIRFLOW*EPOCH_LENGTH,1,SAMPLE_RATE_BODYPOSITION*EPOCH_LENGTH,4)
model = Model(inputs=input,outputs=output)

optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
#optimizer = tf.train.experimental.enable_mixed_precision_graph_rewrite(optimizer, loss_scale='dynamic')


# Compile Model
model.compile(optimizer=optimizer, loss={
                  'hypno_output': 'sparse_categorical_crossentropy', 
                  'status_output': 'binary_crossentropy'},
              loss_weights=lossWeights,
              metrics={
                  'hypno_output': 'sparse_categorical_accuracy', 
                  'status_output': 'binary_accuracy'},
              sample_weight_mode='temporal')
    


In [None]:
#load pre-trained model
load_from = "/kaggle/input/model-deepsense/multi_1x144/"

if load:
    
    model = load_model(load_from + 'cp_42.h5')
    
    onlyfiles = [f for f in listdir(ecg_path) if isfile(join(ecg_path, f))]

    id = [re.search('(.+?).npz', x).group(1) for x in onlyfiles]
    id.sort()
    
    print(len(id))
    
    id_test = np.loadtxt(load_from + 'id_test.txt',dtype='str')
    id_validation = np.loadtxt(load_from + 'id_validation.txt',dtype='str')
    
    print(id_validation)

    id = list(set(id) - set(id_test))
    id = list(set(id) - set(id_validation))
    
    
    
    print(len(id))

Saving id of testing and validation sample

In [None]:
with open('id_validation.txt', 'w') as f:
    for item in id_validation:
        f.write("%s\n" % item)
print(id_validation)

with open('id_test.txt', 'w') as f:
    for item in id_test:
        f.write("%s\n" % item)
print(id_test)

In [None]:
training_generator = DataGenerator(id,  ecg_path=ecg_path, airflow_path=airflow_path,bp_path=bp_path, 
                                   hypnogram_path=hypnogram_path,healthy_path=heatlhy_path, batch_size=BATCH_SIZE, 
                                   class_weights = class_weight,class_weights_status=class_weights_status,  weights= True)

validation_generator = DataGenerator(id_validation,  ecg_path=ecg_path, airflow_path=airflow_path, bp_path=bp_path, 
                                     hypnogram_path=hypnogram_path, healthy_path=heatlhy_path, batch_size=BATCH_SIZE, 
                                     class_weights = class_weight,class_weights_status=class_weights_status,  weights= True)

model.summary()

In [None]:


model._layers = [
    layer for layer in model._layers if isinstance(layer, Layer)
]

tf.keras.utils.plot_model(model, 'multi_input_and_output_model.png', show_shapes=True)



In [None]:
callbacks = []
tf.random.set_seed(SEED)

# Model checkpoint


if cp:

    ckpt_callback = tf.keras.callbacks.ModelCheckpoint(filepath='cp_{epoch:02d}.h5', 
                                                   save_weights_only=False)  # False to save the model directly
    callbacks.append(ckpt_callback)


# Configure the TensorBoard callback and fit your model

if tb:
    tensorboard_callback = keras.callbacks.TensorBoard("logs", profile_batch=0)
    callbacks.append(tensorboard_callback)

# Early Stopping

if early_stop: #using early stopping on validation accuracy
    es_callback = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', mode='max', patience=5, restore_best_weights=True )
    callbacks.append(es_callback)

In [None]:


history = model.fit(training_generator, 
                    validation_data=validation_generator, 
                    epochs=4,
                    use_multiprocessing=True,
                    #max_queue_size = 1,
                    #class_weight=class_weight,
                    callbacks=callbacks)

In [None]:
import matplotlib.pyplot as plt
# list all data in history
print(history.history.keys())
# summarize history for accuracy
plt.plot(history.history['accuracy'])

plt.plot(history.history['val_accuracy'])
plt.title('model accuracy',color='w') 
plt.ylabel('accuracy',color='w')
plt.xlabel('epoch',color='w')
plt.legend(['train', 'test'], loc='upper left')
plt.show()


In [None]:
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss',color='w')
plt.ylabel('loss',color='w')
plt.xlabel('epoch',color='w')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [None]:
# Saving the model
from datetime import date 

today = date.today()


model.save('my_model_' +'.h5')

In [None]:
!python3 -c 'import tensorflow as tf; print(tf.__version__)'

In [None]:
prediction = model.predict(validation_generator,steps=1)
print(prediction)
prediction = np.argmax(prediction,axis=2)
print(prediction)
first = prediction[0]
print(first[1000:1050])

In [None]:
dg = DataGenerator(['204565','201048', '200145', '204285'],  ecg_path=ecg_path, airflow_path=airflow_path, bp_path=bp_path, hypnogram_path=hypnogram_path, batch_size=BATCH_SIZE,shuffle=False)

prediction = model.predict(dg)
#print(prediction)
prediction = np.argmax(prediction,axis=2)
#print(prediction)
first = prediction[0]
print(first[500:550])
print(np.count_nonzero(first==0))
print(np.count_nonzero(first==1))
print(np.count_nonzero(first==2))
print(np.count_nonzero(first==3))
print(np.count_nonzero(first==4))

In [None]:
hypnogram = pd.read_csv(hypnogram_path + '204565' + '.csv', usecols=['Stage'])
hypnogram.rename(columns={'Sleep': 'Y'}, inplace=True)
        
hypnogram_reshaped = np.array(hypnogram).reshape(-1, 1)

print(hypnogram_reshaped[500:550])

print(np.count_nonzero(hypnogram_reshaped==0))
print(np.count_nonzero(hypnogram_reshaped==1))
print(np.count_nonzero(hypnogram_reshaped==2))
print(np.count_nonzero(hypnogram_reshaped==3))
print(np.count_nonzero(hypnogram_reshaped==4))

In [None]:
#test_generator = DataGenerator(id_test,  ecg_path=ecg_path, airflow_path=airflow_path, bp_path=bp_path, hypnogram_path=hypnogram_path, batch_size=BATCH_SIZE)
model.evaluate(training_generator,steps=20)

In [None]:
!pip install tf-nightly

In [None]:
!python3 -c 'import tensorflow as tf; print(tf.__version__)'

In [None]:
from IPython.display import FileLink
FileLink(r'id_test' +'.txt')