# combine tabular data with seq of clinical and therapy readcodes

In [1]:
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.models import Sequential, Model, load_model
from tensorflow.keras.layers import Dense, LSTM, Embedding, Dropout, Bidirectional, Input, concatenate, Reshape, Activation, Flatten, Add, BatchNormalization, Multiply, LeakyReLU
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, TensorBoard
from tensorflow.keras.metrics import AUC, SensitivityAtSpecificity
from tensorflow.keras.optimizers import Adam, Adagrad, RMSprop, Adamax, SGD, Adadelta
from tensorflow.keras.initializers import Constant
from tensorflow.keras.regularizers import L1L2, L1, L2
from livelossplot import PlotLossesKeras
#internal validation
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report, f1_score, balanced_accuracy_score, matthews_corrcoef, auc, average_precision_score, roc_auc_score, balanced_accuracy_score, roc_curve, accuracy_score
from sklearn.preprocessing import StandardScaler, MinMaxScaler

import matplotlib.pyplot as plt

import numpy as np
import pandas as pd
import pickle

# fix random seed for reproducibility
tf.random.set_seed(1234)



2023-12-10 00:44:09.717910: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-12-10 00:44:09.785488: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-12-10 00:44:09.785545: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-12-10 00:44:09.785578: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-12-10 00:44:09.799694: I tensorflow/core/platform/cpu_feature_g

# Load tabular data

In [2]:
# Data loader
# trainingData = pd.read_csv('../FinalData/trainingDataEncoded_08102023.csv')
# validationData = pd.read_csv('../FinalData/validationDataEncoded_08102023.csv')
# evaluationData = pd.read_csv('../FinalData/evaluationDataEncoded_08102023.csv')
# evaluationDataWales = pd.read_csv('../FinalData/evaluationDataWalesEncoded_08102023.csv')
# evaluationDataScotland = pd.read_csv('../FinalData/evaluationDataScotlandEncoded_08102023.csv')

trainingData, validationData, internalEvaluationData, evaluationData, evaluationDataWales, evaluationDataScotland = pickle.load(open('../FinalData/dataset_scaled_2vs1_09122023.sav', 'rb'))

trainingData = trainingData[(trainingData.age >=8) & (trainingData.age <=80)]
validationData = validationData[(validationData.age >=8) & (validationData.age <=80)]
internalEvaluationData = internalEvaluationData[(internalEvaluationData.age >=8) & (internalEvaluationData.age <=80)]
evaluationData = evaluationData[(evaluationData.age >=8) & (evaluationData.age <=80)]
evaluationDataWales = evaluationDataWales[(evaluationDataWales.age >=8) & (evaluationDataWales.age <=80)]
evaluationDataScotland = evaluationDataScotland[(evaluationDataScotland.age >=8) & (evaluationDataScotland.age <=80)]


trainingData = trainingData.rename({'3MonthsOutcome': '3months', '6MonthsOutcome': '6months','9MonthsOutcome': '9months','12MonthsOutcome': '12months',}, axis=1)
validationData = validationData.rename({'3MonthsOutcome': '3months', '6MonthsOutcome': '6months','9MonthsOutcome': '9months','12MonthsOutcome': '12months',}, axis=1)
internalEvaluationData = internalEvaluationData.rename({'3MonthsOutcome': '3months', '6MonthsOutcome': '6months','9MonthsOutcome': '9months','12MonthsOutcome': '12months',}, axis=1)
evaluationData = evaluationData.rename({'3MonthsOutcome': '3months', '6MonthsOutcome': '6months','9MonthsOutcome': '9months','12MonthsOutcome': '12months',}, axis=1)
evaluationDataWales = evaluationDataWales.rename({'3MonthsOutcome': '3months', '6MonthsOutcome': '6months','9MonthsOutcome': '9months','12MonthsOutcome': '12months',}, axis=1)
evaluationDataScotland = evaluationDataScotland.rename({'3MonthsOutcome': '3months', '6MonthsOutcome': '6months','9MonthsOutcome': '9months','12MonthsOutcome': '12months',}, axis=1)

# trainingData = trainingData.rename({'outcome_3months': '3months', 'outcome_combined_6months': '6months','outcome_combined_12months': '12months','outcome_combined_24months': '24months',}, axis=1)
# validationData = validationData.rename({'outcome_3months': '3months', 'outcome_combined_6months': '6months','outcome_combined_12months': '12months','outcome_combined_24months': '24months',}, axis=1)
# internalEvaluationData = internalEvaluationData.rename({'outcome_3months': '3months', 'outcome_combined_6months': '6months','outcome_combined_12months': '12months','outcome_combined_24months': '24months',}, axis=1)
# evaluationData = evaluationData.rename({'outcome_3months': '3months', 'outcome_combined_6months': '6months','outcome_combined_12months': '12months','outcome_combined_24months': '24months',}, axis=1)
# evaluationDataWales = evaluationDataWales.rename({'outcome_3months': '3months', 'outcome_combined_6months': '6months','outcome_combined_12months': '12months','outcome_combined_24months': '24months',}, axis=1)
# evaluationDataScotland = evaluationDataScotland.rename({'outcome_3months': '3months', 'outcome_combined_6months': '6months','outcome_combined_12months': '12months','outcome_combined_24months': '24months',}, axis=1)

In [3]:
#Define feature candidates

features_columns = trainingData.columns.to_list()
exclude_columns = ['patid', 'practice_id', #identifier
                   'BMI', #use the categorical instead
                   'ethnicity', #use ethnic_group instead
                   'Spacer',  #all zero
                   
                   'outcome_3months', 'outcome_6months', 'outcome_9months', 'outcome_12months', 'outcome_15months', 'outcome_18months', 
                   'outcome_21months', 'outcome_24months', 'outcome_combined_6months', 'outcome_combined_9months', 'outcome_combined_12months', 
                   'outcome_combined_15months', 'outcome_combined_18months', 'outcome_combined_24months', '3months', '6months', '9months', '12months', '24months', #outcomes variable
                   
                   'postcode_district', 'County', 'LocalAuthority', 'OutputAreaClassification', #location related variables, use IMD decile only
                   
                   'cat_age', 'cat_average_daily_dose_ICS', 'cat_prescribed_daily_dose_ICS', 'cat_ICS_medication_possesion_ratio', 'cat_numOCS', 'cat_numOCSEvents', 
                   'cat_numOCSwithLRTI', 'cat_numAcuteRespEvents', 'cat_numAntibioticsEvents', 'cat_numAntibioticswithLRTI', 'cat_numAsthmaAttacks', 'cat_numHospEvents', 
                   'cat_numPCS', 'cat_numPCSAsthma', #use continous vars instead
                   
                   'count_rhinitis', 'count_cardiovascular', 'count_heartfailure',
                   'count_psoriasis', 'count_anaphylaxis', 'count_diabetes', 'count_ihd',
                   'count_anxiety', 'count_eczema', 'count_nasalpolyps',
                   'count_paracetamol', 'count_nsaids', 'count_betablocker', #use binary ones
                   
                   'paracetamol', 'nsaids', 'betablocker', #no data in evaluation
                   
                   'numOCSEvents', #duplicate with numOCS
                   
                   'month_12', 'month_4', 'month_5', 'month_10', 'month_1', 'month_6', 'month_3', 
                   'month_11', 'month_8', 'month_9', 'month_7', 'month_2', #month of attacks
                   
                   # 'system_EMIS', 'system_SystemOne', 'system_Vision', #primary care system used
                  ]
exclude_columns = exclude_columns + [x for x in features_columns if '_count' in x] #filter out commorbid count variables
features_columns = [x for x in features_columns if x not in exclude_columns]
print('Features size: ', len(features_columns))
print(features_columns)

Features size:  67
['sex', 'age', 'CharlsonScore', 'BTS_step', 'average_daily_dose_ICS', 'prescribed_daily_dose_ICS', 'ICS_medication_possesion_ratio', 'numOCS', 'PriorEducation', 'numPCS', 'numPCSAsthma', 'numAntibioticsEvents', 'numAntibioticswithLRTI', 'numOCSwithLRTI', 'numAsthmaAttacks', 'numAcuteRespEvents', 'numHospEvents', 'rhinitis', 'cardiovascular', 'heartfailure', 'psoriasis', 'anaphylaxis', 'diabetes', 'ihd', 'anxiety', 'eczema', 'nasalpolyps', 'ethnic_group_Asian - ethnic group', 'ethnic_group_Black - ethnic group', 'ethnic_group_Mixed ethnic census group', 'ethnic_group_Other ethnic group', 'ethnic_group_White - ethnic group', 'ethnic_group_not_recorded', 'smokingStatus_Active Smoker', 'smokingStatus_Former Smoker', 'smokingStatus_Non Smoker', 'DeviceType_BAI', 'DeviceType_DPI', 'DeviceType_NEB', 'DeviceType_pMDI', 'DeviceType_unknown', 'cat_BMI_normal', 'cat_BMI_not recorded', 'cat_BMI_obese', 'cat_BMI_overweight', 'cat_BMI_underweight', 'imd_decile_0', 'imd_decile_1', 

# load sequence data

In [4]:
clinical = pd.read_feather('../SeqModel/all_data_clinical.feather')
therapy = pd.read_feather('../SeqModel/all_data_therapy.feather')
seqCols = ['patid',
       'read_code_seq_padded_end_idx_clin',
       'month_padded_idx_end_clin',
       'read_code_seq_padded_end_idx_ther',
       'month_padded_idx_end_ther']

In [5]:
sequence_data = clinical.merge(therapy[['patid', 'read_code_seq_padded_idx', 'read_code_seq_padded_end_idx',
       'month_padded_idx', 'month_padded_idx_end']], on='patid', suffixes=['_clin', '_ther'], how='inner')

In [6]:
trainingData = trainingData.merge(sequence_data[seqCols], on='patid', how='inner')
validationData = validationData.merge(sequence_data[seqCols], on='patid', how='inner')
internalEvaluationData = internalEvaluationData.merge(sequence_data[seqCols], on='patid', how='inner')
evaluationData = evaluationData.merge(sequence_data[seqCols], on='patid', how='inner')
evaluationDataWales = evaluationDataWales.merge(sequence_data[seqCols], on='patid', how='inner')
evaluationDataScotland = evaluationDataScotland.merge(sequence_data[seqCols], on='patid', how='inner')

In [7]:
print(trainingData.shape)
print(validationData.shape)
print(internalEvaluationData.shape)
print(evaluationData.shape)
print(evaluationDataWales.shape)
print(evaluationDataScotland.shape)

(127450, 131)
(39675, 131)
(31980, 131)
(8044, 131)
(5359, 131)
(2685, 131)


In [29]:
Xt_train = np.array(trainingData[features_columns].values)
Xt_val = np.array(validationData[features_columns].values)
Xt_internaleval = np.array(internalEvaluationData[features_columns].values)
Xt_eval = np.array(evaluationData[features_columns].values)
Xt_eval_Wales = np.array(evaluationDataWales[features_columns].values)
Xt_eval_Scotland = np.array(evaluationDataScotland[features_columns].values)

#scalling tabular data
scaler = StandardScaler().fit(Xt_train)
Xt_train = scaler.transform(Xt_train)
Xt_val = scaler.transform(Xt_val)
Xt_internaleval = scaler.transform(Xt_internaleval)
Xt_eval = scaler.transform(Xt_eval)
Xt_eval_Wales = scaler.transform(Xt_eval_Wales)
Xt_eval_Scotland = scaler.transform(Xt_eval_Scotland)

Xclin_train = np.array(trainingData['read_code_seq_padded_end_idx_clin'].values)
Xclin_val = np.array(validationData['read_code_seq_padded_end_idx_clin'].values)
Xclin_internaleval = np.array(internalEvaluationData['read_code_seq_padded_end_idx_clin'].values)
Xclin_eval = np.array(evaluationData['read_code_seq_padded_end_idx_clin'].values)
Xclin_eval_Wales = np.array(evaluationDataWales['read_code_seq_padded_end_idx_clin'].values)
Xclin_eval_Scotland = np.array(evaluationDataScotland['read_code_seq_padded_end_idx_clin'].values)
Xclin_train = np.array([x for x in Xclin_train])
Xclin_val = np.array([x for x in Xclin_val])
Xclin_internaleval = np.array([x for x in Xclin_internaleval])
Xclin_eval = np.array([x for x in Xclin_eval])
Xclin_eval_Wales = np.array([x for x in Xclin_eval_Wales])
Xclin_eval_Scotland = np.array([x for x in Xclin_eval_Scotland])

Xther_train = np.array(trainingData['read_code_seq_padded_end_idx_ther'].values)
Xther_val = np.array(validationData['read_code_seq_padded_end_idx_ther'].values)
Xther_internaleval = np.array(internalEvaluationData['read_code_seq_padded_end_idx_ther'].values)
Xther_eval = np.array(evaluationData['read_code_seq_padded_end_idx_ther'].values)
Xther_eval_Wales = np.array(evaluationDataWales['read_code_seq_padded_end_idx_ther'].values)
Xther_eval_Scotland = np.array(evaluationDataScotland['read_code_seq_padded_end_idx_ther'].values)
Xther_train = np.array([x for x in Xther_train])
Xther_val = np.array([x for x in Xther_val])
Xther_internaleval = np.array([x for x in Xther_internaleval])
Xther_eval = np.array([x for x in Xther_eval])
Xther_eval_Wales = np.array([x for x in Xther_eval_Wales])
Xther_eval_Scotland = np.array([x for x in Xther_eval_Scotland])



print(Xt_train.shape)
print(Xt_internaleval.shape)
print(Xt_val.shape)
print(Xt_eval.shape)
print(Xt_eval_Wales.shape)
print(Xt_eval_Scotland.shape)

# target_outcomes = ['3months', '6months', '12months', '24months'] 


(127450, 67)
(31980, 67)
(39675, 67)
(8044, 67)
(5359, 67)
(2685, 67)


In [30]:
#vocab
code2idx_clin = pickle.load(open('../SeqModel/all_vocab_clinical.sav', 'rb'))
code2idx_ther = pickle.load(open('../SeqModel/all_vocab_therapy.sav', 'rb'))
month2idx = pickle.load(open('../SeqModel/all_vocab_month.sav', 'rb'))
vocab_size_clinical = len(code2idx_clin)+1
vocab_size_therapy = len(code2idx_ther)+1
month_size = len(month2idx)+1
print(vocab_size_clinical)
print(vocab_size_therapy)
print(month_size)

51644
10709
13


In [32]:
target_outcome = '12months'
max_codes_clin = Xclin_train.shape[1]
max_codes_ther = Xther_train.shape[1]
tab_feature_size = Xt_train.shape[1]

y_train = trainingData[target_outcome].values
y_val = validationData[target_outcome].values
y_internaleval = internalEvaluationData[target_outcome].values
y_eval = evaluationData[target_outcome].values
y_eval_Wales = evaluationDataWales[target_outcome].values
y_eval_Scotland = evaluationDataScotland[target_outcome].values

pos_weight = sum(x == 0 for x in y_train)/sum(x == 1 for x in y_train)
class_weight = {0:1, 1:pos_weight}
print(class_weight)

{0: 1, 1: 5.418068284822238}


In [33]:
def hierarchical():
    #tabular dara - demography   
    inputs1 = Input(shape=tab_feature_size)
    nn = Dense(32, activation='relu', kernel_initializer='glorot_uniform', kernel_regularizer=L1L2(l1=0.0, l2=0.1))(inputs1)
    nn = Dropout(0.5)(nn)
    
    #clinical embedding for lstm
    inputs2 = Input(shape=max_codes_clin)
    embedding_clin = Embedding(vocab_size_clinical, int(np.cbrt(vocab_size_clinical)), input_length=max_codes_clin)(inputs2)
    lstmClinical = Bidirectional(LSTM(units=16, return_sequences=True, kernel_regularizer=L1L2(l1=0.0, l2=0.1)))(embedding_clin)
    
    
    ###Layer 1 - merge tab and lstm clin
    nn = Reshape((1, 32))(nn)
    add = concatenate([nn, lstmClinical], axis=1)
    
    
    #therapy embedding for lstm
    inputs3 = Input(shape=max_codes_ther)
    embedding_ther = Embedding(vocab_size_therapy, int(np.cbrt(vocab_size_therapy)), input_length=max_codes_ther)(inputs3)
    lstmTherapy = Bidirectional(LSTM(units=16, return_sequences=True, kernel_regularizer=L1L2(l1=0.0, l2=0.1)))(embedding_ther)
    
    ###Layer 2 - merge add (tab+clin) and lstm ther
    add = concatenate([add, lstmTherapy], axis=1)
    
    ###layer 3 - LSTM to the final product
    lstm = Bidirectional(LSTM(units=8, return_sequences=True, kernel_regularizer=L1L2(l1=0.0, l2=0.1)))(add)
    lstm = Dropout(0.5)(lstm)
    
    
    ###layer 4 - FCN before classification layer
    final = Dense(units=8, activation=LeakyReLU())(lstm)
    final = Flatten()(final)
    finalr = Dropout(0.5)(final)
    
    ###layer 5 - classification layer
    output = Dense(1, activation='sigmoid')(final)
    
    opt = RMSprop(learning_rate=1e-3, clipvalue=.5)
    metrics = [
        AUC(num_thresholds=1000, name='auc', curve='ROC'),
        AUC(num_thresholds=1000, name='auprc', curve='PR'),
        tf.keras.metrics.Precision(name='prec'),
        tf.keras.metrics.Recall(name='rec'),
        tf.keras.metrics.TrueNegatives(name='TN'),
        tf.keras.metrics.TruePositives(name='TP'),
        tf.keras.metrics.PrecisionAtRecall(0.8)
    ]
    
    loss = tf.keras.losses.BinaryCrossentropy()
    
    model = Model(inputs=[inputs1, inputs2, inputs3], outputs=output)
    model.compile(
        loss='binary_crossentropy', 
        optimizer=opt, 
        metrics=metrics)
    return model

In [34]:
# from sklearn.utils import class_weight
# sklearn_weights = class_weight.compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)
# class_weight = dict(enumerate(sklearn_weights))

#Hyperparameter
lr = 1e-5
clipvalue = 0.2
epoch = 1000
batch_size = 256
embedding_vector_length = 50
month_embedding_vector_length = 5
# embedding_vector_length = int(np.sqrt(vocab_size))
# embedding_vector_length = int(np.cbrt(vocab_size))
print(embedding_vector_length)

50


In [35]:
#visualise model
model = hierarchical()
# model = earlyFussion()
print(model.summary())



2023-12-10 01:14:47.669300: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:880] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-12-10 01:14:47.780879: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:880] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-12-10 01:14:47.780931: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:880] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-12-10 01:14:47.784578: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:880] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-12-10 01:14:47.784633: I tensorflow/compile

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 67)]                 0         []                            
                                                                                                  
 dense (Dense)               (None, 32)                   2176      ['input_1[0][0]']             
                                                                                                  
 input_2 (InputLayer)        [(None, 62)]                 0         []                            
                                                                                                  
 dropout (Dropout)           (None, 32)                   0         ['dense[0][0]']               
                                                                                              

In [37]:
%%time
# TF_GPU_ALLOCATOR=cuda_malloc_async
#training
with tf.device('/CPU:0'):
    earlyStopping = EarlyStopping(monitor='val_auc', patience=50, verbose=0, mode='max', restore_best_weights=True)
    mcp_save = ModelCheckpoint('../SeqModel/seqModel_therapy_tabSeq.mdl_wts.hdf5', save_best_only=True, monitor='val_auc', mode='min')
    history = model.fit([Xt_train, Xclin_train[:,:max_codes_clin], Xther_train[:,:max_codes_ther]], y_train, validation_data=([Xt_val, Xclin_val[:,:max_codes_clin], Xther_val[:,:max_codes_ther]], y_val), 
                            epochs=epoch, batch_size=128, 
                        class_weight = class_weight, 
                        callbacks = [earlyStopping, mcp_save])

Epoch 1/1000


2023-12-10 01:15:51.838199: W tensorflow/core/framework/op_kernel.cc:1839] OP_REQUIRES failed at xla_ops.cc:562 : INVALID_ARGUMENT: Trying to access resource Resource-25-at-0x558aeba265c0 (defined @ /opt/conda/envs/rapids/lib/python3.10/site-packages/keras/src/engine/base_layer_utils.py:137) located in device /job:localhost/replica:0/task:0/device:GPU:0 from device /job:localhost/replica:0/task:0/device:CPU:0
 Cf. https://www.tensorflow.org/xla/known_issues#tfvariable_on_a_different_device
2023-12-10 01:15:51.838288: W tensorflow/core/framework/op_kernel.cc:1839] OP_REQUIRES failed at xla_ops.cc:562 : INVALID_ARGUMENT: Trying to access resource Resource-24-at-0x558aecadb460 (defined @ /opt/conda/envs/rapids/lib/python3.10/site-packages/keras/src/engine/base_layer_utils.py:137) located in device /job:localhost/replica:0/task:0/device:GPU:0 from device /job:localhost/replica:0/task:0/device:CPU:0
 Cf. https://www.tensorflow.org/xla/known_issues#tfvariable_on_a_different_device
2023-12-10

InvalidArgumentError: Graph execution error:

Detected at node RMSprop/StatefulPartitionedCall_25 defined at (most recent call last):
  File "/opt/conda/envs/rapids/lib/python3.10/runpy.py", line 196, in _run_module_as_main

  File "/opt/conda/envs/rapids/lib/python3.10/runpy.py", line 86, in _run_code

  File "/opt/conda/envs/rapids/lib/python3.10/site-packages/ipykernel_launcher.py", line 17, in <module>

  File "/opt/conda/envs/rapids/lib/python3.10/site-packages/traitlets/config/application.py", line 1043, in launch_instance

  File "/opt/conda/envs/rapids/lib/python3.10/site-packages/ipykernel/kernelapp.py", line 725, in start

  File "/opt/conda/envs/rapids/lib/python3.10/site-packages/tornado/platform/asyncio.py", line 195, in start

  File "/opt/conda/envs/rapids/lib/python3.10/asyncio/base_events.py", line 603, in run_forever

  File "/opt/conda/envs/rapids/lib/python3.10/asyncio/base_events.py", line 1909, in _run_once

  File "/opt/conda/envs/rapids/lib/python3.10/asyncio/events.py", line 80, in _run

  File "/opt/conda/envs/rapids/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 513, in dispatch_queue

  File "/opt/conda/envs/rapids/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 502, in process_one

  File "/opt/conda/envs/rapids/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 409, in dispatch_shell

  File "/opt/conda/envs/rapids/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 729, in execute_request

  File "/opt/conda/envs/rapids/lib/python3.10/site-packages/ipykernel/ipkernel.py", line 429, in do_execute

  File "/opt/conda/envs/rapids/lib/python3.10/site-packages/ipykernel/zmqshell.py", line 540, in run_cell

  File "/opt/conda/envs/rapids/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 2914, in run_cell

  File "/opt/conda/envs/rapids/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 2960, in _run_cell

  File "/opt/conda/envs/rapids/lib/python3.10/site-packages/IPython/core/async_helpers.py", line 78, in _pseudo_sync_runner

  File "/opt/conda/envs/rapids/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3185, in run_cell_async

  File "/opt/conda/envs/rapids/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3377, in run_ast_nodes

  File "/opt/conda/envs/rapids/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3457, in run_code

  File "/tmp/ipykernel_361001/1375305670.py", line 1, in <module>

  File "/opt/conda/envs/rapids/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 2419, in run_cell_magic

  File "/opt/conda/envs/rapids/lib/python3.10/site-packages/decorator.py", line 232, in fun

  File "/opt/conda/envs/rapids/lib/python3.10/site-packages/IPython/core/magic.py", line 187, in <lambda>

  File "/opt/conda/envs/rapids/lib/python3.10/site-packages/IPython/core/magics/execution.py", line 1335, in time

  File "<timed exec>", line 6, in <module>

  File "/opt/conda/envs/rapids/lib/python3.10/site-packages/keras/src/utils/traceback_utils.py", line 65, in error_handler

  File "/opt/conda/envs/rapids/lib/python3.10/site-packages/keras/src/engine/training.py", line 1783, in fit

  File "/opt/conda/envs/rapids/lib/python3.10/site-packages/keras/src/engine/training.py", line 1377, in train_function

  File "/opt/conda/envs/rapids/lib/python3.10/site-packages/keras/src/engine/training.py", line 1360, in step_function

  File "/opt/conda/envs/rapids/lib/python3.10/site-packages/keras/src/engine/training.py", line 1349, in run_step

  File "/opt/conda/envs/rapids/lib/python3.10/site-packages/keras/src/engine/training.py", line 1130, in train_step

  File "/opt/conda/envs/rapids/lib/python3.10/site-packages/keras/src/optimizers/optimizer.py", line 544, in minimize

  File "/opt/conda/envs/rapids/lib/python3.10/site-packages/keras/src/optimizers/optimizer.py", line 1223, in apply_gradients

  File "/opt/conda/envs/rapids/lib/python3.10/site-packages/keras/src/optimizers/optimizer.py", line 652, in apply_gradients

  File "/opt/conda/envs/rapids/lib/python3.10/site-packages/keras/src/optimizers/optimizer.py", line 1253, in _internal_apply_gradients

  File "/opt/conda/envs/rapids/lib/python3.10/site-packages/keras/src/optimizers/optimizer.py", line 1345, in _distributed_apply_gradients_fn

  File "/opt/conda/envs/rapids/lib/python3.10/site-packages/keras/src/optimizers/optimizer.py", line 1340, in apply_grad_to_update_var

Detected at node RMSprop/StatefulPartitionedCall_25 defined at (most recent call last):
  File "/opt/conda/envs/rapids/lib/python3.10/runpy.py", line 196, in _run_module_as_main

  File "/opt/conda/envs/rapids/lib/python3.10/runpy.py", line 86, in _run_code

  File "/opt/conda/envs/rapids/lib/python3.10/site-packages/ipykernel_launcher.py", line 17, in <module>

  File "/opt/conda/envs/rapids/lib/python3.10/site-packages/traitlets/config/application.py", line 1043, in launch_instance

  File "/opt/conda/envs/rapids/lib/python3.10/site-packages/ipykernel/kernelapp.py", line 725, in start

  File "/opt/conda/envs/rapids/lib/python3.10/site-packages/tornado/platform/asyncio.py", line 195, in start

  File "/opt/conda/envs/rapids/lib/python3.10/asyncio/base_events.py", line 603, in run_forever

  File "/opt/conda/envs/rapids/lib/python3.10/asyncio/base_events.py", line 1909, in _run_once

  File "/opt/conda/envs/rapids/lib/python3.10/asyncio/events.py", line 80, in _run

  File "/opt/conda/envs/rapids/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 513, in dispatch_queue

  File "/opt/conda/envs/rapids/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 502, in process_one

  File "/opt/conda/envs/rapids/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 409, in dispatch_shell

  File "/opt/conda/envs/rapids/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 729, in execute_request

  File "/opt/conda/envs/rapids/lib/python3.10/site-packages/ipykernel/ipkernel.py", line 429, in do_execute

  File "/opt/conda/envs/rapids/lib/python3.10/site-packages/ipykernel/zmqshell.py", line 540, in run_cell

  File "/opt/conda/envs/rapids/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 2914, in run_cell

  File "/opt/conda/envs/rapids/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 2960, in _run_cell

  File "/opt/conda/envs/rapids/lib/python3.10/site-packages/IPython/core/async_helpers.py", line 78, in _pseudo_sync_runner

  File "/opt/conda/envs/rapids/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3185, in run_cell_async

  File "/opt/conda/envs/rapids/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3377, in run_ast_nodes

  File "/opt/conda/envs/rapids/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3457, in run_code

  File "/tmp/ipykernel_361001/1375305670.py", line 1, in <module>

  File "/opt/conda/envs/rapids/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 2419, in run_cell_magic

  File "/opt/conda/envs/rapids/lib/python3.10/site-packages/decorator.py", line 232, in fun

  File "/opt/conda/envs/rapids/lib/python3.10/site-packages/IPython/core/magic.py", line 187, in <lambda>

  File "/opt/conda/envs/rapids/lib/python3.10/site-packages/IPython/core/magics/execution.py", line 1335, in time

  File "<timed exec>", line 6, in <module>

  File "/opt/conda/envs/rapids/lib/python3.10/site-packages/keras/src/utils/traceback_utils.py", line 65, in error_handler

  File "/opt/conda/envs/rapids/lib/python3.10/site-packages/keras/src/engine/training.py", line 1783, in fit

  File "/opt/conda/envs/rapids/lib/python3.10/site-packages/keras/src/engine/training.py", line 1377, in train_function

  File "/opt/conda/envs/rapids/lib/python3.10/site-packages/keras/src/engine/training.py", line 1360, in step_function

  File "/opt/conda/envs/rapids/lib/python3.10/site-packages/keras/src/engine/training.py", line 1349, in run_step

  File "/opt/conda/envs/rapids/lib/python3.10/site-packages/keras/src/engine/training.py", line 1130, in train_step

  File "/opt/conda/envs/rapids/lib/python3.10/site-packages/keras/src/optimizers/optimizer.py", line 544, in minimize

  File "/opt/conda/envs/rapids/lib/python3.10/site-packages/keras/src/optimizers/optimizer.py", line 1223, in apply_gradients

  File "/opt/conda/envs/rapids/lib/python3.10/site-packages/keras/src/optimizers/optimizer.py", line 652, in apply_gradients

  File "/opt/conda/envs/rapids/lib/python3.10/site-packages/keras/src/optimizers/optimizer.py", line 1253, in _internal_apply_gradients

  File "/opt/conda/envs/rapids/lib/python3.10/site-packages/keras/src/optimizers/optimizer.py", line 1345, in _distributed_apply_gradients_fn

  File "/opt/conda/envs/rapids/lib/python3.10/site-packages/keras/src/optimizers/optimizer.py", line 1340, in apply_grad_to_update_var

2 root error(s) found.
  (0) INVALID_ARGUMENT:  Trying to access resource Resource-25-at-0x558aeba265c0 (defined @ /opt/conda/envs/rapids/lib/python3.10/site-packages/keras/src/engine/base_layer_utils.py:137) located in device /job:localhost/replica:0/task:0/device:GPU:0 from device /job:localhost/replica:0/task:0/device:CPU:0
 Cf. https://www.tensorflow.org/xla/known_issues#tfvariable_on_a_different_device
	 [[{{node RMSprop/StatefulPartitionedCall_25}}]]
	 [[Cumsum_4/_74]]
  (1) INVALID_ARGUMENT:  Trying to access resource Resource-25-at-0x558aeba265c0 (defined @ /opt/conda/envs/rapids/lib/python3.10/site-packages/keras/src/engine/base_layer_utils.py:137) located in device /job:localhost/replica:0/task:0/device:GPU:0 from device /job:localhost/replica:0/task:0/device:CPU:0
 Cf. https://www.tensorflow.org/xla/known_issues#tfvariable_on_a_different_device
	 [[{{node RMSprop/StatefulPartitionedCall_25}}]]
0 successful operations.
0 derived errors ignored. [Op:__inference_train_function_17959]

In [None]:
# list all data in history
print(history.history.keys())
# summarize history for accuracy
plt.plot(history.history['auc'])
plt.plot(history.history['val_auc'])
plt.title('model AUC')
plt.ylabel('AUC')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
# plt.ylim(0.3, 1)
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()

plt.plot(history.history['auprc'])
plt.plot(history.history['val_auprc'])
plt.title('model auprc')
# plt.ylim(0.3, 1)
plt.ylabel('auprc')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()

In [None]:
with tf.device('/GPU:0'):
    model.evaluate([Xt_eval, Xs_clin_eval[:,:max_codes_clin], Xs_ther_eval[:,:max_codes_ther]], y_eval)
    model.evaluate([Xt_test, Xs_clin_test[:,:max_codes_clin], Xs_ther_test[:,:max_codes_ther]], y_test)
    model.evaluate([Xt_testWales, Xs_clin_testWales[:,:max_codes_clin], Xs_ther_testWales[:,:max_codes_ther]], y_testWales)
    model.evaluate([Xt_testScotland, Xs_clin_testScotland[:,:max_codes_clin], Xs_ther_testScotland[:,:max_codes_ther]], y_testScotland)

In [None]:
model.predict([Xt_test, Xs_clin_test[:,:max_codes_clin], Xs_ther_test[:,:max_codes_ther]])

In [None]:
def summariseResultWithThreshold (Xt_test, Xs_clin_test, Xs_ther_test, testY, model):
    preds = model.predict([Xt_test, Xs_clin_test, Xs_ther_test])
    # tn, fp, fn, tp = confusion_matrix(testY, preds).ravel()
    # specificity = tn / (tn+fp)
    # sensitivity = tp / (tp+fn)
    # ppv = 100*tp/(tp+fp)
    # npv = 100*tn/(fn+tn)
    # acc = accuracy_score(testY, preds)
    # f1score = f1_score(testY, preds, average = 'binary')
    # balanceacc = balanced_accuracy_score(testY, preds)
    fpr, tpr, thresholds = roc_curve(testY, preds, pos_label=1)
    # aucscore = auc(fpr, tpr)
    aucscore = roc_auc_score(testY, preds)
    auprc = average_precision_score(testY, preds)
    # plot_confusion_matrix(model, testX, testY, cmap='viridis')  
    return np.round(aucscore,4), np.round(auprc,4)

In [None]:
print(summariseResultWithThreshold(Xt_eval, Xs_clin_eval[:,:max_codes_clin], Xs_ther_eval[:,:max_codes_ther], y_eval, model))
print(summariseResultWithThreshold(Xt_test, Xs_clin_test[:,:max_codes_clin], Xs_ther_test[:,:max_codes_ther], y_test, model))
print(summariseResultWithThreshold(Xt_testWales, Xs_clin_testWales[:,:max_codes_clin], Xs_ther_testWales[:,:max_codes_ther], y_testWales, model))
print(summariseResultWithThreshold(Xt_testScotland, Xs_clin_testScotland[:,:max_codes_clin], Xs_ther_testScotland[:,:max_codes_ther], y_testScotland, model))

In [None]:
# %%time

# # create the model
# embedding_vector_length = 50
# earlyStopping = EarlyStopping(monitor='val_auc', patience=10, verbose=0, mode='max', restore_best_weights=True)
# mcp_save = ModelCheckpoint('../SeqModel/seqModel_therapy.mdl_wts.hdf5', save_best_only=True, monitor='val_auc', mode='min')

# with tf.device('/GPU:0'):
#     model = Sequential()
#     model.add(Embedding(vocab_size, embedding_vector_length, input_length=max_codes))
#     model.add(LSTM(128, return_sequences=True, kernel_regularizer=L1L2(l1=0.02, l2=0.03)))
#     model.add(Dropout(0.5))
#     model.add(LSTM(64,  kernel_regularizer=L1L2(l1=0.02, l2=0.03)))
#     model.add(Dropout(0.5))
#     model.add(Dense(32, activation=LeakyReLU(alpha=.3), kernel_regularizer=L1L2(l1=0.02, l2=0.03)))
#     model.add(Dense(1, activation='sigmoid'))
#     opt = Adadelta(learning_rate=5e-3, clipvalue=0.3)
#     metrics = [
#         AUC(num_thresholds=3, name='auc'),
#     ]
#     model.compile(loss='binary_crossentropy', optimizer=opt, metrics=metrics, )
#     print(model.summary())
#     history = model.fit(Xs_train, y_train, validation_data=(Xs_val, y_val), epochs=30, batch_size=128, class_weight = class_weight, callbacks = [earlyStopping, mcp_save])

In [None]:
def earlyFussion():
       
    inputs1 = Input(shape=tab_feature_size)
    inputs2 = Input(shape=max_codes)
    inputs3 = Input(shape=max_codes)
    
    
    #clinical embedding for lstm
    embedding = Embedding(vocab_size, 50, input_length=max_codes)(inputs2)
    
    #month embedding for lstm
    embedding_month = Embedding(month_size, 7, input_length=max_codes)(inputs3)
    
    nn = Dense(32, activation='relu', kernel_initializer='glorot_uniform', kernel_regularizer=L1L2(l1=0.0, l2=0.1))(inputs1)
    nn = Dropout(0.5)(nn)
    lstmClinical = Bidirectional(LSTM(units=16, return_sequences=True, kernel_regularizer=L1L2(l1=0.0, l2=0.1)))(embedding)
    lstmMonth = Bidirectional(LSTM(units=16, return_sequences=True, kernel_regularizer=L1L2(l1=0.0, l2=0.1)))(embedding_month)
    # lstm = Add()([lstmClinical, lstmMonth])
    lstm = lstmClinical
    
    # nn = Reshape((1, 32))(nn)
    # add = concatenate([nn, lstm], axis=1)
    nn = Dense(16, activation='relu', kernel_initializer='glorot_uniform', kernel_regularizer=L1L2(l1=0.0, l2=0.1))(nn)
    nn = Dropout(0.5)(nn)
    lstm = Bidirectional(LSTM(units=8, return_sequences=True, kernel_regularizer=L1L2(l1=0.0, l2=0.1)))(lstm)
    lstm = Dropout(0.5)(lstm)
    
    # # nn = Reshape((301, 64))(nn)
    # add = concatenate([nn, lstm], axis=1)
    nn = Dense(16, activation='relu', kernel_initializer='glorot_uniform', kernel_regularizer=L1L2(l1=0.0, l2=0.1))(nn)
    nn = Dropout(0.5)(nn)
    lstm = Bidirectional(LSTM(units=8, return_sequences=True, kernel_regularizer=L1L2(l1=0.0, l2=0.1)))(lstm)
    lstm = Dropout(0.5)(lstm)
    
    nn = Reshape((1, 16))(nn)
    model_tot = concatenate([nn, lstm], axis=1)
    # model_tot = BatchNormalization()(model_tot)

    model_tot = Dense(units=8, activation=LeakyReLU())(model_tot)
    nn = Dropout(0.5)(nn)
    
    model_tot = Flatten()(model_tot)
    output = Dense(1, activation='sigmoid')(model_tot)
    
    opt = RMSprop(learning_rate=1e-4, clipvalue=.5)
    metrics = [
        AUC(num_thresholds=3, name='auc', curve='ROC'),
        AUC(num_thresholds=3, name='auprc', curve='PR'),
        tf.keras.metrics.Precision(name='prec'),
        tf.keras.metrics.Recall(name='rec'),
        tf.keras.metrics.TrueNegatives(name='TN'),
        tf.keras.metrics.TruePositives(name='TP'),
        tf.keras.metrics.PrecisionAtRecall(0.8)
    ]
    
    loss = tf.keras.losses.BinaryCrossentropy()
    
    model = Model(inputs=[inputs1, inputs2, inputs3], outputs=output)
    model.compile(
        loss='binary_crossentropy', 
        optimizer=opt, 
        metrics=metrics)
    return model

In [None]:
with tf.device('/GPU:0'):
    model.evaluate([Xt_eval, Xs_eval[:,:max_codes], Xm_eval[:,:max_codes]], y_eval)
    model.evaluate([Xt_test, Xs_test[:,:max_codes], Xm_test[:,:max_codes]], y_test)
    # model.evaluate(X_testWales, y_testWales)
    # model.evaluate(X_testScotland, y_testScotland)

In [None]:
#Model evaluation function
def summariseResult (testY, preds):
    tn, fp, fn, tp = confusion_matrix(testY, preds).ravel()
    specificity = tn / (tn+fp)
    sensitivity = tp / (tp+fn)
    ppv = 100*tp/(tp+fp)
    npv = 100*tn/(fn+tn)
    acc = accuracy_score(testY, preds)
    f1score = f1_score(testY, preds, average = 'binary')
    balanceacc = balanced_accuracy_score(testY, preds)
    fpr, tpr, thresholds = roc_curve(testY, preds, pos_label=1)
    aucscore = auc(fpr, tpr)
    # aucscore = roc_auc_score(testY, preds)
    auprc = average_precision_score(testY, preds)
    # plot_confusion_matrix(model, testX, testY, cmap='viridis')  
    return np.round(acc,4), np.round(specificity,4), np.round(sensitivity,4), np.round(aucscore,4), np.round(auprc,4), np.round(balanceacc,4), np.round(f1score,4), np.round(ppv,4), np.round(npv,4)

data_test_Xs = [X_eval, X_test, X_testWales, X_testScotland]
data_test_ys = [y_eval, y_test, y_testWales, y_testScotland]
for data_test_X, data_test_y in zip(data_test_Xs, data_test_ys):
    with tf.device('/CPU:0'):
        preds = model.predict(data_test_X)
    preds = [0 if pred <0.5 else 1 for pred in preds]
    print(summariseResult(data_test_y, np.squeeze(preds)))

In [None]:
model.save('../SeqModel/model_therapy.h5')

In [None]:
# from keras.models import load_model
# a = load_model('../SeqModel/my_model.h5')