# LSTM Model using Clinical + Therapy sequence

In [1]:
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.models import Sequential, Model, load_model
from tensorflow.keras.layers import Dense, LSTM, Embedding, Dropout, Bidirectional, Input, Reshape, BatchNormalization, Flatten, concatenate, Add, Multiply, LeakyReLU
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.metrics import AUC, SensitivityAtSpecificity
from tensorflow.keras.optimizers import Adam, Adagrad, RMSprop, Adamax, SGD
from tensorflow.keras.initializers import Constant
from tensorflow.keras.regularizers import L1L2, L1, L2
from livelossplot import PlotLossesKeras

# from tensorflow.keras.backend.tensorflow_backend import set_session
from tensorflow.python.keras import backend as K

from sgt import SGT

#internal validation
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report, f1_score, balanced_accuracy_score, matthews_corrcoef, auc, average_precision_score, roc_auc_score, balanced_accuracy_score, roc_curve, accuracy_score

import matplotlib.pyplot as plt

import numpy as np
import pandas as pd
import pickle

# fix random seed for reproducibility
tf.random.set_seed(1234)

target_outcome = '12months'
max_codes = 100

2023-12-08 00:25:15.646566: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-12-08 00:25:15.681867: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-12-08 00:25:15.681906: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-12-08 00:25:15.681932: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-12-08 00:25:15.691453: I tensorflow/core/platform/cpu_feature_g

In [2]:
sets =  pickle.load(open('../SeqModel/sets_1year.sav', 'rb'))
sets_eval = pickle.load(open('../SeqModel/sets_eval_1year.sav', 'rb'))
code2idx = pickle.load(open('../SeqModel/all_vocab_1year.sav', 'rb'))
month2idx = pickle.load(open('../SeqModel/all_vocab_month.sav', 'rb'))
vocab_size = len(code2idx)+1
month_size = len(month2idx)+1
print(vocab_size)
print(month_size)

58558
13


In [3]:
Xt_train, Xt_val, Xt_eval, Xs_train, Xs_val, Xs_eval, Xm_train, Xm_val, Xm_eval, y_train, y_val, y_eval = sets
Xt_test, Xt_testWales, Xt_testScotland, Xs_test, Xs_testWales, Xs_testScotland, Xm_test, Xm_testWales, Xm_testScotland, y_test, y_testWales, y_testScotland = sets_eval

In [4]:
print('Train: ', Xs_train.shape)
print('Val: ', Xs_val.shape[0])
print('Eval (internal validation): ', Xs_eval.shape[0])
print('Test: ', Xs_test.shape[0])
print('Test - Wales: ', Xs_testWales.shape[0])
print('Test - Scotland: ', Xs_testScotland.shape[0])

Train:  (110167, 194)
Val:  59019
Eval (internal validation):  27542
Test:  7870
Test - Wales:  5013
Test - Scotland:  2857


In [5]:
model = Sequential()
model.add(Embedding(vocab_size, 64, input_length=100))

2023-12-08 00:25:19.183600: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:880] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-12-08 00:25:19.199226: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:880] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-12-08 00:25:19.199260: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:880] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-12-08 00:25:19.203112: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:880] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-12-08 00:25:19.203187: I tensorflow/compile

In [6]:
model.compile('rmsprop', 'mse')
output_array = model.predict(Xs_val[:,:100])



In [10]:
output_array.shape

(59019, 100, 64)

In [9]:
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
pca.fit(output_array)
X = pca.transform(output_array)
print(np.sum(pca.explained_variance_ratio_))

ValueError: Found array with dim 3. PCA expected <= 2.

In [None]:
import matplotlib.pyplot as plt
plt.scatter(pca_subset.PC1.values,pca_subset.PC2.values, c=pca_subset.label_num.values)
plt.show()


In [None]:
Xs_train = Xs_train.tolist()

In [None]:
Xs_train

In [None]:
sgt = SGT(kappa = 10, lengthsensitive = True)
embedding = sgt.fit_transform(corpus=Xs_train)