<a href="https://colab.research.google.com/github/davidsolow/med-abbrev-mystery/blob/base-bert/MIMIC_mortality.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import random
import re
import textwrap

from sklearn.model_selection import StratifiedShuffleSplit

import tensorflow as tf
from tensorflow import keras
from transformers import BertTokenizer, TFBertModel

In [9]:
!pip install transformers



In [2]:
transformers = __import__('transformers')
print("Transformers version:", transformers.__version__)

Transformers version: 4.42.4


In [4]:
!pip install transformers==4.37.2



In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [20]:
total_abbreviations = pd.read_csv("drive/MyDrive/266Project/medal_mimic_subset/total_abbreviations.csv")

In [None]:
total_abbreviations.head()

Unnamed: 0,abbreviation
0,SFG
1,CTP
2,HSES
3,MMPIs
4,ATP7B


In [21]:
mimic_train = pd.read_csv("drive/MyDrive/266Project/medal_mimic_subset/diagnoses/train.csv")
mimic_validation = pd.read_csv("drive/MyDrive/266Project/medal_mimic_subset/diagnoses/valid.csv")
mimic_test = pd.read_csv("drive/MyDrive/266Project/medal_mimic_subset/diagnoses/test.csv")

In [None]:
mimic_train.head()

Unnamed: 0,HADM_ID,SUBJECT_ID,TEXT,HOSPITAL_EXPIRE_FLAG,ICD9_ID
0,146431,31916,"Respiratory failure , acute ( not ARDS / Doc...",1.0,5070;51881;55220;1970;1987;1983;5849;5119;V667...
1,116532,29487,Chief Complaint : \n 24 Hour Events : \n EKG...,1.0,03843;5185;78552;42823;486;2762;5990;5849;7070...
2,116532,29487,No significant events overnight\n Renal fail...,1.0,03843;5185;78552;42823;486;2762;5990;5849;7070...
3,111458,31820,"Sepsis , Severe ( with organ dysfunction ) \...",1.0,1970;5849;1578;5119;5990;0388;5582;2869;51881;...
4,116532,29487,"Chief Complaint : urosepsis , erspiratory fa...",1.0,03843;5185;78552;42823;486;2762;5990;5849;7070...


In [None]:
len(mimic_train)

61079

In [59]:
model_checkpoint = 'bert-base-cased'
bert_tokenizer = BertTokenizer.from_pretrained(model_checkpoint)
bert_model = TFBertModel.from_pretrained(model_checkpoint)

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions w

In [22]:
def make_lower(text):
  return text.lower()

total_abbreviations['abbreviation'] = total_abbreviations['abbreviation'].apply(make_lower)

In [23]:
abbreviations = set(total_abbreviations.abbreviation)

def has_any_abbreviation(text):
    words = set(text.lower().split())
    found = words.intersection(abbreviations)
    return bool(found)

mimic_train_subset = mimic_train.loc[mimic_train.TEXT.apply(has_any_abbreviation)]
mimic_validation_subset = mimic_validation.loc[mimic_validation.TEXT.apply(has_any_abbreviation)]
mimic_test_subset = mimic_test.loc[mimic_test.TEXT.apply(has_any_abbreviation)]

In [24]:
print(f"Train: {len(mimic_train_subset)} out of {len(mimic_train)}")
print(f"Validation: {len(mimic_validation_subset)} out of {len(mimic_validation)}")
print(f"Test: {len(mimic_test_subset)} out of {len(mimic_test)}")

Train: 60682 out of 61079
Validation: 9198 out of 9258
Test: 14191 out of 14287


In [25]:
mimic_no_abbreviation = mimic_train.loc[~mimic_train.TEXT.apply(has_any_abbreviation)]
mimic_no_abbreviation.head()

KeyboardInterrupt: 

In [None]:
np.average(mimic_train_subset.HOSPITAL_EXPIRE_FLAG)

0.6145150126891006

In [None]:
mimic_train_subset.SUBJECT_ID.nunique()

5483

In [6]:
MAX_LENGTH = 512

In [26]:
MAX_LENGTH = 512

def head_and_tail(tokens, total_size=MAX_LENGTH):
    """Build an array of size <total_size> using the head and tail ends of the input array"""
    half = int(total_size / 2)
    if len(tokens) <= total_size:
        return tokens
    head = tokens[:half]
    tail = tokens[-half:]
    return np.concatenate([head, tail])


def tokenize(texts):
    """
    Tokenize an array of text inputs for bert, and take the head and tail of
    each set of bert inputs. Returns a tuple of three arrays: input_ids, token_type_ids,
    and attention_mask.
    """
    input_ids_list = []
    token_type_ids_list = []
    attention_mask_list = []

    for text in texts:
        bert_output = bert_tokenizer(
            text, padding="max_length", return_tensors="tf", max_length=MAX_LENGTH
        )
        input_ids = bert_output["input_ids"][0]
        token_type_ids = bert_output["token_type_ids"][0]
        attention_mask = bert_output["attention_mask"][0]

        input_ids_list.append(head_and_tail(input_ids))
        token_type_ids_list.append(head_and_tail(token_type_ids))
        attention_mask_list.append(head_and_tail(attention_mask))

    return np.array(input_ids_list), np.array(token_type_ids_list), np.array(attention_mask_list)


# tester = np.array([1,2,3,4,5,6,7,8,9,0])
# print(head_and_tail(tester, 4))
examples = mimic_train.TEXT[0:3].tolist()
input_ids, token_type_ids, attention_mask = tokenize(examples)

print(len(input_ids[2]))
print(examples[2])
print(bert_tokenizer.decode(input_ids[2]))




512
No significant events overnight
   Renal failure ,  Chronic  ( Chronic renal failure ,  CRF ,  Chronic kidney
   disease ) 
   Assessment : 
   U / O remains extremely low  ~  10 cc / hr ,  total body overloaded w /  4 + 
   pitting edema x all 4 extremities
   Action : 
   All meds renally dosed ,  no fluid boluses overnight
   Response : 
   Plan : 
   Cont to trend changes in BUN / CR ,  renally dose all meds ,  nephrology may
   need to re evaluate if urine output does not improve . 
   Pain control  ( acute pain ,  chronic pain ) 
   Assessment : 
   Sedated on fent / midaz grimaces during turns / repositioning
   Action : 
   Fent boluses prior to turning ,  lido patch off  @  00 : 00
   Response : 
   Continues to experience pain
   Plan : 
   Continue w /  current pain / sedation regimen ,  ortho consult to evaluate
   for septic L hip . 

[CLS] No significant events overnight Renal failure, Chronic ( Chronic renal failure, CRF, Chronic kidney disease ) Assessment : U / O r

In [27]:
#tokenizing subsets

mimic_train_subset_inputs = tokenize(mimic_train_subset.TEXT.tolist())
mimic_validation_subset_inputs = tokenize(mimic_validation_subset.TEXT.tolist())
mimic_test_subset_inputs = tokenize(mimic_test_subset.TEXT.tolist())

In [28]:
mimic_train_subset_labels = mimic_train_subset.HOSPITAL_EXPIRE_FLAG.to_numpy(np.int32)
mimic_validation_subset_labels = mimic_validation_subset.HOSPITAL_EXPIRE_FLAG.to_numpy(np.int32)
mimic_test_subset_labels = mimic_test_subset.HOSPITAL_EXPIRE_FLAG.to_numpy(np.int32)

In [34]:
np.save('drive/MyDrive/266Project/mimic_train_subset_inputs.npy', mimic_train_subset_inputs)
np.save('drive/MyDrive/266Project/mimic_validation_subset_inputs.npy', mimic_validation_subset_inputs)
np.save('drive/MyDrive/266Project/mimic_test_subset_inputs.npy', mimic_test_subset_inputs)

# Save labels
np.save('drive/MyDrive/266Project/mimic_train_subset_labels.npy', mimic_train_subset_labels)
np.save('drive/MyDrive/266Project/mimic_validation_subset_labels.npy', mimic_validation_subset_labels)
np.save('drive/MyDrive/266Project/mimic_test_subset_labels.npy', mimic_test_subset_labels)

In [18]:
# Load tokenized inputs
mimic_train_subset_inputs = np.load('drive/MyDrive/266Project/mimic_train_subset_inputs.npy')
mimic_validation_subset_inputs = np.load('drive/MyDrive/266Project/mimic_validation_subset_inputs.npy')
mimic_test_subset_inputs = np.load('drive/MyDrive/266Project/mimic_test_subset_inputs.npy')

# Load labels
mimic_train_subset_labels = np.load('drive/MyDrive/266Project/mimic_train_subset_labels.npy')
mimic_validation_subset_labels = np.load('drive/MyDrive/266Project/mimic_validation_subset_labels.npy')
mimic_test_subset_labels = np.load('drive/MyDrive/266Project/mimic_test_subset_labels.npy')


In [None]:
mimic_train = None
mimic_validation = None
mimic_test = None
mimic_train_subset = None
mimic_validation_subset = None
mimic_test_subset = None
total_abbreviations = None

In [9]:
def create_bert_base_model(
    bert_base_model,
    max_sequence_length=MAX_LENGTH,
    hidden_size=100,
    dropout=0.3,
    learning_rate=0.00005
):
    """
    Build a simple classification model with BERT. Use the pooler Token output for classification purposes.
    """
    bert_base_model.trainable = True

    input_ids = tf.keras.layers.Input(shape=(MAX_LENGTH,), dtype=tf.int32, name='input_ids_layer')
    token_type_ids = tf.keras.layers.Input(shape=(MAX_LENGTH,), dtype=tf.int32, name='token_type_ids_layer')
    attention_mask = tf.keras.layers.Input(shape=(MAX_LENGTH,), dtype=tf.int32, name='attention_mask_layer')

    bert_inputs = {'input_ids': input_ids,
                   'token_type_ids': token_type_ids,
                   'attention_mask': attention_mask}

    bert_out = bert_base_model(bert_inputs)
    pooler_token = bert_out[1]

    hidden = tf.keras.layers.Dense(hidden_size, activation='relu', name='hidden_layer')(pooler_token)
    hidden = tf.keras.layers.Dropout(dropout)(hidden)


    classification = tf.keras.layers.Dense(1, activation='sigmoid',name='classification_layer')(hidden)

    classification_model = tf.keras.Model(inputs=[input_ids, token_type_ids, attention_mask], outputs=[classification])

    classification_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
                                 loss=tf.keras.losses.BinaryCrossentropy(from_logits=False),
                                 metrics='accuracy')

    return classification_model

In [28]:
print(TFBertModel.__version__)

AttributeError: type object 'TFBertModel' has no attribute '__version__'

In [60]:
#bert_model.load_weights('drive/MyDrive/266Project/bertbaseweights.h5')
baseline_bert_model = create_bert_base_model(bert_model)

In [None]:
# # Step 1: Load the saved weights into a separate BERT model
# bert_model_for_weights = TFBertModel.from_pretrained(model_checkpoint)

# # Assuming saved_weights_path is the path to the saved weights file
# saved_weights_path = 'drive/MyDrive/266Project/base_bert/20240725_base_bert_ft_weights.hdf5'

# # Load weights into the BERT model
# bert_model_for_weights.load_weights(saved_weights_path)

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions w

ValueError: Layer count mismatch when loading weights from file. Model expected 1 layers, found 2 saved layers.

In [None]:
# for layer in bert_model.layers:
#     layer.set_weights(bert_model_for_weights.get_layer(layer.name).get_weights())

ValueError: Layer count mismatch when loading weights from file. Model expected 1 layers, found 2 saved layers.

In [None]:
#baseline_bert_model = create_bert_base_model(bert_model)

In [34]:
from keras.callbacks import ModelCheckpoint

In [35]:
checkpoint = ModelCheckpoint(filepath='drive/MyDrive/266Project/basebert_mimic_baseline_checkpoints/fine-tuned_mortality_new/model_{epoch:02d}.weights.h5',
                             save_weights_only=True,
                             save_best_only=False,
                             monitor='val_loss',
                             mode='auto',
                             save_freq='epoch',
                             verbose=1)

In [29]:
print("Shape of mimic_train_subset_inputs:", mimic_train_subset_inputs.shape)
print("Shape of mimic_train_subset_labels:", mimic_train_subset_labels.shape)
print("Shape of mimic_validation_subset_inputs:", mimic_validation_subset_inputs.shape)
print("Shape of mimic_validation_subset_labels:", mimic_validation_subset_labels.shape)

AttributeError: 'tuple' object has no attribute 'shape'

In [61]:
baseline_history = baseline_bert_model.fit(
    mimic_train_subset_inputs,
    mimic_train_subset_labels,
    epochs=2,
    batch_size=32,
    validation_data=(mimic_validation_subset_inputs, mimic_validation_subset_labels),
    shuffle = True
)

Epoch 1/2
Epoch 2/2


In [48]:
checkpoint_path = 'drive/MyDrive/266Project/basebert_mimic_baseline_checkpoints/model2_02.hdf5'
baseline_bert_model.load_weights(checkpoint_path)

In [None]:
loss, accuracy = baseline_bert_model.evaluate(mimic_test_subset_inputs, mimic_test_subset_labels, verbose=1)
print(f'Test loss: {loss}')
print(f'Test accuracy: {accuracy}')

Test loss: 0.5356011986732483
Test accuracy: 0.7889507412910461


In [38]:
from sklearn.metrics import f1_score, precision_score, recall_score, classification_report, confusion_matrix

In [62]:
#calculate F1, precision, recall
y_pred = baseline_bert_model.predict(mimic_test_subset_inputs)




In [None]:
y_pred.shape

(14191, 1)

In [None]:
mimic_test_subset_labels.shape

(14191,)

In [63]:
y_pred_classes = (y_pred > 0.5).astype(int)
y_true = mimic_test_subset_labels

#classification report
report = classification_report(y_true, y_pred_classes)
print(report)

              precision    recall  f1-score   support

           0       0.61      0.87      0.71      4626
           1       0.92      0.73      0.81      9565

    accuracy                           0.77     14191
   macro avg       0.76      0.80      0.76     14191
weighted avg       0.82      0.77      0.78     14191



In [None]:
print(set(y_true.flatten()))

{0, 1}


In [None]:
print(set(y_pred_classes.flatten()))

{0}
