### 1. Set Up

In [2]:
import numpy as np
import pandas as pd

data_path = '/content/drive/MyDrive/ehr_project/data/'

In [3]:
clinical_notes_training = pd.read_csv(data_path + 'clinical_notes_training.csv')
clinical_notes_test = pd.read_csv(data_path + 'clinical_notes_test.csv')

In [4]:
!pip install transformers

Collecting transformers
[?25l  Downloading https://files.pythonhosted.org/packages/d5/43/cfe4ee779bbd6a678ac6a97c5a5cdeb03c35f9eaebbb9720b036680f9a2d/transformers-4.6.1-py3-none-any.whl (2.2MB)
[K     |████████████████████████████████| 2.3MB 15.1MB/s 
Collecting tokenizers<0.11,>=0.10.1
[?25l  Downloading https://files.pythonhosted.org/packages/d4/e2/df3543e8ffdab68f5acc73f613de9c2b155ac47f162e725dcac87c521c11/tokenizers-0.10.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (3.3MB)
[K     |████████████████████████████████| 3.3MB 49.6MB/s 
Collecting huggingface-hub==0.0.8
  Downloading https://files.pythonhosted.org/packages/a1/88/7b1e45720ecf59c6c6737ff332f41c955963090a18e72acbcbeac6b25e86/huggingface_hub-0.0.8-py3-none-any.whl
Collecting sacremoses
[?25l  Downloading https://files.pythonhosted.org/packages/75/ee/67241dc87f266093c533a2d4d3d69438e57d7a90abb216fa076e7d475d4a/sacremoses-0.0.45-py3-none-any.whl (895kB)
[K     |██████

In [5]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from transformers import TrainingArguments, Trainer

In [6]:
train_texts = clinical_notes_training['note_processed'].to_list()
test_texts = clinical_notes_test['note_processed'].to_list()

In [7]:
import torch

class EHRDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

In [8]:
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

# compute metrics function for binary classification
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average="binary")
    acc = accuracy_score(labels, preds)
    return {"accuracy": acc, "f1": f1, "precision": precision, "recall": recall}

In [9]:
import random
import torch

def set_seed(seed_val = 42):

  random.seed(seed_val)
  np.random.seed(seed_val)
  torch.manual_seed(seed_val)
  torch.cuda.manual_seed(seed_val)

### 2. Dysnea

Results: `BERT` has better performance

* `BERT`: 91.13% accuracy; 91.39% F1-Score 
* `DistilBERT`: 89.42% accuracy; 89.90% F1-Score

#### BERT for Dyspnea

In [27]:
model_name = 'bert-base-uncased'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=1)

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=694.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=898823.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=456318.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1355863.0, style=ProgressStyle(descript…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=597257159.0, style=ProgressStyle(descri…




Some weights of the model checkpoint at allenai/longformer-base-4096 were not used when initializing LongformerForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.decoder.weight', 'lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing LongformerForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of LongformerForSequenceClassification were not initialized from the model checkpoint at allenai/longformer-base-4096 and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.weigh

In [None]:
train_encodings = tokenizer(train_texts, truncation=True, padding=True)
test_encodings = tokenizer(test_texts, truncation=True, padding=True)

In [None]:
train_labels = clinical_notes_training['dyspnea']
test_labels = clinical_notes_test['dyspnea']

In [None]:
clinical_notes_training.columns

Index(['report_no', 'note_processed', 'with_labels', 'dyspnea', 'chest.pain',
       'fatique', 'nausea', 'cough'],
      dtype='object')

In [None]:
train_dataset = EHRDataset(train_encodings, train_labels)
test_dataset = EHRDataset(test_encodings, test_labels)

In [None]:
training_args = TrainingArguments(
    output_dir= 'results',          
    num_train_epochs=5,              
    per_device_train_batch_size=16,  
    per_device_eval_batch_size=16,   
    evaluation_strategy="epoch",
    learning_rate = 5e-5,
    warmup_steps=500,                
    weight_decay=0.01,               
    logging_dir='logs',            
    logging_steps=10,
)

trainer = Trainer(
    model=model,                         
    args=training_args,
    compute_metrics=compute_metrics,                  
    train_dataset=train_dataset,         
    eval_dataset=test_dataset             
)

trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.3946,0.383267,0.860068,0.859107,0.85034,0.868056
2,0.3705,0.405936,0.8157,0.837349,0.739362,0.965278
3,0.2791,0.344621,0.853242,0.86262,0.798817,0.9375
4,0.2107,0.350492,0.894198,0.898361,0.850932,0.951389


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.3946,0.383267,0.860068,0.859107,0.85034,0.868056
2,0.3705,0.405936,0.8157,0.837349,0.739362,0.965278
3,0.2791,0.344621,0.853242,0.86262,0.798817,0.9375
4,0.2107,0.350492,0.894198,0.898361,0.850932,0.951389
5,0.1548,0.396313,0.911263,0.913907,0.873418,0.958333


TrainOutput(global_step=2510, training_loss=0.3379901373291396, metrics={'train_runtime': 2430.8113, 'train_samples_per_second': 1.033, 'total_flos': 53477132396544.0, 'epoch': 5.0, 'init_mem_cpu_alloc_delta': 1852006400, 'init_mem_gpu_alloc_delta': 439072256, 'init_mem_cpu_peaked_delta': 0, 'init_mem_gpu_peaked_delta': 0, 'train_mem_cpu_alloc_delta': 18214912, 'train_mem_gpu_alloc_delta': 1320807936, 'train_mem_cpu_peaked_delta': 0, 'train_mem_gpu_peaked_delta': 13113112064})

In [None]:
trainer.save_model(data_path + 'dyspnea_bert')
tokenizer.save_pretrained(data_path + 'dyspnea_model/tokenizer')

#### DistilBERT for Dyspnea

In [9]:
# TRAINING EPOCHS (total) = 11

model_name = 'distilbert-base-uncased'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

train_encodings = tokenizer(train_texts, truncation=True, padding=True)
test_encodings = tokenizer(test_texts, truncation=True, padding=True)

train_labels = clinical_notes_training['dyspnea']
test_labels = clinical_notes_test['dyspnea']

train_dataset = EHRDataset(train_encodings, train_labels)
test_dataset = EHRDataset(test_encodings, test_labels)

training_args = TrainingArguments(
    output_dir= 'results',          
    num_train_epochs=5,              
    per_device_train_batch_size=16,  
    per_device_eval_batch_size=16,   
    evaluation_strategy="epoch",
    learning_rate = 5e-5,
    warmup_steps=500,                
    weight_decay=0.01,               
    logging_dir='logs',            
    logging_steps=10,
)

trainer = Trainer(
    model=model,                         
    args=training_args,
    compute_metrics=compute_metrics,                  
    train_dataset=train_dataset,         
    eval_dataset=test_dataset             
)

trainer.train()

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=442.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=231508.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=466062.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=28.0, style=ProgressStyle(description_w…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=267967963.0, style=ProgressStyle(descri…




Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_layer_norm.weight', 'vocab_projector.weight', 'vocab_projector.bias', 'vocab_layer_norm.bias', 'vocab_transform.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'pre_classifier.weight', 'pre_classi

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.4011,0.475237,0.771331,0.798799,0.703704,0.923611
2,0.3553,0.367918,0.87372,0.872852,0.863946,0.881944
3,0.2459,0.338173,0.849829,0.856209,0.808642,0.909722
4,0.1749,0.359801,0.883959,0.888889,0.839506,0.944444
5,0.136,0.401175,0.894198,0.899023,0.846626,0.958333


TrainOutput(global_step=2510, training_loss=0.32294234217875506, metrics={'train_runtime': 1262.7893, 'train_samples_per_second': 1.988, 'total_flos': 32704040724480.0, 'epoch': 5.0, 'init_mem_cpu_alloc_delta': 1989648384, 'init_mem_gpu_alloc_delta': 268953088, 'init_mem_cpu_peaked_delta': 0, 'init_mem_gpu_peaked_delta': 0, 'train_mem_cpu_alloc_delta': 112488448, 'train_mem_gpu_alloc_delta': 804109824, 'train_mem_cpu_peaked_delta': 0, 'train_mem_gpu_peaked_delta': 6657193472})

### 3. Chest Pain

* `BERT`: 88.40% (accuracy) and 83.33% (F1-Score)
* `DistilBERT`: 87.03% (accuracy) and 81.37% (F1-Score)

#### BERT for Chest Pain

In [13]:
set_seed()

model_name = 'bert-base-uncased'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

train_encodings = tokenizer(train_texts, truncation=True, padding=True)
test_encodings = tokenizer(test_texts, truncation=True, padding=True)

train_labels = clinical_notes_training['chest.pain']
test_labels = clinical_notes_test['chest.pain']

train_dataset = EHRDataset(train_encodings, train_labels)
test_dataset = EHRDataset(test_encodings, test_labels)

training_args = TrainingArguments(
    output_dir= 'results',          
    num_train_epochs=5,              
    per_device_train_batch_size=16,  
    per_device_eval_batch_size=16,   
    evaluation_strategy="epoch",
    learning_rate = 5e-5,
    warmup_steps=500,                
    weight_decay=0.01,               
    logging_dir='logs',            
    logging_steps=10,
)

trainer = Trainer(
    model=model,                         
    args=training_args,
    compute_metrics=compute_metrics,                  
    train_dataset=train_dataset,         
    eval_dataset=test_dataset             
)

trainer.train()

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=570.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=231508.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=466062.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=28.0, style=ProgressStyle(description_w…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=440473133.0, style=ProgressStyle(descri…




Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.2284,0.397097,0.866894,0.782123,0.777778,0.786517
2,0.227,0.459394,0.866894,0.779661,0.784091,0.775281
3,0.303,0.417522,0.880546,0.80663,0.793478,0.820225
4,0.2557,0.443991,0.849829,0.786408,0.692308,0.910112
5,0.0889,0.453386,0.866894,0.815166,0.704918,0.966292


TrainOutput(global_step=2510, training_loss=0.31049361314431606, metrics={'train_runtime': 1451.8869, 'train_samples_per_second': 1.729, 'total_flos': 53477132396544.0, 'epoch': 5.0, 'init_mem_cpu_alloc_delta': 94208, 'init_mem_gpu_alloc_delta': 438547968, 'init_mem_cpu_peaked_delta': 0, 'init_mem_gpu_peaked_delta': 0, 'train_mem_cpu_alloc_delta': -1145217024, 'train_mem_gpu_alloc_delta': 1320021504, 'train_mem_cpu_peaked_delta': 1145217024, 'train_mem_gpu_peaked_delta': 13021885952})

In [16]:
set_seed()

training_args = TrainingArguments(
    output_dir= 'results',          
    num_train_epochs=3,              
    per_device_train_batch_size=16,  
    per_device_eval_batch_size=16,   
    evaluation_strategy="epoch",
    learning_rate = 5e-5,
    warmup_steps=500,                
    weight_decay=0.01,               
    logging_dir='logs',            
    logging_steps=10,
)

trainer = Trainer(
    model=model,                         
    args=training_args,
    compute_metrics=compute_metrics,                  
    train_dataset=train_dataset,         
    eval_dataset=test_dataset             
)

trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.0373,0.515799,0.914676,0.860335,0.855556,0.865169
2,0.0577,0.640938,0.877133,0.828571,0.719008,0.977528
3,0.042,0.445271,0.904437,0.858586,0.779817,0.955056


TrainOutput(global_step=1506, training_loss=0.09575860288518556, metrics={'train_runtime': 864.6491, 'train_samples_per_second': 1.742, 'total_flos': 31951745771520.0, 'epoch': 3.0, 'init_mem_cpu_alloc_delta': 0, 'init_mem_gpu_alloc_delta': 0, 'init_mem_cpu_peaked_delta': 0, 'init_mem_gpu_peaked_delta': 0, 'train_mem_cpu_alloc_delta': -622956544, 'train_mem_gpu_alloc_delta': 876843008, 'train_mem_cpu_peaked_delta': 622956544, 'train_mem_gpu_peaked_delta': 12997374976})

In [15]:
trainer.save_model(data_path + 'chest.pain_bert')
tokenizer.save_pretrained(data_path + 'chest.pain_bert/tokenizer')

from transformers import AutoConfig
config = AutoConfig.from_pretrained('bert-base-uncased')
config.save_pretrained(data_path + 'chest.pain_bert/tokenizer')

#### DistilBERT for Chest Pain

In [10]:
# TRAINING EPOCHS (total) = 5

set_seed()

model_name = 'distilbert-base-uncased'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

train_encodings = tokenizer(train_texts, truncation=True, padding=True)
test_encodings = tokenizer(test_texts, truncation=True, padding=True)

train_labels = clinical_notes_training['chest.pain']
test_labels = clinical_notes_test['chest.pain']

train_dataset = EHRDataset(train_encodings, train_labels)
test_dataset = EHRDataset(test_encodings, test_labels)

training_args = TrainingArguments(
    output_dir= 'results',          
    num_train_epochs=5,              
    per_device_train_batch_size=16,  
    per_device_eval_batch_size=16,   
    evaluation_strategy="epoch",
    learning_rate = 5e-5,
    warmup_steps=500,                
    weight_decay=0.01,               
    logging_dir='logs',            
    logging_steps=10,
)

trainer = Trainer(
    model=model,                         
    args=training_args,
    compute_metrics=compute_metrics,                  
    train_dataset=train_dataset,         
    eval_dataset=test_dataset             
)

trainer.train()

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=442.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=231508.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=466062.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=28.0, style=ProgressStyle(description_w…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=267967963.0, style=ProgressStyle(descri…




Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_projector.bias', 'vocab_layer_norm.bias', 'vocab_layer_norm.weight', 'vocab_transform.weight', 'vocab_projector.weight', 'vocab_transform.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.bias', 'pre_classifier.weight', 'classi

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.2795,0.471503,0.843003,0.757895,0.712871,0.808989
2,0.2352,0.47448,0.822526,0.747573,0.65812,0.865169
3,0.2471,0.460182,0.764505,0.708861,0.567568,0.94382
4,0.2493,0.410079,0.870307,0.813725,0.721739,0.932584
5,0.1082,0.542575,0.8157,0.754545,0.633588,0.932584


TrainOutput(global_step=2510, training_loss=0.3053745087399426, metrics={'train_runtime': 738.7402, 'train_samples_per_second': 3.398, 'total_flos': 32704040724480.0, 'epoch': 5.0, 'init_mem_cpu_alloc_delta': 2645106688, 'init_mem_gpu_alloc_delta': 268953088, 'init_mem_cpu_peaked_delta': 0, 'init_mem_gpu_peaked_delta': 0, 'train_mem_cpu_alloc_delta': 21483520, 'train_mem_gpu_alloc_delta': 804109824, 'train_mem_cpu_peaked_delta': 0, 'train_mem_gpu_peaked_delta': 6566033408})

### 4. Fatique

* `BERT`: 92.83% (accuracy) and 84.21% (F1-Score)
* `DistilBERT`: 91.81% (accuracy) and 82.09% (F1-Score)

#### DistilBERT for Fatique

In [None]:
model_name = 'distilbert-base-uncased'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

train_encodings = tokenizer(train_texts, truncation=True, padding=True)
test_encodings = tokenizer(test_texts, truncation=True, padding=True)

train_labels = clinical_notes_training['fatique']
test_labels = clinical_notes_test['fatique']

train_dataset = EHRDataset(train_encodings, train_labels)
test_dataset = EHRDataset(test_encodings, test_labels)

training_args = TrainingArguments(
    output_dir= 'results',          
    num_train_epochs=5,              
    per_device_train_batch_size=16,  
    per_device_eval_batch_size=16,   
    evaluation_strategy="epoch",
    learning_rate = 5e-5,
    warmup_steps=500,                
    weight_decay=0.01,               
    logging_dir='logs',            
    logging_steps=10,
)

trainer = Trainer(
    model=model,                         
    args=training_args,
    compute_metrics=compute_metrics,                  
    train_dataset=train_dataset,         
    eval_dataset=test_dataset             
)

trainer.train()

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias', 'vocab_transform.bias', 'vocab_transform.weight', 'vocab_layer_norm.weight']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'classifier.bias', 'pre_classi

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.2278,0.31555,0.901024,0.752137,0.862745,0.666667
2,0.1675,0.312307,0.911263,0.779661,0.884615,0.69697
3,0.1785,0.342227,0.911263,0.779661,0.884615,0.69697
4,0.0911,0.319806,0.924915,0.822581,0.87931,0.772727
5,0.1119,0.344683,0.918089,0.820896,0.808824,0.833333


TrainOutput(global_step=2510, training_loss=0.22322324207163902, metrics={'train_runtime': 1651.9933, 'train_samples_per_second': 1.519, 'total_flos': 32704040724480.0, 'epoch': 5.0, 'init_mem_cpu_alloc_delta': 0, 'init_mem_gpu_alloc_delta': 268428800, 'init_mem_cpu_peaked_delta': 0, 'init_mem_gpu_peaked_delta': 0, 'train_mem_cpu_alloc_delta': 266240, 'train_mem_gpu_alloc_delta': 805944832, 'train_mem_cpu_peaked_delta': 0, 'train_mem_gpu_peaked_delta': 6692648448})

#### BERT for Fatique

In [None]:
model_name = 'bert-base-uncased'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

train_encodings = tokenizer(train_texts, truncation=True, padding=True)
test_encodings = tokenizer(test_texts, truncation=True, padding=True)

train_labels = clinical_notes_training['fatique']
test_labels = clinical_notes_test['fatique']

train_dataset = EHRDataset(train_encodings, train_labels)
test_dataset = EHRDataset(test_encodings, test_labels)

training_args = TrainingArguments(
    output_dir= 'results',          
    num_train_epochs=5,              
    per_device_train_batch_size=16,  
    per_device_eval_batch_size=16,   
    evaluation_strategy="epoch",
    learning_rate = 5e-5,
    warmup_steps=500,                
    weight_decay=0.01,               
    logging_dir='logs',            
    logging_steps=10,
)

trainer = Trainer(
    model=model,                         
    args=training_args,
    compute_metrics=compute_metrics,                  
    train_dataset=train_dataset,         
    eval_dataset=test_dataset             
)

trainer.train()

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.2276,0.40628,0.883959,0.685185,0.880952,0.560606
2,0.1688,0.305184,0.90785,0.765217,0.897959,0.666667
3,0.2166,0.361887,0.90785,0.765217,0.897959,0.666667
4,0.1089,0.301871,0.928328,0.829268,0.894737,0.772727
5,0.1129,0.311178,0.928328,0.842105,0.835821,0.848485


TrainOutput(global_step=2510, training_loss=0.23737905886899902, metrics={'train_runtime': 2314.1104, 'train_samples_per_second': 1.085, 'total_flos': 53477132396544.0, 'epoch': 5.0, 'init_mem_cpu_alloc_delta': 0, 'init_mem_gpu_alloc_delta': 438547968, 'init_mem_cpu_peaked_delta': 0, 'init_mem_gpu_peaked_delta': 0, 'train_mem_cpu_alloc_delta': -622661632, 'train_mem_gpu_alloc_delta': 1319235072, 'train_mem_cpu_peaked_delta': 622669824, 'train_mem_gpu_peaked_delta': 13021853184})

In [None]:
trainer.save_model(data_path + 'fatique_bert')
tokenizer.save_pretrained(data_path + 'fatique_bert/tokenizer')

('/content/drive/MyDrive/ehr_project/data/fatique_bert/tokenizer/tokenizer_config.json',
 '/content/drive/MyDrive/ehr_project/data/fatique_bert/tokenizer/special_tokens_map.json',
 '/content/drive/MyDrive/ehr_project/data/fatique_bert/tokenizer/vocab.txt',
 '/content/drive/MyDrive/ehr_project/data/fatique_bert/tokenizer/added_tokens.json',
 '/content/drive/MyDrive/ehr_project/data/fatique_bert/tokenizer/tokenizer.json')

In [None]:
training_args = TrainingArguments(
    output_dir= 'results',          
    num_train_epochs=3,              
    per_device_train_batch_size=16,  
    per_device_eval_batch_size=16,   
    evaluation_strategy="epoch",
    learning_rate = 5e-5,
    warmup_steps=500,                
    weight_decay=0.01,               
    logging_dir='logs',            
    logging_steps=10,
)

trainer = Trainer(
    model=model,                         
    args=training_args,
    compute_metrics=compute_metrics,                  
    train_dataset=train_dataset,         
    eval_dataset=test_dataset             
)

trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.2511,0.306361,0.928328,0.829268,0.894737,0.772727
2,0.0624,0.286254,0.945392,0.876923,0.890625,0.863636
3,0.0686,0.341851,0.928328,0.842105,0.835821,0.848485


TrainOutput(global_step=1506, training_loss=0.1313821997933812, metrics={'train_runtime': 1385.1031, 'train_samples_per_second': 1.087, 'total_flos': 31951745771520.0, 'epoch': 3.0, 'init_mem_cpu_alloc_delta': 0, 'init_mem_gpu_alloc_delta': 0, 'init_mem_cpu_peaked_delta': 0, 'init_mem_gpu_peaked_delta': 0, 'train_mem_cpu_alloc_delta': -19685376, 'train_mem_gpu_alloc_delta': 876056576, 'train_mem_cpu_peaked_delta': 19685376, 'train_mem_gpu_peaked_delta': 13021853696})

### 5. Nausea

* `BERT`: 92.15% (accuracy) and 83.69% (F1-Score)
* `DistilBERT`: 89.76% (accuracy) and 78.87% (F1-Score)

#### DistilBERT for Nausea

In [None]:
model_name = 'distilbert-base-uncased'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

train_encodings = tokenizer(train_texts, truncation=True, padding=True)
test_encodings = tokenizer(test_texts, truncation=True, padding=True)

train_labels = clinical_notes_training['nausea']
test_labels = clinical_notes_test['nausea']

train_dataset = EHRDataset(train_encodings, train_labels)
test_dataset = EHRDataset(test_encodings, test_labels)

training_args = TrainingArguments(
    output_dir= 'results',          
    num_train_epochs=5,              
    per_device_train_batch_size=16,  
    per_device_eval_batch_size=16,   
    evaluation_strategy="epoch",
    learning_rate = 5e-5,
    warmup_steps=500,                
    weight_decay=0.01,               
    logging_dir='logs',            
    logging_steps=10,
)

trainer = Trainer(
    model=model,                         
    args=training_args,
    compute_metrics=compute_metrics,                  
    train_dataset=train_dataset,         
    eval_dataset=test_dataset             
)

trainer.train()

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias', 'vocab_transform.bias', 'vocab_transform.weight', 'vocab_layer_norm.weight']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'classifier.bias', 'pre_classi

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.2945,0.403526,0.870307,0.693548,0.693548,0.693548
2,0.3037,0.403278,0.87372,0.694215,0.711864,0.677419
3,0.2359,0.460142,0.87372,0.721805,0.676056,0.774194
4,0.2318,0.454305,0.890785,0.764706,0.702703,0.83871
5,0.1342,0.482845,0.897611,0.788732,0.7,0.903226


TrainOutput(global_step=2510, training_loss=0.23766299094336915, metrics={'train_runtime': 1692.8863, 'train_samples_per_second': 1.483, 'total_flos': 32704040724480.0, 'epoch': 5.0, 'init_mem_cpu_alloc_delta': 106496, 'init_mem_gpu_alloc_delta': 267820544, 'init_mem_cpu_peaked_delta': 0, 'init_mem_gpu_peaked_delta': 0, 'train_mem_cpu_alloc_delta': -10977280, 'train_mem_gpu_alloc_delta': 806553088, 'train_mem_cpu_peaked_delta': 10977280, 'train_mem_gpu_peaked_delta': 6566131200})

#### BERT for Nausea

In [None]:
set_seed()

model_name = 'bert-base-uncased'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

train_encodings = tokenizer(train_texts, truncation=True, padding=True)
test_encodings = tokenizer(test_texts, truncation=True, padding=True)

train_labels = clinical_notes_training['nausea']
test_labels = clinical_notes_test['nausea']

train_dataset = EHRDataset(train_encodings, train_labels)
test_dataset = EHRDataset(test_encodings, test_labels)

training_args = TrainingArguments(
    output_dir= 'results',          
    num_train_epochs=5,              
    per_device_train_batch_size=16,  
    per_device_eval_batch_size=16,   
    evaluation_strategy="epoch",
    learning_rate = 5e-5,
    warmup_steps=500,                
    weight_decay=0.01,               
    logging_dir='logs',            
    logging_steps=10,
)

trainer = Trainer(
    model=model,                         
    args=training_args,
    compute_metrics=compute_metrics,                  
    train_dataset=train_dataset,         
    eval_dataset=test_dataset             
)

trainer.train()

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.4414,0.466393,0.836177,0.647059,0.594595,0.709677
2,0.3329,0.351466,0.887372,0.713043,0.773585,0.66129
3,0.2443,0.399592,0.866894,0.723404,0.64557,0.822581
4,0.2287,0.474503,0.901024,0.788321,0.72,0.870968
5,0.1625,0.486843,0.90785,0.80292,0.733333,0.887097


TrainOutput(global_step=2510, training_loss=0.25843757558688224, metrics={'train_runtime': 2318.7968, 'train_samples_per_second': 1.082, 'total_flos': 53477132396544.0, 'epoch': 5.0, 'init_mem_cpu_alloc_delta': 0, 'init_mem_gpu_alloc_delta': 438547968, 'init_mem_cpu_peaked_delta': 0, 'init_mem_gpu_peaked_delta': 0, 'train_mem_cpu_alloc_delta': -913801216, 'train_mem_gpu_alloc_delta': 1320021504, 'train_mem_cpu_peaked_delta': 913801216, 'train_mem_gpu_peaked_delta': 13138277888})

In [None]:
trainer.save_model(data_path + 'nausea_bert')
tokenizer.save_pretrained(data_path + 'nausea_bert/tokenizer')

('/content/drive/MyDrive/ehr_project/data/nausea_bert/tokenizer/tokenizer_config.json',
 '/content/drive/MyDrive/ehr_project/data/nausea_bert/tokenizer/special_tokens_map.json',
 '/content/drive/MyDrive/ehr_project/data/nausea_bert/tokenizer/vocab.txt',
 '/content/drive/MyDrive/ehr_project/data/nausea_bert/tokenizer/added_tokens.json',
 '/content/drive/MyDrive/ehr_project/data/nausea_bert/tokenizer/tokenizer.json')

In [None]:
set_seed()

training_args = TrainingArguments(
    output_dir= 'results',          
    num_train_epochs=5,              
    per_device_train_batch_size=16,  
    per_device_eval_batch_size=16,   
    evaluation_strategy="epoch",
    learning_rate = 5e-5,
    warmup_steps=500,                
    weight_decay=0.01,               
    logging_dir='logs',            
    logging_steps=10,
)

trainer = Trainer(
    model=model,                         
    args=training_args,
    compute_metrics=compute_metrics,                  
    train_dataset=train_dataset,         
    eval_dataset=test_dataset             
)

trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.0999,0.987509,0.761092,0.631579,0.46875,0.967742
2,0.2244,0.487005,0.914676,0.822695,0.734177,0.935484
3,0.06,0.657931,0.901024,0.802721,0.694118,0.951613
4,0.0006,0.604886,0.90785,0.813793,0.710843,0.951613
5,0.0008,0.576838,0.921502,0.836879,0.746835,0.951613


TrainOutput(global_step=2510, training_loss=0.05542004200684619, metrics={'train_runtime': 2303.977, 'train_samples_per_second': 1.089, 'total_flos': 53477132396544.0, 'epoch': 5.0, 'init_mem_cpu_alloc_delta': 0, 'init_mem_gpu_alloc_delta': 0, 'init_mem_cpu_peaked_delta': 0, 'init_mem_gpu_peaked_delta': 0, 'train_mem_cpu_alloc_delta': -354004992, 'train_mem_gpu_alloc_delta': 876843008, 'train_mem_cpu_peaked_delta': 355655680, 'train_mem_gpu_peaked_delta': 13022640128})

In [None]:
trainer.save_model(data_path + 'nausea_bert')

### 6. Cough

* `BERT`: 87.37% (accuracy) and 71.76% (F1-Score)
* `DistilBERT`: 90.10% (accuracy) and 79.72% (F1-Score)

#### DistilBERT for Cough

In [20]:
model_name = 'distilbert-base-uncased'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

train_encodings = tokenizer(train_texts, truncation=True, padding=True)
test_encodings = tokenizer(test_texts, truncation=True, padding=True)

train_labels = clinical_notes_training['cough']
test_labels = clinical_notes_test['cough']

train_dataset = EHRDataset(train_encodings, train_labels)
test_dataset = EHRDataset(test_encodings, test_labels)

training_args = TrainingArguments(
    output_dir= 'results',          
    num_train_epochs=5,              
    per_device_train_batch_size=16,  
    per_device_eval_batch_size=16,   
    evaluation_strategy="epoch",
    learning_rate = 5e-5,
    warmup_steps=500,                
    weight_decay=0.01,               
    logging_dir='logs',            
    logging_steps=10,
)

trainer = Trainer(
    model=model,                         
    args=training_args,
    compute_metrics=compute_metrics,                  
    train_dataset=train_dataset,         
    eval_dataset=test_dataset             
)

trainer.train()

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_projector.bias', 'vocab_layer_norm.bias', 'vocab_layer_norm.weight', 'vocab_transform.weight', 'vocab_projector.weight', 'vocab_transform.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.bias', 'pre_classifier.weight', 'classi

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.2752,0.432564,0.846416,0.676259,0.602564,0.770492
2,0.1569,0.557173,0.866894,0.706767,0.652778,0.770492
3,0.0823,0.632586,0.866894,0.706767,0.652778,0.770492
4,0.0964,0.61933,0.883959,0.760563,0.666667,0.885246
5,0.0762,0.654169,0.890785,0.774648,0.679012,0.901639


TrainOutput(global_step=2510, training_loss=0.1915985918614969, metrics={'train_runtime': 739.3233, 'train_samples_per_second': 3.395, 'total_flos': 32704040724480.0, 'epoch': 5.0, 'init_mem_cpu_alloc_delta': 102400, 'init_mem_gpu_alloc_delta': 268428800, 'init_mem_cpu_peaked_delta': 0, 'init_mem_gpu_peaked_delta': 0, 'train_mem_cpu_alloc_delta': 0, 'train_mem_gpu_alloc_delta': 806469120, 'train_mem_cpu_peaked_delta': 0, 'train_mem_gpu_peaked_delta': 6566033408})

In [21]:
trainer.save_model(data_path + 'cough_distilbert')
tokenizer.save_pretrained(data_path + 'cough_distilbert/tokenizer')

from transformers import AutoConfig
config = AutoConfig.from_pretrained('distilbert-base-uncased')
config.save_pretrained(data_path + 'cough_distilbert/tokenizer')

In [22]:
set_seed()

training_args = TrainingArguments(
    output_dir= 'results',          
    num_train_epochs=3,              
    per_device_train_batch_size=16,  
    per_device_eval_batch_size=16,   
    evaluation_strategy="epoch",
    learning_rate = 5e-5,
    warmup_steps=500,                
    weight_decay=0.01,               
    logging_dir='logs',            
    logging_steps=10,
)

trainer = Trainer(
    model=model,                         
    args=training_args,
    compute_metrics=compute_metrics,                  
    train_dataset=train_dataset,         
    eval_dataset=test_dataset             
)

trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.0948,0.67734,0.901024,0.797203,0.695122,0.934426
2,0.0831,0.772559,0.901024,0.805369,0.681818,0.983607
3,0.001,0.815024,0.90785,0.813793,0.702381,0.967213


TrainOutput(global_step=1506, training_loss=0.07642684745321553, metrics={'train_runtime': 439.3124, 'train_samples_per_second': 3.428, 'total_flos': 19540150118400.0, 'epoch': 3.0, 'init_mem_cpu_alloc_delta': 0, 'init_mem_gpu_alloc_delta': 0, 'init_mem_cpu_peaked_delta': 0, 'init_mem_gpu_peaked_delta': 0, 'train_mem_cpu_alloc_delta': -840978432, 'train_mem_gpu_alloc_delta': 535764992, 'train_mem_cpu_peaked_delta': 842395648, 'train_mem_gpu_peaked_delta': 6566082048})

In [23]:
trainer.save_model(data_path + 'cough_distilbert')

In [24]:
set_seed()

training_args = TrainingArguments(
    output_dir= 'results',          
    num_train_epochs=3,              
    per_device_train_batch_size=16,  
    per_device_eval_batch_size=16,   
    evaluation_strategy="epoch",
    learning_rate = 5e-5,
    warmup_steps=500,                
    weight_decay=0.01,               
    logging_dir='logs',            
    logging_steps=10,
)

trainer = Trainer(
    model=model,                         
    args=training_args,
    compute_metrics=compute_metrics,                  
    train_dataset=train_dataset,         
    eval_dataset=test_dataset             
)

trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.0006,0.714141,0.897611,0.791667,0.686747,0.934426
2,0.0932,0.646374,0.911263,0.816901,0.716049,0.95082
3,0.0001,0.799501,0.914676,0.825175,0.719512,0.967213


TrainOutput(global_step=1506, training_loss=0.03563395050729366, metrics={'train_runtime': 438.4096, 'train_samples_per_second': 3.435, 'total_flos': 19540150118400.0, 'epoch': 3.0, 'init_mem_cpu_alloc_delta': 0, 'init_mem_gpu_alloc_delta': 0, 'init_mem_cpu_peaked_delta': 0, 'init_mem_gpu_peaked_delta': 0, 'train_mem_cpu_alloc_delta': 2220032, 'train_mem_gpu_alloc_delta': 535764992, 'train_mem_cpu_peaked_delta': 4096, 'train_mem_gpu_peaked_delta': 6556599296})

In [25]:
trainer.save_model(data_path + 'cough_distilbert')

#### BERT for Cough

In [19]:
set_seed()

model_name = 'bert-base-uncased'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

train_encodings = tokenizer(train_texts, truncation=True, padding=True)
test_encodings = tokenizer(test_texts, truncation=True, padding=True)

train_labels = clinical_notes_training['cough']
test_labels = clinical_notes_test['cough']

train_dataset = EHRDataset(train_encodings, train_labels)
test_dataset = EHRDataset(test_encodings, test_labels)

training_args = TrainingArguments(
    output_dir= 'results',          
    num_train_epochs=5,              
    per_device_train_batch_size=16,  
    per_device_eval_batch_size=16,   
    evaluation_strategy="epoch",
    learning_rate = 5e-5,
    warmup_steps=500,                
    weight_decay=0.01,               
    logging_dir='logs',            
    logging_steps=10,
)

trainer = Trainer(
    model=model,                         
    args=training_args,
    compute_metrics=compute_metrics,                  
    train_dataset=train_dataset,         
    eval_dataset=test_dataset             
)

trainer.train()

KeyboardInterrupt: ignored

In [None]:
from transformers import AutoConfig
config = AutoConfig.from_pretrained('bert-base-uncased')

trainer.save_model(data_path + 'cough_bert')
tokenizer.save_pretrained(data_path + 'cough_bert/tokenizer')
config.save_pretrained(data_path + 'cough_bert/tokenizer')

In [None]:
set_seed()

training_args = TrainingArguments(
    output_dir= 'results',          
    num_train_epochs=5,              
    per_device_train_batch_size=16,  
    per_device_eval_batch_size=16,   
    evaluation_strategy="epoch",
    learning_rate = 5e-5,
    warmup_steps=500,                
    weight_decay=0.01,               
    logging_dir='logs',            
    logging_steps=10,
)

trainer = Trainer(
    model=model,                         
    args=training_args,
    compute_metrics=compute_metrics,                  
    train_dataset=train_dataset,         
    eval_dataset=test_dataset             
)

trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.2628,0.491684,0.870307,0.712121,0.661972,0.770492
2,0.2006,0.51699,0.853242,0.686131,0.618421,0.770492
3,0.1425,0.475866,0.883959,0.734375,0.701493,0.770492
4,0.2056,0.478996,0.880546,0.728682,0.691176,0.770492
5,0.2901,0.525755,0.87372,0.717557,0.671429,0.770492


TrainOutput(global_step=2510, training_loss=0.22573610306736008, metrics={'train_runtime': 2315.5038, 'train_samples_per_second': 1.084, 'total_flos': 53477132396544.0, 'epoch': 5.0, 'init_mem_cpu_alloc_delta': 0, 'init_mem_gpu_alloc_delta': 0, 'init_mem_cpu_peaked_delta': 0, 'init_mem_gpu_peaked_delta': 0, 'train_mem_cpu_alloc_delta': -450637824, 'train_mem_gpu_alloc_delta': 876056576, 'train_mem_cpu_peaked_delta': 450637824, 'train_mem_gpu_peaked_delta': 13021885952})