# CXC Datathon - Intact Data Science Challenge
#### Predict Medical Specialty from Medical Notes

This document is for choosing parameters for BERT model, which supports the final submission file Code_final_submission.ipynb

In [1]:
import nltk
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('wordnet')
nltk.download('omw-1.4')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...


True

In [2]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.26.1-py3-none-any.whl (6.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.3/6.3 MB[0m [31m15.9 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.6/7.6 MB[0m [31m68.9 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.11.0
  Downloading huggingface_hub-0.12.1-py3-none-any.whl (190 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m190.3/190.3 KB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.12.1 tokenizers-0.13.2 transformers-4.26.1


In [3]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
import torch

import string
import re
import spacy

from nltk.stem.porter import PorterStemmer
from nltk.corpus import stopwords
from nltk.stem.wordnet import WordNetLemmatizer
#from pywsd.utils import lemmatize_sentence

import itertools
from tqdm.auto import tqdm
tqdm.pandas()
from multiprocessing import Pool

from sklearn.model_selection import train_test_split, KFold

from keras.preprocessing.text import Tokenizer
import os
import sys

from wordcloud import WordCloud
from wordcloud import STOPWORDS

from sklearn.feature_extraction.text import TfidfVectorizer
from xgboost import XGBClassifier
from sklearn.model_selection import cross_validate, cross_val_predict
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import roc_auc_score, recall_score, precision_score, accuracy_score, f1_score
from sklearn.metrics import classification_report, accuracy_score, balanced_accuracy_score

initial settings

In [4]:
n_epoch = 10
lr_rate = 1e-5
batch = 16
eps = 1e-8

read data

In [5]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [6]:
path = "/content/drive/MyDrive/CXC DS Competition/1. Intact (main)"# where the data located
train_df = pd.read_csv(os.path.join(path, f"new_train.csv"), index_col=0)
test_df = pd.read_csv(os.path.join(path, f"new_test.csv"), index_col=0)

print("Train size", len(train_df))
print("Test size", len(test_df))
train_df.head(n=3)

Train size 3969
Test size 997


Unnamed: 0,medical_specialty,transcription,labels
0,Emergency Room Reports,"REASON FOR THE VISIT:, Very high PT/INR.,HIST...",0
1,Surgery,"PREOPERATIVE DIAGNOSIS:, Acetabular fracture ...",1
2,Surgery,"NAME OF PROCEDURE,1. Selective coronary angio...",1


# data preprocessing

In [7]:
# lowercase
def lower(df, cols):
    for col in tqdm(cols):
        df[col] = df[col].progress_apply(lambda x: x.lower())
    return df

# number removing
def num_remove(df, cols):
    for col in tqdm(cols):
        df[col] = df[col].progress_apply(lambda x: re.sub(r'\d+', '', x))
    return df

# white spaces removal
def space_remove(df,cols):
    for col in tqdm(cols):
        df[col] = df[col].progress_apply(lambda x: x.strip()) # remove front and end space
        df[col] = df[col].str.replace('\s+', ' ', regex=True) # remove double space
    return df

def to_sentence(df,cols):
    # join words to a sentence
    for col in tqdm(cols):
        df[col] = df[col].progress_apply(lambda x: ' '.join(x))
    return df

# punctuation removal
def punc_remove(df,cols):
    for col in tqdm(cols):
        df[col] = df[col].progress_apply(lambda x: x.translate(str.maketrans('', '', string.punctuation)))
    return df

# stemming
def stemming(df,cols):
    porter_stemmer  = PorterStemmer()
    for col in tqdm(cols):
        df[col] = df[col].progress_apply(lambda x: [porter_stemmer.stem(w) for w in x.split()])
    df = to_sentence(df,cols)
    return df

# convert short words and urls
def decontracted(phrase):
    # specific
    phrase = re.sub(r"won't", "will not", phrase)
    phrase = re.sub(r"can\'t", "can not", phrase)

    # general
    urlPattern = r"((http://)|(https://)|( www\.)|(com)|(net)|(org))"
    phrase = re.sub(urlPattern,'',phrase)
    phrase = re.sub(r"n\'t", " not", phrase)
    phrase = re.sub(r"\'re", " are", phrase)
    phrase = re.sub(r"\'s", " is", phrase)
    phrase = re.sub(r"\'d", " would", phrase)
    phrase = re.sub(r"\'ll", " will", phrase)
    phrase = re.sub(r"\'t", " not", phrase)
    phrase = re.sub(r"\'ve", " have", phrase)
    phrase = re.sub(r"\'m", " am", phrase)
    return phrase

# convert cases such as he's, I'm, ...
def short_word_converter(df,cols):
    nlp = spacy.load("en_core_web_sm")
    for col in tqdm(cols):
        df[col] = df[col].progress_apply(lambda x: decontracted(x))
    return df

# stop word removal
def stop_remove(df,cols):
    stops = set(stopwords.words('english'))
    for col in tqdm(cols):
        df[col] = df[col].progress_apply(lambda x: ' '.join([word for word in x.split() if word not in stops]))
    #df = to_sentence(df,cols)
    return df

# run all preprocessing functions
def preprocess(df,cols):
    df[cols] = lower(df,cols)[cols]
    df[cols] = num_remove(df,cols)[cols]
    #df[cols] = space_remove(df,cols)[cols]
    df[cols] = short_word_converter(df,cols)[cols] 
    #df[cols] = stemming(df,cols)[cols] 
    df[cols] = punc_remove(df,cols)[cols]
    df[cols] = stop_remove(df,cols)[cols]
    return df

In [8]:
train_df['transcription'][0]

'REASON FOR THE VISIT:,  Very high PT/INR.,HISTORY: , The patient is an 81-year-old lady whom I met last month when she came in with pneumonia and CHF.  She was noticed to be in atrial fibrillation, which is a chronic problem for her.  She did not want to have Coumadin started because she said that she has had it before and the INR has had been very difficult to regulate to the point that it was dangerous, but I convinced her to restart the Coumadin again.  I gave her the Coumadin as an outpatient and then the INR was found to be 12.  So, I told her to come to the emergency room to get vitamin K to reverse the anticoagulation.,PAST MEDICAL HISTORY:,1.  Congestive heart failure.,2.  Renal insufficiency.,3.  Coronary artery disease.,4.  Atrial fibrillation.,5.  COPD.,6.  Recent pneumonia.,7.  Bladder cancer.,8.  History of ruptured colon.,9.  Myocardial infarction.,10.  Hernia repair.,11.  Colon resection.,12.  Carpal tunnel repair.,13.  Knee surgery.,MEDICATIONS:,1.  Coumadin.,2.  Simva

In [9]:
train_df = preprocess(train_df,['transcription'])

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/3969 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/3969 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/3969 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/3969 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/3969 [00:00<?, ?it/s]

In [10]:
train_df['transcription'][0]

'reason visit high ptinrhistory patient yearold lady met last month came pneumonia chf noticed atrial fibrillation chronic problem want coumadin started said inr difficult regulate point dangerous convinced restart coumadin gave coumadin outpatient inr found told e emergency room get vitamin k reverse anticoagulationpast medical history congestive heart failure renal insufficiency coronary artery disease atrial fibrillation copd recent pneumonia bladder cancer history ruptured colon myocardial infarction hernia repair colon resection carpal tunnel repair knee surgerymedications coumadin simvastatin nitrofurantoin celebrex digoxin levothyroxine vicodin triamterene hydrochlorothiazide carvedilolsocial history smoke drinkphysical examinationgeneral lady distressvital signs blood pressure pulse respirations temperature heent head normalneck supplelungs clear auscultation percussionheart murmursabdomen softextremities lower extremities edemaassessment atrial fibrillation coagulopathy induce

In [11]:
# Tokenizing and keep only nouns
from nltk.tokenize import word_tokenize
from nltk import pos_tag
from nltk.stem import WordNetLemmatizer

train_df['tokenized_sents'] = train_df['transcription'].apply(nltk.word_tokenize)
train_df['POSTags'] = train_df['tokenized_sents'].apply(pos_tag)
train_df['Nouns'] = train_df['POSTags'].apply(lambda x: [(t[0], t[1]) for t in x if t[1]=='NN' or t[1]=='NNP' or t[1]=='NNS' or t[1]=='NNPS'])

to_be_lemmatized = []

for nouns_per_medical_specialties in train_df['Nouns']:
  words2lemmatied = []

  for word in nouns_per_medical_specialties:
    words2lemmatied.append(word[0])
    
  to_be_lemmatized.append(words2lemmatied)

train_df['to_be_lemmatized'] = to_be_lemmatized

lmtzr = WordNetLemmatizer()

train_df['lemmatize'] = train_df['to_be_lemmatized'].apply(lambda lst:[lmtzr.lemmatize(word) for word in lst])

train_df['tokenized_sents'] = train_df['tokenized_sents'].astype('str')
train_df['tokenized_sents']

train_df['lemmatize_to_string']= train_df['lemmatize'].str.join(" ")
print(train_df['lemmatize_to_string'][0])

reason visit ptinrhistory patient month chf fibrillation chronic problem coumadin regulate point coumadin outpatient inr emergency room k reverse history heart failure insufficiency artery disease fibrillation copd pneumonia bladder cancer history colon infarction hernia repair colon resection tunnel repair surgerymedications celebrex digoxin levothyroxine vicodin triamterene history lady sign blood pressure respiration head normalneck supplelungs auscultation percussionheart murmursabdomen softextremities extremity fibrillation coagulopathy office repeat vitamin k mg ml dw home repeat ptinr week time use coumadin case age medication level anticoagulation prone fall problem use aspirin day anticoagulation risk stroke anticoagulation coumadin use coumadin time see outpatient


In [12]:
train_df.head()

Unnamed: 0,medical_specialty,transcription,labels,tokenized_sents,POSTags,Nouns,to_be_lemmatized,lemmatize,lemmatize_to_string
0,Emergency Room Reports,reason visit high ptinrhistory patient yearold...,0,"['reason', 'visit', 'high', 'ptinrhistory', 'p...","[(reason, NN), (visit, NN), (high, JJ), (ptinr...","[(reason, NN), (visit, NN), (ptinrhistory, NN)...","[reason, visit, ptinrhistory, patient, month, ...","[reason, visit, ptinrhistory, patient, month, ...",reason visit ptinrhistory patient month chf fi...
1,Surgery,preoperative diagnosis acetabular fracture lef...,1,"['preoperative', 'diagnosis', 'acetabular', 'f...","[(preoperative, JJ), (diagnosis, NN), (acetabu...","[(diagnosis, NN), (fracture, NN), (columntrans...","[diagnosis, fracture, columntransverse, poster...","[diagnosis, fracture, columntransverse, poster...",diagnosis fracture columntransverse posterior ...
2,Surgery,name procedure selective coronary angiography ...,1,"['name', 'procedure', 'selective', 'coronary',...","[(name, NN), (procedure, NN), (selective, JJ),...","[(name, NN), (procedure, NN), (angiography, NN...","[name, procedure, angiography, placement, x, x...","[name, procedure, angiography, placement, x, x...",name procedure angiography placement x x stent...
3,Radiology,referring diagnosis motor neuron diseasepertin...,2,"['referring', 'diagnosis', 'motor', 'neuron', ...","[(referring, VBG), (diagnosis, NN), (motor, NN...","[(diagnosis, NN), (motor, NN), (diseasepertine...","[diagnosis, motor, diseasepertinent, history, ...","[diagnosis, motor, diseasepertinent, history, ...",diagnosis motor diseasepertinent history exami...
4,Emergency Room Reports,chief plaint dental painhistory present illnes...,0,"['chief', 'plaint', 'dental', 'painhistory', '...","[(chief, JJ), (plaint, NN), (dental, JJ), (pai...","[(plaint, NN), (painhistory, NN), (illness, NN...","[plaint, painhistory, illness, yearold, female...","[plaint, painhistory, illness, yearold, female...",plaint painhistory illness yearold female stat...


in training set, split train and validation sets

In [13]:
X = train_df['lemmatize_to_string']
y = train_df["labels"]
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.30, random_state=100, stratify=y)

# Bert Model

In [14]:
from transformers import BertTokenizer
from torch.utils.data import TensorDataset

In [15]:
# other tokenize methods: https://huggingface.co/docs/transformers/main_classes/tokenizer#:~:text=%E2%80%9CFast%E2%80%9D%20implementation
#tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
tokenizer = BertTokenizer.from_pretrained('bert-large-uncased')

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

In [16]:
%%time
encoder_train = tokenizer.batch_encode_plus(X_train,
                                           add_special_tokens = True,
                                           pad_to_max_length = True,
                                           return_attention_mask=True,
                                           max_length = 256,
                                           return_tensors = 'pt')



encoder_test = tokenizer.batch_encode_plus(X_val,
                                           add_special_tokens = True,
                                           pad_to_max_length = True,
                                           return_attention_mask=True,
                                           max_length = 256,
                                           return_tensors = 'pt')

input_ids_train = encoder_train['input_ids']
#List of indices specifying which tokens should be attended to by the model 
attention_masks_train = encoder_train["attention_mask"]
labels_train = torch.tensor(pd.DataFrame(y_train).values)
# df[df['data_type']=='train'].Label.value_counts()

input_ids_test = encoder_test['input_ids']
attention_masks_test = encoder_test["attention_mask"]
labels_test = torch.tensor(pd.DataFrame(y_val).values)

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


CPU times: user 16 s, sys: 74.3 ms, total: 16 s
Wall time: 16 s


In [17]:
data_train = TensorDataset(input_ids_train,attention_masks_train,labels_train)
data_test = TensorDataset(input_ids_test,attention_masks_test,labels_test)

model performance

In [18]:
from sklearn.metrics import f1_score, accuracy_score, balanced_accuracy_score
import random

def f1_score_func(preds,labels):
    preds_flat = np.argmax(preds,axis=1).flatten()
    labels_flat = labels.flatten()
    return f1_score(labels_flat,preds_flat,average = 'macro')


###############################################

def accuracy_per_class(preds,labels):
    label_dict_reverse = {v:k for k,v in dict_label.items()}
    
    preds_flat = np.argmax(preds,axis=1).flatten()
    labels_flat = labels.flatten()
    
    for label in np.unique(labels_flat):
        y_preds = preds_flat[labels_flat==label]
        y_true = labels_flat[labels_flat==label]
        print(f"Class:{label_dict_reverse}")
        print(f"Accuracy:{len(y_preds[y_preds==label])}/{len(y_true)}\n")
###############################################

def acc_score_func(preds,labels):
    preds_flat = np.argmax(preds,axis=1).flatten()
    labels_flat = labels.flatten()
    return accuracy_score(labels_flat,preds_flat)

def balance_acc_score_func(preds,labels):
    preds_flat = np.argmax(preds,axis=1).flatten()
    labels_flat = labels.flatten()
    return balanced_accuracy_score(labels_flat,preds_flat)

In [19]:
seed_val = 1234
random.seed(seed_val)
np.random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)

#Batch size
Load data from encoded data

tuning batch size, higher requires gpu, but faster


In [20]:
from torch.utils.data import RandomSampler,SequentialSampler,DataLoader

dataloader_train = DataLoader(
    data_train,
    sampler= RandomSampler(data_train),
    batch_size = 32
    
)


dataloader_test = DataLoader(
    data_test,
    sampler= RandomSampler(data_test),
    batch_size = 32
    
)

In [21]:
from transformers import BertForSequenceClassification
model = BertForSequenceClassification.from_pretrained('bert-large-uncased',
                                     num_labels = y_train.nunique(),
                                     output_attentions = False,
                                     output_hidden_states =  False)

Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/1.34G [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-large-uncased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint a

## Epoch and Optimizer

In [22]:
from transformers import AdamW,get_linear_schedule_with_warmup
optimizer = AdamW(model.parameters(),lr = lr_rate,eps = eps)

epochs  = n_epoch
scheduler = get_linear_schedule_with_warmup(
            optimizer,
    num_warmup_steps = 0,
   num_training_steps = len(dataloader_train)*epochs 
)



In [23]:
# GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

print(f"Loading:{device}")

Loading:cuda


Predict Function

In [24]:
def evaluate(dataloader_val):
    model.eval()
    
    loss_val_total = 0
    predictions,true_vals = [],[]
    
    for batch in tqdm(dataloader_val):
        batch = tuple(b.to(device) for b in batch)
        
        inputs = {'input_ids':  batch[0],
                  'attention_mask':batch[1],
                  'labels': batch[2]
                 }
        with torch.no_grad():
            outputs = model(**inputs)
            
        loss = outputs[0]
        logits = outputs[1]
        loss_val_total +=loss.item()
        
        logits = logits.detach().cpu().numpy()
        label_ids = inputs['labels'].cpu().numpy()
        predictions.append(logits)
        true_vals.append(label_ids)
        
        
    loss_val_avg = loss_val_total/len(dataloader_val)  
    
    predictions = np.concatenate(predictions,axis=0)
    true_vals = np.concatenate(true_vals,axis=0) 
    return loss_val_avg,predictions,true_vals

In [25]:
%%time
accuracy_lis = []
training_loss = []
for epoch in tqdm(range(1,epochs+1)):
    model.train()
    
    loss_train_total=0
    
    progress_bar = tqdm(dataloader_train,desc = "Epoch: {:1d}".format(epoch),leave = False,disable = False)
    
    
    for batch in progress_bar:
        model.zero_grad()
        
        batch = tuple(b.to(device) for b in batch)
        
        inputs = {
            "input_ids":batch[0],
            "attention_mask":batch[1],
            "labels":batch[2]
            
        }
        outputs = model(**inputs)
        
        loss = outputs[0]
#         logits = outputs[1]
        loss_train_total +=loss.item()
        loss.backward()
        
        torch.nn.utils.clip_grad_norm(model.parameters(),1.0)
        
        optimizer.step()
        scheduler.step()
        
        
        progress_bar.set_postfix({'training_loss':'{:.3f}'.format(loss.item()/len(batch))})
#     torch.save(model.state_dict(),f'/output/BERT_ft_epoch{epoch}.model')To save the model after each epoch
    
    tqdm.write('\nEpoch {}'.format(epoch))
    
    loss_train_avg = loss_train_total/len(dataloader_train)
    tqdm.write(f'Training Loss: {loss_train_avg}\f Train Accuracy')
    val_loss,predictions,true_vals = evaluate(dataloader_test)
    test_score = f1_score_func(predictions,true_vals)
    test_acc = acc_score_func(predictions,true_vals)
    test_balance_acc = balance_acc_score_func(predictions,true_vals)
    tqdm.write(f'Val Loss:{val_loss}\n Test F1 Macro Score:{test_score}\n Test Accuracy:{test_acc}\n Test Balanced Accuracy: {test_balance_acc}')
    accuracy_lis.append(test_acc)
    training_loss.append(loss_train_avg)

  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1:   0%|          | 0/87 [00:00<?, ?it/s]




Epoch 1
Training Loss: 3.275456962914302 Train Accuracy


  0%|          | 0/38 [00:00<?, ?it/s]

Val Loss:2.848067597339028
 Test F1 Macro Score:0.020149446538197162
 Test Accuracy:0.27287993282955497
 Test Balanced Accuracy: 0.03841463414634146


Epoch: 2:   0%|          | 0/87 [00:00<?, ?it/s]




Epoch 2
Training Loss: 2.6417690159260543 Train Accuracy


  0%|          | 0/38 [00:00<?, ?it/s]

Val Loss:2.4324890061428674
 Test F1 Macro Score:0.04765898566281956
 Test Accuracy:0.3534844668345928
 Test Balanced Accuracy: 0.0668529865642


Epoch: 3:   0%|          | 0/87 [00:00<?, ?it/s]




Epoch 3
Training Loss: 2.325994675186859 Train Accuracy


  0%|          | 0/38 [00:00<?, ?it/s]

Val Loss:2.266750803119258
 Test F1 Macro Score:0.06270686800669409
 Test Accuracy:0.34844668345927793
 Test Balanced Accuracy: 0.08002667087722887


Epoch: 4:   0%|          | 0/87 [00:00<?, ?it/s]




Epoch 4
Training Loss: 2.1110359457717545 Train Accuracy


  0%|          | 0/38 [00:00<?, ?it/s]

Val Loss:2.086880840753254
 Test F1 Macro Score:0.09471831425993199
 Test Accuracy:0.35432409739714527
 Test Balanced Accuracy: 0.10502100055775077


Epoch: 5:   0%|          | 0/87 [00:00<?, ?it/s]




Epoch 5
Training Loss: 1.9311196557406722 Train Accuracy


  0%|          | 0/38 [00:00<?, ?it/s]

Val Loss:2.0529079813706246
 Test F1 Macro Score:0.10954070295531437
 Test Accuracy:0.3350125944584383
 Test Balanced Accuracy: 0.12194853244289645


Epoch: 6:   0%|          | 0/87 [00:00<?, ?it/s]




Epoch 6
Training Loss: 1.8057534763182717 Train Accuracy


  0%|          | 0/38 [00:00<?, ?it/s]

Val Loss:1.9900527941553217
 Test F1 Macro Score:0.09956926447389548
 Test Accuracy:0.3249370277078086
 Test Balanced Accuracy: 0.10576190539868788


Epoch: 7:   0%|          | 0/87 [00:00<?, ?it/s]




Epoch 7
Training Loss: 1.714534737597937 Train Accuracy


  0%|          | 0/38 [00:00<?, ?it/s]

Val Loss:2.001675580677233
 Test F1 Macro Score:0.11046275571909944
 Test Accuracy:0.31066330814441645
 Test Balanced Accuracy: 0.1227278975640679


Epoch: 8:   0%|          | 0/87 [00:00<?, ?it/s]




Epoch 8
Training Loss: 1.6444865259630927 Train Accuracy


  0%|          | 0/38 [00:00<?, ?it/s]

Val Loss:1.961253843809429
 Test F1 Macro Score:0.10685335256934243
 Test Accuracy:0.3089840470193115
 Test Balanced Accuracy: 0.1179340381236742


Epoch: 9:   0%|          | 0/87 [00:00<?, ?it/s]




Epoch 9
Training Loss: 1.5926038985964897 Train Accuracy


  0%|          | 0/38 [00:00<?, ?it/s]

Val Loss:1.9786850904163562
 Test F1 Macro Score:0.10751015595316513
 Test Accuracy:0.2963895885810244
 Test Balanced Accuracy: 0.11901789397740595


Epoch: 10:   0%|          | 0/87 [00:00<?, ?it/s]




Epoch 10
Training Loss: 1.5538334435430066 Train Accuracy


  0%|          | 0/38 [00:00<?, ?it/s]

Val Loss:1.9647983061639887
 Test F1 Macro Score:0.10650270974250711
 Test Accuracy:0.2938706968933669
 Test Balanced Accuracy: 0.11770622389738428
CPU times: user 15min 9s, sys: 1min 54s, total: 17min 3s
Wall time: 17min 1s


In [26]:
# _, predictions, true_vals = evaluate(dataloader_test)
# accuracy_per_class(predictions, true_vals)
evaluate(dataloader_test)

  0%|          | 0/38 [00:00<?, ?it/s]

(1.9663188802568536,
 array([[-0.9761486 ,  5.3756557 , -0.30508137, ..., -0.33223504,
         -0.5028091 , -1.1234049 ],
        [-0.20517667,  3.7212384 , -0.5548176 , ...,  0.11097746,
         -0.668014  , -1.0517963 ],
        [ 1.2759637 , -1.7963592 , -1.14917   , ..., -1.089242  ,
         -0.09038322, -0.18342975],
        ...,
        [-1.1259121 ,  4.1373625 ,  1.0453359 , ..., -0.5291153 ,
         -0.7358605 , -0.97864985],
        [ 1.8435745 , -1.3176593 , -1.001508  , ..., -1.0726131 ,
         -0.2763333 , -0.6044919 ],
        [ 0.8665087 , -0.96671474, -0.07920175, ..., -1.1458219 ,
         -0.46937603, -0.6274615 ]], dtype=float32),
 array([[ 6],
        [ 1],
        [13],
        ...,
        [ 1],
        [16],
        [20]]))