In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
import seaborn as sn
import matplotlib.pyplot as plt
import torch
from tqdm import tqdm, trange
from transformers import AdamW
from torch import nn
from torch.nn import BCEWithLogitsLoss, BCELoss
from sklearn.metrics import classification_report, confusion_matrix, multilabel_confusion_matrix, f1_score, accuracy_score, label_ranking_average_precision_score
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
import ast

# Multi-task heads
In this experiment, we combine different classes and group them in different heads. We then experiment to check if we are getting any better results.

1. reading the data

In [2]:
def read_data(data_path,split_path):
    df = pd.read_csv(data_path)

    cols = df.columns
    label_cols = list(cols[6:])
    

    train_df = pd.read_csv(f"{split_path}/train.csv")
    # convert str to list
    train_df['labels'] = train_df.apply(lambda row: ast.literal_eval(row['labels']), axis=1)

    test_df = pd.read_csv(f"{split_path}/test.csv")
    test_df['labels'] = test_df.apply(lambda row: ast.literal_eval(row['labels']), axis=1)

    val_df = pd.read_csv(f"{split_path}/val.csv")
    val_df['labels'] = val_df.apply(lambda row: ast.literal_eval(row['labels']), axis=1)

    return train_df, test_df, val_df, label_cols

2. create a column for each head

In [3]:
# ------------attempt of including all heads in one column--------------------
labels = {'No Finding':0, 'Cardiomegaly':1,'Lung Opacity':2,'Edema':3,'Consolidation':4,
          'Pneumonia':5,'Atelectasis':6,'Pneumothorax':7,'Pleural Effusion':8,'Fracture':9,'SupportDevices':10}
def reorder(row,groups_index):
#     print(row)
    groups_values = []
    for group in groups_index:
        tmp =[]
        for index in group:
            if index==-1:
                tmp.append(-1)
            else:
                tmp.append(row[index])
        groups_values.append(tmp)
#     print(groups_values)
    return groups_values

def group_heads(groups,df): # first attempt for each label one head
    df['head_labels'] = df.apply(lambda row: reorder(row['labels'],groups), axis=1)
    return df
        
def padding_heads(heads_index): #as heads does not have the same size, we pad them with -1
    head_counts = [len(head) for head in heads_index]
    lenmax = max(head_counts)
#     print(lenmax)
    padded_heads = []
    for head in heads_index:
        pad_n = lenmax - len(head)
#         print(pad_n)
        arr = np.pad(head, (0,pad_n), 'constant', constant_values=-1)
        padded_heads.append(arr)
    return padded_heads


head_index1 = [0,1,2,6]  #{'No Finding', 'Cardiomegaly','Lung Opacity','Atelectasis'}
head_index2 = [3,4,5,7,9] #{'Edema','Consolidation','Pneumonia', 'Pneumothorax','Fracture'}
head_index3 = [8,10] #{'Pleural Effusion','SupportDevices',}
heads_index = [head_index1, head_index2, head_index3]

data_path = 'data/OpenI/OpenI_cheXpertLabels.csv'
split_path = 'data/OpenI/cheXpertLabels'
train_df, test_df, val_df,  label_cols = read_data(data_path, split_path)

padded_heads=padding_heads(heads_index)
train_df = group_heads(padded_heads, train_df)
train_df

Unnamed: 0,text,labels,head_labels
0,Heart size is normal and lungs are clear. No p...,"[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[[1, 0, 0, 0, -1], [0, 0, 0, 0, 0], [0, 0, -1,..."
1,Cardiomediastinal silhouette is normal. Pulmon...,"[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[[1, 0, 0, 0, -1], [0, 0, 0, 0, 0], [0, 0, -1,..."
2,Cardiomegaly. Left lung clear. Large right eff...,"[0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0]","[[0, 1, 0, 1, -1], [0, 0, 0, 0, 0], [1, 0, -1,..."
3,Normal cardiac size and contour unremarkable m...,"[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[[1, 0, 0, 0, -1], [0, 0, 0, 0, 0], [0, 0, -1,..."
4,The cardiac and mediastinal contours are withi...,"[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]","[[0, 0, 1, 0, -1], [0, 0, 0, 0, 0], [0, 0, -1,..."
...,...,...,...
1011,Frontal and lateral views of the chest with ov...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]","[[0, 0, 0, 0, -1], [0, 0, 0, 0, 0], [0, 1, -1,..."
1012,Lungs are clear. Heart size normal. No pneumot...,"[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[[1, 0, 0, 0, -1], [0, 0, 0, 0, 0], [0, 0, -1,..."
1013,Cardiomediastinal contour and pulmonary vascul...,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0]","[[0, 0, 0, 0, -1], [0, 0, 0, 0, 1], [0, 0, -1,..."
1014,"The lungs are clear bilaterally. Specifically,...","[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[[1, 0, 0, 0, -1], [0, 0, 0, 0, 0], [0, 0, -1,..."


In [4]:
# #------------attempt of one column per head-------------
# def get_head(labels, head_index):
#     head_values = []
#     for index in head_index:
#         head_values.append(labels[index])
#     return head_values
        
# def split_data_to_heads(heads_index,df):
#     i=1
#     for head_index in heads_index:
#         col_name = f"head{i}"
#         df[col_name]= df.apply(lambda row: get_head(row['labels'],head_index), axis=1)
#         i+=1
#     return df
    
# train_df = split_data_to_heads(heads_index, train_df)



In [5]:
class Head():
    def __init__(self,hparams):
        self.inputLayer = hparams['inputLayer']
        self.run()
    def run(self):
        pass

class HeadMultilabelCLS(Head):
    def __init__(self,hparams):
        self.labels = hparams['labels'] # batch labels, or all labels
        self.device = hparams['device']
        self.num_labels = hparams['num_labels'] # number of labels
        self.taskspecificLayer = nn.Linear(hparams['input_size'], hparams['num_labels']).to(self.device) #classifier
        self.loss = None
        self.logits = None
        self.pred_labels = None
        
        super().__init__(hparams)
        
    def run(self):
        self.logits = self.taskspecificLayer(self.inputLayer)
        self.pred_label = torch.sigmoid(self.logits)
        
        loss_func = BCEWithLogitsLoss() 
        self.loss = loss_func(self.logits.view(-1,self.num_labels),
                         self.labels.type_as(self.logits).view(-1,self.num_labels)) #convert labels to float for calculation
        # loss_func = BCELoss() 
        # loss = loss_func(torch.sigmoid(logits.view(-1,num_labels)),b_labels.type_as(logits).view(-1,num_labels)) #convert labels to float for calculation
        
        return self.loss  
    
    
# class HeadMulticlassCLS(Head):
#     def __init__(self,hparams):
#         super().__init__(hparams)

#     def run(self):
#         logits = self.taskspecificLayer(self.inputLayer)

#         loss_fct = CrossEntropyLoss()
#         loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
    
    
# class HeadBinaryCLS(Head):
#     def __init__(self,hparams):
#         super().__init__(hparams)

#     def run(self):
#         pass
    
    
# class HeadAbstractiveSumm(Head):
#     def __init__(self,hparams):
#         super().__init__(hparams)

#     def run(self):
#         pass
    
# class HeadRegression(Head):
#     def __init__(self,hparams):
#         super().__init__(hparams)
        
#     def run(self):
#         loss_fct = MSELoss()
#         loss = loss_fct(logits.view(-1), labels.view(-1))
    
    
# class HeadSTSclinical(Head):
#     def __init__(self,hparams):
#         super().__init__(hparams)

#     def run(self):
#         pass
    
    

In [6]:
from transformers import BertPreTrainedModel, BertModel

class BertCLS(BertPreTrainedModel):
    def __init__(self, config):
        super().__init__(config)
#         self.num_labels = config.num_labels

        self.bert = BertModel(config)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        
#         self.head_count = config.head_count # ex:[2,3,4]
#         self.head_index = config.head_index # ex: [[2,0],[1,3,4],[5,6,7,8,]]
#         self.classifierheads = [nn.Linear(config.hidden_size, count) for count in self.head_count]

        self.init_weights()

# [docs]
#     @add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
#     @add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="bert-base-uncased")
    def forward(
        self,
        input_ids=None,
        attention_mask=None,
        token_type_ids=None,
        position_ids=None,
        head_mask=None,
        inputs_embeds=None,
        labels=None,
        output_attentions=None,
        output_hidden_states=None,
    ):

        outputs = self.bert(
            input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
            position_ids=position_ids,
            head_mask=head_mask,
            inputs_embeds=inputs_embeds,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
        )

        pooled_output = outputs[1]
        pooled_output = self.dropout(pooled_output)
        return pooled_output
    
#         #loop through the heads
#         logit_heads = []
#         for classifier in self.classifierheads:
#             logits = classifier(pooled_output)
#             logit_heads.append(logits)

#         #TODO: work here on this for loop
#         loss=0
#         for logit,head_labels in zip(logit_heads):
#             loss_func = BCEWithLogitsLoss() 
#             loss += loss_func(logits.view(-1,head_num_labels),b_labels.type_as(logits).view(-1,head_num_labels)) #convert labels to float for calculation

#         return loss  
#-------------example------------------
# from transformers import BertTokenizer, BertForSequenceClassification
# import torch
# tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
# model = BertCLS.from_pretrained('bert-base-uncased')
# inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
# # labels = torch.tensor([1]).unsqueeze(0)  # Batch size 1
# outputs = model(**inputs)
# # loss, logits = outputs[:2]
# type(outputs)

3. fine tunning and prediction

In [7]:
def create_dataLoader(input, labels, batch_size):
    data = TensorDataset(input.input_ids, input.attention_mask, labels)
    sampler = SequentialSampler(data)
    dataloader = DataLoader(data, sampler=sampler, batch_size=batch_size)
    return dataloader

def calculate_f1_acc(pred_labels, true_labels, threshold = 0.50):
    pred_bools = [pl>threshold for pl in pred_labels]
    true_bools = [tl==1 for tl in true_labels]
    val_f1_accuracy = f1_score(true_bools,pred_bools,average='micro')*100
    val_flat_accuracy = accuracy_score(true_bools, pred_bools)*100

    print('Validation F1: ', val_f1_accuracy)
    print('Validation Accuracy: ', val_flat_accuracy)

def calculate_f1_acc_test(pred_labels, true_labels, test_label_cols, threshold = 0.50):
    pred_bools = [pl>threshold for pl in pred_labels]
    true_bools = [tl==1 for tl in true_labels]
    val_f1_accuracy = f1_score(true_bools,pred_bools,average='micro')*100
    val_flat_accuracy = accuracy_score(true_bools, pred_bools)*100 
    print("-----------test-----------")
    print("Threshold: 0.5")
    print('Test F1 Score: ', f1_score(true_bools, pred_bools,average='micro'))
    print('Test Accuracy: ', accuracy_score(true_bools, pred_bools))
    print('LRAP: ', label_ranking_average_precision_score(true_labels, pred_labels) ,'\n')
    clf_report = classification_report(true_bools,pred_bools,target_names=test_label_cols)
    # pickle.dump(clf_report, open('classification_report.txt','wb')) #save report
    print(clf_report)

    # Calculate Accuracy - maximize F1 accuracy by tuning threshold values. First with 'macro_thresholds' on the order of e^-1 then with 'micro_thresholds' on the order of e^-2
    print("-----Optimizing threshold value for micro F1 score-----")

    macro_thresholds = np.array(range(1,10))/10

    f1_results, flat_acc_results = [], []
    for th in macro_thresholds:
        pred_bools = [pl>th for pl in pred_labels]
        test_f1_accuracy = f1_score(true_bools,pred_bools,average='micro')
        test_flat_accuracy = accuracy_score(true_bools, pred_bools)
        f1_results.append(test_f1_accuracy)
        flat_acc_results.append(test_flat_accuracy)

    best_macro_th = macro_thresholds[np.argmax(f1_results)] #best macro threshold value

    micro_thresholds = (np.array(range(10))/100)+best_macro_th #calculating micro threshold values

    f1_results, flat_acc_results = [], []
    for th in micro_thresholds:
        pred_bools = [pl>th for pl in pred_labels]
        test_f1_accuracy = f1_score(true_bools,pred_bools,average='micro')
        test_flat_accuracy = accuracy_score(true_bools, pred_bools)
        f1_results.append(test_f1_accuracy)
        flat_acc_results.append(test_flat_accuracy)

    best_f1_idx = np.argmax(f1_results) #best threshold value

    # Printing and saving classification report
    print('Best Threshold: ', micro_thresholds[best_f1_idx])
    print('Test F1 Score: ', f1_results[best_f1_idx])
    print('Test Accuracy: ', flat_acc_results[best_f1_idx])
    print('LRAP: ', label_ranking_average_precision_score(true_labels, pred_labels) , '\n')

    best_pred_bools = [pl>micro_thresholds[best_f1_idx] for pl in pred_labels]
    clf_report_optimized = classification_report(true_bools,best_pred_bools, target_names=label_cols)
    # pickle.dump(clf_report_optimized, open('classification_report_optimized.txt','wb'))
    print(clf_report_optimized)

def multihead_cls(data_path, split_path, PreTrainedModel, epochs, batch_size, 
                  max_length ,ModelTokenizer, tokenizer_name, model_name, 
                  use_data_loader, heads_index, col_names):
    #--------prepare the dataset-----------
    train_df, test_df, val_df,  label_cols = read_data(data_path, split_path)
    num_labels = len(label_cols)
    
    padded_heads=padding_heads(heads_index)
    train_df = group_heads(padded_heads, train_df)
    test_df = group_heads(padded_heads, test_df)
    val_df = group_heads(padded_heads, val_df)
    
    # ----------tokenize---------------
    tokenizer = ModelTokenizer.from_pretrained(tokenizer_name)

    reports_train = train_df.text.to_list()
    reports_test = test_df.text.to_list()
    reports_val   = val_df.text.to_list()

    train = tokenizer(reports_train, padding='max_length', truncation=True, max_length=max_length, return_tensors="pt")
    test = tokenizer(reports_test, padding='max_length', truncation=True, max_length=max_length, return_tensors="pt")
    val = tokenizer(reports_val, padding='max_length', truncation=True, max_length=max_length, return_tensors="pt")
    
    #-----------dataloaders--------------
    #prepare labels for dataloader
    head_count = [len(group) for group in heads_index]
    nheads = len(head_count)
    
    train_labels = torch.from_numpy(np.array(train_df.head_labels.to_list()))
    test_labels = torch.from_numpy(np.array(test_df.head_labels.to_list()))
    val_labels = torch.from_numpy(np.array(val_df.head_labels.to_list()))

    
    if use_data_loader: # if the dataset is huge in size
        # Create an iterator of our data with torch DataLoader. This helps save on memory during training because, 
        # unlike a for loop, with an iterator the entire dataset does not need to be loaded into memory
        train_dataloader      = create_dataLoader(train, train_labels, batch_size)
        validation_dataloader = create_dataLoader(val, val_labels, batch_size)
        test_dataloader       = create_dataLoader(test, test_labels, batch_size)

    else: #TODO: if the dataset is small in size
        pass
    
    #-----------load model----------------
    # Load model, the pretrained model will include a single linear classification layer on top for classification. 
    model = PreTrainedModel.from_pretrained(model_name)
    model.cuda()
    optimizer = AdamW(model.parameters(),lr=2e-5)  # Default optimization
    
    #---------FineTune model-----------
#     device = "cpu"
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    # n_gpu = torch.cuda.device_count()
    
    # Store our loss and accuracy for plotting
    train_loss_set = []

    # trange is a tqdm wrapper around the normal python range
    for _ in trange(epochs, desc="Epoch"):

        #-------Training-------
        # Set our model to training mode (as opposed to evaluation mode)
        model.train()

        # Tracking variables
        tr_loss = 0 #running loss
        nb_tr_examples, nb_tr_steps = 0, 0

        # Train the data for one epoch
        for step, batch in enumerate(train_dataloader):
            # Add batch to GPU
            batch = tuple(t.to(device) for t in batch)
            
            # Unpack the inputs from our dataloader
            b_input_ids, b_input_mask, b_labels = batch
#             print(b_labels)
#             print(b_labels.size())
#             print(b_labels[:,0,:])
            # Clear out the gradients (by default they accumulate)
            optimizer.zero_grad()

            # Forward pass for multiclass classification
            # outputs = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask, labels=b_labels)
            # loss = outputs[0]
            # logits = outputs[1]

            # Forward pass for multilabel classification
#             outputs = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask)
#             logits = outputs[0]
#             loss_func = BCEWithLogitsLoss() 
#             loss = loss_func(logits.view(-1,num_labels),b_labels.type_as(logits).view(-1,num_labels)) #convert labels to float for calculation
#             # loss_func = BCELoss() 
#             # loss = loss_func(torch.sigmoid(logits.view(-1,num_labels)),b_labels.type_as(logits).view(-1,num_labels)) #convert labels to float for calculation
#             train_loss_set.append(loss.item()) 
            
            #Forward pass for multihead
            outputs = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask)  
#             print(outputs[0].size())
            head_losses = []
            for i in range(0,nheads):
                
                #remove -1 paddings:
                labels = b_labels[:,i,:]
                labels = labels[:,0:head_count[i]]
                
                hparams={
                    'labels' : labels,
                    'num_labels' : len(heads_index[i]),
                    'input_size' : outputs[0].size()[0],
                    'inputLayer' : outputs,
                    'device'     : device,
                }
                head_losses.append(HeadMultilabelCLS(hparams))
            
            loss = 0
            for head in head_losses:
                loss += head.loss 
            
            # Backward pass
            loss.backward()
            
            # Update parameters and take a step using the computed gradient
            optimizer.step()
            # scheduler.step()
            
            # Update tracking variables
            tr_loss += loss.item()
            nb_tr_examples += b_input_ids.size(0)
            nb_tr_steps += 1

        print("Train loss: {}".format(tr_loss/nb_tr_steps))

        # ---------Validation--------

        # Put model in evaluation mode to evaluate loss on the validation set
        model.eval()

        # Variables to gather full output
        true_labels_each_head,pred_labels_each_head = [],[]
        true_labels_all_head,pred_labels_all_head = [],[]
        
        # Predict
        for i, batch in enumerate(validation_dataloader):
            batch = tuple(t.to(device) for t in batch)
            # Unpack the inputs from our dataloader
            b_input_ids, b_input_mask, b_labels = batch
            with torch.no_grad():
                outputs = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask)  

                pred_label_heads = []
                true_label_heads = []
                for i in range(0,nheads):
                    #remove -1 paddings:
                    labels = b_labels[:,i,:]
                    labels = labels[:,0:head_count[i]]

                    hparams={
                        'labels' : labels,
                        'num_labels' : len(heads_index[i]),
                        'input_size' : outputs[0].size()[0],
                        'inputLayer' : outputs,
                        'device'     : device,
                    }
                    pred_label_heads.append(HeadMultilabelCLS(hparams).pred_label)
                    true_label_heads.append(labels)
                
                #store batch labels 
                pred_label_b = np.array(pred_label_heads)
                true_labels_b = np.array(true_label_heads)

                #store each head label seperatly
                true_labels_each_head.append(true_labels_b)
                pred_labels_each_head.append(pred_label_b)

                #store all head labels together
                true_labels_all_head.append(
                    torch.cat([true_head_label for true_head_label in true_labels_b],1)
                    .to('cpu').numpy())
                pred_labels_all_head.append(
                    torch.cat([pred_head_label for pred_head_label in pred_label_b],1)
                    .to('cpu').numpy())
        
        print("Results of all heads:")
        true_labels_all_head = np.concatenate([item for item in true_labels_all_head])
        pred_labels_all_head = np.concatenate([item for item in pred_labels_all_head])
#         print("true_labels_all_head", true_labels_all_head.shape)
#         print("pred_labels_all_head", pred_labels_all_head.shape)
        calculate_f1_acc(pred_labels_all_head, true_labels_all_head)
        
        true_labels_each_head = np.array(true_labels_each_head)
        pred_labels_each_head = np.array(pred_labels_each_head)
#         print("true_labels_each_head", true_labels_each_head.shape)
#         print("---true_labels_each_head", true_labels_each_head)
#         print("pred_labels_each_head", pred_labels_each_head.shape)
        
        print("Results of each head:")
        for i in range(0,nheads):
            print(f"Head_{i}")
            i_head_true_labels = true_labels_each_head[:,i]
            i_head_true_labels = torch.cat([item for item in i_head_true_labels],0).to('cpu').numpy()
            
            i_head_pred_labels = pred_labels_each_head[:,i]
            i_head_pred_labels = torch.cat([item for item in i_head_pred_labels],0).to('cpu').numpy()
            calculate_f1_acc(i_head_pred_labels, i_head_true_labels)

    # ---------test--------
    # Put model in evaluation mode to evaluate loss on the validation set
    model.eval()

    #track variables
    true_labels_each_head,pred_labels_each_head = [],[]
    true_labels_all_head,pred_labels_all_head = [],[]
#     Predict
    for i, batch in enumerate(test_dataloader):
        batch = tuple(t.to(device) for t in batch)
        # Unpack the inputs from our dataloader
        b_input_ids, b_input_mask, b_labels = batch
        with torch.no_grad():
            outputs = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask)  

            pred_label_heads = []
            true_label_heads = []
            for i in range(0,nheads):
                #remove -1 paddings:
                labels = b_labels[:,i,:]
                labels = labels[:,0:head_count[i]]

                hparams={
                    'labels' : labels,
                    'num_labels' : len(heads_index[i]),
                    'input_size' : outputs[0].size()[0],
                    'inputLayer' : outputs,
                    'device'     : device,
                }
                pred_label_heads.append(HeadMultilabelCLS(hparams).pred_label)
                true_label_heads.append(labels)

            #store batch labels 
            pred_label_b = np.array(pred_label_heads)
            true_labels_b = np.array(true_label_heads)

            #store each head label seperatly
            true_labels_each_head.append(true_labels_b)
            pred_labels_each_head.append(pred_label_b)

            #store all head labels together
            true_labels_all_head.append(
                torch.cat([true_head_label for true_head_label in true_labels_b],1)
                .to('cpu').numpy())
            pred_labels_all_head.append(
                torch.cat([pred_head_label for pred_head_label in pred_label_b],1)
                .to('cpu').numpy())

    print("########## Results of all heads:")
    true_labels_all_head = np.concatenate([item for item in true_labels_all_head])
    pred_labels_all_head = np.concatenate([item for item in pred_labels_all_head])
    calculate_f1_acc_test(pred_labels_all_head, true_labels_all_head, col_names)

    true_labels_each_head = np.array(true_labels_each_head)
    pred_labels_each_head = np.array(pred_labels_each_head)

    print("########### Results of each head:")
    for i in range(0,nheads):
        print(f"Head_{i}")
        i_head_true_labels = true_labels_each_head[:,i]
        i_head_true_labels = torch.cat([item for item in i_head_true_labels],0).to('cpu').numpy()

        i_head_pred_labels = pred_labels_each_head[:,i]
        i_head_pred_labels = torch.cat([item for item in i_head_pred_labels],0).to('cpu').numpy()
        calculate_f1_acc(i_head_pred_labels, i_head_true_labels)


In [8]:
#------------dataset details------------
data_path = 'data/OpenI/OpenI_cheXpertLabels.csv'
split_path = 'data/OpenI/cheXpertLabels'
use_data_loader = True
#--------------training details-----------
epochs = 3 # Number of training epochs (authors recommend between 2 and 4)
batch_size = 16
max_length = 128
#---------------saving results details----------
experiment_name = "bert"
#----------------bert---------------
from transformers import BertTokenizer, BertForSequenceClassification
model_name = 'bert-base-uncased'
tokenizer_name = "bert-base-uncased"

# # model_name = "bert-base-cased"
# # tokenizer_name = "bert-base-cased"

ModelTokenizer = BertTokenizer
PreTrainedModel = BertCLS #BertForMultiheadClassification

#----------------BioBERT-v1.0----------
# model_name = "monologg/biobert_v1.0_pubmed_pmc" # from hugginface model list
# model_name = "model_wieghts/biobert_v1.0_pubmed_pmc"
# tokenizer_name = "bert-base-cased"
# ModelTokenizer = BertTokenizer
# PreTrainedModel = BertForSequenceClassification

#----------------BioBERT-v1.1----------
# model_name = "model_wieghts/biobert_v1.1_pubmed"
# tokenizer_name = "bert-base-cased"
# ModelTokenizer = BertTokenizer
# PreTrainedModel = BertForSequenceClassification

#------------roberta-------------
# from transformers import RobertaTokenizer, RobertaForSequenceClassification
# model_name = 'roberta-base'
# tokenizer_name = 'roberta-base'
# ModelTokenizer = RobertaTokenizer
# PreTrainedModel = RobertaForSequenceClassification

#------------albert-------------
# from transformers import AlbertTokenizer, AlbertForSequenceClassification
# model_name = 'albert-base-v1'
# tokenizer_name = 'albert-base-v1'
# ModelTokenizer = AlbertTokenizer
# PreTrainedModel = AlbertForSequenceClassification

#medical sense
group1 = [0,1,2,6]  #{'No Finding', 'Cardiomegaly','Lung Opacity','Atelectasis'}
group2 = [3,4,5,7,9] #{'Edema','Consolidation','Pneumonia', 'Pneumothorax','Fracture'}
group3 = [8,10] #{'Pleural Effusion','SupportDevices',}
heads_index = [group1, group2, group3]
col_names = ['No Finding', 'Cardiomegaly', 'Lung Opacity', 'Edema', 'Consolidation', 'Pneumonia', 
             'Atelectasis', 'Pneumothorax', 'Pleural Effusion', 'Fracture', 'SupportDevices']
multihead_cls(data_path,split_path, PreTrainedModel, epochs, batch_size, max_length ,
               ModelTokenizer, tokenizer_name, model_name, use_data_loader, heads_index, col_names)


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertCLS: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertCLS from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertCLS from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Epoch:   0%|          | 0/3 [00:00<?, ?it/s]

Train loss: 2.11224571056664


Epoch:  33%|███▎      | 1/3 [00:14<00:28, 14.39s/it]

Results of all heads:
Validation F1:  18.497576736672052
Validation Accuracy:  0.0
Results of each head:
Head_0
Validation F1:  31.500465983224608
Validation Accuracy:  9.392265193370166
Head_1
Validation F1:  4.197271773347324
Validation Accuracy:  0.8287292817679558
Head_2
Validation F1:  17.777777777777775
Validation Accuracy:  26.243093922651934
Train loss: 2.0907000582665205


Epoch:  67%|██████▋   | 2/3 [00:28<00:14, 14.34s/it]

Results of all heads:
Validation F1:  18.98527004909984
Validation Accuracy:  1.1049723756906076
Results of each head:
Head_0
Validation F1:  35.37549407114624
Validation Accuracy:  8.83977900552486
Head_1
Validation F1:  4.75206611570248
Validation Accuracy:  9.116022099447514
Head_2
Validation F1:  12.931034482758621
Validation Accuracy:  19.88950276243094
Train loss: 2.1080818325281143


Epoch: 100%|██████████| 3/3 [00:42<00:00, 14.28s/it]

Results of all heads:
Validation F1:  20.60461416070008
Validation Accuracy:  0.0
Results of each head:
Head_0
Validation F1:  35.746201966041106
Validation Accuracy:  9.668508287292818
Head_1
Validation F1:  5.4
Validation Accuracy:  0.2762430939226519
Head_2
Validation F1:  16.20253164556962
Validation Accuracy:  32.04419889502763





########## Results of all heads:
-----------test-----------
Threshold: 0.5
Test F1 Score:  0.1995268138801262
Test Accuracy:  0.0
LRAP:  0.30350432470968197 

                  precision    recall  f1-score   support

      No Finding       0.33      0.55      0.41       130
    Cardiomegaly       0.22      0.55      0.31        83
    Lung Opacity       0.26      0.42      0.32        95
           Edema       0.21      0.44      0.28        75
   Consolidation       0.05      0.44      0.09        16
       Pneumonia       0.02      0.80      0.04         5
     Atelectasis       0.04      0.73      0.07        11
    Pneumothorax       0.01      0.25      0.01         4
Pleural Effusion       0.05      0.38      0.10        21
        Fracture       0.11      0.66      0.19        35
  SupportDevices       0.06      0.40      0.11        30

       micro avg       0.12      0.50      0.20       505
       macro avg       0.12      0.51      0.18       505
    weighted avg       0.22

# Grouping Algorithms

In [None]:
def mesh_grouping(observations):
    