In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
import seaborn as sn
import matplotlib.pyplot as plt
import torch
from tqdm import tqdm, trange
from transformers import AdamW
from torch import nn
from torch.nn import BCEWithLogitsLoss, BCELoss
from sklearn.metrics import classification_report, confusion_matrix, multilabel_confusion_matrix, f1_score, accuracy_score, label_ranking_average_precision_score
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
import ast

# Multi-task heads
In this experiment, we combine different classes and group them in different heads. We then experiment to check if we are getting any better results.

1. reading the data

In [3]:
def read_data(data_path,split_path):
    df = pd.read_csv(data_path)

    cols = df.columns
    label_cols = list(cols[6:])
    

    train_df = pd.read_csv(f"{split_path}/train.csv")
    # convert str to list
    train_df['labels'] = train_df.apply(lambda row: ast.literal_eval(row['labels']), axis=1)

    test_df = pd.read_csv(f"{split_path}/test.csv")
    test_df['labels'] = test_df.apply(lambda row: ast.literal_eval(row['labels']), axis=1)

    val_df = pd.read_csv(f"{split_path}/val.csv")
    val_df['labels'] = val_df.apply(lambda row: ast.literal_eval(row['labels']), axis=1)

    return train_df, test_df, val_df, label_cols

2. Grouping the heads

In [4]:
labels = {'No Finding':0, 'Cardiomegaly':1,'Lung Opacity':2,'Edema':3,'Consolidation':4,
          'Pneumonia':5,'Atelectasis':6,'Pneumothorax':7,'Pleural Effusion':8,'Fracture':9,'SupportDevices':10}
def reorder(row,groups_index):
#     print(row)
    groups_values = []
    for group in groups_index:
        tmp =[]
        for index in group:
            if index==-1:
                tmp.append(-1)
            else:
                tmp.append(row[index])
        groups_values.append(tmp)
#     print(groups_values)
    return groups_values


def group_heads(groups,df): # first attempt for each label one head
    df['head_labels'] = df.apply(lambda row: reorder(row['labels'],groups), axis=1)
    return df
        
def padding_heads(heads_index): #as heads does not have the same size, we pad them with -1
    head_counts = [len(head) for head in heads_index]
    lenmax = max(head_counts)
    print(lenmax)
    padded_heads = []
    for head in heads_index:
        pad_n = lenmax - len(head)
        print(pad_n)
        arr = np.pad(head, (0,pad_n), 'constant', constant_values=-1)
        padded_heads.append(arr)
    return padded_heads

group1 = [0,1,2,6]  #{'No Finding', 'Cardiomegaly','Lung Opacity','Atelectasis'}
group2 = [3,4,5,7,9] #{'Edema','Consolidation','Pneumonia', 'Pneumothorax','Fracture'}
group3 = [8,10] #{'Pleural Effusion','SupportDevices',}
heads_index = [group1, group2, group3]

padded_heads=padding_heads(heads_index)

data_path = 'data/OpenI/OpenI_cheXpertLabels.csv'
split_path = 'data/OpenI/cheXpertLabels'
train_df, test_df, val_df,  label_cols = read_data(data_path, split_path)

train_df = group_heads(padded_heads, train_df)
train_df.head(20)

5
1
0
3


Unnamed: 0,text,labels,head_labels
0,Heart size is normal and lungs are clear. No p...,"[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[[1, 0, 0, 0, -1], [0, 0, 0, 0, 0], [0, 0, -1,..."
1,Cardiomediastinal silhouette is normal. Pulmon...,"[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[[1, 0, 0, 0, -1], [0, 0, 0, 0, 0], [0, 0, -1,..."
2,Cardiomegaly. Left lung clear. Large right eff...,"[0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0]","[[0, 1, 0, 1, -1], [0, 0, 0, 0, 0], [1, 0, -1,..."
3,Normal cardiac size and contour unremarkable m...,"[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[[1, 0, 0, 0, -1], [0, 0, 0, 0, 0], [0, 0, -1,..."
4,The cardiac and mediastinal contours are withi...,"[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]","[[0, 0, 1, 0, -1], [0, 0, 0, 0, 0], [0, 0, -1,..."
5,The cardiomediastinal silhouette and pulmonary...,"[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[[1, 0, 0, 0, -1], [0, 0, 0, 0, 0], [0, 0, -1,..."
6,Atrial septal occluder artifact. Rotated front...,"[0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0]","[[0, 0, 1, 1, -1], [0, 0, 0, 0, 0], [0, 0, -1,..."
7,opacities in the left base may be compatible ...,"[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]","[[0, 0, 1, 0, -1], [0, 0, 0, 0, 0], [0, 0, -1,..."
8,No focal consolidation. No pneumothorax. No pl...,"[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[[1, 0, 0, 0, -1], [0, 0, 0, 0, 0], [0, 0, -1,..."
9,The trachea is midline. Cardiomediastinal silh...,"[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[[1, 0, 0, 0, -1], [0, 0, 0, 0, 0], [0, 0, -1,..."


3. fine tunning and prediction

In [11]:
class Head():
    def __init__(self,hparams):
        self.inputLayer = hparams.inputLayer
        self.taskspecificLayer = None
        self.loss = None
        self.run()
    def run(self):
        pass

class HeadMultilabelCLS(Head):
    def __init__(self,hparams):
        super().__init__(hparams)
        self.labels = hparams.labels # batch labels, or all labels
        self.num_labels = hparams.num_labels # number of labels
        self.taskspecificLayer = nn.Linear(hparams.hidden_size, hparams.num_labels) #classifier

    def run(self):
        logits = self.taskspecificLayer(self.inputLayer)

        loss_func = BCEWithLogitsLoss() 
        loss = loss_func(logits.view(-1,self.num_labels),self.labels.type_as(logits).view(-1,self.num_labels)) #convert labels to float for calculation
        # loss_func = BCELoss() 
        # loss = loss_func(torch.sigmoid(logits.view(-1,num_labels)),b_labels.type_as(logits).view(-1,num_labels)) #convert labels to float for calculation

        return loss  
    
    
# class HeadMulticlassCLS(Head):
#     def __init__(self,hparams):
#         super().__init__(hparams)

#     def run(self):
#         logits = self.taskspecificLayer(self.inputLayer)

#         loss_fct = CrossEntropyLoss()
#         loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
    
    
# class HeadBinaryCLS(Head):
#     def __init__(self,hparams):
#         super().__init__(hparams)

#     def run(self):
#         pass
    
    
# class HeadAbstractiveSumm(Head):
#     def __init__(self,hparams):
#         super().__init__(hparams)

#     def run(self):
#         pass
    
# class HeadRegression(Head):
#     def __init__(self,hparams):
#         super().__init__(hparams)
        
#     def run(self):
#         loss_fct = MSELoss()
#         loss = loss_fct(logits.view(-1), labels.view(-1))
    
    
# class HeadSTSclinical(Head):
#     def __init__(self,hparams):
#         super().__init__(hparams)

#     def run(self):
#         pass
    
    

In [14]:
class Ape(object):
    def __init__(self):
        print ('ooook')
        self.say('hi')
    def say(self, s):
        print (s)

def main():
    Ape()

if __name__ == '__main__':
    main()

ooook
hi


In [6]:
from transformers import BertPreTrainedModel, BertModel

class BertCLS(BertPreTrainedModel):
    def __init__(self, config):
        super().__init__(config)
#         self.num_labels = config.num_labels

        self.bert = BertModel(config)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        
#         self.head_count = config.head_count # ex:[2,3,4]
#         self.head_index = config.head_index # ex: [[2,0],[1,3,4],[5,6,7,8,]]
#         self.classifierheads = [nn.Linear(config.hidden_size, count) for count in self.head_count]

        self.init_weights()

# [docs]
#     @add_start_docstrings_to_callable(BERT_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
#     @add_code_sample_docstrings(tokenizer_class=_TOKENIZER_FOR_DOC, checkpoint="bert-base-uncased")
    def forward(
        self,
        input_ids=None,
        attention_mask=None,
        token_type_ids=None,
        position_ids=None,
        head_mask=None,
        inputs_embeds=None,
        labels=None,
        output_attentions=None,
        output_hidden_states=None,
    ):

        outputs = self.bert(
            input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
            position_ids=position_ids,
            head_mask=head_mask,
            inputs_embeds=inputs_embeds,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
        )

        pooled_output = outputs[1]
        pooled_output = self.dropout(pooled_output)
        return pooled_output
    
#         #loop through the heads
#         logit_heads = []
#         for classifier in self.classifierheads:
#             logits = classifier(pooled_output)
#             logit_heads.append(logits)

#         #TODO: work here on this for loop
#         loss=0
#         for logit,head_labels in zip(logit_heads):
#             loss_func = BCEWithLogitsLoss() 
#             loss += loss_func(logits.view(-1,head_num_labels),b_labels.type_as(logits).view(-1,head_num_labels)) #convert labels to float for calculation

#         return loss  
#-------------example------------------
from transformers import BertTokenizer, BertForSequenceClassification
import torch
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForMultiheadClassification.from_pretrained('bert-base-uncased')
inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
labels = torch.tensor([1]).unsqueeze(0)  # Batch size 1
outputs = model(**inputs)
# loss, logits = outputs[:2]


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForMultiheadClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForMultiheadClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForMultiheadClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [7]:
outputs

tensor([[-7.1946e-01, -2.1445e-01, -2.9576e-01,  3.6603e-01,  2.7968e-01,
          2.2184e-02,  5.7299e-01,  6.2331e-02,  5.9586e-02, -9.9965e-01,
          5.0146e-02,  4.4756e-01,  9.7612e-01,  3.3988e-02,  8.4494e-01,
         -3.6905e-01,  9.8649e-02, -3.7169e-01,  1.7371e-01,  1.1515e-01,
          4.4133e-01,  9.9525e-01,  3.7221e-01,  8.2881e-02,  2.1402e-01,
          6.8965e-01, -6.1042e-01,  8.7136e-01,  9.4158e-01,  5.7372e-01,
         -3.2187e-01,  8.6672e-03, -9.8611e-01, -2.0542e-02, -4.3756e-01,
         -9.8012e-01,  1.1142e-01, -6.7587e-01,  1.3499e-01,  3.1130e-01,
         -8.2997e-01,  1.9006e-01,  9.9896e-01, -3.1798e-01,  2.1517e-02,
         -1.6531e-01, -9.9943e-01,  1.0173e-01, -8.1811e-01,  3.3119e-02,
          3.6740e-01, -7.3230e-02, -1.4261e-01,  1.8907e-01,  2.6119e-01,
          4.1582e-01, -2.4427e-01, -5.9846e-02, -7.3492e-02, -3.4202e-01,
         -5.8001e-01,  2.8331e-01, -5.0513e-01, -8.1967e-01,  1.9813e-01,
          1.9108e-01,  3.7011e-02, -1.

In [9]:
outputs.size()

torch.Size([1, 768])

In [5]:
def create_dataLoader(input, labels, batch_size):
    data = TensorDataset(input.input_ids, input.attention_mask, labels)
    sampler = SequentialSampler(data)
    dataloader = DataLoader(data, sampler=sampler, batch_size=batch_size)
    return dataloader


def multihead_cls(data_path, split_path, PreTrainedModel, epochs, batch_size, max_length ,ModelTokenizer, tokenizer_name, model_name, use_data_loader, heads_index):
    #prepare the dataset
    train_df, test_df, val_df,  label_cols = read_data(data_path, split_path)
    
#     train_df = group_heads(heads_index, train_df)
#     val_df = group_heads(heads_index, val_df)
#     test_df = group_heads(heads_index, test_df)
    
#     num_labels = len(label_cols)
    head_count = [len(group) for group in heads_index]
    # ----------tokenize---------------
    tokenizer = ModelTokenizer.from_pretrained(tokenizer_name)

    reports_train = train_df.text.to_list()
    reports_test = test_df.text.to_list()
    reports_val   = val_df.text.to_list()

    train = tokenizer(reports_train, padding='max_length', truncation=True, max_length=max_length, return_tensors="pt")
    test = tokenizer(reports_test, padding='max_length', truncation=True, max_length=max_length, return_tensors="pt")
    val = tokenizer(reports_val, padding='max_length', truncation=True, max_length=max_length, return_tensors="pt")
#     print("***")
#     print(type(train_df.head_labels.to_list()))
#     print(np.array(train_df.head_labels.to_list(), dtype="int32").dtype)
    train_labels = torch.from_numpy(np.array(train_df.labels.to_list()))
#     train_labels = train_df.head_labels
    test_labels = torch.from_numpy(np.array(test_df.labels.to_list()))
#     test_labels = test_df.head_labels
    val_labels = torch.from_numpy(np.array(val_df.labels.to_list()))
#     val_labels = val_df.head_labels

    #-----------dataloaders--------------
    if use_data_loader: # if the dataset is huge in size
        # Create an iterator of our data with torch DataLoader. This helps save on memory during training because, 
        # unlike a for loop, with an iterator the entire dataset does not need to be loaded into memory

        train_dataloader = create_dataLoader(train, train_labels, batch_size)
        validation_dataloader   = create_dataLoader(val, val_labels, batch_size)
        test_dataloader  = create_dataLoader(test, test_labels, batch_size)

    else: #TODO: if the dataset is small in size
        pass

    # Load model, the pretrained model will include a single linear classification layer on top for classification. 
    model = BertForMultiheadClassification(model_name, head_counts = head_counts, head_index = head_index)
    model.cuda()
    optimizer = AdamW(model.parameters(),lr=2e-5)  # Default optimization
    
    #---------FineTune model-----------
    device = "cpu"
#     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    # n_gpu = torch.cuda.device_count()
    
    # Store our loss and accuracy for plotting
    train_loss_set = []

    # trange is a tqdm wrapper around the normal python range
    for _ in trange(epochs, desc="Epoch"):

        #-------Training-------

        # Set our model to training mode (as opposed to evaluation mode)
        model.train()

        # Tracking variables
        tr_loss = 0 #running loss
        nb_tr_examples, nb_tr_steps = 0, 0

        # Train the data for one epoch
        for step, batch in enumerate(train_dataloader):
            # Add batch to GPU
            batch = tuple(t.to(device) for t in batch)
            
            # Unpack the inputs from our dataloader
            b_input_ids, b_input_mask, b_labels = batch
            
            # Clear out the gradients (by default they accumulate)
            optimizer.zero_grad()

            # Forward pass for multiclass classification
            # outputs = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask, labels=b_labels)
            # loss = outputs[0]
            # logits = outputs[1]

            # Forward pass for multilabel classification
#             outputs = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask)
#             logits = outputs[0]
#             loss_func = BCEWithLogitsLoss() 
#             loss = loss_func(logits.view(-1,num_labels),b_labels.type_as(logits).view(-1,num_labels)) #convert labels to float for calculation
#             # loss_func = BCELoss() 
#             # loss = loss_func(torch.sigmoid(logits.view(-1,num_labels)),b_labels.type_as(logits).view(-1,num_labels)) #convert labels to float for calculation
#             train_loss_set.append(loss.item()) 
            
            #Forward pass for multioutput classification
            loss = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask)
            train_loss_set.append(loss.item()) 
            
            
            # Backward pass
            loss.backward()
            
            # Update parameters and take a step using the computed gradient
            optimizer.step()
            # scheduler.step()
            
            # Update tracking variables
            tr_loss += loss.item()
            nb_tr_examples += b_input_ids.size(0)
            nb_tr_steps += 1

        print("Train loss: {}".format(tr_loss/nb_tr_steps))

        # ---------Validation--------

        # Put model in evaluation mode to evaluate loss on the validation set
        model.eval()

        # Variables to gather full output
        logit_preds,true_labels,pred_labels,tokenized_texts = [],[],[],[]

        # Predict
        for i, batch in enumerate(validation_dataloader):
            batch = tuple(t.to(device) for t in batch)
            # Unpack the inputs from our dataloader
            b_input_ids, b_input_mask, b_labels = batch
            with torch.no_grad():
                # Forward pass
                outs = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask)
                b_logit_pred = outs[0]
                pred_label = torch.sigmoid(b_logit_pred)

                b_logit_pred = b_logit_pred.detach().cpu().numpy()
                pred_label = pred_label.to('cpu').numpy()
                b_labels = b_labels.to('cpu').numpy()

            tokenized_texts.append(b_input_ids)
            logit_preds.append(b_logit_pred)
            true_labels.append(b_labels)
            pred_labels.append(pred_label)

        # Flatten outputs
        pred_labels = [item for sublist in pred_labels for item in sublist]
        true_labels = [item for sublist in true_labels for item in sublist]

        # Calculate Accuracy
        threshold = 0.50
        pred_bools = [pl>threshold for pl in pred_labels]
        true_bools = [tl==1 for tl in true_labels]
        val_f1_accuracy = f1_score(true_bools,pred_bools,average='micro')*100
        val_flat_accuracy = accuracy_score(true_bools, pred_bools)*100

        print('F1 Validation Accuracy: ', val_f1_accuracy)
        print('Validation Accuracy: ', val_flat_accuracy)

    # ---------test--------
    # Put model in evaluation mode to evaluate loss on the validation set
    model.eval()

    #track variables
    logit_preds,true_labels,pred_labels,tokenized_texts = [],[],[],[]

    # Predict
    for i, batch in enumerate(test_dataloader):
        batch = tuple(t.to(device) for t in batch)
        # Unpack the inputs from our dataloader
        b_input_ids, b_input_mask, b_labels = batch
        with torch.no_grad():
            # Forward pass
            outs = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask)
            b_logit_pred = outs[0]
            pred_label = torch.sigmoid(b_logit_pred)

            b_logit_pred = b_logit_pred.detach().cpu().numpy()
            pred_label = pred_label.to('cpu').numpy()
            b_labels = b_labels.to('cpu').numpy()

        tokenized_texts.append(b_input_ids)
        logit_preds.append(b_logit_pred)
        true_labels.append(b_labels)
        pred_labels.append(pred_label)

    # Flatten outputs
    tokenized_texts = [item for sublist in tokenized_texts for item in sublist]
    pred_labels = [item for sublist in pred_labels for item in sublist]
    true_labels = [item for sublist in true_labels for item in sublist]
    # Converting flattened binary values to boolean values
    true_bools = [tl==1 for tl in true_labels]

    #We need to threshold our sigmoid function outputs which range from [0, 1]. Below I use 0.50 as a threshold.
    test_label_cols = label_cols
    pred_bools = [pl>0.50 for pl in pred_labels] #boolean output after thresholding

    # Print and save classification report
    print("-----------test-----------")
    print("Threshold: 0.5")
    print('Test F1 Score: ', f1_score(true_bools, pred_bools,average='micro'))
    print('Test Accuracy: ', accuracy_score(true_bools, pred_bools))
    print('LRAP: ', label_ranking_average_precision_score(true_labels, pred_labels) ,'\n')
    clf_report = classification_report(true_bools,pred_bools,target_names=test_label_cols)
    # pickle.dump(clf_report, open('classification_report.txt','wb')) #save report
    print(clf_report)

    # Calculate Accuracy - maximize F1 accuracy by tuning threshold values. First with 'macro_thresholds' on the order of e^-1 then with 'micro_thresholds' on the order of e^-2
    print("-----Optimizing threshold value for micro F1 score-----")

    macro_thresholds = np.array(range(1,10))/10

    f1_results, flat_acc_results = [], []
    for th in macro_thresholds:
        pred_bools = [pl>th for pl in pred_labels]
        test_f1_accuracy = f1_score(true_bools,pred_bools,average='micro')
        test_flat_accuracy = accuracy_score(true_bools, pred_bools)
        f1_results.append(test_f1_accuracy)
        flat_acc_results.append(test_flat_accuracy)

    best_macro_th = macro_thresholds[np.argmax(f1_results)] #best macro threshold value

    micro_thresholds = (np.array(range(10))/100)+best_macro_th #calculating micro threshold values

    f1_results, flat_acc_results = [], []
    for th in micro_thresholds:
        pred_bools = [pl>th for pl in pred_labels]
        test_f1_accuracy = f1_score(true_bools,pred_bools,average='micro')
        test_flat_accuracy = accuracy_score(true_bools, pred_bools)
        f1_results.append(test_f1_accuracy)
        flat_acc_results.append(test_flat_accuracy)

    best_f1_idx = np.argmax(f1_results) #best threshold value

    # Printing and saving classification report
    print('Best Threshold: ', micro_thresholds[best_f1_idx])
    print('Test F1 Score: ', f1_results[best_f1_idx])
    print('Test Accuracy: ', flat_acc_results[best_f1_idx])
    print('LRAP: ', label_ranking_average_precision_score(true_labels, pred_labels) , '\n')

    best_pred_bools = [pl>micro_thresholds[best_f1_idx] for pl in pred_labels]
    clf_report_optimized = classification_report(true_bools,best_pred_bools, target_names=label_cols)
    # pickle.dump(clf_report_optimized, open('classification_report_optimized.txt','wb'))
    print(clf_report_optimized)


In [6]:
data_path = 'data/OpenI/OpenI_cheXpertLabels.csv'
split_path = 'data/OpenI/cheXpertLabels'
use_data_loader = True

epochs = 3 # Number of training epochs (authors recommend between 2 and 4)
batch_size = 16
max_length = 128
#----------------bert---------------
from transformers import BertTokenizer, BertForSequenceClassification
model_name = 'bert-base-uncased'
tokenizer_name = "bert-base-uncased"

# # model_name = "bert-base-cased"
# # tokenizer_name = "bert-base-cased"

ModelTokenizer = BertTokenizer
PreTrainedModel = BertForMultiheadClassification

#----------------BioBERT-v1.0----------
# model_name = "monologg/biobert_v1.0_pubmed_pmc" # from hugginface model list
# model_name = "model_wieghts/biobert_v1.0_pubmed_pmc"
# tokenizer_name = "bert-base-cased"
# ModelTokenizer = BertTokenizer
# PreTrainedModel = BertForSequenceClassification

#----------------BioBERT-v1.1----------
# model_name = "model_wieghts/biobert_v1.1_pubmed"
# tokenizer_name = "bert-base-cased"
# ModelTokenizer = BertTokenizer
# PreTrainedModel = BertForSequenceClassification

#------------roberta-------------
# from transformers import RobertaTokenizer, RobertaForSequenceClassification
# model_name = 'roberta-base'
# tokenizer_name = 'roberta-base'
# ModelTokenizer = RobertaTokenizer
# PreTrainedModel = RobertaForSequenceClassification

#------------albert-------------
# from transformers import AlbertTokenizer, AlbertForSequenceClassification
# model_name = 'albert-base-v1'
# tokenizer_name = 'albert-base-v1'
# ModelTokenizer = AlbertTokenizer
# PreTrainedModel = AlbertForSequenceClassification


group1 = [0,1,2,6]  #{'No Finding', 'Cardiomegaly','Lung Opacity','Atelectasis'}
group2 = [3,4,5,7,9] #{'Edema','Consolidation','Pneumonia', 'Pneumothorax','Fracture'}
group3 = [8,10] #{'Pleural Effusion','SupportDevices',}
heads_index = [group1, group2, group3]


multihead_cls(data_path,split_path, PreTrainedModel, epochs, batch_size, max_length ,
               ModelTokenizer, tokenizer_name, model_name, use_data_loader, heads_index)


AttributeError: 'DataFrame' object has no attribute 'head_labels'

In [None]:
torch.tensor([[1,2,3],[1,2]])

In [None]:
a = torch.tensor([1,2,3])
b = torch.tensor([4,5,6])

c = [a,b]

In [None]:
torch.tensor(c)

In [6]:
a = [1, 2, 3, 4, 5]
np.pad(a, (0,3), 'constant', constant_values=-1)

array([ 1,  2,  3,  4,  5, -1, -1, -1])