In [23]:
import os
import pickle
import numpy as np
import pandas as pd



import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
from torch.utils.data import TensorDataset, DataLoader, Subset
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence


import matplotlib.pyplot as plt
import gc

# Unique to this file
from transformers import AutoTokenizer, AutoModel
from transformers import BertForSequenceClassification
from transformers import AdamW
from transformers import get_linear_schedule_with_warmup

from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import confusion_matrix, f1_score, precision_score, recall_score, accuracy_score


from transformers import AutoTokenizer, AutoModel, AutoModelForSequenceClassification

from train_test import get_device, compute_accuracy, test_ST


# Depending on your GPU you can either increase or decrease this value
batch_size = 16
total_epoch = 10
learning_rate = 1e-5
iter_num = 1

# Find out how many labels are in the dataset
with open('covid_dataset/5_class_map.pkl','rb') as f:
    labels = pickle.load(f)
labels_in_dst = len(labels)



model_map= {
    #"tinybert": "huawei-noah/TinyBERT_General_4L_312D",
    #"covidbert": "digitalepidemiologylab/covid-twitter-bert-v2",
    #"distilbert": "distilbert-base-uncased",
    #"bertweet": "vinai/bertweet-base",
    "bertweetcovid": "vinai/bertweet-covid19-base-uncased"
}
# train teacher model for self training
# the train_st.dst is TensorDataset that assign each tweet a weight 1, that's the only difference with train.dst

cwd = os.getcwd()

dst_path = 'preprocessed_data/{}.dst'
dst_path = os.path.join(cwd,'preprocessed_data/{}-{{}}.dst'.format("bertweetcovid"))

train = torch.load(dst_path.format('train_st'))
val = torch.load(dst_path.format('val'))
test = torch.load(dst_path.format('test'))
unlabeled = torch.load(dst_path.format('19k'))



In [24]:
input_ids, masks, _ = zip(*unlabeled)

input_ids = torch.stack(input_ids)
masks = torch.stack(masks)

df_19k = pd.read_csv('covid_dataset/splits/19k_bertweetcovid_probs.csv')

pred_probs = df_19k[['class0', 'class1', 'class2', 'class3', 'class4']].to_numpy()

paper_pred_prob = torch.tensor(pred_probs, dtype=torch.float32)

unlabeled_prob = TensorDataset(input_ids, masks, paper_pred_prob )
print(unlabeled_prob.__len__())

19591


In [25]:
unlabeled_prob

<torch.utils.data.dataset.TensorDataset at 0x29465ae30>

In [26]:
# cross entropy loss for two probability distribution
# https://discuss.pytorch.org/t/how-should-i-implement-cross-entropy-loss-with-continuous-target-outputs/10720/18
def cross_entropy(pred, soft_targets):
    logsoftmax = nn.LogSoftmax(dim=1)
    return torch.mean(torch.sum(- soft_targets * logsoftmax(pred), 1))

#assuming pred and soft_targets are both Variables with shape (batchsize, num_of_classes), 
#each row of pred is predicted logits and each row of soft_targets is a discrete distribution.


# class CrossEntropyLossForSoftTarget(nn.Module):
#     def __init__(self, T=20):
#         super(CrossEntropyLossForSoftTarget, self).__init__()
#         self.T = T
#         self.softmax = nn.Softmax(dim=-1)
#         self.logsoftmax = nn.LogSoftmax(dim=-1)
#     def forward(self, y_pred, y_gt):
#         y_pred_soft = y_pred.div(self.T)
#         y_gt_soft = y_gt.div(self.T)
#         return -(y_gt_soft)*self.logsoftmax(y_pred_soft).mean().mul(self.T*self.T)

In [66]:
# Do KD with unlabeled data on a given model student_path
def student_distill(student_path, train, unlabel, val, test, batch_size, total_epoch, labels_in_dst, learning_rate):
    global model_map

    trainloader = DataLoader(train, shuffle=True, batch_size=batch_size)
    valloader = DataLoader(val, shuffle=False, batch_size=batch_size)
    unlabel_loader = DataLoader(unlabel, shuffle=True, batch_size=batch_size)
    
    device = get_device()


    model = AutoModelForSequenceClassification.from_pretrained(model_map['bertweetcovid'], 
                                                          num_labels=labels_in_dst,
                                                          return_dict=True)
    
    checkpoint = torch.load(student_path)
    model.load_state_dict(checkpoint['model_state_dict'])
    
    save_model_path = "distill_" + student_path
    model = model.to(device)
    gc.collect()

    optimizer = AdamW(model.parameters(), lr=learning_rate)

    sum_loss = []
    sum_val = []
    
    val_f1_average = []

    for epoch in range(0, total_epoch):
        print('Epoch:', epoch)
        train_loss, valid_loss = [], []
        model.train()
        for input_ids, attention_mask, labels in unlabel_loader:
            input_ids = input_ids.to(device)
            attention_mask = attention_mask.to(device)
            labels = labels.to(device) # teacher predicted probabilities

            
            optimizer.zero_grad()
            output1 = model(input_ids, attention_mask=attention_mask)
            
            logits = output1.logits
            # loss on teacher and student, two probability distributions on unlabeled data 
#             loss_fct = CrossEntropyLossForSoftTarget() 
            loss = cross_entropy(logits, labels)
#             
            
            loss.backward()
            optimizer.step()
            train_loss.append(loss.item())
        sum_loss.append(sum(train_loss)/len(train))  
        print('Loss: {:.4f}'.format(sum_loss[epoch-1]))

#       evaluation part 
        model.eval()
        with torch.no_grad():
            predictions = []
            true_labels = []
            for input_ids, attention_mask, labels in valloader:
                input_ids = input_ids.to(device)
                attention_mask = attention_mask.to(device)
                labels = labels.to(device)
                output = model(input_ids, attention_mask=attention_mask)
                predictions.append(output.logits.clone().detach())
                true_labels.append(labels.clone().detach())
            predictions = torch.cat(predictions)
            true_labels = torch.cat(true_labels)
            predictions = predictions.cpu()
            true_labels = true_labels.cpu()

            # val_f1 is weighted f1 
            acc, precision, recall, f1_macro, val_f1  = compute_accuracy(predictions, true_labels)
            print("validation performance at epoch: ", epoch, acc, precision, recall, f1_macro, val_f1)
            
            
            best_f1 = max(val_f1_average, default=-1)
            best_model_state = ''
            # Save the best model seen so far
            if val_f1 > best_f1:
                best_f1 = val_f1
                torch.save(model.state_dict(), save_model_path)
#                 torch.save({
#                             'epoch': epoch,
#                             'model_state_dict': model.state_dict(),
#                             'optimizer_state_dict': optimizer.state_dict(),
#                             'best_f1': best_f1
#                             }, save_model_path)
            
            val_f1_average.append(val_f1)
    
        # test
        model.eval()
        testloader = DataLoader(test, shuffle=False, batch_size=batch_size)

        with torch.no_grad():
            predictions = []
            true_labels = []
            pred_prob = []
            for input_ids, attention_mask, labels in testloader:
                input_ids = input_ids.to(device)
                attention_mask = attention_mask.to(device)
                labels = labels.to(device)

                output = model(input_ids, attention_mask=attention_mask)
                logits = output.logits.clone().detach()

                predictions.append(logits)
                true_labels.append(labels.clone().detach())

                softmax = torch.nn.Softmax(dim=1)
                prob_batch = softmax(logits)
                prob_batch = prob_batch.cpu().numpy()
                pred_prob.append(prob_batch)

            predictions = torch.cat(predictions)
            true_labels = torch.cat(true_labels)
            predictions = predictions.cpu()
            true_labels = true_labels.cpu()

            flat_prob = np.concatenate(pred_prob, axis=0)

            pred_labels = np.argmax(flat_prob, axis=1).flatten()

            acc, precision, recall,f1_macro, f1_score  = compute_accuracy(predictions, true_labels)

            print("test model performance at epoch : ", epoch, acc, precision, recall,f1_macro, f1_score)


In [63]:
def test_ST2(checkpoint_path, test, labels_in_dst, batch_size):
    global model_map
    test_f1_average = []
    test_precision = []
    test_recall = []
    test_acc = []
    test_f1 = []
    
    device = get_device()  
    
    model = AutoModelForSequenceClassification.from_pretrained(model_map['bertweetcovid'], 
                                                          num_labels=labels_in_dst,
                                                          return_dict=True)
    model.load_state_dict(torch.load(checkpoint_path))


    model = model.to(device)
    model.eval()
    flat_prob = []
    pred_labels = []
    
    testloader = DataLoader(test, shuffle=False, batch_size=batch_size)
    
    with torch.no_grad():
            predictions = []
            true_labels = []
            pred_prob = []
            for input_ids, attention_mask, labels in testloader:
                input_ids = input_ids.to(device)
                attention_mask = attention_mask.to(device)
                labels = labels.to(device)
                
                output = model(input_ids, attention_mask=attention_mask)
                logits = output.logits.clone().detach()
                
                predictions.append(logits)
                true_labels.append(labels.clone().detach())
                
                softmax = torch.nn.Softmax(dim=1)
                prob_batch = softmax(logits)
                prob_batch = prob_batch.cpu().numpy()
                pred_prob.append(prob_batch)
                
            predictions = torch.cat(predictions)
            true_labels = torch.cat(true_labels)
            predictions = predictions.cpu()
            true_labels = true_labels.cpu()
            
            flat_prob = np.concatenate(pred_prob, axis=0)
               
            pred_labels = np.argmax(flat_prob, axis=1).flatten()
            
            acc, precision, recall,f1_macro, f1_score  = compute_accuracy(predictions, true_labels)
            test_acc.append(acc)
            test_f1_average.append(f1_macro)
            test_f1.append(f1_score)
            test_precision.append(precision)
            test_recall.append(recall)
            print("test performance: ", acc, precision, recall,f1_macro, f1_score)
            
    return pred_labels, flat_prob

In [51]:
_, _ = test_ST("bertweetcovid_ST_1iter_500eachclass.pth", test, labels_in_dst, batch_size)

Some weights of the model checkpoint at vinai/bertweet-covid19-base-uncased were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight', 'roberta.pooler.dense.weight', 'lm_head.decoder.bias', 'lm_head.dense.bias', 'roberta.pooler.dense.bias', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-covid19-base-unc

test performance:  tensor(0.7846) 0.7422315972547893 0.7914124393783851 0.7609219783639178 0.7860240176079207


In [56]:
batch_size = 32
total_epoch = 3
learning_rate = 1e-5

student_distill("0bertweetcovid_ST_1iter_500each16batch10epochs.pth", train, unlabeled_prob, val, test, batch_size, total_epoch, labels_in_dst, learning_rate)

Some weights of the model checkpoint at vinai/bertweet-covid19-base-uncased were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight', 'roberta.pooler.dense.weight', 'lm_head.decoder.bias', 'lm_head.dense.bias', 'roberta.pooler.dense.bias', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-covid19-base-unc

Epoch: 0


  return torch.mean(torch.sum(- soft_targets * logsoftmax(pred), 1))


Loss: 0.2092
validation performance at epoch:  0 tensor(0.8431) 0.8039799433985481 0.8152110187259357 0.8037856581240295 0.8406837024965624
test model performance at epoch :  0 tensor(0.8008) 0.7847086983638473 0.7758315591449507 0.779795468947741 0.7999913382314555
Epoch: 1


  return torch.mean(torch.sum(- soft_targets * logsoftmax(pred), 1))


Loss: 0.2092
validation performance at epoch:  1 tensor(0.8407) 0.8149440523839772 0.8076860391831243 0.8048270818249647 0.8374674418482477
test model performance at epoch :  1 tensor(0.7805) 0.7530965391621129 0.754807300384843 0.7517758457517494 0.7788943940146954
Epoch: 2


  return torch.mean(torch.sum(- soft_targets * logsoftmax(pred), 1))


Loss: 0.1790
validation performance at epoch:  2 tensor(0.8358) 0.8062405840846208 0.7820968135938988 0.78795074976583 0.8322190832144352
test model performance at epoch :  2 tensor(0.7927) 0.7820490620490621 0.7696189498490464 0.7732231120720858 0.7922526287990183


In [53]:
# run 1: self training only
_, _ = test_ST("0bertweetcovid_ST_1iter_500each16batch10epochs.pth", test, labels_in_dst, 16)

Some weights of the model checkpoint at vinai/bertweet-covid19-base-uncased were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight', 'roberta.pooler.dense.weight', 'lm_head.decoder.bias', 'lm_head.dense.bias', 'roberta.pooler.dense.bias', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-covid19-base-unc

test performance:  tensor(0.7846) 0.7676145268233876 0.7851251877436876 0.771820959379522 0.7854781940879401


In [68]:
# run 2 ST
_, _ = test_ST("1bertweetcovid_ST_1iter_500each16batch10epochs.pth", test, labels_in_dst, 16)
batch_size = 32
total_epoch = 3
learning_rate = 1e-5
# run 2 distill
student_distill("1bertweetcovid_ST_1iter_500each16batch10epochs.pth", train, unlabeled_prob, val, test, batch_size, total_epoch, labels_in_dst, learning_rate)

# run 2: ST + KD
_, _ = test_ST2("distill_1bertweetcovid_ST_1iter_500each16batch10epochs.pth", test, labels_in_dst, 16)

Some weights of the model checkpoint at vinai/bertweet-covid19-base-uncased were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight', 'roberta.pooler.dense.weight', 'lm_head.decoder.bias', 'lm_head.dense.bias', 'roberta.pooler.dense.bias', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-covid19-base-unc

test performance:  tensor(0.8008) 0.7630293198181128 0.7706211597882099 0.766116890743152 0.8006840247836313


Some weights of the model checkpoint at vinai/bertweet-covid19-base-uncased were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight', 'roberta.pooler.dense.weight', 'lm_head.decoder.bias', 'lm_head.dense.bias', 'roberta.pooler.dense.bias', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-covid19-base-unc

Epoch: 0


  return torch.mean(torch.sum(- soft_targets * logsoftmax(pred), 1))


Loss: 0.2145
validation performance at epoch:  0 tensor(0.8407) 0.8086891792199138 0.8234869049839902 0.8082826520921758 0.8378682717151439
test model performance at epoch :  0 tensor(0.7602) 0.7275469435254778 0.7109646359163155 0.7144578803049798 0.7583184325553198
Epoch: 1


  return torch.mean(torch.sum(- soft_targets * logsoftmax(pred), 1))


Loss: 0.2145
validation performance at epoch:  1 tensor(0.8333) 0.79493973089836 0.8025029509083789 0.7921659122247448 0.8303014183235032
test model performance at epoch :  1 tensor(0.7805) 0.7657868352223192 0.7245501989956661 0.7374041867954911 0.778185721953838
Epoch: 2


  return torch.mean(torch.sum(- soft_targets * logsoftmax(pred), 1))


Loss: 0.1791
validation performance at epoch:  2 tensor(0.8284) 0.7910670453142362 0.8063343225029405 0.7911375015260743 0.8240810123721346
test model performance at epoch :  2 tensor(0.7886) 0.7766147695383078 0.7630546214025276 0.7678594570902263 0.786626093067619


Some weights of the model checkpoint at vinai/bertweet-covid19-base-uncased were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight', 'roberta.pooler.dense.weight', 'lm_head.decoder.bias', 'lm_head.dense.bias', 'roberta.pooler.dense.bias', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-covid19-base-unc

test performance:  tensor(0.7602) 0.7275469435254778 0.7109646359163155 0.7144578803049798 0.7583184325553198


In [70]:
# run 3-5 ST
for i in range(2,5):
    _, _ = test_ST(str(i)+"bertweetcovid_ST_1iter_500each16batch10epochs.pth", test, labels_in_dst, 16)
    batch_size = 32
    total_epoch = 3
    learning_rate = 1e-5
    # run 3-5 distill
    student_distill(str(i)+"bertweetcovid_ST_1iter_500each16batch10epochs.pth", train, unlabeled_prob, val, test, batch_size, total_epoch, labels_in_dst, learning_rate)

    # run 3-5: ST + KD
    _, _ = test_ST2("distill_"+str(i)+"bertweetcovid_ST_1iter_500each16batch10epochs.pth", test, labels_in_dst, 16)

Some weights of the model checkpoint at vinai/bertweet-covid19-base-uncased were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight', 'roberta.pooler.dense.weight', 'lm_head.decoder.bias', 'lm_head.dense.bias', 'roberta.pooler.dense.bias', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-covid19-base-unc

test performance:  tensor(0.7764) 0.7472347887516759 0.7388359132813804 0.7409038742252747 0.776251975906057


Some weights of the model checkpoint at vinai/bertweet-covid19-base-uncased were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight', 'roberta.pooler.dense.weight', 'lm_head.decoder.bias', 'lm_head.dense.bias', 'roberta.pooler.dense.bias', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-covid19-base-unc

Epoch: 0


  return torch.mean(torch.sum(- soft_targets * logsoftmax(pred), 1))


Loss: 0.2083
validation performance at epoch:  0 tensor(0.8407) 0.8078432639916926 0.8078036113185283 0.8020619607904237 0.8378468256517749
test model performance at epoch :  0 tensor(0.7967) 0.7754226136546374 0.7729744162878079 0.7694238479577825 0.796745769154633
Epoch: 1


  return torch.mean(torch.sum(- soft_targets * logsoftmax(pred), 1))


Loss: 0.2083
validation performance at epoch:  1 tensor(0.8456) 0.8185699416190746 0.8096846488459676 0.8087764856085575 0.8430456272153588
test model performance at epoch :  1 tensor(0.7764) 0.7488001129305477 0.7435479890565027 0.7441512279445416 0.7758187256041027
Epoch: 2


  return torch.mean(torch.sum(- soft_targets * logsoftmax(pred), 1))


Loss: 0.1790
validation performance at epoch:  2 tensor(0.8407) 0.8068555170549585 0.8067649408167705 0.8010481797037772 0.8377958363559035
test model performance at epoch :  2 tensor(0.7927) 0.7615806284724564 0.7503529328471805 0.7526431335955144 0.7910289509205497


Some weights of the model checkpoint at vinai/bertweet-covid19-base-uncased were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight', 'roberta.pooler.dense.weight', 'lm_head.decoder.bias', 'lm_head.dense.bias', 'roberta.pooler.dense.bias', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-covid19-base-unc

test performance:  tensor(0.7764) 0.7488001129305477 0.7435479890565027 0.7441512279445416 0.7758187256041027


Some weights of the model checkpoint at vinai/bertweet-covid19-base-uncased were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight', 'roberta.pooler.dense.weight', 'lm_head.decoder.bias', 'lm_head.dense.bias', 'roberta.pooler.dense.bias', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-covid19-base-unc

test performance:  tensor(0.7724) 0.7336275252525253 0.7754680570235103 0.7399784566493908 0.77370341833292


Some weights of the model checkpoint at vinai/bertweet-covid19-base-uncased were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight', 'roberta.pooler.dense.weight', 'lm_head.decoder.bias', 'lm_head.dense.bias', 'roberta.pooler.dense.bias', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-covid19-base-unc

Epoch: 0


  return torch.mean(torch.sum(- soft_targets * logsoftmax(pred), 1))


Loss: 0.2089
validation performance at epoch:  0 tensor(0.8358) 0.812198922045271 0.7928712243683095 0.794135000709719 0.8322086864394185
test model performance at epoch :  0 tensor(0.7967) 0.7878623188405798 0.7330995787461503 0.7528174510684164 0.7925521475484556
Epoch: 1


  return torch.mean(torch.sum(- soft_targets * logsoftmax(pred), 1))


Loss: 0.2089
validation performance at epoch:  1 tensor(0.8431) 0.8045084700699728 0.808224794831369 0.8016657308078589 0.8404016978987733
test model performance at epoch :  1 tensor(0.7967) 0.7865195693937637 0.7528721623521439 0.7651603251913428 0.794188860862681
Epoch: 2


  return torch.mean(torch.sum(- soft_targets * logsoftmax(pred), 1))


Loss: 0.1792
validation performance at epoch:  2 tensor(0.8309) 0.8037588692136074 0.7791771055647017 0.7845052798665794 0.826934190073491
test model performance at epoch :  2 tensor(0.7805) 0.7687024044929617 0.7275815578806835 0.7419954493078805 0.7772020192168154


Some weights of the model checkpoint at vinai/bertweet-covid19-base-uncased were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight', 'roberta.pooler.dense.weight', 'lm_head.decoder.bias', 'lm_head.dense.bias', 'roberta.pooler.dense.bias', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-covid19-base-unc

test performance:  tensor(0.7967) 0.7865195693937637 0.7528721623521439 0.7651603251913428 0.794188860862681


Some weights of the model checkpoint at vinai/bertweet-covid19-base-uncased were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight', 'roberta.pooler.dense.weight', 'lm_head.decoder.bias', 'lm_head.dense.bias', 'roberta.pooler.dense.bias', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-covid19-base-unc

test performance:  tensor(0.7520) 0.6960830527497194 0.7282235527932721 0.7048031067154543 0.7579093201168468


Some weights of the model checkpoint at vinai/bertweet-covid19-base-uncased were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight', 'roberta.pooler.dense.weight', 'lm_head.decoder.bias', 'lm_head.dense.bias', 'roberta.pooler.dense.bias', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-covid19-base-unc

Epoch: 0


  return torch.mean(torch.sum(- soft_targets * logsoftmax(pred), 1))


Loss: 0.2093
validation performance at epoch:  0 tensor(0.8456) 0.811502798715075 0.8117619898494832 0.8068450563155791 0.8430785898307819
test model performance at epoch :  0 tensor(0.7967) 0.7727102216897604 0.7767618069919037 0.772951203385986 0.7969136240610258
Epoch: 1


  return torch.mean(torch.sum(- soft_targets * logsoftmax(pred), 1))


Loss: 0.2093
validation performance at epoch:  1 tensor(0.8382) 0.80687232141505 0.7895042210013061 0.7925905236236371 0.8348260336575066
test model performance at epoch :  1 tensor(0.8089) 0.7950839215665477 0.7936551989198101 0.7895828437502731 0.8082757080579802
Epoch: 2


  return torch.mean(torch.sum(- soft_targets * logsoftmax(pred), 1))


Loss: 0.1812
validation performance at epoch:  2 tensor(0.8407) 0.8115216066965656 0.7928062717486124 0.795444987385633 0.8370870075604978
test model performance at epoch :  2 tensor(0.7846) 0.7573653765408226 0.736477094005856 0.7439202353710362 0.7828769697307957


Some weights of the model checkpoint at vinai/bertweet-covid19-base-uncased were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight', 'roberta.pooler.dense.weight', 'lm_head.decoder.bias', 'lm_head.dense.bias', 'roberta.pooler.dense.bias', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at vinai/bertweet-covid19-base-unc

test performance:  tensor(0.7967) 0.7727102216897604 0.7767618069919037 0.772951203385986 0.7969136240610258
