In [None]:
!pip install sentence-transformers transformers wandb

In [39]:
import os
import sys
import re
import pandas as pd
import numpy as np 
import torch
import random
import tarfile
from sklearn.model_selection import train_test_split
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, RandomSampler, SequentialSampler, random_split
import torch.nn.functional as F
from torch.optim import AdamW
from torch.nn import CosineSimilarity, MSELoss
from torch.nn.utils import clip_grad_norm_
from sklearn.metrics import f1_score

In [40]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [41]:
if torch.cuda.is_available():
    device = torch.device("cuda")
    print(f"# available GPUs : {torch.cuda.device_count()}")
    print(f"GPU name : {torch.cuda.get_device_name()}")
else:
    device = torch.device("cpu")
print(device)

# available GPUs : 1
GPU name : Tesla P100-PCIE-16GB
cuda


In [42]:
cd /content/drive/MyDrive/NLP

/content/drive/MyDrive/NLP


In [6]:
df = pd.read_csv('/content/drive/MyDrive/NLP/df.csv')
test = pd.read_csv('/content/drive/MyDrive/NLP/test.csv')

In [7]:
df = df[['sentence1', 'sentence2', 'real-label', 'binary-label']]
test = test[['sentence1', 'sentence2', 'real-label', 'binary-label']]

In [8]:
train, val = train_test_split(df, test_size=0.1, shuffle=True)

In [9]:
!pip install transformers
!pip install sentence-transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [10]:
from transformers import AutoTokenizer, AutoModel, RobertaModel, RobertaTokenizer
from transformers import ElectraModel, ElectraTokenizer
from transformers import get_linear_schedule_with_warmup, get_constant_schedule

In [11]:
# seed
seed = 42
random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)

In [12]:
class CustomDataset(Dataset):
    def __init__(self, sentence1, sentence2, real_label):
        self.X1 = sentence1 #list str
        self.X2 = sentence2 #list str
        self.Y = real_label #list float

    def __len__(self):
        return len(self.X1)
    
    def __getitem__(self, index):
        return self.X1[index], self.X2[index], self.Y[index]

In [13]:
def generate_dataset(df, flag):
    sen_one = df['sentence1'].tolist()
    sen_two = df['sentence2'].tolist()
    lab = df['binary-label'].tolist()
    real_lab = df['real-label'].tolist()
    
    if flag:
        return CustomDataset(sen_one, sen_two, real_lab)
    else:
        return CustomDataset(sen_one, sen_two, lab)

In [14]:
train_dataset = generate_dataset(train, True)
val_dataset = generate_dataset(val, True)
test_dataset = generate_dataset(test, True)

In [15]:
tokenizer = AutoTokenizer.from_pretrained("klue/roberta-base")

In [16]:
def CustomCollateFn(batch):
    sen_one_list = []
    sen_two_list = []
    label_list = []


    for sen_one, sen_two, label in batch:
        sen_one_list.append(sen_one)
        sen_two_list.append(sen_two)
        label_list.append(label/5.0)
    
    tokenized_sen_one = tokenizer(sen_one_list, add_special_tokens=True, padding='max_length',
                                truncation=True, max_length=128, return_tensors='pt')
    tokenized_sen_two = tokenizer(sen_two_list, add_special_tokens=True, padding='max_length',
                                truncation=True, max_length=128, return_tensors='pt')

    label_list = torch.Tensor(label_list)


    return (tokenized_sen_one, tokenized_sen_two, label_list)

def CustomCollateFn_dev(batch):
    sen_one_list = []
    sen_two_list = []
    label_list = []


    for sen_one, sen_two, label in batch:
        sen_one_list.append(sen_one)
        sen_two_list.append(sen_two)
        label_list.append(label)

    tokenized_sen_one = tokenizer(sen_one_list, add_special_tokens=True, padding='max_length',
                                truncation=True, max_length=128, return_tensors='pt')
    tokenized_sen_two = tokenizer(sen_two_list, add_special_tokens=True, padding='max_length',
                                truncation=True, max_length=128, return_tensors='pt')

    label_list = torch.Tensor(label_list)

    return (tokenized_sen_one, tokenized_sen_two, label_list)    

In [17]:
def mean_pooling_fn(output, attention_mask):
    embedding = output.last_hidden_state # (batch len, longest sentence length, 1024)
    att_msk = attention_mask # (batch_len, 1024)
    mask = att_msk.unsqueeze(-1).expand(output.last_hidden_state.size()).float() # (batch len, longest sentence length, 1024)
    masked_embedding = output.last_hidden_state * mask # (batch_len, longest sen len, 1024)
    me_sum = torch.sum(masked_embedding, 1) # (batch_len, 1024)
    ms_sum = torch.clamp(mask.sum(1), min=1e-9) # (batch_len, 1024)
    mean_pool = me_sum/ms_sum # batch_len, 1024
    return mean_pool

In [18]:
def mean_pooling_fn(model_output, attention_mask):
    token_embeddings = model_output[0] #First element of model_output contains all token embeddings
    input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
    return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)

In [19]:
class CustomPooling(nn.Module):
    def __init__(self):
        super(CustomPooling, self).__init__()

        self.robert = AutoModel.from_pretrained("klue/roberta-base")


        self.cos_score = nn.Sequential(
            nn.Identity()
        )
    
    def forward(self, senone, sentwo):
        output_one = self.robert(input_ids=senone['input_ids'], attention_mask=senone['attention_mask'],
                             token_type_ids=senone['token_type_ids'])
        output_two = self.robert(input_ids=sentwo['input_ids'], attention_mask=sentwo['attention_mask'],
                             token_type_ids=sentwo['token_type_ids'])

        pooled_one = mean_pooling_fn(output_one, senone['attention_mask'])
        pooled_two = mean_pooling_fn(output_two, sentwo['attention_mask'])


        cos_sim = torch.cosine_similarity(pooled_one, pooled_two)
        logit = self.cos_score(cos_sim)

        return logit

In [20]:
def initializer(input_dataloader, epochs):

    model = CustomPooling()
    optimizer = AdamW(model.parameters(), lr=1e-5, eps=1e-8)
    print(f'total step: {len(input_dataloader) * epochs}')

    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps = round(len(input_dataloader)*0.1),
        num_training_steps = len(input_dataloader) * epochs,

    )

    return model, optimizer, scheduler

In [21]:
def initializer(input_dataloader, epochs):
    """
    설정에 맞춰서 wandb sweep 실행.
    """
    wandb.init(config=sweep_config)
    model = CustomPooling()   
    w_config = wandb.config   

    optimizer = AdamW(model.parameters(), lr = 1e-5, eps = 1e-8) 
    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps = round(len(input_dataloader)*0.1),
        num_training_steps = len(input_dataloader) * epochs,

    )
    print(f'total step: {len(input_dataloader) * epochs}') 
    text_table = wandb.Table(columns=["epoch", "step", "text", 'true_label', 'pred_label'])
    wandb.log({f"error-text-{wandb.run.name}" : text_table})
    return model, optimizer, scheduler    

In [22]:
def save_checkpoint(path, model, optimizer, scheduler, epoch, loss):
    file_name = f'/content/drive/MyDrive/data/checkpoints/sts_hyper.ckpt.{epoch}'
    torch.save({
        'epoch':epoch,
        'model_state_dict':model.state_dict(),
        'optimizer_state_dict':optimizer.state_dict(),
        'scheduler_state_dict':scheduler.state_dict(),
        'loss':loss
    }, file_name)

    print(f'SAVING EPOCH {epoch} ...')

In [23]:
def train(model, loss_fct, scheduler, optimizer, train_dataloader, valid_dataloader, epochs):
    for epoch in range(epochs):
        print(f'****** STARTING TO TRAIN EPOCH #{epoch} ******')

        wandb.watch(model, log="all", log_freq = 10)
        total_loss = 0
        batch_loss = 0
        batch_count = 0

        model.train()
        model.to(device)

        for step, batch in enumerate(train_dataloader):
            batch_count += 1
            batch = tuple(items.to(device) for items in batch)

            (x_batch_one, x_batch_two, y_batch) = batch

            model.zero_grad()

            logit = model(x_batch_one, x_batch_two)
            loss = loss_fct(logit, y_batch)

            batch_loss += loss.item()
            total_loss += loss.item()

            loss.backward()
            clip_grad_norm_(model.parameters(), 1.0)

            optimizer.step()
            scheduler.step()

            if(step % 10 == 0 and step != 0):
                wandb.log({'train_loss': batch_loss / batch_count, 'train_lr': optimizer.param_groups[0]['lr']})
                print(f"Step : {step + 1}, train Loss : {batch_loss / batch_count:.4f}")                      
                # reset 
                batch_loss, batch_count = 0,0

        wandb.log({'total_train_loss': total_loss / (step + 1), 'total_train_lr': optimizer.param_groups[0]['lr'], "epoch" : (epoch + 1)})
  

        print(f"Epoch {epoch} Total Mean Loss : {total_loss/(step+1):.4f}")
        print(f"*****Epoch {epoch} Train Finish*****\n")
        save_checkpoint(".", model, optimizer, scheduler, epoch, total_loss/(step+1))
           
        if valid_dataloader is not None:
            print(f"*****Epoch {epoch} Valid Start*****")
            valid_loss, valid_pearson, valid_f1 = validate(model, loss_fct, valid_dataloader)
            print(f"Epoch {epoch} Valid Loss : {valid_loss} Valid Pearsonr : {valid_pearson} ValidF1 : {valid_f1}")
            print(f"*****Epoch {epoch} Valid Finish*****\n")
  

    print('** Train Completed! **')

In [24]:
!pip install audtorch

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [25]:
from audtorch.metrics.functional import pearsonr
from sklearn.metrics import f1_score
from scipy import stats

In [26]:
def validate(model, loss_fct, valid_dataloader):

    model.eval()
    model.to(device)

    total_loss = 0
    total_acc = 0
    all_prediction = []
    all_reallabel = []

    for step, batch in enumerate(valid_dataloader):
        batch = tuple(items.to(device) for items in batch)

        (x_batch_one, x_batch_two, batch_y) = batch

        with torch.no_grad():
            logit = model(x_batch_one, x_batch_two)

        logit = logit*5
        loss = loss_fct(logit, batch_y)
        total_loss += loss.item()


        logit = logit[:].cpu()
        batch_y = batch_y.cpu()

        print(f'Step: {step},  Pearson: {pearsonr(logit, batch_y)}')

        all_prediction = all_prediction + logit.tolist()
        all_reallabel = all_reallabel + batch_y.tolist()

    #pearson

    pred = torch.Tensor(all_prediction) # x
    real = torch.Tensor(all_reallabel) # y
    
    pearson = pearsonr(pred, real)
    
    #loss
    total_loss = total_loss / (step+1)

    #f1
    fone = f1_process(pred, real)

    wandb.log({'total_valid_loss': total_loss, "total_f1_score ": fone, "total_pearsonr" : pearson})  
    print('total_valid_loss : ', total_loss, "total_f1_score : ",  fone,  "total_pearsonr :", pearson)  
    return total_loss, pearson, fone

In [27]:
def f1_process(pred, real):
    bin_real = []
    bin_pred = []

    for index in range(len(real)):
        if real[index] < 3:
            bin_real.append(0)
        else:
            bin_real.append(1)
    
        if pred[index] < 3:
            bin_pred.append(0)
        else:
            bin_pred.append(1)

    return f1_score(bin_real, bin_pred)

In [28]:
train_dataloader = DataLoader(
    train_dataset,
    batch_size = 8,
    sampler = RandomSampler(train_dataset),
    collate_fn = CustomCollateFn,
)
valid_dataloader = DataLoader(
    val_dataset,
    batch_size = 16,
    sampler = SequentialSampler(val_dataset),
    collate_fn = CustomCollateFn_dev,
)
test_dataloader = DataLoader(
    test_dataset,
    batch_size = 16,
    sampler = SequentialSampler(test_dataset),
    collate_fn = CustomCollateFn_dev,
) 

In [29]:
sweep_config = {
    
    "name" : "sts_v2",   
    "method": "bayes",
    "metric": {
        "name" : "total_pearsonr", 
        "goal" : "maximize"
                },
    
    "parameters": { 
        "epochs" : {
            "distribution" : "categorical",
            "values" : [4]},
        "learning_rate" : {
            "distribution" : "categorical",
            "values" : [1e-5, 3e-5, 5e-5]},                     
        "eps" : {
            "distribution" : "categorical",
            "values" : [1e-8]
        },
        "train_batch_size" : {
            "distribution" : "categorical",
            "values" : [8]
        },
        "valid_batch_size" : {
            "distribution" : "categorical",
            "values" : [16]
        },
        "warm_up_ratio" : {
            "distribution" : "categorical",
            "values" : [0, 0.1]  #[0, 0.1, 0.2]
        },
    },         
    "early_terminate" : {
        "type": "hyperband", # metric이 2번 이상 개선되지 않을 경우 조기 종료
        "min_iter" : 2,
        "eta" : 2
        }
}

In [30]:
def run_sweeep(config=None):
    """
    설정에 맞춰서 wandb sweep 실행.
    """
    model = CustomPooling() 
    wandb.init(config=config)
    w_config = wandb.config   
    
    optimizer = AdamW(model.parameters(), lr = w_config.learning_rate, eps =  w_config.eps) 
    num_training_steps = w_config.epochs * len(train_dataloader)
    scheduler = get_linear_schedule_with_warmup(optimizer=optimizer, num_warmup_steps= (num_training_steps * w_config.warm_up_ratio),
                                                num_training_steps = num_training_steps)
    loss_fct = MSELoss()
    train(model, loss_fct, scheduler, optimizer, train_dataloader, valid_dataloader, w_config.epochs)

In [31]:
!pip install wandb
!wandb login
import wandb

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
[34m[1mwandb[0m: Currently logged in as: [33mkdb[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [32]:
sweep_id = wandb.sweep(sweep_config, project = "sts_v2")
wandb.agent(sweep_id, run_sweeep, count = 5)

Create sweep with ID: sy73kc4x
Sweep URL: https://wandb.ai/kdb/sts_v2/sweeps/sy73kc4x


[34m[1mwandb[0m: Agent Starting Run: 6xqc4oyo with config:
[34m[1mwandb[0m: 	epochs: 4
[34m[1mwandb[0m: 	eps: 1e-08
[34m[1mwandb[0m: 	learning_rate: 3e-05
[34m[1mwandb[0m: 	train_batch_size: 8
[34m[1mwandb[0m: 	valid_batch_size: 16
[34m[1mwandb[0m: 	warm_up_ratio: 0
Some weights of the model checkpoint at klue/roberta-base were not used when initializing RobertaModel: ['lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceCla

****** STARTING TO TRAIN EPOCH #0 ******
Step : 11, train Loss : 0.0964
Step : 21, train Loss : 0.0447
Step : 31, train Loss : 0.0431
Step : 41, train Loss : 0.0269
Step : 51, train Loss : 0.0303
Step : 61, train Loss : 0.0309
Step : 71, train Loss : 0.0394
Step : 81, train Loss : 0.0240
Step : 91, train Loss : 0.0437
Step : 101, train Loss : 0.0417
Step : 111, train Loss : 0.0309
Step : 121, train Loss : 0.0292
Step : 131, train Loss : 0.0307
Step : 141, train Loss : 0.0303
Step : 151, train Loss : 0.0283
Step : 161, train Loss : 0.0298
Step : 171, train Loss : 0.0327
Step : 181, train Loss : 0.0208
Step : 191, train Loss : 0.0275
Step : 201, train Loss : 0.0282
Step : 211, train Loss : 0.0235
Step : 221, train Loss : 0.0378
Step : 231, train Loss : 0.0301
Step : 241, train Loss : 0.0352
Step : 251, train Loss : 0.0275
Step : 261, train Loss : 0.0216
Step : 271, train Loss : 0.0271
Step : 281, train Loss : 0.0242
Step : 291, train Loss : 0.0327
Step : 301, train Loss : 0.0338
Step : 3

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▃▆█
total_f1_score,▁▆██
total_pearsonr,▁▃▆█
total_train_loss,█▃▂▁
total_train_lr,█▆▃▁
total_valid_loss,█▃▂▁
train_loss,▆█▆▅▄▅▅▅▅▄▂▃▃▂▃▂▃▂▂▃▂▂▂▂▁▂▁▁▁▂▁▁▁▁▁▁▁▁▁▁
train_lr,███▇▇▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▁▁▁

0,1
epoch,4.0
total_f1_score,0.91141
total_pearsonr,0.92591
total_train_loss,0.00352
total_train_lr,0.0
total_valid_loss,0.49367
train_loss,0.00295
train_lr,0.0


[34m[1mwandb[0m: Agent Starting Run: ydod97j4 with config:
[34m[1mwandb[0m: 	epochs: 4
[34m[1mwandb[0m: 	eps: 1e-08
[34m[1mwandb[0m: 	learning_rate: 5e-05
[34m[1mwandb[0m: 	train_batch_size: 8
[34m[1mwandb[0m: 	valid_batch_size: 16
[34m[1mwandb[0m: 	warm_up_ratio: 0
Some weights of the model checkpoint at klue/roberta-base were not used when initializing RobertaModel: ['lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceCla

****** STARTING TO TRAIN EPOCH #0 ******
Step : 11, train Loss : 0.0964
Step : 21, train Loss : 0.0323
Step : 31, train Loss : 0.0396
Step : 41, train Loss : 0.0497
Step : 51, train Loss : 0.0319
Step : 61, train Loss : 0.0309
Step : 71, train Loss : 0.0228
Step : 81, train Loss : 0.0262
Step : 91, train Loss : 0.0314
Step : 101, train Loss : 0.0350
Step : 111, train Loss : 0.0390
Step : 121, train Loss : 0.0364
Step : 131, train Loss : 0.0193
Step : 141, train Loss : 0.0300
Step : 151, train Loss : 0.0240
Step : 161, train Loss : 0.0246
Step : 171, train Loss : 0.0241
Step : 181, train Loss : 0.0393
Step : 191, train Loss : 0.0287
Step : 201, train Loss : 0.0308
Step : 211, train Loss : 0.0288
Step : 221, train Loss : 0.0352
Step : 231, train Loss : 0.0388
Step : 241, train Loss : 0.0196
Step : 251, train Loss : 0.0340
Step : 261, train Loss : 0.0253
Step : 271, train Loss : 0.0372
Step : 281, train Loss : 0.0347
Step : 291, train Loss : 0.0290
Step : 301, train Loss : 0.0270
Step : 3

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▃▆█
total_f1_score,▁▃▆█
total_pearsonr,▁▃▆█
total_train_loss,█▄▂▁
total_train_lr,█▆▃▁
total_valid_loss,█▆▂▁
train_loss,▆▄▇▅█▇▄▆▇▅▃▄▃▃▃▄▃▃▃▃▂▂▂▂▂▂▁▂▂▁▁▁▁▁▁▁▁▁▁▁
train_lr,███▇▇▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▁▁▁

0,1
epoch,4.0
total_f1_score,0.90535
total_pearsonr,0.91647
total_train_loss,0.00431
total_train_lr,0.0
total_valid_loss,0.54973
train_loss,0.00416
train_lr,0.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: hq2qnjn2 with config:
[34m[1mwandb[0m: 	epochs: 4
[34m[1mwandb[0m: 	eps: 1e-08
[34m[1mwandb[0m: 	learning_rate: 1e-05
[34m[1mwandb[0m: 	train_batch_size: 8
[34m[1mwandb[0m: 	valid_batch_size: 16
[34m[1mwandb[0m: 	warm_up_ratio: 0.1
Some weights of the model checkpoint at klue/roberta-base were not used when initializing RobertaModel: ['lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly i

****** STARTING TO TRAIN EPOCH #0 ******
Step : 11, train Loss : 0.1946
Step : 21, train Loss : 0.2356
Step : 31, train Loss : 0.1915
Step : 41, train Loss : 0.2071
Step : 51, train Loss : 0.1777
Step : 61, train Loss : 0.1913
Step : 71, train Loss : 0.1877
Step : 81, train Loss : 0.1372
Step : 91, train Loss : 0.1491
Step : 101, train Loss : 0.1367
Step : 111, train Loss : 0.1016
Step : 121, train Loss : 0.0832
Step : 131, train Loss : 0.1073
Step : 141, train Loss : 0.0757
Step : 151, train Loss : 0.0866
Step : 161, train Loss : 0.0704
Step : 171, train Loss : 0.0477
Step : 181, train Loss : 0.0488
Step : 191, train Loss : 0.0493
Step : 201, train Loss : 0.0549
Step : 211, train Loss : 0.0525
Step : 221, train Loss : 0.0493
Step : 231, train Loss : 0.0422
Step : 241, train Loss : 0.0539
Step : 251, train Loss : 0.0453
Step : 261, train Loss : 0.0306
Step : 271, train Loss : 0.0330
Step : 281, train Loss : 0.0356
Step : 291, train Loss : 0.0322
Step : 301, train Loss : 0.0403
Step : 3

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▃▆█
total_f1_score,▁▄█▅
total_pearsonr,▁▄██
total_train_loss,█▃▂▁
total_train_lr,█▆▃▁
total_valid_loss,█▄▂▁
train_loss,█▄▂▂▂▂▂▂▂▂▁▂▁▁▂▂▁▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_lr,▂▃▅▇███▇▇▇▇▇▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▂▂▂▂▂▁▁▁

0,1
epoch,4.0
total_f1_score,0.9102
total_pearsonr,0.92573
total_train_loss,0.00482
total_train_lr,0.0
total_valid_loss,0.48251
train_loss,0.00561
train_lr,0.0


[34m[1mwandb[0m: Agent Starting Run: n6ddh39r with config:
[34m[1mwandb[0m: 	epochs: 4
[34m[1mwandb[0m: 	eps: 1e-08
[34m[1mwandb[0m: 	learning_rate: 5e-05
[34m[1mwandb[0m: 	train_batch_size: 8
[34m[1mwandb[0m: 	valid_batch_size: 16
[34m[1mwandb[0m: 	warm_up_ratio: 0.1
Some weights of the model checkpoint at klue/roberta-base were not used when initializing RobertaModel: ['lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceC

****** STARTING TO TRAIN EPOCH #0 ******
Step : 11, train Loss : 0.2024
Step : 21, train Loss : 0.2338
Step : 31, train Loss : 0.2025
Step : 41, train Loss : 0.1643
Step : 51, train Loss : 0.1387
Step : 61, train Loss : 0.0733
Step : 71, train Loss : 0.0675
Step : 81, train Loss : 0.0560
Step : 91, train Loss : 0.0505
Step : 101, train Loss : 0.0579
Step : 111, train Loss : 0.0450
Step : 121, train Loss : 0.0414
Step : 131, train Loss : 0.0322
Step : 141, train Loss : 0.0306
Step : 151, train Loss : 0.0295
Step : 161, train Loss : 0.0308
Step : 171, train Loss : 0.0303
Step : 181, train Loss : 0.0355
Step : 191, train Loss : 0.0278
Step : 201, train Loss : 0.0291
Step : 211, train Loss : 0.0258
Step : 221, train Loss : 0.0266
Step : 231, train Loss : 0.0343
Step : 241, train Loss : 0.0233
Step : 251, train Loss : 0.0303
Step : 261, train Loss : 0.0265
Step : 271, train Loss : 0.0225
Step : 281, train Loss : 0.0356
Step : 291, train Loss : 0.0271
Step : 301, train Loss : 0.0249
Step : 3

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▃▆█
total_f1_score,▁█▇▅
total_pearsonr,▁▄▇█
total_train_loss,█▄▂▁
total_train_lr,█▆▃▁
total_valid_loss,█▄▂▁
train_loss,█▄▅▄▄▄▅▄▄▄▃▂▂▂▂▂▃▃▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▂▁▁▁▁▁▁
train_lr,▂▃▅▇███▇▇▇▇▇▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▂▂▂▂▂▁▁▁

0,1
epoch,4.0
total_f1_score,0.89866
total_pearsonr,0.91885
total_train_loss,0.00459
total_train_lr,0.0
total_valid_loss,0.53917
train_loss,0.00585
train_lr,0.0


[34m[1mwandb[0m: Agent Starting Run: f2xjgp2n with config:
[34m[1mwandb[0m: 	epochs: 4
[34m[1mwandb[0m: 	eps: 1e-08
[34m[1mwandb[0m: 	learning_rate: 3e-05
[34m[1mwandb[0m: 	train_batch_size: 8
[34m[1mwandb[0m: 	valid_batch_size: 16
[34m[1mwandb[0m: 	warm_up_ratio: 0.1
Some weights of the model checkpoint at klue/roberta-base were not used when initializing RobertaModel: ['lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceC

****** STARTING TO TRAIN EPOCH #0 ******
Step : 11, train Loss : 0.1763
Step : 21, train Loss : 0.1691
Step : 31, train Loss : 0.1963
Step : 41, train Loss : 0.1863
Step : 51, train Loss : 0.1754
Step : 61, train Loss : 0.1333
Step : 71, train Loss : 0.1065
Step : 81, train Loss : 0.0870
Step : 91, train Loss : 0.0627
Step : 101, train Loss : 0.0535
Step : 111, train Loss : 0.0541
Step : 121, train Loss : 0.0430
Step : 131, train Loss : 0.0406
Step : 141, train Loss : 0.0541
Step : 151, train Loss : 0.0435
Step : 161, train Loss : 0.0331
Step : 171, train Loss : 0.0294
Step : 181, train Loss : 0.0349
Step : 191, train Loss : 0.0415
Step : 201, train Loss : 0.0361
Step : 211, train Loss : 0.0316
Step : 221, train Loss : 0.0297
Step : 231, train Loss : 0.0228
Step : 241, train Loss : 0.0328
Step : 251, train Loss : 0.0363
Step : 261, train Loss : 0.0248
Step : 271, train Loss : 0.0280
Step : 281, train Loss : 0.0206
Step : 291, train Loss : 0.0375
Step : 301, train Loss : 0.0268
Step : 3

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▃▆█
total_f1_score,▁▆█▆
total_pearsonr,▁▅▆█
total_train_loss,█▃▂▁
total_train_lr,█▆▃▁
total_valid_loss,█▇▄▁
train_loss,█▃▃▂▃▂▂▃▃▄▂▂▂▂▂▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_lr,▂▃▅▇███▇▇▇▇▇▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▂▂▂▂▂▁▁▁

0,1
epoch,4.0
total_f1_score,0.91314
total_pearsonr,0.92283
total_train_loss,0.00381
total_train_lr,0.0
total_valid_loss,0.51211
train_loss,0.00364
train_lr,0.0


In [36]:
def predict(model, test_dataloader):
    model.eval()
    model.to(device)

    all_prediction = []
    all_reallabel = []

    for step, batch in enumerate(test_dataloader):

        batch = tuple(items.to(device) for items in batch)

        (x_batch_one, x_batch_two, batch_y) = batch

        with torch.no_grad():
            logit = model(x_batch_one, x_batch_two)
        logit = logit*5
        logit = logit.cpu()
        batch_y = batch_y.cpu()

        all_prediction = all_prediction + logit.tolist()
        all_reallabel = all_reallabel + batch_y.tolist()

    pred = torch.Tensor(all_prediction) # x
    real = torch.Tensor(all_reallabel) # y
    
    pearson = pearsonr(pred, real) #stats.spearmanr(pred, real)

    #f1
    fone = f1_process(pred, real)

    return pearson, fone

In [46]:
ckpt1 = '/content/drive/MyDrive/data/checkpoints/sts_hyper.ckpt.0'
ckpt2 = '/content/drive/MyDrive/data/checkpoints/sts_hyper.ckpt.1'
ckpt3 = '/content/drive/MyDrive/data/checkpoints/sts_hyper.ckpt.2'
ckpt4 = '/content/drive/MyDrive/data/checkpoints/sts_hyper.ckpt.3'

In [48]:
all_checkpoints = [ckpt1, ckpt2, ckpt3, ckpt4]

for checkpoint in all_checkpoints:
    loaded_ckpt = torch.load(checkpoint)
    model, optimizer, scheduler = initializer(train_dataloader, 1)
    model.load_state_dict(loaded_ckpt['model_state_dict'])
    #model.load_state_dict(torch.load(checkpoint, map_location=device))
    pearson_score, fonescore = predict(model, test_dataloader)
    print(f'{checkpoint[44:]} pearsonr: {pearson_score}, f1_score: {fonescore}')

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

Some weights of the model checkpoint at klue/roberta-base were not used when initializing RobertaModel: ['lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for

total step: 1953
hyper.ckpt.0 pearsonr: tensor([0.8652]), f1_score: 0.8259109311740891


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

Some weights of the model checkpoint at klue/roberta-base were not used when initializing RobertaModel: ['lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for

total step: 1953
hyper.ckpt.1 pearsonr: tensor([0.8701]), f1_score: 0.841046277665996


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

Some weights of the model checkpoint at klue/roberta-base were not used when initializing RobertaModel: ['lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for

total step: 1953
hyper.ckpt.2 pearsonr: tensor([0.8768]), f1_score: 0.8336673346693387


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

Some weights of the model checkpoint at klue/roberta-base were not used when initializing RobertaModel: ['lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.bias', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for

total step: 1953
hyper.ckpt.3 pearsonr: tensor([0.8804]), f1_score: 0.8384458077709611
