In [1]:
from collections import deque

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import torch
from torch import nn
import nltk
from torch.utils.data import Dataset
import pickle

from utils.utils import *
from utils.label_decoding import *
from utils.HierarchicalLoss import *

# SubTask 1

In [2]:
class DataSet(Dataset):
    def __init__(self, df, labels_at_level, features_file):
        super(DataSet, self).__init__()
        self.data_df = df
        self.labels_at_level = labels_at_level
        self.features_file = features_file
        self.features_dict = None
        with open(features_file, 'rb') as f:
            self.features_dict = pickle.load(f)
    
    def __len__(self):
        return len(self.data_df)
    
    def __getitem__(self, idx):
        id = self.data_df.iloc[idx]['id']
        text = self.data_df.iloc[idx]['cleaned_text']
        level_1_target = self.encode(self.data_df.iloc[idx]['Level 1'], 1)
        level_2_target = self.encode(self.data_df.iloc[idx]['Level 2'], 2)
        level_3_target = self.encode(self.data_df.iloc[idx]['Level 3'], 3)
        level_4_target = self.encode(self.data_df.iloc[idx]['Level 4'], 4)
        level_5_target = self.encode(self.data_df.iloc[idx]['Level 5'], 5)
            
        
        return {'id': id,
                'text': text, 
                'text_features': self.features_dict[id],
                'level_1_target': level_1_target, 
                'level_2_target': level_2_target, 
                'level_3_target': level_3_target, 
                'level_4_target': level_4_target, 
                'level_5_target': level_5_target }

    def encode(self, labels, level):
        level_ = f'Level {level}'
        
        target = torch.zeros(len(self.labels_at_level[level_])+1)
        
        for label in labels:
            label_idx = self.labels_at_level[level_][label]
            target[label_idx] = 1
        
        if len(labels) == 0:
            target[-1] = 1
        
        return target

In [3]:
class TestDataSet(Dataset):
    def __init__(self, df, features_file):
        super(TestDataSet, self).__init__()
        self.data_df = df
        self.features_file = features_file
        self.features_dict = None
        with open(features_file, 'rb') as f:
            self.features_dict = pickle.load(f)
    
    def __len__(self):
        return len(self.data_df)
    
    def __getitem__(self, idx):
        id = self.data_df.iloc[idx]['id']
        text = self.data_df.iloc[idx]['cleaned_text']
        
        return {'id': id,
                'text': text, 
                'text_features': self.features_dict[id] }

In [4]:
class Baseline(torch.nn.Module):
    def __init__(self):
        super(Baseline, self).__init__()
        
        # self.embeddings = BertModel.from_pretrained("bert-base-multilingual-cased")
        
        # for param in self.embeddings.parameters():
        #     param.requires_grad = False
        
        dropout_rate = 0.15
        
        self.linear_level1 = nn.Sequential(nn.Linear(1024, 1024), nn.ReLU(), nn.Dropout(dropout_rate),
                                           nn.Linear(1024, 512), nn.ReLU(), nn.Dropout(dropout_rate),
                                           nn.Linear(512, 128), nn.ReLU())
        
        self.linear_level2 = nn.Sequential(nn.Linear(1024, 1024), nn.ReLU(), nn.Dropout(dropout_rate),
                                           nn.Linear(1024, 512), nn.ReLU(), nn.Dropout(dropout_rate),
                                           nn.Linear(512, 128), nn.ReLU())
        self.linear_level3 = nn.Sequential(nn.Linear(1024, 1024), nn.ReLU(), nn.Dropout(dropout_rate),
                                           nn.Linear(1024, 512), nn.ReLU(), nn.Dropout(dropout_rate),
                                           nn.Linear(512, 128), nn.ReLU())
        self.linear_level4 = nn.Sequential(nn.Linear(1024, 1024), nn.ReLU(), nn.Dropout(dropout_rate),
                                           nn.Linear(1024, 512), nn.ReLU(), nn.Dropout(dropout_rate),
                                           nn.Linear(512, 128), nn.ReLU())
        self.linear_level5 = nn.Sequential(nn.Linear(1024, 1024), nn.ReLU(), nn.Dropout(dropout_rate),
                                           nn.Linear(1024, 512), nn.ReLU(), nn.Dropout(dropout_rate),
                                           nn.Linear(512, 128), nn.ReLU())

        self.sigmoid_reg1 = nn.Sequential(nn.Linear(128, 2))
        self.sigmoid_reg2 = nn.Sequential(nn.Linear(128*2, 4))
        self.sigmoid_reg3 = nn.Sequential(nn.Linear(128*3, 13), nn.Sigmoid())
        self.sigmoid_reg4 = nn.Sequential(nn.Linear(128*4, 13), nn.Sigmoid())
        self.sigmoid_reg5 = nn.Sequential(nn.Linear(128*5, 7), nn.Sigmoid())
        
        
    def forward(self, embeddings):
        # embeddings = self.embeddings(input_ids, attention_mask, token_type_ids)
        
        # embeddings = embeddings.last_hidden_state[:, 0, :]
        
        lvl1_rep = self.linear_level1(embeddings)
        lvl2_rep = self.linear_level2(embeddings)
        lvl3_rep = self.linear_level3(embeddings)
        lvl4_rep = self.linear_level4(embeddings)
        lvl5_rep = self.linear_level5(embeddings)
        
        lvl1_pred = self.sigmoid_reg1(lvl1_rep)
        lvl2_pred = self.sigmoid_reg2(torch.cat((lvl1_rep, lvl2_rep), dim=1))
        lvl3_pred = self.sigmoid_reg3(torch.cat((lvl1_rep, lvl2_rep, lvl3_rep), dim=1))
        lvl4_pred = self.sigmoid_reg4(torch.cat((lvl1_rep, lvl2_rep, lvl3_rep, lvl4_rep), dim=1))
        lvl5_pred = self.sigmoid_reg5(torch.cat((lvl1_rep, lvl2_rep, lvl3_rep, lvl4_rep, lvl5_rep), dim=1))
        
        return lvl1_pred, lvl2_pred, lvl3_pred, lvl4_pred, lvl5_pred

In [5]:
def evaluate_model(model, dataloader, pred_file_path, gold_file_path, 
                   evaluator_script_path, id2leaf_label, format=None,validation=False, HL=None, batchsize=None):
    model.eval()
    predictions = []
    
    # HL = HierarchicalLoss(id2label=id2label_1, hierarchical_labels=hierarchy_1, persuasion_techniques=persuasion_techniques_1, device=device)
    total_loss = 0
    
    
    with torch.no_grad():
        
        for batch in dataloader:
            if not isinstance(batch['id'], list):
                ids = batch['id'].detach().numpy().tolist()
            else:
                ids = batch['id']
        
            embeddings = batch['text_features']
            embeddings = embeddings.to(device)
            pred_1, pred_2, pred_3, pred_4, pred_5 = model(embeddings)
            
            if validation:
                y_1, y_2, y_3 = batch['level_1_target'], batch['level_2_target'], batch['level_3_target']
                y_4, y_5 = batch['level_4_target'], batch['level_5_target']
                
                y_1, y_2, y_3, y_4, y_5 = y_1.to(device), y_2.to(device), y_3.to(device), y_4.to(device), y_5.to(device)
                
                dloss = HL.calculate_dloss([pred_1, pred_2, pred_3, pred_4, pred_5], [y_1, y_2, y_3, y_4, y_5])
                lloss = HL.calculate_lloss([pred_1, pred_2, pred_3, pred_4, pred_5], [y_1, y_2, y_3, y_4, y_5])
                
                total_loss += (dloss + lloss).detach().cpu().item()
                
            pred_3 = (pred_3.cpu().detach().numpy() > 0.4).astype(int)
            pred_4 = (pred_4.cpu().detach().numpy() > 0.4).astype(int)
            pred_5 = (pred_5.cpu().detach().numpy() > 0.4).astype(int)
            
            predictions += get_labels(id2leaf_label, ids, pred_3, pred_4, pred_5, format)

        # Writing JSON data
        with open(pred_file_path, 'w') as f:
            json.dump(predictions, f, indent=4)
        
        if gold_file_path is None:
            return
        
        prec_h, rec_h, f1_h = evaluate_h(pred_file_path, gold_file_path)
        # print("f1_h={:.5f}\tprec_h={:.5f}\trec_h={:.5f}".format(f1_h, prec_h, rec_h))
        if validation:
            return prec_h, rec_h, f1_h, total_loss / (len(dataloader) * batchsize)
            
        # command = [
        #         "python3", evaluator_script_path,
        #         "--gold_file_path", gold_file_path,
        #         "--pred_file_path", pred_file_path
        # ]
        # 
        # result = subprocess.run(command, capture_output=True, text=True)
        # 
        # if result.returncode == 0:
        #     print("Output:\n", result.stdout)
        # else:
        #     print("Error:\n", result.stderr)
        #     
        
    

In [6]:
from torch.utils.data import DataLoader

train_json = './semeval2024_dev_release/subtask1/train.json'
validation_json = './semeval2024_dev_release/subtask1/validation.json'

train_data = process_json(train_json, techniques_to_level_1, hierarchy_1)
# val_data = 
validation_data = process_json(validation_json, techniques_to_level_1, hierarchy_1)


training_dataset = DataSet(train_data, indexed_persuasion_techniques_1, 
                           './TextFeatures/subtask1a/XLNet/train_text_features.pkl')
validation_dataset = DataSet(validation_data, indexed_persuasion_techniques_1, 
                             './TextFeatures/subtask1a/XLNet/validation_text_features.pkl')

In [7]:
device = get_device()

device = torch.device('cpu')

Using MPS


In [8]:
import wandb

# Initialize WandB and log in to your account
wandb.login()

sweep_config = {
    'method': 'bayes',  # Using Bayesian optimization
    'metric': {
        'name': 'val_loss',
        'goal': 'minimize'
    },
    'parameters': {
        'learning_rate': {
            'min': 1e-5,
            'max': 1e-3
        },
        'batch_size': {
            'values': [128, 256]
        },
        'optimizer': {
            'values': ['adam']
        },
        'beta1': {  # Relevant for Adam
            'min': 0.85,
            'max': 0.95
        },
        # 'momentum': {  # Relevant for SGD
        #     'min': 0.8,
        #     'max': 0.99
        # }
        'alpha': {
            'min': 0.5,
            'max': 1.0
        },
        'beta': {
            'min': 0.5,
            'max': 1.0
        },
        'threshold': {
            'min': 0.6,
            'max': 0.8
            }
    }
}

sweep_id = wandb.sweep(sweep_config, project="FineTuning-XLNet-250-epoch")
# sweep_id = '44uz6ydx'

[34m[1mwandb[0m: Currently logged in as: [33miqbal_shaik[0m ([33mphoenix_nlp[0m). Use [1m`wandb login --relogin`[0m to force relogin


Create sweep with ID: th5z4up4
Sweep URL: https://wandb.ai/phoenix_nlp/FineTuning-XLNet-250-epoch/sweeps/th5z4up4


In [9]:
models_dir = './models/subtask1a/XLNet-200/'
num_epochs = 250

In [10]:
from tqdm import tqdm
import json
import subprocess
from subtask_1_2a import *

def train():
    
    wandb.init()

    # Use WandB configurations
    config = wandb.config
    batch_size = config.batch_size
    learning_rate = config.learning_rate
    
    train_dataloader = DataLoader(training_dataset, batch_size=batch_size, shuffle=True)
    validation_dataloader = DataLoader(validation_dataset, batch_size=batch_size, shuffle=True)
    
    model = Baseline()
    model.to(device)
    
    optimizer = None
    
    if config.optimizer == 'adam':
        optimizer = torch.optim.Adam(
            model.parameters(),
            lr=learning_rate,
            betas=(config.beta1, 0.999)
        )
    elif config.optimizer == 'sgd':
        optimizer = torch.optim.SGD(
            model.parameters(),
            lr=learning_rate,
            momentum=config.momentum
        )
        
    HL = HierarchicalLoss(id2label=id2label_1, hierarchical_labels=hierarchy_1,
                          persuasion_techniques=persuasion_techniques_1, device=device, 
                          alpha=config.alpha, beta=config.beta, threshold=config.threshold)
    
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for batch_idx, batch in enumerate(train_dataloader):
            
            y_1, y_2, y_3 = batch['level_1_target'], batch['level_2_target'], batch['level_3_target']
            y_4, y_5 = batch['level_4_target'], batch['level_5_target']
            
            embeddings = batch['text_features']
            embeddings = embeddings.to(device)
            y_1, y_2, y_3, y_4, y_5 = y_1.to(device), y_2.to(device), y_3.to(device), y_4.to(device), y_5.to(device)
            
            
            optimizer.zero_grad()
            pred_1, pred_2, pred_3, pred_4, pred_5 = model(embeddings)
            
            dloss = HL.calculate_dloss([pred_1, pred_2, pred_3, pred_4, pred_5], [y_1, y_2, y_3, y_4, y_5])
            lloss = HL.calculate_lloss([pred_1, pred_2, pred_3, pred_4, pred_5], [y_1, y_2, y_3, y_4, y_5])
    
            total_loss = lloss + dloss
            
            total_loss.backward()
            optimizer.step()
            
            running_loss += total_loss.detach().item()
            
            # if batch_idx % 20 == 19:
            #     print(f"[{epoch + 1}, {batch_idx + 1}] loss: {running_loss / 20:.3f}")
            #     running_loss = 0.0
        
        running_loss /= (len(train_dataloader) * batch_size)
        
        val_pred_file_path = './Predictions/val_predictions_subtask.json'
        val_gold_file_path = './semeval2024_dev_release/subtask1/validation.json'
        evaluator_script = './scorer-baseline/subtask_1_2a.py'
        
        prec_h, rec_h, f1_h, validation_loss = evaluate_model(model, validation_dataloader, val_pred_file_path, 
                                         val_gold_file_path, evaluator_script,id2leaf_label,
                                         validation=True, HL=HL, batchsize=batch_size)
        
        if epoch % 50 == 49:
            print(f'[{epoch+1}/{num_epochs}]')
            print("f1_h={:.5f}\tprec_h={:.5f}\trec_h={:.5f}".format(f1_h, prec_h, rec_h))
        
        # Log training metrics
        wandb.log({"epoch": epoch, "train_loss": running_loss})
        wandb.log({"val_loss": validation_loss})
        wandb.log({"h_precision": prec_h, "h_recall": rec_h, "h_f1-score":f1_h})
        
    
    torch.save(model.state_dict(), f"{models_dir}{wandb.run.name}.pth")
    wandb.join()

In [11]:
def main():
    wandb.agent(sweep_id, train, count=5)

if __name__ == "__main__":
    main()

[34m[1mwandb[0m: Agent Starting Run: fhi4vd63 with config:
[34m[1mwandb[0m: 	alpha: 0.5328816362719954
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	beta: 0.625107673229499
[34m[1mwandb[0m: 	beta1: 0.9319098101765652
[34m[1mwandb[0m: 	learning_rate: 0.0004098600267649725
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	threshold: 0.6029156395974248


[50/250]
f1_h=0.48550	prec_h=0.56349	rec_h=0.42648
[100/250]
f1_h=0.51859	prec_h=0.56567	rec_h=0.47874
[150/250]
f1_h=0.51398	prec_h=0.55844	rec_h=0.47609
[200/250]
f1_h=0.50229	prec_h=0.52042	rec_h=0.48539
[250/250]
f1_h=0.51333	prec_h=0.52889	rec_h=0.49867


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
h_f1-score,▁▇▇▇███▇█▇███████▇▇██▇█████████▇████▇█▇█
h_precision,█▅▄▃▃▂▂▃▂▃▂▂▂▂▂▂▁▂▂▁▁▂▂▂▂▁▂▂▁▁▁▁▁▂▂▁▁▁▂▂
h_recall,▁▆▆▆▇▇▇▇█▇▇▇▇▇▇▇▇▇▇██▇▇▇████▇██▇██▇▇▇▇▇▇
train_loss,█▇▅▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,▇▁▂▃▁▃▆▂▇▅▅▅▆▆▄▄▆▇█▆█▅▆▅▆▄█▆▆▄▄▄▇▆▃▄▆▆▅▄

0,1
epoch,249.0
h_f1-score,0.51333
h_precision,0.52889
h_recall,0.49867
train_loss,0.05861
val_loss,5.24164


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ut60amw5 with config:
[34m[1mwandb[0m: 	alpha: 0.9913930660362298
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	beta: 0.5496886952192024
[34m[1mwandb[0m: 	beta1: 0.9071609101063982
[34m[1mwandb[0m: 	learning_rate: 0.00091771763122179
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	threshold: 0.7726805360531108


[50/250]
f1_h=0.48289	prec_h=0.54349	rec_h=0.43446
[100/250]
f1_h=0.50268	prec_h=0.52967	rec_h=0.47830
[150/250]
f1_h=0.48888	prec_h=0.53834	rec_h=0.44774
[200/250]
f1_h=0.51152	prec_h=0.53314	rec_h=0.49159
[250/250]
f1_h=0.52120	prec_h=0.53712	rec_h=0.50620


VBox(children=(Label(value='0.007 MB of 0.007 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
h_f1-score,▁▆▇█▇█▇█▇█▇▇▇██▇▇▇██▇█▇▇█▇██▇▇▇██████▇██
h_precision,▅█▄▃▃▃▃▁▂▂▂▃▂▂▂▂▃▂▂▃▂▃▂▂▂▃▃▃▃▂▂▂▃▂▂▂▃▁▂▂
h_recall,▁▄▆█▇▇▇█▇█▇▇▇▇▇▇▇▇▇█▇▇▇▆▇▇▇▇▇▇▇█▇██▇▇▇██
train_loss,█▇▅▄▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,█▅▃▃▂▃▃▆▅▄▅▄▃▄▆▄▄▄▅▄▄▃▃▄▂▁▂▂▂▃▂▄▁▄▂▄▅▅▃▁

0,1
epoch,249.0
h_f1-score,0.5212
h_precision,0.53712
h_recall,0.5062
train_loss,0.09978
val_loss,4.7526


[34m[1mwandb[0m: Agent Starting Run: 7i923cbe with config:
[34m[1mwandb[0m: 	alpha: 0.9984195187612744
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	beta: 0.6670966656987594
[34m[1mwandb[0m: 	beta1: 0.9286959301675918
[34m[1mwandb[0m: 	learning_rate: 0.00023905460602819623
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	threshold: 0.6635299249934175


[50/250]
f1_h=0.50994	prec_h=0.54781	rec_h=0.47697
[100/250]
f1_h=0.47670	prec_h=0.56081	rec_h=0.41453
[150/250]
f1_h=0.49610	prec_h=0.57501	rec_h=0.43623
[200/250]
f1_h=0.50655	prec_h=0.54822	rec_h=0.47077
[250/250]
f1_h=0.49201	prec_h=0.55273	rec_h=0.44331


VBox(children=(Label(value='0.007 MB of 0.007 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
h_f1-score,▁▆▆▇▇▇█▇▇██▇█▇██▇███▇████▇▇███████▇▇█▇█▇
h_precision,▄▇█▇▄▅▄▃▂▂▃▂▁▃▂▃▃▁▂▂▃▂▂▂▂▂▁▁▂▂▁▂▂▂▂▃▃▁▃▃
h_recall,▁▄▅▆▆▆▇▆▆█▇▇█▇▇▇▇██▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
train_loss,█▇▆▅▄▃▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,█▆▃▂▄▃▁▂▃▃▅▄▆▅▅▂▃▇▅▅▆▆▅▅▅▄▇▃▇▆▅▆▇█▇▅▇▅▅▅

0,1
epoch,249.0
h_f1-score,0.49201
h_precision,0.55273
h_recall,0.44331
train_loss,0.07229
val_loss,5.59022


[34m[1mwandb[0m: Agent Starting Run: pejfvxr9 with config:
[34m[1mwandb[0m: 	alpha: 0.9340969856911232
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	beta: 0.7910533622555236
[34m[1mwandb[0m: 	beta1: 0.8693688237118415
[34m[1mwandb[0m: 	learning_rate: 0.0001609740373469136
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	threshold: 0.6247977280702973


[50/250]
f1_h=0.51248	prec_h=0.55916	rec_h=0.47298
[100/250]
f1_h=0.50681	prec_h=0.55065	rec_h=0.46944


[34m[1mwandb[0m: [32m[41mERROR[0m Error while calling W&B API: context deadline exceeded (<Response [500]>)


[150/250]
f1_h=0.50969	prec_h=0.56257	rec_h=0.46590
[200/250]
f1_h=0.52095	prec_h=0.53315	rec_h=0.50930
[250/250]
f1_h=0.50107	prec_h=0.54199	rec_h=0.46590


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
h_f1-score,▁▅▇▇▇██▇█▇▇████▇█▇▇▇█████████▇▇▇▇██▇█▇▇█
h_precision,▆█▅▅▄▄▃▃▁▃▃▃▃▂▂▃▃▃▂▃▃▃▃▂▃▃▂▃▂▃▃▃▃▂▂▃▂▂▃▂
h_recall,▁▄▆▆▆▇▇▇█▇▇▇▇█▇▇▇▇▇▇▇▇██▇▇▇▇▇▇▇▆▇▇▇▇▇▇▇▇
train_loss,█▇▆▄▄▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,▄▁▃▂▂▂▃▂▄▃▃▄▅▇▅▄▃▄▅▆▆▃▆▇▆▃▄▄▄▁▄▄▅▅█▄▇▃▃▄

0,1
epoch,249.0
h_f1-score,0.50107
h_precision,0.54199
h_recall,0.4659
train_loss,0.09316
val_loss,7.56273


[34m[1mwandb[0m: Agent Starting Run: 7rgiwnxx with config:
[34m[1mwandb[0m: 	alpha: 0.6528033057378441
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	beta: 0.9756793544083928
[34m[1mwandb[0m: 	beta1: 0.938711279814244
[34m[1mwandb[0m: 	learning_rate: 0.0004208487209873777
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: 	threshold: 0.7831049814244941


[50/250]
f1_h=0.52661	prec_h=0.55676	rec_h=0.49956
[100/250]
f1_h=0.49788	prec_h=0.53056	rec_h=0.46900
[150/250]
f1_h=0.50048	prec_h=0.54795	rec_h=0.46058
[200/250]
f1_h=0.51411	prec_h=0.54312	rec_h=0.48804
[250/250]
f1_h=0.50643	prec_h=0.54794	rec_h=0.47077


VBox(children=(Label(value='0.006 MB of 0.007 MB uploaded\r'), FloatProgress(value=0.8071059079771304, max=1.0…

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
h_f1-score,▁▅▇▇█▇███▇███▇▇██████████▇████▇██████▇██
h_precision,▅█▅▅▅▄▃▃▃▃▂▂▂▃▃▃▂▂▃▂▂▂▃▄▃▃▂▃▂▂▂▃▃▃▁▂▃▂▁▂
h_recall,▁▄▆▆▇▇▇█▇▇██▇▇▇▇▇▇▇▇▇▇▇▇▇▇█▇▇█▇▇█▇▇▇▇▇█▇
train_loss,█▇▆▅▄▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,█▅▄▅▂▂▂▃▂▃▃▅▁▂▁▃▅▃▃▃▅▄▃▂▃▂▃▄▅▄▃▃▄▃▄▄▃▂▆▄

0,1
epoch,249.0
h_f1-score,0.50643
h_precision,0.54794
h_recall,0.47077
train_loss,0.08884
val_loss,8.00998


In [12]:
model = Baseline()
model.to(device)

Baseline(
  (linear_level1): Sequential(
    (0): Linear(in_features=1024, out_features=1024, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.15, inplace=False)
    (3): Linear(in_features=1024, out_features=512, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.15, inplace=False)
    (6): Linear(in_features=512, out_features=128, bias=True)
    (7): ReLU()
  )
  (linear_level2): Sequential(
    (0): Linear(in_features=1024, out_features=1024, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.15, inplace=False)
    (3): Linear(in_features=1024, out_features=512, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.15, inplace=False)
    (6): Linear(in_features=512, out_features=128, bias=True)
    (7): ReLU()
  )
  (linear_level3): Sequential(
    (0): Linear(in_features=1024, out_features=1024, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.15, inplace=False)
    (3): Linear(in_features=1024, out_features=512, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.15, inplace=False)
    (6): Linear(

In [13]:
model.load_state_dict(torch.load('./models/subtask1a/XLNet-200/fanciful-sweep-1.pth', map_location=device))

<All keys matched successfully>

In [14]:
from tqdm import tqdm
import json
import subprocess

bulgarian_pred_file_path = './Predictions/bulgarian_predictions_subtask1.json'
bulgarian_gold_file_path = './test_labels_ar_bg_md_version2/test_subtask1_bg.json'
evaluator_script = './scorer-baseline/subtask_1_2a.py'

bg_test_data = process_test_json(bulgarian_gold_file_path)


bg_test_dataset = TestDataSet(bg_test_data, './TextFeatures/subtask1a/XLNet/bg_test_text_features.pkl')
bg_test_dataloader = DataLoader(bg_test_dataset, batch_size=64, shuffle=True)

evaluate_model(model, bg_test_dataloader, bulgarian_pred_file_path, bulgarian_gold_file_path,
               evaluator_script, id2leaf_label, validation=False)

In [15]:
en_pred_file_path = './Predictions/en_predictions_subtask1.txt'

en_test_data = process_test_json('./test_data/english/en_subtask1_test_unlabeled.json')

en_test_dataset = TestDataSet(en_test_data, './TextFeatures/subtask1a/XLNet/en_test_text_features.pkl')
en_test_dataloader = DataLoader(en_test_dataset, batch_size=64, shuffle=True)

evaluate_model(model, en_test_dataloader, en_pred_file_path, None, evaluator_script, id2leaf_label, validation=False)