In [4]:
import time
import numpy as np
import pandas as pd
import pickle
import math
import torch
import datasets

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report

from transformers import  XLMRobertaForSequenceClassification, XLMRobertaTokenizer, DataCollatorWithPadding

model_path = "./models/xlm-roberta-base"

path = "./data/emb/"

model_emb_langs = {"mBART": ["ta_IN", "xh_ZA", "vi_VN"], 
                   "m2m100": ["ta", "xh", "vi"]}

limit = 50000

id2label = {0: "False", 1: "True"}
label2id = {"False": 0, "True": 1}

In [5]:
tokenizer = XLMRobertaTokenizer.from_pretrained(model_path)
model = XLMRobertaForSequenceClassification.from_pretrained(model_path, num_labels = 2, id2label = id2label, label2id = label2id)

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at ./models/xlm-roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [6]:
import evaluate
import numpy as np

accuracy = evaluate.load("accuracy")
precision = evaluate.load("precision")
recall = evaluate.load("recall")
f1 = evaluate.load("f1")

bro = []

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)

    accuracy_score = accuracy.compute(predictions=predictions, references=labels)["accuracy"]
    precision_score = precision.compute(predictions=predictions, references=labels)["precision"]
    recall_score = recall.compute(predictions=predictions, references=labels)["recall"]
    f1_score = f1.compute(predictions=predictions, references=labels)["f1"]

    bro.append({"accruacy": accuracy_score, "precision":precision_score, "recall":recall_score, "f1":f1_score})

    return {"accruacy": accuracy_score, "precision":precision_score, "recall":recall_score, "f1":f1_score}

In [7]:
text = pd.read_csv("./data/data.csv", sep="|")["claim"].to_numpy().tolist()[:limit]
labels = pd.read_csv("./data/labels.csv").replace([True, False], [1, 0]).to_numpy().ravel()[:limit]

x_train, x_test, y_train, y_test = train_test_split(text, labels, test_size=0.25, random_state=42)
x_test, x_valid, y_test, y_valid = train_test_split(x_test, y_test, test_size=0.5, random_state=42)

In [8]:
from datasets import Dataset

ds_train = Dataset.from_dict({"claim":x_train, "label":y_train}).select(range(15))
ds_test = Dataset.from_dict({"claim":x_test, "label":y_test}).select(range(15))
ds_val = Dataset.from_dict({"claim":x_valid, "label":y_valid}).select(range(15))

In [9]:
def tok_data(data):
    return tokenizer(data["claim"], truncation=True, padding="max_length", max_length=512)

In [10]:
tok_train = ds_train.map(tok_data, batched=True)
tok_test = ds_test.map(tok_data, batched=True)
tok_val = ds_val.map(tok_data, batched=True)

Map:   0%|          | 0/15 [00:00<?, ? examples/s]

Map:   0%|          | 0/15 [00:00<?, ? examples/s]

Map:   0%|          | 0/15 [00:00<?, ? examples/s]

In [11]:
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
    output_dir = "finetuned_XLMRoBERTa",
    logging_dir = "test",
    learning_rate = 1e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=2,
    weight_decay=0.01,
    evaluation_strategy="epoch",
    logging_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    save_total_limit=1,
    push_to_hub=False,
    do_train = True,
    optim="adamw_torch"
)
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tok_train,
    eval_dataset=tok_val,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

t = trainer.train()

Epoch,Training Loss,Validation Loss,Accruacy,Precision,Recall,F1
1,0.7946,0.724545,0.4,0.4,1.0,0.571429
2,0.7792,0.720735,0.4,0.4,1.0,0.571429


In [12]:
eval = trainer.evaluate(eval_dataset=tok_test)
print(eval)

{'eval_loss': 0.6592999696731567, 'eval_accruacy': 0.6666666666666666, 'eval_precision': 0.6666666666666666, 'eval_recall': 1.0, 'eval_f1': 0.8, 'eval_runtime': 5.9496, 'eval_samples_per_second': 2.521, 'eval_steps_per_second': 0.168, 'epoch': 2.0}


In [1]:
import pandas as pd

r_va = pd.read_pickle("results/results_data_translated_ta_mBART.pickle")

print(r_va)

   epoch                                            results
0      0  {'loss': 1.0122, 'learning_rate': 5e-06, 'epoc...
1      1  {'eval_loss': 0.5542508363723755, 'eval_accrua...
2      2  {'loss': 0.8893, 'learning_rate': 0.0, 'epoch'...
3      3  {'eval_loss': 0.5552643537521362, 'eval_accrua...
4      4  {'train_runtime': 128.1849, 'train_samples_per...
5      5  {'eval_loss': 0.5552643537521362, 'eval_accrua...


In [32]:
r_va.iloc[7]["results"]

{'eval_loss': 0.5822702646255493,
 'eval_accruacy': 0.86,
 'eval_precision': 0.84375,
 'eval_recall': 0.9310344827586207,
 'eval_f1': 0.8852459016393444,
 'eval_runtime': 0.4211,
 'eval_samples_per_second': 118.724,
 'eval_steps_per_second': 9.498,
 'epoch': 4.0,
 'step': 28}