In [42]:
import torch
import torch.nn.functional as F
from torch.utils.data import DataLoader

from datasets import load_metric,load_dataset,Dataset

import transformers
from transformers import AutoTokenizer, DataCollatorWithPadding,RobertaForSequenceClassification,AdamW,get_scheduler,TrainingArguments,Trainer


import itertools
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split,StratifiedKFold
from tqdm.auto import tqdm, trange

import csv
import gc

model_checkpoint = 'roberta-base'

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
transformers.logging.set_verbosity(transformers.logging.ERROR)

BATCH_SIZE = 16

In [43]:
def clean_memory():
    gc.collect()
    torch.cuda.empty_cache()
    
def compute_metrics(testing_dataloader):
    metric = load_metric("f1")

    model.eval()
    for batch in testing_dataloader:
        batch = {k: v.to(device) for k, v in batch.items()}
        with torch.no_grad():
            outputs = model(**batch)

        logits = outputs.logits
        predictions = torch.argmax(logits, dim=-1)
        metric.add_batch(predictions=predictions, references=batch["labels"])
        
    return metric.compute(average='micro')

## Data preprocessing

In [44]:
data = pd.read_csv('../data/BABE/final_labels_SG2.csv',sep=';')
data = data[['text','label_bias']]
data = data[data['label_bias']!='No agreement']
mapping = {'Non-biased':0, 'Biased':1}
data.replace({'label_bias':mapping},inplace=True)
data.rename(columns={'text':'sentence','label_bias':'label'},inplace=True)

## Training

In [45]:
skfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

In [46]:
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint);
model = RobertaForSequenceClassification.from_pretrained(model_checkpoint);
model.to(device);

In [47]:
training_args = TrainingArguments(
    output_dir='../',
    num_train_epochs=10,
    per_device_train_batch_size=BATCH_SIZE,
    warmup_steps=0,  
    logging_steps=50,
    disable_tqdm = False,
    save_total_limit=2,
    weight_decay=5e-5)

In [48]:
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

In [49]:
tokenize = lambda data : tokenizer(data['sentence'], truncation=True)

In [50]:
tokenized_data = Dataset.from_pandas(data)
tokenized_data = tokenized_data.map(tokenize,batched=True)
tokenized_data = tokenized_data.remove_columns(['sentence','__index_level_0__'])
tokenized_data.set_format("torch")

  0%|          | 0/4 [00:00<?, ?ba/s]

### 5-fold CV

In [51]:
f1_scores = []

In [None]:
for train_index, val_index in skfold.split(data[['sentence']],data[['label']]):
    
    token_train = Dataset.from_dict(tokenized_data[train_index])
    token_valid = Dataset.from_dict(tokenized_data[val_index])
    
    model = RobertaForSequenceClassification.from_pretrained(model_checkpoint);
    trainer = Trainer(model,training_args,train_dataset=token_train,data_collator=data_collator,
                      tokenizer=tokenizer)
    trainer.train()
    
    #evaluation
    eval_dataloader = DataLoader(token_valid, batch_size=BATCH_SIZE, collate_fn=data_collator)
    f1_scores.append(compute_metrics(eval_dataloader)['f1'])


In [54]:
np.mean(f1_scores)

0.8045175999555136

### Inferrence experiments

In [35]:
sentence = 'Orange Is the New Black" star Yael Stone is renouncing her U.S. green card to return to her native Australia in order to fight climate change.'
sentence = 'This might be biased but mustache suits you.'
toksentence = tokenizer(sentence,truncation=True,return_tensors="pt")
model.eval()
with torch.no_grad():
    toksentence.to(device)
    output = model(**toksentence)

In [36]:
classification = F.softmax(output.logits,dim=1).argmax(dim=1)
print(sentence,': ',{0:'unbiased',1:'biased'}[classification[0].item()])

This might be biased but mustache suits you. :  biased
