### Setup

In [1]:
import csv
import torch
import numpy as np
import pandas as pd
from transformers import (BertForSequenceClassification, BertTokenizer,
                          RobertaForSequenceClassification, RobertaTokenizer,
                          XLMForSequenceClassification, XLMTokenizer,
                          XLNetForSequenceClassification, XLNetTokenizer,
                          DistilBertForSequenceClassification, DistilBertTokenizer,
                          AlbertForSequenceClassification, AlbertTokenizer,
                          AdamW, get_linear_schedule_with_warmup
                          )

#tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)
tokenizer = XLNetTokenizer.from_pretrained('xlnet-base-cased')

if torch.cuda.is_available():    
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

  from .autonotebook import tqdm as notebook_tqdm


### Load Data

In [2]:
def encode_label(label:str):
    if label == 'true': return 0
    if label == 'mostly-true': return 1
    if label == 'barely-true': return 2
    if label == 'half-true': return 3
    if label == 'false': return 4
    if label == 'pants-fire': return 5
    return -1

def load_df(liar_path:str):
    df = pd.read_csv(liar_path, sep='\t', header=None, quoting=csv.QUOTE_NONE, usecols=[2,3,5,14,15]).dropna()
    df = df.rename(columns={2:'target'})    
    df['text'] = df[3] + '. ' + df[5] + '. ' + df[14] + '. ' + df[15]
    df['target'] = df['target'].apply(encode_label)   
    return df[['text', 'target']]

def tokenize(df):
    input_ids = []
    attention_masks = []

    for txt in df['text'].tolist():
        encoded_text = tokenizer.encode_plus(
                            txt,
                            add_special_tokens = True,
                            max_length = 400,
                            pad_to_max_length = True,
                            return_attention_mask = True,
                            return_tensors = 'pt'
                    )
        input_ids.append(encoded_text['input_ids'])
        attention_masks.append(encoded_text['attention_mask'])

    input_ids = torch.cat(input_ids, dim=0)
    attention_masks = torch.cat(attention_masks, dim=0)

    labels = torch.tensor(torch.from_numpy(df['target'].to_numpy()))
    return input_ids, attention_masks, labels

def accuracy(pred, actual):
    pred_flat = np.argmax(pred, axis=1).flatten()
    labels_flat = actual.flatten()
    return np.sum(pred_flat == labels_flat) / len(labels_flat)

train_df = load_df('../data/liar_plus/train2.tsv')
test_df = load_df('../data/liar_plus/test2.tsv')
valid_df = load_df('../data/liar_plus/val2.tsv')

In [3]:
train_input_ids, train_attention_masks, train_labels = tokenize(train_df)
valid_input_ids, valid_attention_masks, valid_labels = tokenize(valid_df)

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
  labels = torch.tensor(torch.from_numpy(df['target'].to_numpy()))


In [4]:
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler

train_dataset = TensorDataset(train_input_ids, train_attention_masks, train_labels)
valid_dataset = TensorDataset(valid_input_ids, valid_attention_masks, valid_labels)

epochs = 3
batch_size = 8
train_dataloader = DataLoader(
            train_dataset,
            sampler = RandomSampler(train_dataset),
            batch_size = batch_size)
valid_dataloader = DataLoader(
            valid_dataset,
            sampler = SequentialSampler(valid_dataset),
            batch_size = batch_size)


### Model

In [5]:
# model = BertForSequenceClassification.from_pretrained(
#     "bert-base-uncased", 
#     num_labels = 6, 
#     output_attentions = False,
#     output_hidden_states = False
# )

model = XLNetForSequenceClassification.from_pretrained(
    "xlnet-base-cased",
    num_labels = 6, 
    output_attentions = False,
    output_hidden_states = False)

desc = model.cuda()
optimizer = AdamW(model.parameters(), lr = 2e-5, eps = 1e-8)
scheduler = get_linear_schedule_with_warmup(optimizer, 
                                            num_warmup_steps = 0,
                                            num_training_steps = len(train_dataloader) * epochs)

Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary.summary.bias', 'logits_proj.weight', 'sequence_summary.summary.weight', 'logits_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

bert-base-uncaed:
- seed = 30
- lr = 2e-5, eps = 1e-8

In [6]:
import random
import numpy as np

seed_val = 30
random.seed(seed_val)
np.random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)

training_stats = []

for epoch_i in range(0, epochs):
    print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))
    total_train_loss = 0
    model.train()

    for step, batch in enumerate(train_dataloader):

        if step % 100 == 0 and not step == 0:
            print('Batch {} of {}.'.format(step, len(train_dataloader)))

        batch = tuple(t.to(device) for t in batch)
        b_ids, b_mask, b_labels = batch

        model.zero_grad()        
        loss, logits = model(b_ids, 
                             token_type_ids=None, 
                             attention_mask=b_mask, 
                             labels=b_labels,
                             return_dict=False)
        
        total_train_loss += loss.item()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()
        scheduler.step()

    avg_train_loss = total_train_loss / len(train_dataloader) 
               
    print("Avg. Train Loss: {0:.2f}".format(avg_train_loss))
    print("=====Validating====")

    model.eval()
    total_eval_accuracy = 0
    total_eval_loss = 0
    nb_eval_steps = 0

    for batch in valid_dataloader:
        batch = tuple(t.to(device) for t in batch)
        b_ids, b_mask, b_labels = batch
        
        with torch.no_grad():        
            (loss, logits, _) = model(b_ids, 
                                   token_type_ids=None, 
                                   attention_mask=b_mask,
                                   labels=b_labels,
                                   return_dict=False)
        total_eval_loss += loss.item()

        logits = logits.detach().cpu().numpy()
        label_ids = b_labels.to('cpu').numpy()
        total_eval_accuracy += accuracy(logits, label_ids)
        
    avg_val_accuracy = total_eval_accuracy / len(valid_dataloader)
    print("Avg. Val Acc: {0:.2f}".format(avg_val_accuracy))

    avg_val_loss = total_eval_loss / len(valid_dataloader)
    print("Val Loss: {0:.2f}".format(avg_val_loss))

    training_stats.append(
        {
            'epoch': epoch_i + 1,
            'Training Loss': avg_train_loss,
            'Valid. Loss': avg_val_loss,
            'Valid. Accur.': avg_val_accuracy
        }
    )

print("")
print("Training complete!")

Batch 100 of 1257.
Batch 200 of 1257.
Batch 300 of 1257.
Batch 400 of 1257.
Batch 500 of 1257.
Batch 600 of 1257.
Batch 700 of 1257.
Batch 800 of 1257.
Batch 900 of 1257.
Batch 1000 of 1257.
Batch 1100 of 1257.
Batch 1200 of 1257.
Avg. Train Loss: 1.78
=====Validating====
Avg. Val Acc: 0.19
Val Loss: 1.76
Batch 100 of 1257.
Batch 200 of 1257.
Batch 300 of 1257.
Batch 400 of 1257.
Batch 500 of 1257.
Batch 600 of 1257.
Batch 700 of 1257.
Batch 800 of 1257.
Batch 900 of 1257.
Batch 1000 of 1257.
Batch 1100 of 1257.
Batch 1200 of 1257.
Avg. Train Loss: 1.74
=====Validating====
Avg. Val Acc: 0.27
Val Loss: 1.68
Batch 100 of 1257.
Batch 200 of 1257.
Batch 300 of 1257.
Batch 400 of 1257.
Batch 500 of 1257.
Batch 600 of 1257.
Batch 700 of 1257.
Batch 800 of 1257.
Batch 900 of 1257.
Batch 1000 of 1257.
Batch 1100 of 1257.
Batch 1200 of 1257.
Avg. Train Loss: 1.64
=====Validating====
Avg. Val Acc: 0.27
Val Loss: 1.65

Training complete!


### Evaluation

In [7]:
test_input_ids, test_attention_masks, test_labels = tokenize(test_df)
test_dataset = TensorDataset(test_input_ids, test_attention_masks, test_labels)
test_dataloader = DataLoader(
            test_dataset,
            sampler = RandomSampler(test_dataset),
            batch_size = batch_size)
model.eval()
pred_probas , labels = [], []

for (step, batch) in enumerate(test_dataloader):
  
    if step % 50 == 0 and not step == 0:
        print('Batch {} of {}.'.format(step, len(test_dataloader)))
    
    batch = tuple(t.to(device) for t in batch)
    b_ids, b_mask, b_labels = batch
  
    with torch.no_grad():
        outputs = model(b_ids, token_type_ids=None, attention_mask=b_mask)

    logits = outputs[0]
    logits = logits.detach().cpu().numpy()
    label_ids = b_labels.to('cpu').numpy()
  
    pred_probas.append(logits)
    labels.append(label_ids)

  labels = torch.tensor(torch.from_numpy(df['target'].to_numpy()))


Batch 50 of 156.
Batch 100 of 156.
Batch 150 of 156.


In [8]:
preds = np.concatenate(pred_probas, axis=0)
ytrue = np.concatenate(labels, axis=0)
ypred = np.argmax(preds, axis=1)

In [9]:
from sklearn import metrics

f1 = metrics.f1_score(ytrue, ypred, average='macro')
acc = metrics.accuracy_score(ytrue, ypred)
precision = metrics.precision_score(ytrue, ypred, average='macro')
recall = metrics.recall_score(ytrue, ypred, average='macro')
cmatrix = metrics.confusion_matrix(ytrue, ypred)
print('F1 score: {:.3}, Accuracy: {:.3}, Precision: {:.3}, Recall: {:.3}, \n Confusion Matrix: \n {}'.format(f1, acc, precision, recall, cmatrix))

F1 score: 0.285, Accuracy: 0.295, Precision: 0.314, Recall: 0.287, 
 Confusion Matrix: 
 [[ 68  46   2  59  21   5]
 [ 53  76   5  75  27   1]
 [ 20  34  21  94  33   8]
 [ 30  57  14 120  37   1]
 [ 36  26  23  83  59  19]
 [ 11   5   6  23  21  23]]
