# Imports

In [None]:
import pandas as pd
import torch
from torch.utils.data import DataLoader, Dataset
from transformers import BertForSequenceClassification, BertTokenizer, RobertaTokenizer, RobertaForSequenceClassification
import torch
from sklearn.model_selection import train_test_split
from torch.optim import AdamW
from torch.nn.utils.clip_grad import clip_grad_norm_
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import ConfusionMatrixDisplay

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Data Preprocessing

In [None]:
train = pd.read_csv("/content/drive/MyDrive/liar_dataset/train.tsv", sep="\t", header=None, names=
                        ["json_ID", "label", "statement", "subject", "speaker", "job_title", "state", "party",
           "barely_true", "false", "half_true", "mostly_true", "pants_on_fire", "source"])
val = pd.read_csv("/content/drive/MyDrive/liar_dataset/valid.tsv", sep="\t", header=None, names=
                        ["json_ID", "label", "statement", "subject", "speaker", "job_title", "state", "party",
           "barely_true", "false", "half_true", "mostly_true", "pants_on_fire", "source"])
test = pd.read_csv("/content/drive/MyDrive/liar_dataset/test.tsv", sep="\t", header=None, names=
                        ["json_ID", "label", "statement", "subject", "speaker", "job_title", "state", "party",
           "barely_true", "false", "half_true", "mostly_true", "pants_on_fire", "source"])

In [None]:
# Description of the TSV format:

# Column 1: the ID of the statement ([ID].json).
# Column 2: the label.
# Column 3: the statement.
# Column 4: the subject(s).
# Column 5: the speaker.
# Column 6: the speaker's job title.
# Column 7: the state info.
# Column 8: the party affiliation.
# Column 9-13: the total credit history count, including the current statement.
# 9: barely true counts.
# 10: false counts.
# 11: half true counts.
# 12: mostly true counts.
# 13: pants on fire counts.
# Column 14: the context (venue / location of the speech or statement).

train.head()

Unnamed: 0,json_ID,label,statement,subject,speaker,job_title,state,party,barely_true,false,half_true,mostly_true,pants_on_fire,source
0,2635.json,false,Says the Annies List political group supports ...,abortion,dwayne-bohac,State representative,Texas,republican,0.0,1.0,0.0,0.0,0.0,a mailer
1,10540.json,half-true,When did the decline of coal start? It started...,"energy,history,job-accomplishments",scott-surovell,State delegate,Virginia,democrat,0.0,0.0,1.0,1.0,0.0,a floor speech.
2,324.json,mostly-true,"Hillary Clinton agrees with John McCain ""by vo...",foreign-policy,barack-obama,President,Illinois,democrat,70.0,71.0,160.0,163.0,9.0,Denver
3,1123.json,false,Health care reform legislation is likely to ma...,health-care,blog-posting,,,none,7.0,19.0,3.0,5.0,44.0,a news release
4,9028.json,half-true,The economic turnaround started at the end of ...,"economy,jobs",charlie-crist,,Florida,democrat,15.0,9.0,20.0,19.0,2.0,an interview on CNN


In [None]:
# Load tokenizer
# tokenizer = BertTokenizer.from_pretrained('bert-large-uncased', do_lower_case=True)
tokenizer = RobertaTokenizer.from_pretrained('roberta-large', do_lower_case=True)

def preprocess_data(df):
    # Tokenize statements
    tokens = tokenizer(list(df['statement']), padding='max_length', truncation=True, max_length=128, return_tensors='pt')
    labels = torch.tensor(df['label'].map({"pants-fire": 0, "false": 1, "barely-true": 2, "half-true": 3, "mostly-true": 4, "true": 5}).values)
    return tokens, labels

# Example with train set
train_tokens, train_labels = preprocess_data(train)
val_tokens, val_labels = preprocess_data(val)
test_tokens, test_labels = preprocess_data(test)

In [None]:
# train_tokens['input_ids'][0]

In [None]:
class FakeNewsDataset(Dataset):
    def __init__(self, tokens, labels):
        self.input_ids = tokens['input_ids']
        self.attention_mask = tokens['attention_mask']
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return {
            'input_ids': self.input_ids[idx],
            'attention_mask': self.attention_mask[idx],
            'labels': self.labels[idx]
        }

# Create dataset
train_dataset = FakeNewsDataset(train_tokens, train_labels)
val_dataset = FakeNewsDataset(val_tokens, val_labels)
test_dataset = FakeNewsDataset(test_tokens, test_labels)

In [None]:
# Set batch size
batch_size = 16

# Create DataLoader
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size)
test_loader = DataLoader(test_dataset, batch_size=batch_size)

In [None]:
fine_tune = True

# Load pre-trained BERT for classification
# model = BertForSequenceClassification.from_pretrained('bert-large-uncased', num_labels=6)
# for param in model.bert.parameters():
#     param.requires_grad = fine_tune

model = RobertaForSequenceClassification.from_pretrained('roberta-large', num_labels=6)
for param in model.roberta.parameters():
    param.requires_grad = fine_tune

# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
print(f"Using: {device}")

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Using: cuda


In [None]:
total_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Total Parameters (fine_tuning = {fine_tune}): {total_parameters}")
# BERT-base
# Total Parameters (fine_tune = True): 109486854
# Total Parameters (fine_tune = False): 4614

# BERT-large
# Total Parameters (fine_tuning = True): 335148038

# ROBERTA-large
# Total Parameters (fine_tuning = True): 355365894

Total Parameters (fine_tuning = True): 355365894


In [None]:
# optimizer = torch.optim.AdamW(
#     model.parameters(),
#     lr = 5e-5,
#     eps = 1e-08
# )

epochs = 5

# Optimizer
optimizer = AdamW(filter(lambda p: p.requires_grad, model.parameters()), eps = 1e-08, lr=1e-5)

# Training loop
model.train()
for epoch in range(epochs):
    print(f"Epoch {epoch + 1}/{epochs}")
    for batch in train_loader:
        # Move to device
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        # Forward pass
        outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        logits = outputs.logits

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()

        print(f"Epoch: {epoch+1}; Loss: {loss.item()}")


Epoch 1/5
Epoch: 1; Loss: 1.7436107397079468
Epoch: 1; Loss: 1.7394465208053589
Epoch: 1; Loss: 1.809720754623413
Epoch: 1; Loss: 1.8493081331253052
Epoch: 1; Loss: 1.9003818035125732
Epoch: 1; Loss: 1.914628505706787
Epoch: 1; Loss: 1.8500354290008545
Epoch: 1; Loss: 1.8578991889953613
Epoch: 1; Loss: 1.6941049098968506
Epoch: 1; Loss: 1.824720859527588
Epoch: 1; Loss: 1.7831525802612305
Epoch: 1; Loss: 1.8257478475570679
Epoch: 1; Loss: 1.7535582780838013
Epoch: 1; Loss: 1.769162654876709
Epoch: 1; Loss: 1.960245132446289
Epoch: 1; Loss: 1.6681666374206543
Epoch: 1; Loss: 1.8073978424072266
Epoch: 1; Loss: 1.8584266901016235
Epoch: 1; Loss: 1.9005889892578125
Epoch: 1; Loss: 1.84257173538208
Epoch: 1; Loss: 1.7864519357681274
Epoch: 1; Loss: 1.7841532230377197
Epoch: 1; Loss: 1.6786938905715942
Epoch: 1; Loss: 1.8141645193099976
Epoch: 1; Loss: 1.8137214183807373
Epoch: 1; Loss: 1.7647291421890259
Epoch: 1; Loss: 1.8708306550979614
Epoch: 1; Loss: 1.8296353816986084
Epoch: 1; Loss: 1

In [None]:
def evaluate_model(model, val_loader, device):
    model.eval()  # Set model to evaluation mode
    predictions, true_labels = [], []

    with torch.no_grad():  # No gradient calculation
        for batch in val_loader:
            # Move data to the same device as the model
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            # Get model outputs
            outputs = model(input_ids, attention_mask=attention_mask)
            logits = outputs.logits  # Predicted logits

            # Get the predicted class (highest score)
            preds = torch.argmax(logits, dim=1)
            predictions.extend(preds.cpu().numpy())
            true_labels.extend(labels.cpu().numpy())

    # Calculate metrics
    acc = accuracy_score(true_labels, predictions)
    print(f"Accuracy: {acc:.4f}")
    print("\nClassification Report:")
    print(classification_report(true_labels, predictions, target_names=['pants-on-fire', 'false', 'barely-true', 'half-true', 'mostly-true', 'true']))

    print("\nConfusion Matrix:")
    print(confusion_matrix(true_labels, predictions))

    return acc

In [None]:
val_accuracy = evaluate_model(model, val_loader, device)
test_accuracy = evaluate_model(model, test_loader, device)

In [None]:
# bert-base-uncased
# Fine Tune
# epochs = 5
# optimizer = AdamW(filter(lambda p: p.requires_grad, model.parameters()), eps = 1e-08, lr=1e-5)
# batch_size = 16

# Validation
# Accuracy: 0.2679
# Classification Report:
#                precision    recall  f1-score   support

# pants-on-fire       0.38      0.20      0.26       116
#         false       0.27      0.32      0.30       263
#   barely-true       0.24      0.14      0.17       237
#     half-true       0.31      0.27      0.29       248
#   mostly-true       0.28      0.33      0.31       251
#          true       0.20      0.33      0.25       169

#      accuracy                           0.27      1284
#     macro avg       0.28      0.26      0.26      1284
#  weighted avg       0.28      0.27      0.26      1284


# Confusion Matrix:
# [[23 37 11 16  9 20]
#  [13 84 35 38 48 45]
#  [10 72 32 36 43 44]
#  [ 8 50 27 67 53 43]
#  [ 4 35 24 39 83 66]
#  [ 3 28  3 23 57 55]]

# Testing
# Accuracy: 0.2676
# Classification Report:
#                precision    recall  f1-score   support

# pants-on-fire       0.34      0.13      0.19        92
#         false       0.28      0.32      0.30       249
#   barely-true       0.25      0.14      0.18       212
#     half-true       0.27      0.22      0.24       265
#   mostly-true       0.24      0.34      0.28       241
#          true       0.29      0.38      0.33       208

#      accuracy                           0.27      1267
#     macro avg       0.28      0.25      0.25      1267
#  weighted avg       0.27      0.27      0.26      1267


# Confusion Matrix:
# [[12 30  9 10 17 14]
#  [ 6 79 26 37 52 49]
#  [ 7 48 30 48 55 24]
#  [ 6 60 25 57 79 38]
#  [ 1 34 17 37 83 69]
#  [ 3 31 12 22 62 78]]


# bert-large-uncased
# Fine Tunestate
# epochs = 5
# optimizer = AdamW(filter(lambda p: p.requires_grad, model.parameters()), eps = 1e-08, lr=1e-5)
# batch_size = 16

# Validation

# Accuracy: 0.2757
# Classification Report:
#                precision    recall  f1-score   support

# pants-on-fire       0.35      0.15      0.21       116
#         false       0.32      0.33      0.33       263
#   barely-true       0.27      0.19      0.22       237
#     half-true       0.24      0.29      0.26       248
#   mostly-true       0.29      0.33      0.31       251
#          true       0.23      0.31      0.27       169

#      accuracy                           0.28      1284
#     macro avg       0.28      0.26      0.27      1284
#  weighted avg       0.28      0.28      0.27      1284


# Confusion Matrix:
# [[17 36 19 16 15 13]
#  [14 87 44 44 33 41]
#  [ 5 52 44 66 35 35]
#  [ 5 39 33 71 64 36]
#  [ 6 34 16 63 82 50]
#  [ 2 21  8 32 53 53]]

# Testing

# Accuracy: 0.2636
# Classification Report:
#                precision    recall  f1-score   support

# pants-on-fire       0.40      0.18      0.25        92
#         false       0.28      0.27      0.27       249
#   barely-true       0.24      0.16      0.19       212
#     half-true       0.25      0.26      0.26       265
#   mostly-true       0.26      0.34      0.29       241
#          true       0.27      0.31      0.29       208

#      accuracy                           0.26      1267
#     macro avg       0.28      0.25      0.26      1267
#  weighted avg       0.27      0.26      0.26      1267


# Confusion Matrix:
# [[17 31 12  9 16  7]
#  [ 9 67 28 52 43 50]
#  [ 2 45 33 63 44 25]
#  [ 6 46 37 70 65 41]
#  [ 3 28 16 60 83 51]
#  [ 5 26 14 28 71 64]]