# 2024 COMP90042 Project
*Make sure you change the file name with your group id.*

# Readme
*If there is something to be noted for the marker, please mention here.*

*If you are planning to implement a program with Object Oriented Programming style, please put those the bottom of this ipynb file*

# 1.DataSet Processing
(You can add as many code blocks and text blocks as you need. However, YOU SHOULD NOT MODIFY the section title)

In [None]:
# prompt: mount drive



Mounted at /content/drive


In [1]:
import pandas as pd

import torch
import math
import json
import nltk
import numpy as np


import torch
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence
import torch.nn as nn
import torch.nn.functional as F
from torch import Tensor
from sklearn.metrics import accuracy_score, classification_report
from collections import Counter, OrderedDict


from nltk.tokenize import TweetTokenizer
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('punkt')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [2]:
from google.colab import drive
drive.mount('/content/drive')

evidences = pd.read_json('/content/drive/MyDrive/nlp/data/evidence.json', orient='index')
train_claims = pd.read_json('/content/drive/MyDrive/nlp/data/train-claims.json', orient='index')
dev_claims = pd.read_json('/content/drive/MyDrive/nlp/data/dev-claims.json', orient='index')

#update column names
evidences.reset_index(inplace=True)
evidences.columns = ['evidence_id', 'evidence_text']

train_claims.reset_index(inplace=True)
train_claims.rename(columns={'index': 'claim_id'}, inplace=True)

dev_claims.reset_index(inplace=True)
dev_claims.rename(columns={'index': 'claim_id'}, inplace=True)

evidence_id = evidences['evidence_id']
evidence_text = evidences['evidence_text']
evidence_idx = evidences.index.tolist()

evidence_id_dict = dict(zip(evidence_id, evidence_idx))

train_claims_text = train_claims['claim_text']
train_evidence_ids = train_claims['evidences']
train_claim_labels = train_claims['claim_label']
#map evidence_id to their corrosponding index for faster processing
train_evidence_idxs = train_evidence_ids.apply(lambda x: [evidence_id_dict[evidence_id] for evidence_id in x])

dev_claims_text = dev_claims['claim_text']
dev_claim_labels = dev_claims['claim_label']
dev_evidence_ids = dev_claims['evidences']
dev_evidence_idxs = dev_evidence_ids.apply(lambda x: [evidence_id_dict[evidence_id] for evidence_id in x])

Mounted at /content/drive


In [None]:
test_claims = pd.read_json('/content/drive/MyDrive/nlp/data/test-claims-unlabelled.json', orient='index')
test_claims.reset_index(inplace=True)
test_claims.columns = ['claim_id', 'claim_text']
test_claims_text = test_claims['claim_text']
test_claims_id = test_claims['claim_id']

In [None]:
dev_claim_ids = dev_claims['claim_id']

In [3]:
import json
dev_evidence_indices = json.load(open("/content/drive/MyDrive/nlp/data/reranked_indices.json", "r"))
test_evidence_indices = json.load(open("/content/drive/MyDrive/nlp/data/test_reranked_indices.json", "r"))

In [4]:
dev_k_indices = [sublist[:5] if len(sublist) >= 5 else sublist + [None] * (5 - len(sublist)) for sublist in dev_evidence_indices]
test_k_indices = [sublist[:5] if len(sublist) >= 5 else sublist + [None] * (5 - len(sublist)) for sublist in test_evidence_indices]

In [5]:
tt = TweetTokenizer()
stopwords = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

def preprocess_data(text):
    tokens = tt.tokenize(text.lower())
    return tokens

train_claims_text_processed = train_claims_text.apply(preprocess_data)
dev_claims_text_processed = dev_claims_text.apply(preprocess_data)
evidence_text_processed = evidence_text.apply(preprocess_data)


In [6]:
def build_vocab(texts, min_freq=3):
    # Count all the words
    word_freq = Counter()
    for text in texts:
        word_freq.update(text)

    # Start vocab from special tokens
    vocab = OrderedDict({
        "<pad>": 0,
        "<unk>": 1,
        "<sos>": 2,
        "<eos>": 3
    })
    index = 4  # Start indexing from 4 because 0-3 are reserved for special tokens
    for word, freq in word_freq.items():
        if freq >= min_freq:  # Only include words that meet the frequency threshold
            vocab[word] = index
            index += 1

    return vocab

# Build vocabulary using only evidence texts and applying the frequency threshold
vocab = build_vocab(evidence_text_processed, min_freq=3)

In [7]:
label_map = {
    "REFUTES": 0,
    "SUPPORTS": 1,
    "NOT_ENOUGH_INFO": 2,
    "DISPUTED": 3
}
train_claim_labels = train_claims['claim_label'].map(label_map)
dev_claim_labels = dev_claims['claim_label'].map(label_map)

In [42]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
import random


def text_to_indices(text, vocab):
    return [vocab.get(word, vocab["<unk>"]) for word in text]

class ClaimEvidenceDataset(Dataset):
    def __init__(self, claims, evidence_indices, evidences, claim_labels, vocab):
        self.claims = claims
        self.evidence_indices = evidence_indices
        self.evidences = evidences
        self.claim_labels = claim_labels
        self.vocab = vocab
        self.pairs = self.create_pairs()

    def create_pairs(self):
        pairs = []
        for idx, claim in enumerate(self.claims):
            label = self.claim_labels[idx]
            if label == 3:  # Randomly assign 0 (REFUTES) or 1 (SUPPORTS) for "DISPUTED"
                label = random.choice([0, 1, 2])
            if label in [0, 1, 2]:  # Only consider REFUTES (0), SUPPORTS (1), and NOT ENOUGH INFO (2)
                candidate_pos_indices = self.evidence_indices[idx]
                relevant_evidences = [self.evidences[i] for i in candidate_pos_indices]
                for evidence in relevant_evidences:
                    pairs.append((claim, evidence, label))
        return pairs

    def __len__(self):
        return len(self.pairs)

    def __getitem__(self, idx):
        claim, evidence, label = self.pairs[idx]
        claim_indices = text_to_indices(claim, self.vocab)
        evidence_indices = text_to_indices(evidence, self.vocab)
        claim_indices = [self.vocab["<sos>"]] + claim_indices + [self.vocab["<eos>"]]
        evidence_indices = [self.vocab["<sos>"]] + evidence_indices + [self.vocab["<eos>"]]
        return claim_indices, evidence_indices, label

def custom_collate_fn(batch):
    claims, evidences, labels = zip(*batch)
    claims_tensor = pad_sequence([torch.tensor(seq, dtype=torch.long) for seq in claims], batch_first=True, padding_value=vocab["<pad>"]).to(device)
    evidences_tensor = pad_sequence([torch.tensor(seq, dtype=torch.long) for seq in evidences], batch_first=True, padding_value=vocab["<pad>"]).to(device)
    labels_tensor = torch.tensor(labels, dtype=torch.float).to(device)
    return claims_tensor, evidences_tensor, labels_tensor


# Prepare the data
train_dataset = ClaimEvidenceDataset(train_claims_text_processed, train_evidence_idxs, evidence_text_processed, train_claim_labels, vocab)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, collate_fn=custom_collate_fn)
dev_dataset = ClaimEvidenceDataset(dev_claims_text_processed, dev_evidence_idxs, evidence_text_processed, dev_claim_labels, vocab)
dev_loader = DataLoader(dev_dataset, batch_size=128, shuffle=False, collate_fn=custom_collate_fn)


# 2. Model Implementation
(You can add as many code blocks and text blocks as you need. However, YOU SHOULD NOT MODIFY the section title)

## 2.1 Bi-Directional GRU with Attention

In [10]:
class ClaimEvidenceAttnModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, pad_idx, dropout=0.7):
        super(ClaimEvidenceAttnModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=pad_idx)
        self.gru = nn.GRU(embedding_dim, hidden_dim, batch_first=True, bidirectional=True)
        self.fc = nn.Linear(hidden_dim * 4, 3)  # Three classes
        self.dropout = nn.Dropout(dropout)
        self.pad_idx = pad_idx
        self.attn_claims = nn.Linear(hidden_dim * 2, 1, bias=False)
        self.attn_evidences = nn.Linear(hidden_dim * 2, 1, bias=False)

    def attention(self, gru_output, attn_layer, mask):
        attn_energies = attn_layer(gru_output).squeeze(2)
        attn_energies = attn_energies.masked_fill(mask.squeeze(2) == 0, -1e10)
        attn_weights = F.softmax(attn_energies, dim=1)
        context = torch.bmm(attn_weights.unsqueeze(1), gru_output).squeeze(1)
        return context

    def forward(self, claims, evidences):
        embedded_claims = self.embedding(claims)
        embedded_claims = self.dropout(embedded_claims)
        claims_mask = (claims != self.pad_idx).unsqueeze(2).float()
        embedded_claims *= claims_mask

        gru_output_claims, _ = self.gru(embedded_claims)
        claims_context = self.attention(gru_output_claims, self.attn_claims, claims_mask)

        embedded_evidences = self.embedding(evidences)
        embedded_evidences = self.dropout(embedded_evidences)
        evidences_mask = (evidences != self.pad_idx).unsqueeze(2).float()
        embedded_evidences *= evidences_mask

        gru_output_evidences, _ = self.gru(embedded_evidences)
        evidences_context = self.attention(gru_output_evidences, self.attn_evidences, evidences_mask)

        combined_representation = torch.cat((claims_context, evidences_context), dim=1)
        combined_representation = self.dropout(combined_representation)
        logits = self.fc(combined_representation)

        return logits




## 2.2 Transformer with self-attention

In [27]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import math

class PositionalEncoding(nn.Module):
    def __init__(self, emb_size: int, dropout: float, maxlen: int = 5000):
        super(PositionalEncoding, self).__init__()
        den = torch.exp(-torch.arange(0, emb_size, 2) * math.log(10000) / emb_size)
        pos = torch.arange(0, maxlen).reshape(maxlen, 1)
        pos_embedding = torch.zeros((maxlen, emb_size))
        pos_embedding[:, 0::2] = torch.sin(pos * den)
        pos_embedding[:, 1::2] = torch.cos(pos * den)
        pos_embedding = pos_embedding.unsqueeze(0)

        self.dropout = nn.Dropout(dropout)
        self.register_buffer('pos_embedding', pos_embedding)

    def forward(self, token_embedding: torch.Tensor):
        return self.dropout(token_embedding + self.pos_embedding[:, :token_embedding.size(1), :])


class TokenEmbedding(nn.Module):
    def __init__(self, vocab_size: int, emb_size):
        super(TokenEmbedding, self).__init__()
        self.embedding = nn.Embedding(vocab_size, emb_size)
        self.emb_size = emb_size

    def forward(self, tokens: torch.Tensor):
        return self.embedding(tokens.long()) * math.sqrt(self.emb_size)


class TransformerEncoder(nn.Module):
    def __init__(self, emb_size: int, num_heads: int, ff_hidden_size: int, dropout: float):
        super(TransformerEncoder, self).__init__()
        self.attention = nn.MultiheadAttention(emb_size, num_heads, dropout=dropout)
        self.ff = nn.Sequential(
            nn.Linear(emb_size, ff_hidden_size),
            nn.ReLU(),
            nn.Linear(ff_hidden_size, emb_size)
        )
        self.layernorm1 = nn.LayerNorm(emb_size)
        self.layernorm2 = nn.LayerNorm(emb_size)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x, mask):
        attn_output, _ = self.attention(x, x, x, key_padding_mask=mask)
        x = self.layernorm1(x + self.dropout(attn_output))
        ff_output = self.ff(x)
        x = self.layernorm2(x + self.dropout(ff_output))
        return x


class ClaimVerificationTransformer(nn.Module):
    def __init__(self, emb_size: int, vocab_size: int, num_heads: int, ff_hidden_size: int, num_encoder_layers: int, num_classes: int, dropout: float = 0.1, maxlen: int = 5000, pad_idx: int = 0):
        super(ClaimVerificationTransformer, self).__init__()
        self.embedding = TokenEmbedding(vocab_size, emb_size)
        self.positional_encoding = PositionalEncoding(emb_size, dropout, maxlen)
        self.encoder_layers = nn.ModuleList([TransformerEncoder(emb_size, num_heads, ff_hidden_size, dropout) for _ in range(num_encoder_layers)])
        self.fc = nn.Linear(emb_size, num_classes)
        self.pad_idx = pad_idx

    def forward(self, claims: torch.Tensor, evidences: torch.Tensor):
        # Embed and encode claims
        embedded_claims = self.embedding(claims)
        embedded_claims = self.positional_encoding(embedded_claims)
        claims_mask = (claims == self.pad_idx)

        for layer in self.encoder_layers:
            embedded_claims = layer(embedded_claims.transpose(0, 1), claims_mask).transpose(0, 1)

        claims_encoded = torch.mean(embedded_claims, dim=1)

        # Embed and encode evidences
        embedded_evidences = self.embedding(evidences)
        embedded_evidences = self.positional_encoding(embedded_evidences)
        evidences_mask = (evidences == self.pad_idx)

        for layer in self.encoder_layers:
            embedded_evidences = layer(embedded_evidences.transpose(0, 1), evidences_mask).transpose(0, 1)

        evidences_encoded = torch.mean(embedded_evidences, dim=1)

        # Combine claim and evidence representations
        combined_representation = claims_encoded + evidences_encoded
        logits = self.fc(combined_representation)
        return logits





## 2.3 Training function

In [16]:
def train_model(model, train_loader, dev_loader, criterion, optimizer, device, num_epochs=10, grad_clip=1.0):
    model = model.to(device)
    criterion = criterion.to(device)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3, verbose=True)

    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        for claims, evidences, labels in train_loader:
            claims = claims.to(device)
            evidences = evidences.to(device)
            labels = labels.to(device).long()

            optimizer.zero_grad()
            logits = model(claims, evidences)
            loss = criterion(logits, labels)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), grad_clip)
            optimizer.step()
            total_loss += loss.item()

        avg_train_loss = total_loss / len(train_loader)
        print(f'Epoch {epoch + 1}/{num_epochs} - Training Loss: {avg_train_loss}')

        model.eval()
        total_val_loss = 0
        all_preds = []
        all_labels = []

        with torch.no_grad():
            for claims, evidences, labels in dev_loader:
                claims = claims.to(device)
                evidences = evidences.to(device)
                labels = labels.to(device).long()

                logits = model(claims, evidences)
                val_loss = criterion(logits, labels)
                total_val_loss += val_loss.item()

                preds = torch.argmax(logits, dim=1).cpu().numpy()
                all_preds.extend(preds)
                all_labels.extend(labels.cpu().numpy())


        avg_val_loss = total_val_loss / len(dev_loader)
        accuracy = accuracy_score(all_labels, all_preds)
        f1 = classification_report(all_labels, all_preds, target_names=['REFUTES', 'SUPPORTS', 'NOT_ENOUGH_INFO'], zero_division=0, output_dict=True)['macro avg']['f1-score']

        print(f'Epoch {epoch + 1}/{num_epochs} - Validation Loss: {avg_val_loss}')
        print(f'Accuracy: {accuracy}')
        print(f'F1 Score: {f1}')
        print(classification_report(all_labels, all_preds, target_names=['REFUTES', 'SUPPORTS', 'NOT_ENOUGH_INFO'], zero_division=0))

        scheduler.step(avg_val_loss)

### GRU Training

In [47]:
# Exclude the "DISPUTED" label from the calculations
filtered_labels = train_claim_labels[train_claim_labels != 3]

# Count the occurrences of each label
class_counts = np.bincount(filtered_labels)
total_samples = len(filtered_labels)

# Calculate class weights
class_weights = total_samples / (len(class_counts) * class_counts)
# Convert the class weights to a tensor
class_weights_tensor = torch.tensor(class_weights, dtype=torch.float).to(device)
# Example usage with your existing dataset and dataloaders:
train_dataset = ClaimEvidenceDataset(train_claims_text_processed, train_evidence_idxs, evidence_text_processed, train_claim_labels, vocab)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, collate_fn=custom_collate_fn)
dev_dataset = ClaimEvidenceDataset(dev_claims_text_processed, dev_evidence_idxs, evidence_text_processed, dev_claim_labels, vocab)
dev_loader = DataLoader(dev_dataset, batch_size=128, shuffle=False, collate_fn=custom_collate_fn)

gru_model = ClaimEvidenceAttnModel(vocab_size=len(vocab), embedding_dim=300, hidden_dim=1024, pad_idx=vocab['<pad>'])
#model = ClaimVerificationTransformer(emb_size=512, vocab_size=len(vocab), num_classes =3,dropout=0.7, pad_idx=vocab["<pad>"])
criterion = nn.CrossEntropyLoss(weight=class_weights_tensor)
optimizer = torch.optim.Adam(gru_model.parameters(), lr=0.0001)

train_model(gru_model, train_loader, dev_loader, criterion, optimizer, device=device, num_epochs=10)



Epoch 1/10 - Training Loss: 1.047194480895996
Epoch 1/10 - Validation Loss: 1.086488664150238
Accuracy: 0.4480651731160896
F1 Score: 0.2062822315986873
                 precision    recall  f1-score   support

        REFUTES       0.00      0.00      0.00        92
       SUPPORTS       0.00      0.00      0.00       179
NOT_ENOUGH_INFO       0.45      1.00      0.62       220

       accuracy                           0.45       491
      macro avg       0.15      0.33      0.21       491
   weighted avg       0.20      0.45      0.28       491

Epoch 2/10 - Training Loss: 1.0041954214756306
Epoch 2/10 - Validation Loss: 1.0712772607803345
Accuracy: 0.4684317718940937
F1 Score: 0.2776992905182058
                 precision    recall  f1-score   support

        REFUTES       0.37      0.08      0.13        92
       SUPPORTS       0.78      0.04      0.07       179
NOT_ENOUGH_INFO       0.47      0.98      0.63       220

       accuracy                           0.47       491
     

### Transformer Training

In [48]:
transformer_model = ClaimVerificationTransformer(emb_size=300, vocab_size=len(vocab), num_classes =3, num_heads=6, ff_hidden_size=1024, num_encoder_layers=6,dropout=0.7, pad_idx=vocab["<pad>"])
criterion = nn.CrossEntropyLoss(weight=class_weights_tensor)
optimizer = torch.optim.Adam(transformer_model.parameters(), lr=0.0001)

train_model(transformer_model, train_loader, dev_loader, criterion, optimizer, device=device, num_epochs=15)



Epoch 1/15 - Training Loss: 1.1000692028265733
Epoch 1/15 - Validation Loss: 1.1579849421977997
Accuracy: 0.19959266802443992
F1 Score: 0.15561503746600477
                 precision    recall  f1-score   support

        REFUTES       0.18      0.86      0.30        92
       SUPPORTS       0.53      0.05      0.09       179
NOT_ENOUGH_INFO       0.32      0.05      0.08       220

       accuracy                           0.20       491
      macro avg       0.34      0.32      0.16       491
   weighted avg       0.37      0.20      0.12       491

Epoch 2/15 - Training Loss: 1.0641530339534466
Epoch 2/15 - Validation Loss: 1.155857801437378
Accuracy: 0.23014256619144602
F1 Score: 0.1888466834772967
                 precision    recall  f1-score   support

        REFUTES       0.18      0.74      0.29        92
       SUPPORTS       1.00      0.01      0.01       179
NOT_ENOUGH_INFO       0.41      0.20      0.27       220

       accuracy                           0.23       491
 

# 3.Testing and Evaluation
(You can add as many code blocks and text blocks as you need. However, YOU SHOULD NOT MODIFY the section title)

In [51]:
from sklearn.metrics import accuracy_score, classification_report
import torch
from torch.nn.utils.rnn import pad_sequence
from collections import Counter

def evaluate_model(model, claims, evidence_idxs, evidences, claim_labels, vocab, pad_idx, device):
    model.eval()
    all_preds = []
    all_labels = []
    all_evidence_predictions = []
    all_evidence_probs = []

    for claim_tokens, evidence_idx_list, true_label in zip(claims, evidence_idxs, claim_labels):
        # Numericalize the claim tokens
        claim_indices = text_to_indices(claim_tokens, vocab)
        claim_indices = [vocab["<sos>"]] + claim_indices + [vocab["<eos>"]]
        claim_tensor = torch.tensor(claim_indices, dtype=torch.long).unsqueeze(0).to(device)  # Add batch dimension

        evidence_tensors = []
        for idx in evidence_idx_list:
            evidence_tokens = evidences[idx]
            evidence_indices = text_to_indices(evidence_tokens, vocab)
            evidence_indices = [vocab["<sos>"]] + evidence_indices + [vocab["<eos>"]]
            evidence_tensor = torch.tensor(evidence_indices, dtype=torch.long).to(device)
            evidence_tensors.append(evidence_tensor)

        # Pad evidence tensors to the same length
        evidence_tensors_padded = pad_sequence(evidence_tensors, batch_first=True, padding_value=pad_idx).to(device)

        evidence_predictions = []
        evidence_probs = []
        with torch.no_grad():
            for evidence_tensor in evidence_tensors_padded:
                evidence_tensor = evidence_tensor.unsqueeze(0)  # Add batch dimension
                logits = model(claim_tensor, evidence_tensor)
                preds = torch.argmax(logits, dim=1).cpu().numpy()
                evidence_predictions.extend(preds)
                #prob = torch.sigmoid(logits).item()
                #evidence_probs.append(prob)


        aggregated_prediction = aggregate_predictions(evidence_predictions)
        all_preds.append(aggregated_prediction)
        all_labels.append(true_label)
        all_evidence_predictions.append(evidence_predictions)
        all_evidence_probs.append(evidence_probs)

    accuracy = accuracy_score(all_labels, all_preds)
    report = classification_report(all_labels, all_preds, target_names=['REFUTES', 'SUPPORTS', 'NOT ENOUGH INFO', 'DISPUTED'], zero_division=0)
    return accuracy, report

def aggregate_predictions(evidence_predictions):
    counter = Counter(evidence_predictions)
    num_supports = counter[1]
    num_refutes = counter[0]

    # Handle conflicts: if both SUPPORTS and REFUTES are present
    if num_supports > 0 and num_refutes > 0:
        return 3  # DISPUTED if there are both SUPPORTS and REFUTES

    # Determine the class if all predictions are either SUPPORTS or REFUTES (with or without NOT ENOUGH INFO)
    if num_supports > 0 and num_refutes == 0:
        return 1  # SUPPORTS

    if num_refutes > 0 and num_supports == 0:
        return 0  # REFUTES

    # Default to NOT ENOUGH INFO if there are no SUPPORTS or REFUTES
    return 2


# Evaluate the model on the training set
gru_accuracy, gru_report = evaluate_model(gru_model, dev_claims_text_processed, dev_k_indices, evidence_text_processed, dev_claim_labels, vocab, vocab["<pad>"], device)
print(f'GRU Model Accuracy: {gru_accuracy}')
print(f'GRU Model Classification Report:\n {gru_report}')

tran_accuracy, tran_report = evaluate_model(transformer_model, dev_claims_text_processed, dev_k_indices, evidence_text_processed, dev_claim_labels, vocab, vocab["<pad>"], device)
print(f'Transformer Model Accuracy: {tran_accuracy}')
print(f'Transformer Model Classification Report:\n {tran_report}')


GRU Model Accuracy: 0.4090909090909091
GRU Model Classification Report:
                  precision    recall  f1-score   support

        REFUTES       0.24      0.37      0.29        27
       SUPPORTS       0.56      0.51      0.54        68
NOT ENOUGH INFO       0.38      0.37      0.37        41
       DISPUTED       0.25      0.17      0.20        18

       accuracy                           0.41       154
      macro avg       0.36      0.35      0.35       154
   weighted avg       0.42      0.41      0.41       154

Transformer Model Accuracy: 0.42207792207792205
Transformer Model Classification Report:
                  precision    recall  f1-score   support

        REFUTES       0.50      0.30      0.37        27
       SUPPORTS       0.50      0.54      0.52        68
NOT ENOUGH INFO       0.37      0.39      0.38        41
       DISPUTED       0.19      0.22      0.21        18

       accuracy                           0.42       154
      macro avg       0.39      0.

## Object Oriented Programming codes here

*You can use multiple code snippets. Just add more if needed*