In [None]:
!pip install transformers torch scikit-learn matplotlib seaborn tqdm --quiet

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m30.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m25.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m35.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m12.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m127.9/127.9 MB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
import os
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score,
    f1_score, roc_auc_score, confusion_matrix,
    classification_report, roc_curve
)
from torch import nn
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, AutoModel, get_scheduler
from torch.optim import AdamW

In [None]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
MODEL_NAME = "xlm-roberta-base"

In [None]:
class TextDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        encodings = self.tokenizer(
            self.texts[idx],
            padding='max_length',
            truncation=True,
            max_length=self.max_len,
            return_tensors="pt"
        )
        item = {key: val.squeeze(0) for key, val in encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx], dtype=torch.float)
        return item

In [None]:
def load_data(train_path, test_path, tokenizer, max_len=64):
    train_df = pd.read_csv(train_path)
    test_df = pd.read_csv(test_path)

    train_dataset = TextDataset(train_df['text'].tolist(), train_df['label'].tolist(), tokenizer, max_len)
    test_dataset = TextDataset(test_df['text'].tolist(), test_df['label'].tolist(), tokenizer, max_len)

    train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=16)
    return train_loader, test_loader


In [None]:
class TransformerClassifier(nn.Module):
    def __init__(self, model_name):
        super(TransformerClassifier, self).__init__()
        self.transformer = AutoModel.from_pretrained(model_name)
        self.dropout = nn.Dropout(0.3)
        self.classifier = nn.Linear(self.transformer.config.hidden_size, 1)

    def forward(self, input_ids, attention_mask):
        output = self.transformer(input_ids=input_ids, attention_mask=attention_mask)
        pooled_output = output.last_hidden_state[:, 0]
        pooled_output = self.dropout(pooled_output)
        return self.classifier(pooled_output).squeeze(-1)

In [None]:
def train_and_evaluate(train_loader, test_loader, model_name="xlm-roberta-base", save_dir="results/xlm-roberta"):
    os.makedirs(save_dir, exist_ok=True)

    model = TransformerClassifier(model_name).to(DEVICE)
    optimizer = AdamW(model.parameters(), lr=2e-5)

    pos_weight = torch.tensor([9.0]).to(DEVICE)
    criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)

    num_training_steps = len(train_loader) * 6
    scheduler = get_scheduler("linear", optimizer=optimizer, num_warmup_steps=0, num_training_steps=num_training_steps)

    best_val_loss = float("inf")
    patience_counter = 0
    EPOCHS = 6

    for epoch in range(EPOCHS):
        model.train()
        epoch_loss = 0
        for batch in tqdm(train_loader, desc=f"Epoch {epoch+1}"):
            input_ids = batch['input_ids'].to(DEVICE)
            attention_mask = batch['attention_mask'].to(DEVICE)
            labels = batch['labels'].to(DEVICE)

            logits = model(input_ids, attention_mask)
            loss = criterion(logits, labels)

            loss.backward()
            optimizer.step()
            scheduler.step()
            optimizer.zero_grad()

            epoch_loss += loss.item()

        avg_epoch_loss = epoch_loss / len(train_loader)
        print(f"Epoch {epoch+1} Loss: {avg_epoch_loss:.4f}")

        model.eval()
        val_loss = 0
        with torch.no_grad():
            for batch in test_loader:
                input_ids = batch['input_ids'].to(DEVICE)
                attention_mask = batch['attention_mask'].to(DEVICE)
                labels = batch['labels'].to(DEVICE)

                logits = model(input_ids, attention_mask)
                loss = criterion(logits, labels)
                val_loss += loss.item()

        avg_val_loss = val_loss / len(test_loader)
        print(f"Validation Loss: {avg_val_loss:.4f}")

        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            patience_counter = 0
            torch.save(model.state_dict(), os.path.join(save_dir, "best_model.pt"))
        else:
            patience_counter += 1
            if patience_counter >= 2:
                print("⏹️ Early stopping triggered.")
                break

    model.load_state_dict(torch.load(os.path.join(save_dir, "best_model.pt")))
    model.eval()
    all_preds, all_probs, all_labels = [], [], []

    with torch.no_grad():
        for batch in tqdm(test_loader, desc="Evaluating"):
            input_ids = batch['input_ids'].to(DEVICE)
            attention_mask = batch['attention_mask'].to(DEVICE)
            labels = batch['labels'].to(DEVICE)

            logits = model(input_ids, attention_mask)
            probs = torch.sigmoid(logits)
            preds = (probs > 0.5).int()

            all_preds.extend(preds.cpu().numpy())
            all_probs.extend(probs.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    acc = accuracy_score(all_labels, all_preds)
    prec = precision_score(all_labels, all_preds)
    rec = recall_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds)
    auc = roc_auc_score(all_labels, all_probs)

    print(f"Accuracy:  {acc:.4f}")
    print(f"Precision: {prec:.4f}")
    print(f"Recall:    {rec:.4f}")
    print(f"F1-score:  {f1:.4f}")
    print(f"ROC AUC:   {auc:.4f}")
    print(classification_report(all_labels, all_preds, zero_division=0))

    cm = confusion_matrix(all_labels, all_preds)
    plt.figure()
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    plt.title("Confusion Matrix for RoBERTa - The Best Variant")
    plt.savefig(os.path.join(save_dir, "confusion_matrix.png"))
    plt.close()

    fpr, tpr, _ = roc_curve(all_labels, all_probs)
    plt.figure()
    plt.plot(fpr, tpr, label=f'AUC = {auc:.4f}')
    plt.plot([0, 1], [0, 1], 'k--')
    plt.title("ROC Curve for RoBERTa - The Best Varianl")
    plt.legend()
    plt.savefig(os.path.join(save_dir, "roc_curve.png"))
    plt.close()

In [None]:
def run_pipeline_xlmr(train_path, test_path, max_len=64):
    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
    train_loader, test_loader = load_data(train_path, test_path, tokenizer, max_len)
    train_and_evaluate(train_loader, test_loader, model_name=MODEL_NAME)

In [11]:
from google.colab import files
files.download('/content/results/xlm-roberta/best_model.pt')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
run_pipeline_xlmr("v1_training_variant1_raw.csv", "v1_test_variant1_raw.csv")

Epoch 1: 100%|██████████| 628/628 [02:38<00:00,  3.97it/s]


Epoch 1 Loss: 1.1404
Validation Loss: 1.2163


Epoch 2: 100%|██████████| 628/628 [02:36<00:00,  4.00it/s]


Epoch 2 Loss: 1.1436
Validation Loss: 1.2626


Epoch 3: 100%|██████████| 628/628 [02:37<00:00,  4.00it/s]


Epoch 3 Loss: 0.8911
Validation Loss: 0.7860


Epoch 4: 100%|██████████| 628/628 [02:37<00:00,  3.98it/s]


Epoch 4 Loss: 0.6325
Validation Loss: 0.8103


Epoch 5: 100%|██████████| 628/628 [02:37<00:00,  3.99it/s]


Epoch 5 Loss: 0.4677
Validation Loss: 0.8537
⏹️ Early stopping triggered.


Evaluating: 100%|██████████| 63/63 [00:03<00:00, 18.64it/s]


Accuracy:  0.8430
Precision: 0.4542
Recall:    0.8507
F1-score:  0.5922
ROC AUC:   0.9066
              precision    recall  f1-score   support

         0.0       0.97      0.84      0.90       866
         1.0       0.45      0.85      0.59       134

    accuracy                           0.84      1000
   macro avg       0.71      0.85      0.75      1000
weighted avg       0.90      0.84      0.86      1000



In [None]:
run_pipeline_xlmr("v1_training_variant2_light.csv", "v1_test_variant2_light.csv")

Epoch 1: 100%|██████████| 628/628 [02:36<00:00,  4.02it/s]


Epoch 1 Loss: 1.1943
Validation Loss: 1.0922


Epoch 2: 100%|██████████| 628/628 [02:35<00:00,  4.03it/s]


Epoch 2 Loss: 0.8833
Validation Loss: 0.8838


Epoch 3: 100%|██████████| 628/628 [02:35<00:00,  4.03it/s]


Epoch 3 Loss: 0.6625
Validation Loss: 0.9930


Epoch 4: 100%|██████████| 628/628 [02:35<00:00,  4.05it/s]


Epoch 4 Loss: 0.5297
Validation Loss: 1.3047
⏹️ Early stopping triggered.


Evaluating: 100%|██████████| 63/63 [00:03<00:00, 19.07it/s]


Accuracy:  0.8090
Precision: 0.3956
Recall:    0.8060
F1-score:  0.5307
ROC AUC:   0.8865
              precision    recall  f1-score   support

         0.0       0.96      0.81      0.88       866
         1.0       0.40      0.81      0.53       134

    accuracy                           0.81      1000
   macro avg       0.68      0.81      0.71      1000
weighted avg       0.89      0.81      0.83      1000



In [None]:
run_pipeline_xlmr("v1_training_variant3_full.csv", "v1_test_variant3_full.csv")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/615 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.10M [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/1.12G [00:00<?, ?B/s]

Epoch 1: 100%|██████████| 626/626 [02:34<00:00,  4.05it/s]


Epoch 1 Loss: 1.1542
Validation Loss: 1.4331


Epoch 2: 100%|██████████| 626/626 [02:40<00:00,  3.89it/s]


Epoch 2 Loss: 1.1414
Validation Loss: 1.4013


Epoch 3: 100%|██████████| 626/626 [02:43<00:00,  3.84it/s]


Epoch 3 Loss: 1.0216
Validation Loss: 1.4571


Epoch 4: 100%|██████████| 626/626 [02:42<00:00,  3.84it/s]


Epoch 4 Loss: 0.8524
Validation Loss: 1.1603


Epoch 5: 100%|██████████| 626/626 [02:43<00:00,  3.84it/s]


Epoch 5 Loss: 0.7133
Validation Loss: 1.3061


Epoch 6: 100%|██████████| 626/626 [02:42<00:00,  3.85it/s]


Epoch 6 Loss: 0.6058
Validation Loss: 1.4186
⏹️ Early stopping triggered.


Evaluating: 100%|██████████| 63/63 [00:03<00:00, 18.29it/s]


Accuracy:  0.8148
Precision: 0.3942
Recall:    0.7090
F1-score:  0.5067
ROC AUC:   0.8432
              precision    recall  f1-score   support

         0.0       0.95      0.83      0.89       865
         1.0       0.39      0.71      0.51       134

    accuracy                           0.81       999
   macro avg       0.67      0.77      0.70       999
weighted avg       0.87      0.81      0.84       999

