In [None]:
!pip install transformers torch sacremoses scikit-learn matplotlib seaborn tqdm --quiet

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m85.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m55.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m44.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m11.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m127.9/127.9 MB[0m [31m9.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
import os
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score,
    f1_score, roc_auc_score, confusion_matrix,
    classification_report, roc_curve
)
from torch import nn
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, AutoModel, get_scheduler
from torch.optim import AdamW

In [None]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
MODEL_NAME = "allegro/herbert-base-cased"  # HerBERT-BASE-CASED

In [None]:
class TextDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        encodings = self.tokenizer(
            self.texts[idx],
            padding='max_length',
            truncation=True,
            max_length=self.max_len,
            return_tensors="pt"
        )
        item = {key: val.squeeze(0) for key, val in encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx], dtype=torch.float)
        return item

In [5]:
def load_data(train_path, test_path, tokenizer, max_len=64):
    train_df = pd.read_csv(train_path)
    test_df = pd.read_csv(test_path)

    train_dataset = TextDataset(train_df['text'].tolist(), train_df['label'].tolist(), tokenizer, max_len)
    test_dataset = TextDataset(test_df['text'].tolist(), test_df['label'].tolist(), tokenizer, max_len)

    train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=16)
    return train_loader, test_loader

In [None]:
class TransformerClassifier(nn.Module):
    def __init__(self, model_name):
        super(TransformerClassifier, self).__init__()
        self.transformer = AutoModel.from_pretrained(model_name)
        self.dropout = nn.Dropout(0.3)
        self.classifier = nn.Linear(self.transformer.config.hidden_size, 1)

    def forward(self, input_ids, attention_mask):
        output = self.transformer(input_ids=input_ids, attention_mask=attention_mask)
        pooled_output = output.last_hidden_state[:, 0]
        pooled_output = self.dropout(pooled_output)
        return self.classifier(pooled_output).squeeze(-1)

In [None]:
def train_and_evaluate(train_loader, test_loader, model_name="allegro/herbert-base-cased", save_dir="results/herbert-base-cased"):
    os.makedirs(save_dir, exist_ok=True)

    model = TransformerClassifier(model_name).to(DEVICE)
    optimizer = AdamW(model.parameters(), lr=2e-5)

    pos_weight = torch.tensor([9.0]).to(DEVICE)
    criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)

    num_training_steps = len(train_loader) * 6
    scheduler = get_scheduler("linear", optimizer=optimizer, num_warmup_steps=0, num_training_steps=num_training_steps)

    best_val_loss = float("inf")
    patience_counter = 0
    EPOCHS = 6

    for epoch in range(EPOCHS):
        model.train()
        epoch_loss = 0
        for batch in tqdm(train_loader, desc=f"Epoch {epoch+1}"):
            input_ids = batch['input_ids'].to(DEVICE)
            attention_mask = batch['attention_mask'].to(DEVICE)
            labels = batch['labels'].to(DEVICE)

            logits = model(input_ids, attention_mask)
            loss = criterion(logits, labels)

            loss.backward()
            optimizer.step()
            scheduler.step()
            optimizer.zero_grad()

            epoch_loss += loss.item()

        avg_epoch_loss = epoch_loss / len(train_loader)
        print(f"Epoch {epoch+1} Loss: {avg_epoch_loss:.4f}")

        model.eval()
        val_loss = 0
        with torch.no_grad():
            for batch in test_loader:
                input_ids = batch['input_ids'].to(DEVICE)
                attention_mask = batch['attention_mask'].to(DEVICE)
                labels = batch['labels'].to(DEVICE)

                logits = model(input_ids, attention_mask)
                loss = criterion(logits, labels)
                val_loss += loss.item()

        avg_val_loss = val_loss / len(test_loader)
        print(f"Validation Loss: {avg_val_loss:.4f}")

        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            patience_counter = 0
            torch.save(model.state_dict(), os.path.join(save_dir, "best_model.pt"))
        else:
            patience_counter += 1
            if patience_counter >= 2:
                print("⏹️ Early stopping triggered.")
                break

    model.load_state_dict(torch.load(os.path.join(save_dir, "best_model.pt")))
    model.eval()
    all_preds, all_probs, all_labels = [], [], []

    with torch.no_grad():
        for batch in tqdm(test_loader, desc="Evaluating"):
            input_ids = batch['input_ids'].to(DEVICE)
            attention_mask = batch['attention_mask'].to(DEVICE)
            labels = batch['labels'].to(DEVICE)

            logits = model(input_ids, attention_mask)
            probs = torch.sigmoid(logits)
            preds = (probs > 0.5).int()

            all_preds.extend(preds.cpu().numpy())
            all_probs.extend(probs.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    acc = accuracy_score(all_labels, all_preds)
    prec = precision_score(all_labels, all_preds)
    rec = recall_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds)
    auc = roc_auc_score(all_labels, all_probs)

    print(f"Accuracy:  {acc:.4f}")
    print(f"Precision: {prec:.4f}")
    print(f"Recall:    {rec:.4f}")
    print(f"F1-score:  {f1:.4f}")
    print(f"ROC AUC:   {auc:.4f}")
    print(classification_report(all_labels, all_preds, zero_division=0))

    cm = confusion_matrix(all_labels, all_preds)
    plt.figure()
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    plt.title("Confusion Matrix for HerBERT - The Best Variant")
    plt.savefig(os.path.join(save_dir, "confusion_matrix.png"))
    plt.close()

    fpr, tpr, _ = roc_curve(all_labels, all_probs)
    plt.figure()
    plt.plot(fpr, tpr, label=f'AUC = {auc:.4f}')
    plt.plot([0, 1], [0, 1], 'k--')
    plt.title("ROC Curve for HerBERT - The Best Varianl")
    plt.legend()
    plt.savefig(os.path.join(save_dir, "roc_curve.png"))
    plt.close()

In [8]:
# ▶️ Uruchomienie
def run_pipeline_herbert(train_path, test_path, max_len=64):
    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
    train_loader, test_loader = load_data(train_path, test_path, tokenizer, max_len)
    train_and_evaluate(train_loader, test_loader, model_name=MODEL_NAME)

In [15]:
from google.colab import files
files.download('/content/results/herbert-base-cased/best_model.pt')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
run_pipeline_herbert("v1_training_variant1_raw.csv", "v1_test_variant1_raw.csv")

Some weights of the model checkpoint at allegro/herbert-base-cased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.sso.sso_relationship.bias', 'cls.sso.sso_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Epoch 1: 100%|██████████| 628/628 [02:02<00:00,  5.13it/s]


Epoch 1 Loss: 1.1334
Validation Loss: 1.4547


Epoch 2: 100%|██████████| 628/628 [02:01<00:00,  5.16it/s]


Epoch 2 Loss: 1.0074
Validation Loss: 0.7459


Epoch 3: 100%|██████████| 628/628 [02:01<00:00,  5.15it/s]


Epoch 3 Loss: 0.5963
Validation Loss: 0.6373


Epoch 4: 100%|██████████| 628/628 [02:02<00:00,  5.14it/s]


Epoch 4 Loss: 0.4272
Validation Loss: 0.7924


Epoch 5: 100%|██████████| 628/628 [02:01<00:00,  5.15it/s]


Epoch 5 Loss: 0.3158
Validation Loss: 0.8294
⏹️ Early stopping triggered.


Evaluating: 100%|██████████| 63/63 [00:03<00:00, 18.06it/s]


Accuracy:  0.8850
Precision: 0.5438
Recall:    0.8806
F1-score:  0.6724
ROC AUC:   0.9513
              precision    recall  f1-score   support

         0.0       0.98      0.89      0.93       866
         1.0       0.54      0.88      0.67       134

    accuracy                           0.89      1000
   macro avg       0.76      0.88      0.80      1000
weighted avg       0.92      0.89      0.90      1000



In [10]:
run_pipeline_herbert("v1_training_variant2_light.csv", "v1_test_variant2_light.csv")

Some weights of the model checkpoint at allegro/herbert-base-cased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.sso.sso_relationship.bias', 'cls.sso.sso_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Epoch 1: 100%|██████████| 628/628 [01:56<00:00,  5.38it/s]


Epoch 1 Loss: 0.7648
Validation Loss: 0.9408


Epoch 2: 100%|██████████| 628/628 [01:57<00:00,  5.35it/s]


Epoch 2 Loss: 0.4629
Validation Loss: 0.8831


Epoch 3: 100%|██████████| 628/628 [01:57<00:00,  5.36it/s]


Epoch 3 Loss: 0.3322
Validation Loss: 1.4833


Epoch 4: 100%|██████████| 628/628 [01:56<00:00,  5.37it/s]


Epoch 4 Loss: 0.1978
Validation Loss: 2.3668
⏹️ Early stopping triggered.


Evaluating: 100%|██████████| 63/63 [00:03<00:00, 18.72it/s]


Accuracy:  0.8910
Precision: 0.5668
Recall:    0.7910
F1-score:  0.6604
ROC AUC:   0.9291
              precision    recall  f1-score   support

         0.0       0.97      0.91      0.94       866
         1.0       0.57      0.79      0.66       134

    accuracy                           0.89      1000
   macro avg       0.77      0.85      0.80      1000
weighted avg       0.91      0.89      0.90      1000



In [14]:
run_pipeline_herbert("v1_training_variant3_full.csv", "v1_test_variant3_full.csv")

Some weights of the model checkpoint at allegro/herbert-base-cased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.sso.sso_relationship.bias', 'cls.sso.sso_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Epoch 1: 100%|██████████| 626/626 [01:55<00:00,  5.41it/s]


Epoch 1 Loss: 0.9047
Validation Loss: 0.9908


Epoch 2: 100%|██████████| 626/626 [01:56<00:00,  5.38it/s]


Epoch 2 Loss: 0.6448
Validation Loss: 0.9852


Epoch 3: 100%|██████████| 626/626 [01:56<00:00,  5.38it/s]


Epoch 3 Loss: 0.4640
Validation Loss: 1.2749


Epoch 4: 100%|██████████| 626/626 [01:56<00:00,  5.38it/s]


Epoch 4 Loss: 0.3153
Validation Loss: 1.6984
⏹️ Early stopping triggered.


Evaluating: 100%|██████████| 63/63 [00:03<00:00, 18.46it/s]


Accuracy:  0.8579
Precision: 0.4780
Recall:    0.6493
F1-score:  0.5506
ROC AUC:   0.8841
              precision    recall  f1-score   support

         0.0       0.94      0.89      0.92       865
         1.0       0.48      0.65      0.55       134

    accuracy                           0.86       999
   macro avg       0.71      0.77      0.73       999
weighted avg       0.88      0.86      0.87       999

