In [33]:
# Imports & Settings
import random
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score, classification_report
from torch.optim import AdamW
from transformers import (
    AutoTokenizer,
    AutoConfig,
    AutoModelForSequenceClassification,
    get_linear_schedule_with_warmup,
)

# Reproducibility
seed        = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)

# Hyperparameters
data_path   = "Cleaned_Tickets.csv"
pretrained  = "microsoft/deberta-v3-large"
batch_size  = 16
max_length  = 256
lr          = 2e-5
num_epochs  = 10
patience    = 2
device      = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [34]:
# %% Load & Prepare Data
df = pd.read_csv(data_path)

In [35]:
# Combine subject + body
df["text"] = df["subject"].str.strip() + " " + df["body"].str.strip()

# Build list of tags
df["tags_list"] = df[["tag_1", "tag_2", "tag_3"]].values.tolist()
all_tags       = sorted({t for tags in df["tags_list"] for t in tags})

In [36]:
# Binarize tags
mlb = MultiLabelBinarizer(classes=all_tags)
y = mlb.fit_transform(df["tags_list"])    # shape = [N, num_tags]

In [37]:
# Train / Val / Test split
texts    = df["text"].tolist()

X_temp, X_test, y_temp, y_test = train_test_split(
    df["text"].tolist(), y,
    test_size=0.10, random_state=seed, shuffle=True
)
X_train, X_val, y_train, y_val = train_test_split(
    X_temp, y_temp,
    test_size=0.10, random_state=seed, shuffle=True
)

In [38]:
# %% Tokenizer & Dataset
tokenizer = AutoTokenizer.from_pretrained(
    "microsoft/deberta-v3-large",
    use_fast=False,
    do_lower_case=True   # if you want all lowercase inputs
)

class MultiLabelDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_length):
        self.texts       = texts
        self.labels      = labels
        self.tokenizer   = tokenizer
        self.max_length  = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        enc = self.tokenizer(
            self.texts[idx],
            truncation=True,
            padding="max_length",
            max_length=self.max_length,
            return_tensors="pt"
        )
        item = {k: v.squeeze(0) for k, v in enc.items()}
        item["labels"] = torch.tensor(self.labels[idx], dtype=torch.float)
        return item

train_ds = MultiLabelDataset(X_train, y_train, tokenizer, max_length)
val_ds   = MultiLabelDataset(X_val,   y_val,   tokenizer, max_length)
test_ds  = MultiLabelDataset(X_test,  y_test,  tokenizer, max_length)

train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True,  num_workers=0)
val_loader   = DataLoader(val_ds,   batch_size=batch_size, shuffle=False, num_workers=0)
test_loader  = DataLoader(test_ds,  batch_size=batch_size, shuffle=False, num_workers=0)

In [39]:
# %% Model, Loss, Optimizer, Scheduler
config    = AutoConfig.from_pretrained(
    pretrained,
    num_labels=len(all_tags),
    problem_type="multi_label_classification"
)
model     = AutoModelForSequenceClassification.from_pretrained( pretrained, config=config, weights_only=True)
model.to(device)

criterion = nn.BCEWithLogitsLoss()
optimizer = AdamW(model.parameters(), lr=lr)

total_steps = len(train_loader) * num_epochs
scheduler   = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=int(0.1 * total_steps),
    num_training_steps=total_steps
)

Some weights of DebertaV2ForSequenceClassification were not initialized from the model checkpoint at microsoft/deberta-v3-large and are newly initialized: ['classifier.bias', 'classifier.weight', 'pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [40]:
# %% Early Stopping
class EarlyStopping:
    def __init__(self, patience=patience, mode="max", delta=0.0):
        self.patience   = patience
        self.mode       = mode
        self.delta      = delta
        self.best       = None
        self.counter    = 0
        self.should_stop= False

    def step(self, metric):
        improved = (
            metric > self.best + self.delta
            if self.mode == "max"
            else metric < self.best - self.delta
        ) if self.best is not None else True

        if improved:
            self.best    = metric
            self.counter = 0
        else:
            self.counter += 1
            if self.counter >= self.patience:
                self.should_stop = True

In [41]:
# %% Training & Evaluation Functions
def train_epoch():
    model.train()
    total_loss = 0.0
    for batch in train_loader:
        optimizer.zero_grad()
        input_ids      = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels         = batch["labels"].to(device)

        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        logits  = outputs.logits
        loss    = criterion(logits, labels)
        loss.backward()
        optimizer.step()
        scheduler.step()

        total_loss += loss.item()
    return total_loss / len(train_loader)

def eval_model(loader):
    model.eval()
    total_loss = 0.0
    all_preds, all_labels = [], []
    with torch.no_grad():
        for batch in loader:
            input_ids      = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels         = batch["labels"].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            logits  = outputs.logits
            loss    = criterion(logits, labels)
            total_loss += loss.item()

            probs = torch.sigmoid(logits).cpu().numpy()
            preds = (probs > 0.5).astype(int)

            all_preds .append(preds)
            all_labels.append(labels.cpu().numpy())

    avg_loss = total_loss / len(loader)
    y_true   = np.vstack(all_labels)
    y_pred   = np.vstack(all_preds)
    precision= precision_score(y_true, y_pred, average="micro")
    recall   = recall_score(y_true, y_pred, average="micro")
    f1       = f1_score(y_true, y_pred, average="micro")
    return {
        "loss":      avg_loss,
        "precision": precision,
        "recall":    recall,
        "f1":        f1,
        "y_true":    y_true,
        "y_pred":    y_pred
    }

In [42]:
# %% Main Training Loop
early_stopper = EarlyStopping()
best_f1       = -float("inf")
checkpoint    = "best_pretrained_model.pt"

for epoch in range(1, num_epochs + 1):
    print(f"\n→ Epoch {epoch}", flush=True)
    tr_loss = train_epoch()
    val_res = eval_model(val_loader)

    if val_res["f1"] > best_f1:
        best_f1 = val_res["f1"]
        torch.save(model.state_dict(), checkpoint)
        print(f"  New best Val F1={best_f1:.4f} → checkpoint saved.")

    print(
        f"  Train Loss: {tr_loss:.4f} | "
        f"Val Loss: {val_res['loss']:.4f} | "
        f"Val F1: {val_res['f1']:.4f} | "
        f"P: {val_res['precision']:.4f} | R: {val_res['recall']:.4f}"
    )

    early_stopper.step(val_res["f1"])
    if early_stopper.should_stop:
        print("Early stopping triggered.")
        break


→ Epoch 1


KeyboardInterrupt: 

In [None]:
# %% Final Test Evaluation
model.load_state_dict(torch.load(checkpoint, map_location=device))
test_res = eval_model(test_loader)

print("\n--- Test Set Performance ---")
print(f"Precision: {test_res['precision']:.4f}")
print(f"Recall   : {test_res['recall']:.4f}")
print(f"Micro F1 : {test_res['f1']:.4f}\n")

print("Detailed per-tag report:")
print(classification_report(
    test_res["y_true"],
    test_res["y_pred"],
    target_names=all_tags,
    zero_division=0
))