# Zero-Shot Classification: mDeBERTa
## Model: MoritzLaurer/mDeBERTa-v3-base-mnli-xnli

Zero-Shot NLI-basierte Klassifikation auf Volltext.

**Voraussetzung:** GPU-Runtime aktiviert, `HF_TOKEN` in Colab Secrets hinterlegt.

In [None]:
# === SETUP (in jedem Notebook identisch) ===
import os, sys

# Repo klonen / aktualisieren
REPO = "/content/news_articles_classification_thesis"
if not os.path.exists(REPO):
    !git clone https://github.com/ZorbeyOezcan/news_articles_classification_thesis.git {REPO}
else:
    !cd {REPO} && git pull -q

# Dependencies
!pip install -q transformers datasets huggingface_hub scikit-learn matplotlib seaborn tqdm pandas

# Google Drive mounten (persistente Reports)
from google.colab import drive
drive.mount("/content/drive", force_remount=False)

# pipeline_utils importierbar machen
PIPELINE_DIR = f"{REPO}/Python/classification_pipeline"
if PIPELINE_DIR not in sys.path:
    sys.path.insert(0, PIPELINE_DIR)

import importlib
import pipeline_utils as pu
importlib.reload(pu)

# HuggingFace Login
from huggingface_hub import login
from google.colab import userdata
login(token=userdata.get("HF_TOKEN"))

print(f"Reports-Ordner: {pu.REPORTS_DIR}")
print("Setup abgeschlossen.")

In [None]:
# ===== MODEL CONFIG =====
MODEL_ID = "MoritzLaurer/mDeBERTa-v3-base-mnli-xnli"
MODEL_SHORT_NAME = "mdeberta"
MODEL_TYPE = "zero-shot"  # "zero-shot" | "few-shot" | "fine-tuned"

# Welcher Split wird evaluiert?
EVALUATE_ON = "test"  # "test" oder "eval"

# Batch-Größe für Klassifikation (kleiner = weniger VRAM, langsamer)
BATCH_SIZE = 8

# ===== NLI CONFIG =====
# Hypothesis Template für Zero-Shot NLI
HYPOTHESIS_TEMPLATE = "Dieser Text handelt von {}."

# Candidate Labels: None = Labels aus dem Datensatz verwenden
# Oder eigene Liste angeben: ["Label A", "Label B", ...]
CANDIDATE_LABELS = None

# ===== MODEL INFO (für Report) =====
MODEL_INFO = {
    "huggingface_id": MODEL_ID,
    "language": "Multilingual (100+ Sprachen, inkl. Deutsch)",
    "max_tokens": 512,
    "parameters": "278M",
    "notes": "mDeBERTa unterstützt KEIN FP16 (NaN). Immer FP32 verwenden.",
}

In [None]:
# Daten laden
data = pu.load_data(
    split_mode="percentage",
    eval_fraction=0.2,
    random_seed=42,
    load_raw=False,
)

eval_df = data[EVALUATE_ON]

# Labels bestimmen
if CANDIDATE_LABELS is None:
    CANDIDATE_LABELS = list(data["label_mapping"].values())

print(f"\nEvaluiere auf '{EVALUATE_ON}' Split: {len(eval_df)} Artikel")
print(f"Labels: {len(CANDIDATE_LABELS)}")

In [None]:
# Modell laden
import torch
from transformers import pipeline as hf_pipeline

device = 0 if torch.cuda.is_available() else -1

classifier = hf_pipeline(
    "zero-shot-classification",
    model=MODEL_ID,
    device=device,
)

print(f"Modell geladen: {MODEL_ID}")
print(f"Device: {'GPU' if device == 0 else 'CPU'}")
print(f"Tokenizer max length: {classifier.tokenizer.model_max_length}")

In [None]:
# Klassifikation (Volltext)
from tqdm.auto import tqdm

def classify_batch(texts, batch_size=BATCH_SIZE):
    """Zero-Shot Klassifikation mit Progress-Tracking."""
    predictions = [None] * len(texts)
    non_empty_indices = [i for i, t in enumerate(texts) if t.strip()]
    non_empty_texts = [texts[i] for i in non_empty_indices]

    for start in tqdm(range(0, len(non_empty_texts), batch_size), desc="Classifying"):
        batch_texts = non_empty_texts[start:start + batch_size]
        batch_indices = non_empty_indices[start:start + batch_size]

        results = classifier(
            batch_texts,
            candidate_labels=CANDIDATE_LABELS,
            hypothesis_template=HYPOTHESIS_TEMPLATE,
            multi_label=False,
        )
        if isinstance(results, dict):
            results = [results]

        for idx, r in zip(batch_indices, results):
            predictions[idx] = r["labels"][0]

    empty_count = sum(1 for p in predictions if p is None)
    if empty_count > 0:
        print(f"  {empty_count} leere Texte -> 'Andere'")
    return [p if p is not None else "Andere" for p in predictions]


texts = eval_df["text"].fillna("").tolist()
true_labels = eval_df["label"].tolist()

timer = pu.ExperimentTimer()
with timer:
    predictions = classify_batch(texts)

print(f"\nKlassifikation abgeschlossen: {timer.duration_formatted}")
print(f"Durchsatz: {timer.articles_per_second(len(texts)):.2f} Artikel/Sekunde")

In [None]:
# Evaluation
metrics = pu.evaluate(
    true_labels,
    predictions,
    labels=CANDIDATE_LABELS,
    experiment_name=EVALUATE_ON,
)

pu.print_metrics(metrics, f"Zero-Shot mDeBERTa — {EVALUATE_ON} Split")

In [None]:
# Confusion Matrix
pu.plot_confusion_matrix(
    metrics,
    title=f"Zero-Shot mDeBERTa ({EVALUATE_ON})",
)

In [None]:
# Report generieren
report_path = pu.generate_report(
    model_name=f"{MODEL_SHORT_NAME}_zeroshot",
    model_type=MODEL_TYPE,
    metrics=metrics,
    timer=timer,
    model_info=MODEL_INFO,
    candidate_labels=CANDIDATE_LABELS,
    hypothesis_template=HYPOTHESIS_TEMPLATE,
    split_config=data["split_config"],
    label_mapping=data["label_mapping"],
    experiment_notes=(
        "Zero-Shot NLI-Klassifikation auf Volltext. "
        "Texte werden automatisch auf 512 Tokens gekürzt (inverted pyramid)."
    ),
)

print(f"\nReport gespeichert: {report_path}")

In [None]:
# Summary
print("=" * 70)
print(f"  Model:           {MODEL_ID}")
print(f"  Type:            {MODEL_TYPE}")
print(f"  Split:           {EVALUATE_ON} ({len(eval_df)} Artikel)")
print(f"  F1 Macro:        {metrics['f1_macro']:.4f}")
print(f"  F1 Weighted:     {metrics['f1_weighted']:.4f}")
print(f"  Accuracy:        {metrics['accuracy']:.4f}")
print(f"  Dauer:           {timer.duration_formatted}")
print(f"  Artikel/Sek:     {timer.articles_per_second(len(eval_df)):.2f}")
print("=" * 70)