In [None]:
from datasets import load_dataset

ds = load_dataset("Tobi-Bueck/customer-support-tickets")  # downloads once then caches
print(ds)                          # shows splits/sizes
print(ds["train"].column_names)    # view columns
ds["train"][0]                     # inspect one example

README.md: 0.00B [00:00, ?B/s]

aa_dataset-tickets-multi-lang-5-2-50-ver(…):   0%|          | 0.00/26.0M [00:00<?, ?B/s]

dataset-tickets-multi-lang-4-20k.csv:   0%|          | 0.00/18.8M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/48587 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['subject', 'body', 'answer', 'type', 'queue', 'priority', 'language', 'version', 'tag_1', 'tag_2', 'tag_3', 'tag_4', 'tag_5', 'tag_6', 'tag_7', 'tag_8'],
        num_rows: 48587
    })
})
['subject', 'body', 'answer', 'type', 'queue', 'priority', 'language', 'version', 'tag_1', 'tag_2', 'tag_3', 'tag_4', 'tag_5', 'tag_6', 'tag_7', 'tag_8']


{'subject': 'Wesentlicher Sicherheitsvorfall',
 'body': 'Sehr geehrtes Support-Team,\\n\\nich möchte einen gravierenden Sicherheitsvorfall melden, der gegenwärtig mehrere Komponenten unserer Infrastruktur betrifft. Betroffene Geräte umfassen Projektoren, Bildschirme und Speicherlösungen auf Cloud-Plattformen. Der Grund für die Annahme ist, dass der Vorfall eine potenzielle Datenverletzung im Zusammenhang mit einer Cyberattacke darstellt, was ein erhebliches Risiko für sensible Informationen und den laufenden Geschäftsbetrieb unserer Organisation bedeutet.\\n\\nUnsere initialen Untersuchungen haben ungewöhnliche Aktivitäten und Abweichungen bei den Geräten ergeben. Trotz der Umsetzung unserer standardisierten Behebungs- und Eindämmungsmaßnahmen konnte die Bedrohung bislang nicht vollständig eliminiert.',
 'answer': 'Vielen Dank für die Meldung des kritischen Sicherheitsvorfalls und die Bereitstellung der Übersicht über die betroffenen Geräte sowie der ergriffenen ersten Maßnahmen. Wir e

In [None]:
def preview(idx=0):
    row = ds["train"][idx]
    tags = [row.get(f"tag_{i}") for i in range(1, 9) if row.get(f"tag_{i}") not in (None, "", "null")]
    print("subject:", row["subject"])
    print("body   :", (row["body"] or "")[:400])
    print("language:", row["language"], "| priority:", row["priority"], "| type:", row["type"])
    print("tags   :", tags)

for i in range(3):
    preview(i)


subject: Wesentlicher Sicherheitsvorfall
body   : Sehr geehrtes Support-Team,\n\nich möchte einen gravierenden Sicherheitsvorfall melden, der gegenwärtig mehrere Komponenten unserer Infrastruktur betrifft. Betroffene Geräte umfassen Projektoren, Bildschirme und Speicherlösungen auf Cloud-Plattformen. Der Grund für die Annahme ist, dass der Vorfall eine potenzielle Datenverletzung im Zusammenhang mit einer Cyberattacke darstellt, was ein erheblich
language: de | priority: high | type: Incident
tags   : ['Security', 'Outage', 'Disruption', 'Data Breach']
subject: Account Disruption
body   : Dear Customer Support Team,\n\nI am writing to report a significant problem with the centralized account management portal, which currently appears to be offline. This outage is blocking access to account settings, leading to substantial inconvenience. I have attempted to log in multiple times using different browsers and devices, but the issue persists.\n\nCould you please provide an update on th
lan

In [None]:
import pandas as pd

df = ds["train"].to_pandas()
tag_cols = [c for c in df.columns if c.startswith("tag_")]
# Flatten all tag columns, remove empties/"null"
labels = sorted(set(x for x in df[tag_cols].values.ravel() if x not in (None, "", "null")))
print("Number of unique tags:", len(labels))
labels[:30]  # peek first 30

Number of unique tags: 2150


['2019',
 'AES',
 'AI',
 'API',
 'API Integration',
 'AR',
 'AWS',
 'Abnormal',
 'Abonnement',
 'Abrechnungssystem',
 'Access',
 'Access Control',
 'Access Controls',
 'Access Difficulty',
 'Access Issue',
 'Access Log',
 'Access Management',
 'Access Restriction',
 'Access-Control',
 'AccessControl',
 'AccessControls',
 'AccessManagement',
 'Access_Control',
 'Accessibility',
 'Accessory',
 'Account',
 'Accounting',
 'Accrual',
 'Accrued',
 'Accuracy']

In [None]:
from datasets import DatasetDict

# 1. Keep only English tickets
ds_en = ds["train"].filter(lambda x: x["language"] == "en")


Filter:   0%|          | 0/48587 [00:00<?, ? examples/s]

In [None]:
# 2. Build a text field (subject + body)
def build_text(example):
    subject = example["subject"] or ""
    body = example["body"] or ""
    example["text"] = subject + " " + body
    return example

ds_en = ds_en.map(build_text)


Map:   0%|          | 0/28261 [00:00<?, ? examples/s]

In [None]:
# 3. Collect non-null tags
tag_cols = [c for c in ds_en.column_names if c.startswith("tag_")]

def collect_tags(example):
    tags = [example[c] for c in tag_cols if example[c] not in (None, "", "null")]
    example["tags"] = tags
    return example

ds_en = ds_en.map(collect_tags)


Map:   0%|          | 0/28261 [00:00<?, ? examples/s]

In [None]:
# 4. Keep only text + tags
ds_en = ds_en.remove_columns([c for c in ds_en.column_names if c not in ["text", "tags"]])

# 5. Train/val/test split
ds_split = ds_en.train_test_split(test_size=0.2, seed=42)
test_valid = ds_split["test"].train_test_split(test_size=0.5, seed=42)
ds_clean = DatasetDict({
    "train": ds_split["train"],
    "validation": test_valid["train"],
    "test": test_valid["test"]
})

ds_clean

DatasetDict({
    train: Dataset({
        features: ['text', 'tags'],
        num_rows: 22608
    })
    validation: Dataset({
        features: ['text', 'tags'],
        num_rows: 2826
    })
    test: Dataset({
        features: ['text', 'tags'],
        num_rows: 2827
    })
})

In [None]:
# Collect all unique tags
all_tags = sorted({tag for tags in ds_clean["train"]["tags"] for tag in tags})
len(all_tags), all_tags[:30]  # number of tags + peek first 30

(1433,
 ['2019',
  'AI',
  'API',
  'API Integration',
  'AWS',
  'Access',
  'Access Control',
  'Access Controls',
  'Access Log',
  'Access Management',
  'Access Restriction',
  'Access-Control',
  'AccessControl',
  'AccessControls',
  'AccessManagement',
  'Accessibility',
  'Accessory',
  'Account',
  'Accounting',
  'Accrued',
  'Accuracy',
  'Action',
  'ActionTeam',
  'Activation',
  'ActiveCampaign',
  'Ad',
  'Ad Spend',
  'Ad Spending',
  'AdBlocker',
  'AdBlocking'])

In [None]:
from collections import Counter

tag_counter = Counter(tag for tags in ds_clean["train"]["tags"] for tag in tags)
top_tags = [t for t, _ in tag_counter.most_common(50)]
print(top_tags)


['Tech Support', 'IT', 'Performance', 'Feedback', 'Documentation', 'Bug', 'Security', 'Feature', 'Disruption', 'Outage', 'Technical', 'Network', 'Product', 'Sales', 'Resolution', 'Guidance', 'Recovery', 'Billing', 'Crash', 'Hardware', 'Integration', 'Payment', 'Marketing', 'Maintenance', 'Customer', 'Support', 'Software', 'Incident', 'Account', 'Strategy', 'Virus', 'Breach', 'Alert', 'Compliance', 'Analytics', 'Follow-Up', 'Refund', 'Campaign', 'Investigation', 'Login', 'Issue', 'Lead', 'Training', 'Encryption', 'Notification', 'Troubleshooting', 'Compatibility', 'Server', 'Assistance', 'Healthcare']


In [None]:
from collections import Counter

tag_counter = Counter(tag for tags in ds_clean["train"]["tags"] for tag in tags)
top_tags = [t for t, _ in tag_counter.most_common(50)]
print(top_tags)


['Tech Support', 'IT', 'Performance', 'Feedback', 'Documentation', 'Bug', 'Security', 'Feature', 'Disruption', 'Outage', 'Technical', 'Network', 'Product', 'Sales', 'Resolution', 'Guidance', 'Recovery', 'Billing', 'Crash', 'Hardware', 'Integration', 'Payment', 'Marketing', 'Maintenance', 'Customer', 'Support', 'Software', 'Incident', 'Account', 'Strategy', 'Virus', 'Breach', 'Alert', 'Compliance', 'Analytics', 'Follow-Up', 'Refund', 'Campaign', 'Investigation', 'Login', 'Issue', 'Lead', 'Training', 'Encryption', 'Notification', 'Troubleshooting', 'Compatibility', 'Server', 'Assistance', 'Healthcare']


In [None]:
from transformers import pipeline

# Load a zero-shot classification model
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

# Function to predict top-3 tags
def predict_top3(text, candidate_labels):
    result = classifier(text, candidate_labels, multi_label=True)
    return [result["labels"][i] for i in range(3)]

# Correct way to iterate over first 20 examples
preds, trues = [], []
for i in range(20):
    ex = ds_clean["validation"][i]
    pred = predict_top3(ex["text"], top_tags)
    preds.append(pred)
    trues.append(ex["tags"])

# Show first 5
for i in range(5):
    print("Ticket:", ds_clean["validation"][i]["text"][:120])
    print("True:", trues[i])
    print("Pred:", preds[i])
    print("----")

config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

Device set to use cuda:0
You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


Ticket: Concern Regarding Medical Data Encryption There has been an unexpected failure in the medical data encryption process. I
True: ['Technical', 'Security', 'Bug', 'Crash', 'Documentation', 'Resolution', 'Feedback']
Pred: ['Encryption', 'Assistance', 'Healthcare']
----
Ticket: Reported Crash in Digital Campaign Management System Customer Support reports an unexpected failure of the digital campa
True: ['Crash', 'Performance', 'Outage', 'Disruption', 'Recovery', 'Software']
Pred: ['Crash', 'Customer', 'Assistance']
----
Ticket: Report on Inaccurate Digital Campaign Metrics Dear Customer Support,\n\nI am contacting you to report an issue with my d
True: ['Technical', 'Customer', 'Bug', 'Integration', 'DigitalCampaign', 'Metrics', 'Zapier']
Pred: ['Issue', 'Support', 'Customer']
----
Ticket: Assistance Requested Customer support, could you please offer detailed digital strategy options to effectively boost bra
True: ['Customer Support', 'Feedback', 'Sales', 'Lead', 'IT', 'Tech Support

In [None]:
# Try only 5 examples, shorter text, and 20 labels
small_tags = top_tags[:20]

for i in range(5):
    ex = ds_clean["validation"][i]
    pred = predict_top3(ex["text"][:300], small_tags)
    print("True:", ex["tags"])
    print("Pred:", pred)
    print("----")


True: ['Technical', 'Security', 'Bug', 'Crash', 'Documentation', 'Resolution', 'Feedback']
Pred: ['IT', 'Product', 'Feature']
----
True: ['Crash', 'Performance', 'Outage', 'Disruption', 'Recovery', 'Software']
Pred: ['Crash', 'IT', 'Disruption']
----
True: ['Technical', 'Customer', 'Bug', 'Integration', 'DigitalCampaign', 'Metrics', 'Zapier']
Pred: ['Feature', 'Feedback', 'Product']
----
True: ['Customer Support', 'Feedback', 'Sales', 'Lead', 'IT', 'Tech Support']
Pred: ['Guidance', 'Sales', 'Feedback']
----
True: ['Outage', 'Network', 'Performance', 'IT', 'Tech Support']
Pred: ['Outage', 'Disruption', 'Technical']
----


In [None]:
classifier = pipeline("zero-shot-classification",
                      model="valhalla/distilbart-mnli-12-1")


config.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/890M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/890M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/772 [00:00<?, ?B/s]

Device set to use cuda:0


In [None]:
from collections import Counter

tag_counter = Counter(tag for tags in ds_clean["train"]["tags"] for tag in tags)
top20_tags = [t for t,_ in tag_counter.most_common(20)]
print(top20_tags)


['Tech Support', 'IT', 'Performance', 'Feedback', 'Documentation', 'Bug', 'Security', 'Feature', 'Disruption', 'Outage', 'Technical', 'Network', 'Product', 'Sales', 'Resolution', 'Guidance', 'Recovery', 'Billing', 'Crash', 'Hardware']


In [None]:
import os

# Paste your Hugging Face token here
os.environ["HF_TOKEN"] = "abc"

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

# Load model + tokenizer from Hugging Face
model_name = "mistralai/Mistral-7B-Instruct-v0.2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",   # auto-detect GPU if available
    torch_dtype="auto"   # use float16 if GPU supports it
)

# Create pipeline
generator = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    device_map="auto"
)

# Few-shot examples
examples = [
    {
        "text": "I can’t log into my account, password reset not working",
        "tags": ["Account", "Login", "Technical"]
    },
    {
        "text": "I was charged twice for my subscription and need a refund",
        "tags": ["Billing", "Refund", "Payment"]
    },
    {
        "text": "The app keeps crashing when I upload a file",
        "tags": ["Bug", "Crash", "Technical"]
    }
]

def build_prompt(ticket_text, candidate_tags):
    prompt = "Classify the following support ticket into the most relevant 3 tags.\n"
    prompt += f"Candidate tags: {', '.join(candidate_tags)}\n\n"
    prompt += "Examples:\n"
    for ex in examples:
        prompt += f"Ticket: {ex['text']}\nTags: {', '.join(ex['tags'])}\n\n"
    prompt += f"Now classify this ticket:\nTicket: {ticket_text}\nTags:"
    return prompt

# Test with one sample
sample = ds_clean["test"][0]["text"]
prompt = build_prompt(sample, top20_tags)

response = generator(prompt, max_new_tokens=100, do_sample=False)

print("Ticket:", sample[:200])
print("True tags:", ds_clean["test"][0]["tags"])
print("Predicted tags:", response[0]["generated_text"])


tokenizer_config.json:   0%|          | 0.00/2.10k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/596 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Fetching 3 files:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

Device set to use cuda:0
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Ticket: Concern Regarding Access to Medical Data Hello Customer Support, I am contacting you to address an unexpected problem with access to my medical data. It is possible that this issue is a result of a ha
True tags: ['Technical', 'Customer', 'Security', 'Access', 'Hardware', 'Software', 'Resolution', 'Feedback']
Predicted tags: Classify the following support ticket into the most relevant 3 tags.
Candidate tags: Tech Support, IT, Performance, Feedback, Documentation, Bug, Security, Feature, Disruption, Outage, Technical, Network, Product, Sales, Resolution, Guidance, Recovery, Billing, Crash, Hardware

Examples:
Ticket: I can’t log into my account, password reset not working
Tags: Account, Login, Technical

Ticket: I was charged twice for my subscription and need a refund
Tags: Billing, Refund, Payment

Ticket: The app keeps crashing when I upload a file
Tags: Bug, Crash, Technical

Now classify this ticket:
Ticket: Concern Regarding Access to Medical Data Hello Customer Support, I 

In [None]:
def build_prompt(ticket_text, candidate_tags):
    prompt = "Classify the following support ticket into the most relevant 3 tags.\n"
    prompt += f"Candidate tags: {', '.join(candidate_tags)}\n\n"
    prompt += "Examples:\n"
    for ex in examples:
        prompt += f"Ticket: {ex['text']}\nTags: {', '.join(ex['tags'])}\n\n"
    # stricter instruction here
    prompt += f"Now classify this ticket. Only output the 3 tags separated by commas.\n"
    prompt += f"Ticket: {ticket_text}\nTags:"
    return prompt


In [None]:
import re

def extract_tags(output):
    # look for "Tags: something"
    match = re.search(r"Tags:\s*(.*)", output)
    if match:
        tags = match.group(1)
        # split by comma and clean spaces
        return [t.strip() for t in tags.split(",")][:3]
    return []


In [None]:
import re

def extract_tags(output):
    # look for "Tags: something"
    match = re.search(r"Tags:\s*(.*)", output)
    if match:
        tags = match.group(1)
        # split by comma and clean spaces
        return [t.strip() for t in tags.split(",")][:3]
    return []


In [None]:
prompt = build_prompt(sample, top20_tags)

response = generator(prompt, max_new_tokens=50, do_sample=False)

raw_output = response[0]["generated_text"]

predicted_tags = extract_tags(raw_output)

print("Ticket:", sample[:200])
print("True tags:", ds_clean["test"][0]["tags"])
print("Raw output:", raw_output)
print("Clean predicted tags:", predicted_tags)


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Ticket: Concern Regarding Access to Medical Data Hello Customer Support, I am contacting you to address an unexpected problem with access to my medical data. It is possible that this issue is a result of a ha
True tags: ['Technical', 'Customer', 'Security', 'Access', 'Hardware', 'Software', 'Resolution', 'Feedback']
Raw output: Classify the following support ticket into the most relevant 3 tags.
Candidate tags: Tech Support, IT, Performance, Feedback, Documentation, Bug, Security, Feature, Disruption, Outage, Technical, Network, Product, Sales, Resolution, Guidance, Recovery, Billing, Crash, Hardware

Examples:
Ticket: I can’t log into my account, password reset not working
Tags: Account, Login, Technical

Ticket: I was charged twice for my subscription and need a refund
Tags: Billing, Refund, Payment

Ticket: The app keeps crashing when I upload a file
Tags: Bug, Crash, Technical

Now classify this ticket. Only output the 3 tags separated by commas.
Ticket: Concern Regarding Access to

In [None]:
# -------------------------------
# Fine-tuning for Tag Prediction
# -------------------------------
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
from sklearn.preprocessing import MultiLabelBinarizer
import numpy as np

In [None]:
# Prepare dataset for training
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

In [None]:
# Encode labels (multi-label classification since tickets can have multiple tags)
mlb = MultiLabelBinarizer()
df["tags"] = df[tag_cols].values.tolist()
df["tags"] = df["tags"].apply(lambda x: [t for t in x if t not in (None, "", "null")])
Y = mlb.fit_transform(df["tags"])

In [None]:
def tokenize(batch):
    texts = [str(t) if t is not None else "" for t in batch["body"]]  # force clean strings
    return tokenizer(texts, padding="max_length", truncation=True, max_length=256)

dataset = dataset.map(tokenize, batched=True)


Map:   0%|          | 0/48587 [00:00<?, ? examples/s]

In [None]:
from datasets import Dataset
dataset = Dataset.from_pandas(df[["body", "tags"]])
dataset = dataset.map(lambda x: {"labels": mlb.transform([x["tags"]])[0].astype("float32")}, batched=False)

dataset = dataset.map(tokenize, batched=True)

dataset = dataset.train_test_split(test_size=0.2, seed=42)


Map:   0%|          | 0/48587 [00:00<?, ? examples/s]

Map:   0%|          | 0/48587 [00:00<?, ? examples/s]

In [None]:
# Convert to pytorch format
dataset = dataset.remove_columns(["tags"])
dataset.set_format("torch")


In [None]:
# Model for multi-label classification
model = AutoModelForSequenceClassification.from_pretrained(
    "distilbert-base-uncased",
    num_labels=len(mlb.classes_),
    problem_type="multi_label_classification"
)


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
# Define metrics
from sklearn.metrics import f1_score, accuracy_score

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = (torch.sigmoid(torch.tensor(logits)) > 0.5).int().numpy()
    f1 = f1_score(labels, preds, average="micro")
    acc = accuracy_score(labels, preds)
    return {"accuracy": acc, "f1": f1}


In [None]:
# -----------------------------
# Training setup (old API style)
# -----------------------------
training_args = TrainingArguments(
    output_dir="./results",
    do_eval=True,                     # enable evaluation
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=2,
    weight_decay=0.01,
    logging_dir="./logs",
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

# Train model
trainer.train()

# Evaluate AFTER training
results = trainer.evaluate()
print("Fine-tuned model performance:", results)


  trainer = Trainer(


Step,Training Loss
500,0.1272
1000,0.0084
1500,0.0076
2000,0.0072
2500,0.0067
3000,0.0062
3500,0.0057
4000,0.0056
4500,0.0051
5000,0.0051


Fine-tuned model performance: {'eval_loss': 0.004320092033594847, 'eval_accuracy': 0.11061946902654868, 'eval_f1': 0.6506473150925001, 'eval_runtime': 84.4189, 'eval_samples_per_second': 115.116, 'eval_steps_per_second': 14.393, 'epoch': 2.0}


In [None]:
import torch
import numpy as np

# Function to get top-3 predictions
def predict_top3(texts, model, tokenizer, mlb, top_k=3):
    model.eval()
    device = next(model.parameters()).device  # get model device (cpu or cuda)

    inputs = tokenizer(texts, padding=True, truncation=True, max_length=256, return_tensors="pt").to(device)
    with torch.no_grad():
        logits = model(**inputs).logits
        probs = torch.sigmoid(logits).cpu().numpy()  # move back to cpu for numpy

    results = []
    for prob in probs:
        top_indices = np.argsort(prob)[-top_k:][::-1]  # top-k indices sorted by probability
        tags = [mlb.classes_[i] for i in top_indices]
        scores = [prob[i] for i in top_indices]
        results.append(list(zip(tags, scores)))
    return results

# Example: predict on 5 random tickets
sample_texts = df["body"].sample(5, random_state=42).tolist()
predictions = predict_top3(sample_texts, model, tokenizer, mlb)

for text, preds in zip(sample_texts, predictions):
    print("\nTicket:", text[:120], "...")
    for tag, score in preds:
        print(f"  {tag}: {score:.3f}")



Ticket: I am reaching out for support in understanding how to use data analytics to optimize my investment strategies. Could you ...
  Feedback: 0.885
  Documentation: 0.592
  Feature: 0.494

Ticket: Hello Customer Support, we are encountering inconsistent investment data reporting on the analytics dashboard. The probl ...
  Tech Support: 0.959
  Performance: 0.946
  IT: 0.933

Ticket: I would like to request information on the best security practices for storing medical data on a QNAP TS-453D Network At ...
  Security: 0.980
  Documentation: 0.528
  IT: 0.306

Ticket: The project management SaaS has been crashing frequently during peak usage times, probably due to server overload. Resta ...
  Performance: 0.899
  Outage: 0.870
  Disruption: 0.748

Ticket: Das Tool stürzt während der Datenanalyse ab. Habe bereits versucht, Ressourcen neu zuzuordnen und neu zu starten, jedoch ...
  Bug: 0.929
  Performance: 0.836
  Tech Support: 0.830


In [None]:
from transformers import pipeline
import torch

# ✅ Zero-shot pipeline (already used before)
zero_shot_pipeline = pipeline(
    "zero-shot-classification",
    model="facebook/bart-large-mnli",
    device=0 if torch.cuda.is_available() else -1
)

# ✅ Fine-tuned prediction function
def predict_top3(texts, model, tokenizer, mlb):
    inputs = tokenizer(texts, padding=True, truncation=True, return_tensors="pt").to(model.device)
    with torch.no_grad():
        outputs = model(**inputs).logits
        probs = torch.sigmoid(outputs).cpu().numpy()
    results = []
    for prob in probs:
        top3_idx = prob.argsort()[-3:][::-1]
        results.append(list(zip(mlb.classes_[top3_idx], prob[top3_idx])))
    return results

# ✅ Compare Zero-shot vs Fine-tuned
def compare_predictions(texts, model, tokenizer, mlb, candidate_labels):
    fine_tuned_preds = predict_top3(texts, model, tokenizer, mlb)

    for i, text in enumerate(texts):
        print("="*100)
        print(f"Ticket: {text[:150]}...\n")

        # Fine-tuned
        print("📌 Fine-tuned Predictions:")
        for label, score in fine_tuned_preds[i]:
            print(f"   {label}: {score:.3f}")

        # Zero-shot
        zs = zero_shot_pipeline(text, candidate_labels, multi_label=True)
        zs_pairs = sorted(
            zip(zs['labels'], zs['scores']),
            key=lambda x: x[1], reverse=True
        )[:3]

        print("\n📌 Zero-Shot Predictions:")
        for label, score in zs_pairs:
            print(f"   {label}: {score:.3f}")
        print("\n")

# Candidate labels come from your binarizer
candidate_labels = list(mlb.classes_)

# Example: run on 5 random tickets
sample_texts = df["body"].sample(5, random_state=42).tolist()
compare_predictions(sample_texts, model, tokenizer, mlb, candidate_labels)



Device set to use cuda:0


Ticket: I am reaching out for support in understanding how to use data analytics to optimize my investment strategies. Could you share some tips and resources...

📌 Fine-tuned Predictions:
   Feedback: 0.885
   Documentation: 0.592
   Feature: 0.494

📌 Zero-Shot Predictions:
   Support: 0.970
   Support Inquiry: 0.963
   Support Request: 0.962


Ticket: Hello Customer Support, we are encountering inconsistent investment data reporting on the analytics dashboard. The problem appears to be related to th...

📌 Fine-tuned Predictions:
   Tech Support: 0.959
   Performance: 0.946
   IT: 0.933

📌 Zero-Shot Predictions:
   Investment analysis: 0.995
   Customer Support: 0.995
   Investment Analytics: 0.992


Ticket: I would like to request information on the best security practices for storing medical data on a QNAP TS-453D Network Attached Storage (NAS) device. G...

📌 Fine-tuned Predictions:
   Security: 0.980
   Documentation: 0.528
   IT: 0.306

📌 Zero-Shot Predictions:
   Sensitive Data:

In [None]:
import torch
import numpy as np
import pandas as pd
from sklearn.metrics import classification_report
from transformers import pipeline

# ✅ Assume you already have:
# - df_test (your held-out test DataFrame)
# - model, tokenizer (fine-tuned model loaded)
# - mlb (MultiLabelBinarizer fit on training labels)
# - candidate_labels (list of labels for zero-shot)

# Add this line to define df_test from your dataset object
df_test = dataset["test"].to_pandas()

# -----------------------------
# 1. Fine-tuned Predictions
# -----------------------------
def predict_finetuned(texts, model, tokenizer, mlb, threshold=0.5):
    inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True).to(model.device)
    with torch.no_grad():
        logits = model(**inputs).logits
    probs = torch.sigmoid(logits).cpu().numpy()
    preds = (probs >= threshold).astype(int)
    return preds

y_true = mlb.transform(df_test["labels"])  # ground truth one-hot

y_pred_finetuned = []
for text in df_test["body"].tolist():
    pred = predict_finetuned([text], model, tokenizer, mlb)
    y_pred_finetuned.append(pred[0])
y_pred_finetuned = np.array(y_pred_finetuned)

print("📊 Fine-tuned Model Report")
print(classification_report(y_true, y_pred_finetuned, target_names=mlb.classes_))

# -----------------------------
# 2. Zero-shot Predictions
# -----------------------------
zero_shot = pipeline("zero-shot-classification", model="facebook/bart-large-mnli", device=0)

def predict_zeroshot(texts, candidate_labels, threshold=0.5):
    preds = []
    for text in texts:
        result = zero_shot(text, candidate_labels, multi_label=True)
        labels = result["labels"]
        scores = result["scores"]
        pred = [1 if s >= threshold else 0 for s in scores]
        preds.append(pred)
    return np.array(preds)

y_pred_zeroshot = predict_zeroshot(df_test["body"].tolist(), candidate_labels, threshold=0.5)

print("\n📊 Zero-shot Model Report")
print(classification_report(y_true, y_pred_zeroshot, target_names=mlb.classes_))

📊 Fine-tuned Model Report


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


                                                                                                            precision    recall  f1-score   support

                                                                                                      2019       0.00      0.00      0.00         0
                                                                                                       AES       0.00      0.00      0.00         0
                                                                                                        AI       0.00      0.00      0.00         0
                                                                                                       API       0.00      0.00      0.00         0
                                                                                           API Integration       0.00      0.00      0.00         0
                                                                                                        AR     

Device set to use cuda:0
