In [1]:
import numpy as np
import pandas as pd
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

from transformers import BertTokenizer, BertModel


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
df = pd.read_excel("ticket_dataset_2000.xlsx")  # same name as downloaded file
df.head()


Unnamed: 0,title,description,priority,category
0,Request new monitor,Helpdesk ticket created for request new monito...,Medium,Device Request
1,IP conflict detected,The system shows symptoms of ip conflict detec...,Low,Network Issue
2,Fan making noise,Employee mentions repeated problems related to...,High,Hardware Issue
3,Software installation failed,User is unable to proceed due to software inst...,Medium,Software Issue
4,VPN certificate error,A new issue was reported: vpn certificate erro...,Medium,VPN Issue


In [None]:
df["text"] = df["title"].astype(str) + " - " + df["description"].astype(str)
df[["text", "priority", "category"]].head()


Unnamed: 0,text,priority,category
0,Request new monitor - Helpdesk ticket created ...,Medium,Device Request
1,IP conflict detected - The system shows sympto...,Low,Network Issue
2,Fan making noise - Employee mentions repeated ...,High,Hardware Issue
3,Software installation failed - User is unable ...,Medium,Software Issue
4,VPN certificate error - A new issue was report...,Medium,VPN Issue


In [4]:
cat_encoder = LabelEncoder()
prio_encoder = LabelEncoder()

df["category_encoded"] = cat_encoder.fit_transform(df["category"])
df["priority_encoded"] = prio_encoder.fit_transform(df["priority"])

print("Category classes:", cat_encoder.classes_)
print("Priority classes:", prio_encoder.classes_)


Category classes: ['Access Request' 'Device Request' 'Email Issue' 'Hardware Issue'
 'Login Issue' 'Network Issue' 'Power Issue' 'Printer Issue' 'SAP Issue'
 'Software Issue' 'System Performance Issue' 'VPN Issue']
Priority classes: ['Critical' 'High' 'Low' 'Medium']


In [5]:
train_df, val_df = train_test_split(
    df,
    test_size=0.2,
    random_state=42,
    stratify=df["category_encoded"]   # keeps category distribution balanced
)

train_df.shape, val_df.shape


((1600, 7), (400, 7))

In [6]:
class TicketDataset(Dataset):
    def __init__(self, dataframe, tokenizer, max_len=128):
        self.df = dataframe.reset_index(drop=True)
        self.tokenizer = tokenizer
        self.max_len = max_len
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        text = str(row["text"])

        encoding = self.tokenizer(
            text,
            truncation=True,
            padding="max_length",
            max_length=self.max_len,
            return_tensors="pt"
        )

        return {
            "input_ids": encoding["input_ids"].squeeze(0),
            "attention_mask": encoding["attention_mask"].squeeze(0),
            "category": torch.tensor(row["category_encoded"], dtype=torch.long),
            "priority": torch.tensor(row["priority_encoded"], dtype=torch.long),
        }


In [7]:
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

train_dataset = TicketDataset(train_df, tokenizer, max_len=64)
val_dataset   = TicketDataset(val_df, tokenizer, max_len=64)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader   = DataLoader(val_dataset, batch_size=16, shuffle=False)


In [8]:
class MultiTaskBERT(nn.Module):
    def __init__(self, n_cat, n_prio):
        super().__init__()
        self.bert = BertModel.from_pretrained("bert-base-uncased")
        self.dropout = nn.Dropout(0.2)
        
        hidden_size = self.bert.config.hidden_size  # 768 for bert-base
        
        self.category_head = nn.Linear(hidden_size, n_cat)
        self.priority_head = nn.Linear(hidden_size, n_prio)

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(
            input_ids=input_ids,
            attention_mask=attention_mask
        )
        # CLS token representation
        cls_output = outputs.last_hidden_state[:, 0, :]
        cls_output = self.dropout(cls_output)
        
        cat_logits  = self.category_head(cls_output)
        prio_logits = self.priority_head(cls_output)
        
        return cat_logits, prio_logits


In [10]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device


device(type='cpu')

In [11]:
model = MultiTaskBERT(
    n_cat=len(cat_encoder.classes_),
    n_prio=len(prio_encoder.classes_)
)

model.to(device)

optimizer = torch.optim.AdamW(model.parameters(), lr=2e-5)
loss_fn = nn.CrossEntropyLoss()


In [12]:
EPOCHS = 5  # start with 5; you can increase later

for epoch in range(EPOCHS):
    model.train()
    train_loss = 0.0

    for batch in train_loader:
        input_ids      = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        cat_labels     = batch["category"].to(device)
        prio_labels    = batch["priority"].to(device)

        optimizer.zero_grad()

        cat_logits, prio_logits = model(input_ids, attention_mask)

        loss_cat  = loss_fn(cat_logits, cat_labels)
        loss_prio = loss_fn(prio_logits, prio_labels)

        loss = loss_cat + loss_prio
        loss.backward()
        optimizer.step()

        train_loss += loss.item()

    avg_train_loss = train_loss / len(train_loader)
    print(f"Epoch [{epoch+1}/{EPOCHS}] - Training Loss: {avg_train_loss:.4f}")


Epoch [1/5] - Training Loss: 2.9247
Epoch [2/5] - Training Loss: 1.4807
Epoch [3/5] - Training Loss: 1.3485
Epoch [4/5] - Training Loss: 1.3259
Epoch [5/5] - Training Loss: 1.2955


In [13]:
model.eval()
all_cat_true, all_cat_pred = [], []
all_prio_true, all_prio_pred = [], []

with torch.no_grad():
    for batch in val_loader:
        input_ids      = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        
        cat_labels  = batch["category"].cpu().numpy()
        prio_labels = batch["priority"].cpu().numpy()

        cat_logits, prio_logits = model(input_ids, attention_mask)

        cat_pred  = cat_logits.argmax(dim=1).cpu().numpy()
        prio_pred = prio_logits.argmax(dim=1).cpu().numpy()

        all_cat_true.extend(cat_labels)
        all_cat_pred.extend(cat_pred)

        all_prio_true.extend(prio_labels)
        all_prio_pred.extend(prio_pred)

cat_acc  = accuracy_score(all_cat_true, all_cat_pred)
prio_acc = accuracy_score(all_prio_true, all_prio_pred)

print(f"Category accuracy: {cat_acc:.3f}")
print(f"Priority accuracy: {prio_acc:.3f}")


Category accuracy: 1.000
Priority accuracy: 0.305


In [15]:
def predict_ticket(title: str, description: str):
    model.eval()
    
    text = title + " - " + description
    
    encoding = tokenizer(
        text,
        truncation=True,
        padding="max_length",
        max_length=64,
        return_tensors="pt"
    )

    input_ids      = encoding["input_ids"].to(device)
    attention_mask = encoding["attention_mask"].to(device)

    with torch.no_grad():
        cat_logits, prio_logits = model(input_ids, attention_mask)

    cat_id  = cat_logits.argmax(dim=1).item()
    prio_id = prio_logits.argmax(dim=1).item()

    return {
        "category": cat_encoder.inverse_transform([cat_id])[0],
        "priority": prio_encoder.inverse_transform([prio_id])[0],
    }


In [16]:
examples = [
    {
        "title": "Laptop overheating",
        "description": "User reports system gets very hot and shuts down after 10 minutes of use."
    },
    {
        "title": "Cannot send email",
        "description": "Outlook shows error whenever trying to send emails to external domain."
    },
    {
        "title": "VPN keeps disconnecting",
        "description": "During remote work session, VPN drops every 5 minutes and reconnects."
    },
    {
        "title": "Request shared drive access",
        "description": "User needs access to finance shared drive to upload reports."
    }
]

for ex in examples:
    print("Title:", ex["title"])
    print("Description:", ex["description"])
    print("Prediction:", predict_ticket(ex["title"], ex["description"]))
    print("-" * 70)


Title: Laptop overheating
Description: User reports system gets very hot and shuts down after 10 minutes of use.
Prediction: {'category': 'Hardware Issue', 'priority': 'Critical'}
----------------------------------------------------------------------
Title: Cannot send email
Description: Outlook shows error whenever trying to send emails to external domain.
Prediction: {'category': 'Email Issue', 'priority': 'High'}
----------------------------------------------------------------------
Title: VPN keeps disconnecting
Description: During remote work session, VPN drops every 5 minutes and reconnects.
Prediction: {'category': 'VPN Issue', 'priority': 'Critical'}
----------------------------------------------------------------------
Title: Request shared drive access
Description: User needs access to finance shared drive to upload reports.
Prediction: {'category': 'Access Request', 'priority': 'High'}
----------------------------------------------------------------------


In [17]:
torch.save(model.state_dict(), "multitask_bert_ticket.pt")

import pickle
with open("category_encoder.pkl", "wb") as f:
    pickle.dump(cat_encoder, f)
with open("priority_encoder.pkl", "wb") as f:
    pickle.dump(prio_encoder, f)


In [18]:
technician_map = {
    "Hardware Issue": {
        "team": "Hardware Support Team",
        "technicians": [
            {"name": "Raj", "id": 101, "current_load": 3},
            {"name": "Nisha", "id": 102, "current_load": 1},
            {"name": "Amit", "id": 103, "current_load": 5}
        ]
    },
    "Software Issue": {
        "team": "Software Support Team",
        "technicians": [
            {"name": "Priya", "id": 201, "current_load": 0},
            {"name": "Karan", "id": 202, "current_load": 4}
        ]
    },
    "Network Issue": {
        "team": "Network Team",
        "technicians": [
            {"name": "Neeraj", "id": 301, "current_load": 2},
            {"name": "Shreya", "id": 302, "current_load": 1}
        ]
    },
    "Login Issue": {
        "team": "Access Management Team",
        "technicians": [
            {"name": "Vikram", "id": 401, "current_load": 2}
        ]
    },
    "Access Request": {
        "team": "IAM Team",
        "technicians": [
            {"name": "Mahesh", "id": 501, "current_load": 0},
            {"name": "Aarti", "id": 502, "current_load": 3}
        ]
    },
    "SAP Issue": {
        "team": "SAP Support Team",
        "technicians": [
            {"name": "Sunil", "id": 601, "current_load": 4},
            {"name": "Megha", "id": 602, "current_load": 1}
        ]
    },
    "Email Issue": {
        "team": "Messaging Team",
        "technicians": [
            {"name": "Jaspreet", "id": 701, "current_load": 1}
        ]
    },
    "Printer Issue": {
        "team": "Peripheral Support",
        "technicians": [
            {"name": "Deepak", "id": 801, "current_load": 2}
        ]
    },
    "VPN Issue": {
        "team": "Remote Access Team",
        "technicians": [
            {"name": "Monika", "id": 901, "current_load": 3}
        ]
    },
    "System Performance Issue": {
        "team": "System Health Team",
        "technicians": [
            {"name": "Harsh", "id": 1001, "current_load": 4},
            {"name": "Rohit", "id": 1002, "current_load": 1}
        ]
    },
    "Power Issue": {
        "team": "Infra Support",
        "technicians": [
            {"name": "Arun", "id": 1101, "current_load": 0}
        ]
    },
    "Device Request": {
        "team": "Inventory & Procurement Team",
        "technicians": [
            {"name": "Lakshmi", "id": 1201, "current_load": 2}
        ]
    }
}


In [19]:
def assign_technician(category, priority):
    # If model predicts an unknown category
    if category not in technician_map:
        return {
            "team": "General Support",
            "technician": {"name": "Fallback Technician", "id": 999, "current_load": 0}
        }
    
    team_info = technician_map[category]
    technicians = team_info["technicians"]

    # Sort technicians by current_load â†’ assign least busy
    sorted_techs = sorted(technicians, key=lambda x: x["current_load"])

    # Priority-based load balancing
    if priority == "Critical":
        # Always assign the least busy technician
        selected = sorted_techs[0]
    elif priority == "High":
        selected = sorted_techs[0]
    elif priority == "Medium":
        selected = sorted_techs[min(1, len(sorted_techs)-1)]  # mid-load
    else:  # Low priority
        selected = sorted_techs[-1]  # most free or least important job
    
    return {
        "team": team_info["team"],
        "technician": selected
    }


In [20]:
def classify_and_assign(title, description):
    prediction = predict_ticket(title, description)

    category = prediction["category"]
    priority = prediction["priority"]

    assignment = assign_technician(category, priority)

    return {
        "category": category,
        "priority": priority,
        "assign_to_team": assignment["team"],
        "assigned_technician": assignment["technician"]
    }


In [21]:
test_title = "Laptop overheating"
test_description = "User reports the laptop becomes extremely hot and shuts down during usage."

result = classify_and_assign(test_title, test_description)
result


{'category': 'Hardware Issue',
 'priority': 'Critical',
 'assign_to_team': 'Hardware Support Team',
 'assigned_technician': {'name': 'Nisha', 'id': 102, 'current_load': 1}}

In [22]:
test_title = "Outlook cannot send emails"
test_description = "User reports error when sending messages."
classify_and_assign(test_title, test_description)


{'category': 'Email Issue',
 'priority': 'High',
 'assign_to_team': 'Messaging Team',
 'assigned_technician': {'name': 'Jaspreet', 'id': 701, 'current_load': 1}}