In [None]:
import pandas as pd
import numpy as np
from transformers import pipeline
import re
import os
from tqdm import tqdm
import torch

categories = {
   "Order Modifications": [
       "Item Additions", 
       "Item Removals", 
       "Quantity Adjustments", 
       "Late Modification Requests"
   ],
   "Order Cancellations": [
       "Standard Cancellations", 
       "Urgent Cancellations", 
       "Rescheduled Orders"
   ],
   "Delivery Issues": [
       "Late Deliveries", 
       "Missed Deliveries", 
       "Incomplete Deliveries", 
       "Damaged Goods", 
       "Delivery Confirmation Issues"
   ],
   "Pickup Scheduling & Rescheduling": [
       "New Pickup Requests", 
       "Rescheduling Pickup", 
       "Missed Pickups", 
       "Pickup Policy Clarifications"
   ],
   "Product Availability & Substitutions": [
       "Stock Availability Inquiries", 
       "Out-of-Stock Notifications", 
       "Product Substitution Requests", 
       "Special Item Requests"
   ],
   "Grant & Billing Issues": [
       "Grant Fund Usage", 
       "Incorrect Grant Deduction", 
       "Billing Discrepancies", 
       "Payment & Credit Issues"
   ],
   "Training & Account Access": [
       "Training Signups", 
       "Missed Training Sessions", 
       "Login Issues", 
       "New User Account Requests"
   ],
   "Emergency Situations": [
       "Weather-Related Disruptions", 
       "Personal/Organization Emergencies", 
       "Food Safety Concerns", 
       "Unexpected Facility Closures"
   ],
   "Special Requests": [
       "Educational Materials", 
       "Large Event Orders", 
       "Holiday & Seasonal Adjustments"
   ],
   "Technical Support": [
       "Website & Ordering System Errors", 
       "Email & Communication Issues", 
       "Data Entry Mistakes", 
       "General IT Assistance"
   ],
   "Other": ["Miscellaneous"]
}

def read_ticket_data(file_path):
    with open(file_path, 'r', encoding='utf-8', errors='replace') as file:
        content = file.read()
    
    # Split the content by the ticket separator
    tickets = content.split("END OF TICKET")
    
    # Remove empty tickets and strip whitespace
    tickets = [ticket.strip() for ticket in tickets if ticket.strip()]
    
    return tickets

def preprocess_ticket(ticket):
    if ticket.lower() == "nan" or ticket.strip() == "":
        return "Other/Miscellaneous"
    
    # Remove special characters and formatting
    ticket = re.sub(r'\{color:[^}]*\}|\{color\}|\{quote\}|\{adf\}.*?\{adf\}', ' ', ticket)
    ticket = re.sub(r'<[^>]*>', ' ', ticket)
    ticket = re.sub(r'!https?://[^\s!]*!', ' ', ticket)
    ticket = re.sub(r'\[[^\]]*\]', ' ', ticket)
    ticket = re.sub(r'https?://\S+', ' ', ticket)
    ticket = re.sub(r'\s+', ' ', ticket)
    
    return ticket.strip()

def generate_class_labels():
    labels = []
    for category, subcategories in categories.items():
        for subcategory in subcategories:
            labels.append(f"{category}/{subcategory}")
    return labels

def classify_ticket(ticket, classifier, class_labels):
    if ticket.lower() == "empty ticket":
        return "Other/Miscellaneous"
    
    # Use the classifier to predict the category
    result = classifier(ticket, class_labels, multi_label=False)
    return result['labels'][0] 

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
tickets = read_ticket_data("raw_description.txt")
preprocessed_tickets = [preprocess_ticket(ticket) for ticket in tickets]
class_labels = generate_class_labels()
    
print("Loading zero-shot classification model...")
classifier = pipeline("zero-shot-classification", 
                        model="facebook/bart-large-mnli",
                        device="cuda")

Loading zero-shot classification model...


Device set to use cuda


In [13]:
result = classifier(preprocessed_tickets[:100], class_labels, multi_label=False)

In [14]:
output = [o['labels'][0] for o in result]

In [15]:
output

['Other/Miscellaneous',
 'Other/Miscellaneous',
 'Other/Miscellaneous',
 'Order Cancellations/Rescheduled Orders',
 'Order Modifications/Item Additions',
 'Order Modifications/Item Additions',
 'Order Modifications/Item Additions',
 'Delivery Issues/Late Deliveries',
 'Order Cancellations/Rescheduled Orders',
 'Order Cancellations/Rescheduled Orders',
 'Order Modifications/Item Additions',
 'Order Cancellations/Rescheduled Orders',
 'Pickup Scheduling & Rescheduling/Rescheduling Pickup',
 'Order Modifications/Item Additions',
 'Order Cancellations/Rescheduled Orders',
 'Order Cancellations/Rescheduled Orders',
 'Training & Account Access/Training Signups',
 'Pickup Scheduling & Rescheduling/Rescheduling Pickup',
 'Delivery Issues/Missed Deliveries',
 'Order Modifications/Item Additions',
 'Delivery Issues/Damaged Goods',
 'Delivery Issues/Late Deliveries',
 'Order Modifications/Item Removals',
 'Pickup Scheduling & Rescheduling/Pickup Policy Clarifications',
 'Order Modifications/Item 