#### Aim : Ticket Resoltuion System

- provide a quick demo of underlying functionality

In [7]:
import re
import string
import os
import json
from typing import List, Dict, Any, Optional

import pandas as pd
import numpy as np

from sentence_transformers import SentenceTransformer
import hnswlib

from huggingface_hub import InferenceClient

In [3]:
# read existing resolved tickets data
resolved_tickets_df = pd.read_csv('../data/combined_data.csv')
print (resolved_tickets_df.shape)

# read new tickets data
new_tickets_df = pd.read_csv('../data/new_tickets.csv')
print (new_tickets_df.shape)

(30, 8)
(10, 5)


In [4]:
# updated version with threshold and index initialisation in place
class TicketMatchingSystem:
    def __init__(self, resolved_tickets_data_path='../data/combined_data.csv', model_name='sentence-transformers/all-MiniLM-L6-v2', index_path=None):
        """
        Initialize the TicketMatchingSystem.
        
        Args:
            resolved_tickets_data_path (str): Path to CSV file containing resolved ticket data. Required parameter.
            model_name (str): Name of the sentence transformer model.
            index_path (str, optional): Path to a pre-built index. If provided, the index will be loaded from disk.
        """
        if not resolved_tickets_data_path:
            print("Need a data file to initialize the system")
            return None

        self.model = SentenceTransformer(model_name)
        self.index = None
        self.ticket_ids = []
        self.resolved_tickets_data = None
        self.dim = 384  # default dimension for all-MiniLM-L6-v2
        self.index_path = index_path
        self.resolved_tickets_data_path = resolved_tickets_data_path

        # Check if data file exists
        if not os.path.exists(resolved_tickets_data_path):
            raise FileNotFoundError(f"Resolved tickets data file not found at {resolved_tickets_data_path}")

        # Load index if path provided, otherwise build new index from CSV
        if index_path:
            if not os.path.exists(index_path):
                raise FileNotFoundError(f"Index file not found at {index_path}")
            self.load_index(index_path)
            self.load_resolved_tickets_data(resolved_tickets_data_path)
        else:
            # Build index from CSV
            self.build_index_from_csv(resolved_tickets_data_path)
    
    def create_ticket_string(self, issue, category, description):
        """Combine ticket fields into a single string representation"""
        if pd.isna(issue):
            issue = ""
        if pd.isna(category):
            category = ""
        if pd.isna(description):
            description = ""
        
        return f"{issue} {category} {description}".strip()
    
    def generate_embeddings(self, texts):
        """Generate embeddings for text(s)"""
        # Handle both single text and list of texts
        if isinstance(texts, str):
            return self.model.encode([texts])[0]
        else:
            return self.model.encode(texts)
    
    def build_index_from_csv(self, csv_path, save_path="ticket_index.bin"):
        """
        Build search index from CSV file of tickets and optionally save it to disk.
        
        Args:
            csv_path (str): Path to CSV file containing ticket data.
            save_path (str, optional): Path to save the index.
        """
        # Load CSV into DataFrame
        df = pd.read_csv(csv_path)
        self.resolved_tickets_data = df.copy()
        
        # Create ticket strings
        ticket_strings = []
        for _, row in df.iterrows():
            ticket_string = self.create_ticket_string(
                row.get('Issue', ''), 
                row.get('Category', ''), 
                row.get('Description', '')
            )
            ticket_strings.append(ticket_string)
        
        # Store ticket IDs
        self.ticket_ids = df['Ticket ID'].tolist()
        
        # Generate embeddings
        embeddings = self.generate_embeddings(ticket_strings)
        
        # Build index
        n_elements = len(embeddings)
        self.index = hnswlib.Index(space='cosine', dim=self.dim)
        self.index.init_index(max_elements=n_elements, ef_construction=200, M=16)
        self.index.add_items(embeddings, np.arange(n_elements))
        self.index.set_ef(50)  # ef influences search accuracy

        # Save index for re-use
        self.save_index(save_path)
    
    def save_index(self, save_path):
        """Save the index to disk"""
        if self.index is None:
            raise ValueError("Index has not been built yet")
        self.index.save_index(save_path)
        print(f"Index saved to {save_path}")
    
    def load_index(self, load_path):
        """Load the index from disk"""
        self.index = hnswlib.Index(space='cosine', dim=self.dim)
        self.index.load_index(load_path)
        self.index.set_ef(50)  # Set ef for search
        print(f"Index loaded from {load_path}")
    
    def load_resolved_tickets_data(self, resolved_tickets_data_path):
        """Load the resolved tickets DataFrame from disk"""
        self.resolved_tickets_data = pd.read_csv(resolved_tickets_data_path)
        self.ticket_ids = self.resolved_tickets_data['Ticket ID'].tolist()
        print(f"Resolved tickets data loaded from {resolved_tickets_data_path}")
    
    def find_similar_tickets(self, issue, category, description, k=3, similarity_threshold=0.5):
        """
        Find similar tickets to a query ticket
        
        Args:
            issue (str): Issue title
            category (str): Ticket category
            description (str): Ticket description
            k (int): Number of nearest neighbors to retrieve
            similarity_threshold (float): Minimum similarity score threshold (default: 0.5)
        """
        if self.index is None:
            raise ValueError("Index has not been built yet")
        if self.resolved_tickets_data is None:
            raise ValueError("Resolved tickets data has not been loaded yet")
        
        # Create ticket string and generate embedding
        query_string = self.create_ticket_string(issue, category, description)
        query_embedding = self.generate_embeddings(query_string)
        
        # Search for similar tickets
        labels, distances = self.index.knn_query(query_embedding.reshape(1, -1), k=k)
        
        # Prepare results
        results = []
        for idx, dist in zip(labels[0], distances[0]):
            similarity_score = 1 - dist
            
            # Only include results above similarity threshold
            if similarity_score >= similarity_threshold:
                ticket_id = self.ticket_ids[idx]
                
                # Get relevant info from original dataframe
                ticket_row = self.resolved_tickets_data[self.resolved_tickets_data['Ticket ID'] == ticket_id].iloc[0]
                
                results.append({
                    'ticket_id': ticket_id,
                    'similarity_score': similarity_score,
                    'issue': ticket_row.get('Issue', ''),
                    'category': ticket_row.get('Category', ''),
                    'description': ticket_row.get('Description', ''),
                    'resolved': ticket_row.get('Resolved', False),
                    'resolution': ticket_row.get('Resolution', ''),
                })
        
        # Sort by 'resolved' status (True first) and then by similarity score
        results.sort(key=lambda x: (-int(x['resolved']), -x['similarity_score']))
        
        return results

In [5]:
def test_system_with_existing_index():
    # Initialize system with an existing index and base DataFrame
    system = TicketMatchingSystem(
        index_path="ticket_index.bin",
        resolved_tickets_data_path="../data/combined_data.csv"
    )
    
    # Test query
    # new_ticket = {
    #     'Issue': 'Cannot open files in shared drive',
    #     'Category': 'Network',
    #     'Description': 'User reports an error when trying to open files in the shared drive.'
    # }
    
    # new_ticket = {
    #     'Issue': 'VPN connection timeout',
    #     'Category': 'Network',
    #     'Description': 'VPN connection times out frequently during use.'
    # }

    # new_ticket = {
    #     'Issue': 'New software installation request',
    #     'Category': 'Software',
    #     'Description': 'A request to install new project management software.'
    # }
    new_ticket = {
        'Issue': 'Printer not connecting to WiFi',
        'Category': 'Hardware',
        'Description': 'WiFi printer is not connecting to any devices in the office.'
    }

    # new_ticket = {
    #     'Issue': 'Virus discovered',
    #     'Category': 'Malware',
    #     'Description': 'There is a malware discovered circulating in office machines. We need immediate cleanup!'
    # }
    
    results = system.find_similar_tickets(
        new_ticket['Issue'],
        new_ticket['Category'],
        new_ticket['Description'],
        k=4
    )
    
    print("Query: ", new_ticket)
    print("\nMatching tickets:")
    for i, result in enumerate(results, 1):
        print(f"\n{i}. Ticket ID: {result['ticket_id']}")
        print(f"   Issue: {result['issue']}")
        print(f"   Category: {result['category']}")
        print(f"   Similarity Score: {result['similarity_score']:.4f}")
        print(f"   Resolved: {result['resolved']}")
        if result['resolved']:
            print(f"   Resolution: {result['resolution']}")
        print(f"   Description: {result['description']}")

if __name__ == "__main__":
    # Test building and saving the index
    # test_system()
    
    # Test loading and using an existing index
    test_system_with_existing_index()

Index loaded from ticket_index.bin
Resolved tickets data loaded from ../data/combined_data.csv
Query:  {'Issue': 'Printer not connecting to WiFi', 'Category': 'Hardware', 'Description': 'WiFi printer is not connecting to any devices in the office.'}

Matching tickets:

1. Ticket ID: TCKT-1044
   Issue: Printer connectivity problem
   Category: Hardware
   Similarity Score: 0.8326
   Resolved: True
   Resolution: Printer driver reinstalled
   Description: Printer not connecting to any devices in the office. This is a recurring issue.

2. Ticket ID: TKT1001
   Issue: Printer not connecting
   Category: Hardware
   Similarity Score: 0.8015
   Resolved: True
   Resolution: Update software
   Description: Network printer is not connecting to the office network, making it impossible for employees to print documents.

3. Ticket ID: TCKT-1054
   Issue: Printer printing blank pages
   Category: Hardware
   Similarity Score: 0.5386
   Resolved: True
   Resolution: Printer maintenance performed
 

In [18]:
# integrated solution with case where no similar tickets found handeled
from ticket_matching_system import TicketMatchingSystem

HF_API_KEY = os.getenv("HUGGINGFACE_API_KEY")

class TicketResolutionSystem:
    def __init__(self, model_id: str = "mistralai/Mixtral-8x7B-Instruct-v0.1"):
        """
        Initialize the Ticket Resolution System.
        
        Args:
            model_id: The HuggingFace model ID to use for inference
        """
        self.client = InferenceClient(model=model_id, token = HF_API_KEY)
        
    def generate_response(self, new_ticket: Dict[str, Any], similar_tickets: List[Dict[str, Any]]) -> str:
        """
        Generate a coherent response for a new ticket based on similar past tickets.
        
        Args:
            new_ticket: Dictionary containing the new ticket information
            similar_tickets: List of dictionaries containing similar tickets from vector search
        
        Returns:
            A coherent response that can be used by a human agent
        """
        # Check if any tickets were found
        if not similar_tickets:
            # Generate a simple suggestion using the model with high confidence settings
            response = self.client.chat_completion(
                messages=[
                    {"role": "system", "content": "You are a technical support assistant. Provide a very brief solution suggestion (max 15 words) for the following issue ONLY if you are highly confident. If not confident, respond with 'No immediate solution available.'"},
                    {"role": "user", "content": f"Issue: {json.dumps(new_ticket, indent=2)}"}
                ],
                max_tokens=50,
                temperature=0.1,  # Very low temperature for high confidence
                top_p=0.1
            )
            suggestion = response['choices'][0]['message']['content']
            return f"No matching tickets found in the database.\n\nSuggested direction: {suggestion}\n\nBest, your Smart assistant"

        # Convert similar_tickets to a JSON-serializable format
        serializable_tickets = []
        for ticket in similar_tickets:
            serializable_ticket = {
                "ticket_id": str(ticket["ticket_id"]),
                "similarity_score": float(ticket["similarity_score"]),
                "issue": str(ticket["issue"]),
                "category": str(ticket["category"]),
                "description": str(ticket["description"]),
                "resolved": bool(ticket["resolved"]),
                "resolution": str(ticket["resolution"]) if ticket["resolution"] else ""
            }
            serializable_tickets.append(serializable_ticket)
        
        # Check if any of the similar tickets are resolved
        resolved_tickets = [ticket for ticket in serializable_tickets if ticket.get("resolved")]
        
        # Craft the prompt based on the situation
        if resolved_tickets:
            prompt = self._craft_prompt_for_resolved_tickets(new_ticket, resolved_tickets)
        else:
            prompt = self._craft_prompt_for_unresolved_tickets(new_ticket, serializable_tickets)
        
        # Generate response using the HuggingFace model
        response = self.client.chat_completion(
            messages=[
                {"role": "system", "content": prompt},
                {"role": "user", "content": f"New Ticket: {json.dumps(new_ticket, indent=2)}"}
            ],
            max_tokens=1024,
            temperature=0.3,  # Lower temperature for more focused responses
            top_p=0.95
        )
        
        # Extract the generated response
        generated_response = response['choices'][0]['message']['content']
        return generated_response
    
    def _craft_prompt_for_resolved_tickets(self, new_ticket: Dict[str, Any], resolved_tickets: List[Dict[str, Any]]) -> str:
        """
        Craft a prompt for when there are resolved similar tickets.
        """
        prompt = f"""
        You are an AI assistant that helps Human Agents respond to support tickets. 
        
        I will provide you with a new support ticket and details from {len(resolved_tickets)} similar resolved tickets from our database.
        
        Your task is to:
        1. Analyze the new ticket and the resolved similar tickets
        2. Create a coherent response that addresses the new ticket's issue
        3. Include the most relevant solution from the resolved tickets
        4. End the message by saying: Best, your Smart assistant 
        Here are the similar resolved tickets:
        {json.dumps(resolved_tickets, indent=2)}
        
        Please create a response that the agent can use to address the new ticket. Be concise but comprehensive.
        """
        return prompt
    
    def _craft_prompt_for_unresolved_tickets(self, new_ticket: Dict[str, Any], similar_tickets: List[Dict[str, Any]]) -> str:
        """
        Craft a prompt for when there are no resolved similar tickets.
        """
        prompt = f"""
        You are an AI assistant that helps Human Agents respond to support tickets.
        
        I will provide you with a new support ticket and details from {len(similar_tickets)} similar tickets from our database, but none of these similar tickets have been resolved.
        
        Your task is to:
        1. Analyze the new ticket and the similar unresolved tickets
        2. Create a coherent response that acknowledges the ongoing nature of this issue
        3. Share details about the similar tickets and what approaches did not work
        4. Suggest potential next steps based on the history of attempts
        5. Format your response to be ready for a human agent to review and send
        6. End the message by saying: Best, your Smart assistant
        
        Here are the similar unresolved tickets:
        {json.dumps(similar_tickets, indent=2)}
        
        Please create a response that the agent can use to address the new ticket, acknowledging that we don't have a proven solution yet.
        """
        return prompt

In [19]:

# Example usage
if __name__ == "__main__":
    # Initialize the TicketMatchingSystem
    system = TicketMatchingSystem(index_path="ticket_index.bin", resolved_tickets_data_path="../data/combined_data.csv")
    
    # Initialize the TicketResolutionSystem
    resolution_system = TicketResolutionSystem()
    
    # Sample new ticket
    # new_ticket = {
    #     'Issue': 'Virus discovered',
    #     'Category': 'Malware',
    #     'Description': 'There is a malware discovered circulating in office machines. We need immediate cleanup!'
    # }


    new_ticket = {
        'Issue': 'Printer not connecting to WiFi',
        'Category': 'Hardware',
        'Description': 'WiFi printer is not connecting to any devices in the office.'
    }

    # new_ticket = {
    #     'Issue': 'New software installation request',
    #     'Category': 'Software',
    #     'Description': 'A request to install new project management software.'
    # }
    
    # Find similar tickets using the TicketMatchingSystem
    similar_tickets = system.find_similar_tickets(
        new_ticket["Issue"],
        new_ticket["Category"],
        new_ticket["Description"],
        k=3
    )
    
    print("User Query: ", new_ticket)
    print("\nMatching tickets:")
    for i, result in enumerate(similar_tickets, 1):
        print(f"\n{i}. Ticket ID: {result['ticket_id']}")
        print(f"   Issue: {result['issue']}")
        print(f"   Category: {result['category']}")
        print(f"   Similarity Score: {result['similarity_score']:.4f}")
        print(f"   Resolved: {result['resolved']}")
        if result['resolved']:
            print(f"   Resolution: {result['resolution']}")
        print(f"   Description: {result['description']}")
        
    # Generate the response using the TicketResolutionSystem
    response = resolution_system.generate_response(new_ticket, similar_tickets)
    print(response)

Index loaded from ticket_index.bin
Resolved tickets data loaded from ../data/combined_data.csv
User Query:  {'Issue': 'Printer not connecting to WiFi', 'Category': 'Hardware', 'Description': 'WiFi printer is not connecting to any devices in the office.'}

Matching tickets:

1. Ticket ID: TCKT-1044
   Issue: Printer connectivity problem
   Category: Hardware
   Similarity Score: 0.8326
   Resolved: True
   Resolution: Printer driver reinstalled
   Description: Printer not connecting to any devices in the office. This is a recurring issue.

2. Ticket ID: TKT1001
   Issue: Printer not connecting
   Category: Hardware
   Similarity Score: 0.8015
   Resolved: True
   Resolution: Update software
   Description: Network printer is not connecting to the office network, making it impossible for employees to print documents.

3. Ticket ID: TCKT-1054
   Issue: Printer printing blank pages
   Category: Hardware
   Similarity Score: 0.5386
   Resolved: True
   Resolution: Printer maintenance perfor

-----
EOF