<a href="https://colab.research.google.com/github/ananyaarya02/TicketMind/blob/main/main_pipeline.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install sentence-transformers faiss-cpu
!pip install google-generativeai



In [None]:
import pickle
import faiss
import numpy as np
import pandas as pd
import google.generativeai as genai
from sentence_transformers import SentenceTransformer
import google.generativeai as genai
from bs4 import BeautifulSoup
import string
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('wordnet')






In [None]:
import joblib
tfidf_subject = joblib.load("/content/tfidf_subject.pkl")
tfidf_body = joblib.load("/content/tfidf_body.pkl")
clf = joblib.load("/content/multiclassifier_model.pkl")
index = faiss.read_index("faiss1.index")
metadata = pickle.load(open("metadata2.pkl", "rb"))
embedder = SentenceTransformer("all-MiniLM-L6-v2")
print(" All ML models loaded")


 All ML models loaded


In [None]:
genai.configure(api_key="Your API key")

In [None]:
model = genai.GenerativeModel("models/gemini-2.5-flash")


In [None]:
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

def get_wordnet_pos(tag):
    if tag.startswith('J'):
        return 'a'
    elif tag.startswith('V'):
        return 'v'
    elif tag.startswith('N'):
        return 'n'
    elif tag.startswith('R'):
        return 'r'
    else:
        return 'n'

def preprocess_text(text):
    if not isinstance(text, str):
        return ""

    # 1. remove HTML
    text = BeautifulSoup(text, "html.parser").get_text()

    # 2. lowercase
    text = text.lower()

    # 3. remove punctuation
    text = "".join(ch for ch in text if ch not in string.punctuation)

    # 4. tokenize
    tokens = re.split('\W+', text)

    # 5. remove stopwords & empty tokens
    tokens = [t for t in tokens if t and t not in stop_words]

    # 6. POS tagging + lemmatization
    tagged = nltk.pos_tag(tokens)
    lemmatized = [
        lemmatizer.lemmatize(word, get_wordnet_pos(tag))
        for word, tag in tagged
    ]
    return " ".join(lemmatized)


In [None]:
def predict_labels(subject, body):
    subject_clean = preprocess_text(subject)
    body_clean = preprocess_text(body)
    X_sub = tfidf_subject.transform([subject])
    X_body = tfidf_body.transform([body])

    X = np.hstack([X_sub.toarray(), X_body.toarray()])
    preds = clf.predict(X)[0]
    probs = clf.predict_proba(X)


    labels = {
        "type": preds[0],
        "queue": preds[1],
        "priority": preds[2],
    }

    confidence = {
        "type": float(np.max(probs[0])),
        "queue": float(np.max(probs[1])),
        "priority": float(np.max(probs[2]))
    }

    return labels, confidence



In [None]:
def retrieve_similar(subject, body, k=3):
    text = subject + " " + body
    emb = embedder.encode([text]).astype("float32")

    distances, indices = index.search(emb, k)

    rows = []
    for i in indices[0]:
        rows.append(metadata[i])

    return pd.DataFrame(rows)


In [None]:
def validate_with_rag(predicted_labels, retrieved_df, threshold=0.6):
    final_labels = {}

    for label in ["type", "queue", "priority"]:
        majority_label = retrieved_df[label].value_counts().idxmax()
        agreement = (retrieved_df[label] == predicted_labels[label]).mean()

        if agreement >= threshold:
            final_labels[label] = predicted_labels[label]
        else:
            final_labels[label] = majority_label

    return final_labels


In [None]:
def generate_answer(subject, body, labels, retrieved):
    context = "\n".join(retrieved["answer"].dropna().tolist())

    prompt = f"""
You are a customer support agent.

Ticket:
{subject}
{body}

Predicted labels:
Type: {labels['type']}
Queue: {labels['queue']}
Priority: {labels['priority']}

Previous solutions:
{context}

Write a helpful and professional response.
"""

    response = model.generate_content(prompt)
    return response.text


In [None]:
def predict(subject, body):
    labels, confidence = predict_labels(subject, body)

    retrieved = retrieve_similar(subject, body)

    final_labels = validate_with_rag(labels, retrieved)

    answer = generate_answer(subject, body, final_labels, retrieved)

    return {
        "type": final_labels["type"],
        "queue": final_labels["queue"],
        "priority": final_labels["priority"],
        "similar tickets response": retrieved,
        "confidence": confidence,
        "answer": answer
    }



In [None]:
result = predict(
    subject="Application crashes frequently",
    body="The analytics platform crashes whenever I upload a PDF report."
)



In [None]:
def pretty_print_response(res):
    print("\nüé´ TICKET CLASSIFICATION")
    print("-" * 40)
    print(f"Type     : {res['type']}  (confidence: {res['confidence']['type']:.2f})")
    print(f"Queue    : {res['queue']} (confidence: {res['confidence']['queue']:.2f})")
    print(f"Priority : {res['priority']} (confidence: {res['confidence']['priority']:.2f})")
    print("\n SIMILAR TICKETS EARLIER RESPONSE")
    print("-" * 40)
    for i, ans in enumerate(res['similar tickets response']['answer'], start=1):
      print(f"\n{i}. {ans}")
    print("\nüìù GENERATED RESPONSE")
    print("-" * 40)
    print(res["answer"])


pretty_print_response(result)



üé´ TICKET CLASSIFICATION
----------------------------------------
Type     : Incident  (confidence: 0.96)
Queue    : Technical Support (confidence: 0.89)
Priority : medium (confidence: 1.00)

 SIMILAR TICKETS EARLIER RESPONSE
----------------------------------------

1. Please assist in troubleshooting the crash of the Data Analytics Platform. Kindly provide any error messages for further analysis.

2. Please investigate the crash issue with the analytics software and contact us at <tel_num> for further assistance.

3. Please investigate the problem and contact <tel_num> for assistance in resolving the analytics software crash.

üìù GENERATED RESPONSE
----------------------------------------
Hello,

Thank you for reaching out and reporting this issue. I understand that your Data Analytics Platform is crashing frequently, specifically when you upload PDF reports. I apologize for the inconvenience this is causing you.

To help us investigate and resolve this issue as quickly as possi