In [None]:
!pip install faiss-cpu watermark crewai -q

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.8/42.8 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.3/67.3 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m48.2/48.2 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.3/31.3 MB[0m [31m52.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m337.1/337.1 kB[0m [31m24.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.3/8.3 MB[0m [31m99.0 MB/s[0m eta [3

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os

import pandas as pd
import numpy as np

import faiss
from crewai import Agent, Task, Crew, LLM
from textwrap import dedent
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from sentence_transformers import SentenceTransformer



In [None]:
ROOT_PATH = "drive/MyDrive/help-desk-tickets-prototype"

In [None]:
embedder = SentenceTransformer('all-MiniLM-L6-v2')
d = embedder.get_sentence_embedding_dimension()
print('embedding vector length:', d)

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

embedding vector length: 384


In [None]:
index = faiss.read_index(os.path.join(ROOT_PATH, 'faiss_index.idx'))
df = pd.read_csv(os.path.join(ROOT_PATH, 'sample_utterances_drop_na.csv'), index_col=0)

In [None]:
from crewai.tools import tool

@tool("Retrieve Similar Tickets")
def retrieve_similar_tickets(query: str, k=10) -> str:
    """Retrieve the top k relevant help desk tickets from FAISS index."""
    query_vector = embedder.encode([query])
    D, I = index.search(query_vector, k)
    D, I = D.squeeze(), I.squeeze()
    return '\n'.join(df.iloc[I]['actionbody'].drop_duplicates().tolist())


In [None]:
llm = LLM(
    model="huggingface/HuggingFaceTB/SmolLM3-3B"
)



# Define Agents
query_preparation_agent = Agent(
    role="Help Desk Query Optimization Assistant",
    goal=(
        "Transform user-provided context into a clean, semantically rich query "
        "that maximizes the relevance of results retrieved from a FAISS vector database "
        "of help desk tickets."
    ),
    backstory=(
        "This agent specializes in interpreting short, potentially vague or noisy user inputs "
        "and converting them into refined search queries suitable for semantic vector search. "
        "Rather than relying on deep training over IT support data, it uses structured heuristics, "
        "linguistic simplification, synonym substitution, and keyword prioritization to craft queries "
        "that align well with the embedding space of support tickets. Its goal is to express the user's "
        "core issue using terminology likely to appear in resolved help desk tickets."
    ),
    verbose=True,
    llm=llm
)

query_retrieval_agent = Agent(
    role="Help Desk Retrieval Specialist",
    goal=(
        "Use optimized queries to retrieve the most relevant help desk tickets "
        "from the FAISS vector index."
    ),
    backstory=(
        "An expert in semantic search, this agent is responsible for retrieving similar "
        "historical help desk tickets using FAISS based on optimized queries."
    ),
    verbose=True,
    llm=llm
)


task1 = Task(
    description=(
        "Take user input describing a help desk issue and prepare a refined, semantically optimized query. "
        "This query should be short (5–15 words), free of filler language, and focus on key technical terms "
        "that are likely to appear in similar help desk tickets. Avoid vague phrasing. Prioritize precision "
        "and relevance for vector-based semantic search."
    ),
    expected_output=(
        "A concise search query string (5–15 words) that captures the essence of the user's issue, "
        "rephrased using language typical of help desk tickets. The query should be well-suited "
        "for use with a FAISS vector search engine."
    ),
    agent=query_preparation_agent,
    async_execution=False,
    name="Prepare FAISS-Compatible Help Desk Query"
)

task2 = Task(
    description=(
        "Take the optimized query and retrieve the top 5–10 similar help desk tickets "
        "using the FAISS vector index. Use the available tool to run the search."
    ),
    expected_output="A string of ticket texts most similar to the input issue.",
    agent=query_retrieval_agent,
    context=[task1],
    tools=[retrieve_similar_tickets],
    function_args={"query": None, "k": 5},
    async_execution=False,
    name="Retrieve Help Desk Tickets"
)

crew = Crew(
    agents=[query_preparation_agent, query_retrieval_agent],
    tasks=[task1, task2],
    name="Help Desk Ticket Retrieval Crew",
    description="Transforms user issues into optimized queries and retrieves matching tickets using FAISS.",
    verbose=True
)

user_input = "My VPN keeps disconnecting every 10 minutes."

results = crew.kickoff(inputs={"input": user_input})

print("Top Similar Help Desk Tickets:")
for ticket in results:
    print("-", ticket)

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

[93m Maximum iterations reached. Requesting final answer.[00m


Output()

Top Similar Help Desk Tickets:
- ('raw', 'The following is the complete content of a relevant help desk ticket:\n\n"User is experiencing DNS resolution error after reboot. Rebooted the server, but DNS resolution error persists. Check DNS settings, ensure the server\'s IP is correctly configured. Also, verify that the DNS server\'s IP address is correctly set in the server\'s network configuration. If the issue persists, it may be a DNS server issue, in which case the DNS server should be contacted for assistance."')
- ('pydantic', None)
- ('json_dict', None)
- ('tasks_output', [TaskOutput(description='Take user input describing a help desk issue and prepare a refined, semantically optimized query. This query should be short (5–15 words), free of filler language, and focus on key technical terms that are likely to appear in similar help desk tickets. Avoid vague phrasing. Prioritize precision and relevance for vector-based semantic search.', name='Prepare FAISS-Compatible Help Desk Quer

In [None]:
%reload_ext watermark
%watermark
%watermark --iversions