In [1]:
import pandas as pd
from sentence_transformers import SentenceTransformer
from langchain_community.vectorstores import FAISS # Using FAISS for efficient in-memory vector search
from langchain_core.documents import Document # To structure our data for LangChain
import torch
import os

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# --- Configuration ---
csv_file_path = 'tickets.csv'
issue_column_name = 'Issue'
resolution_column_name = 'Resolution'
embedding_model_name = 'mixedbread-ai/mxbai-embed-large-v1'
vector_db_path = "faiss_index_pool" # Directory to save/load FAISS index

In [3]:
print(f"--- Loading data from '{csv_file_path}' ---")
if not os.path.exists(csv_file_path):
    print(f"Error: The file '{csv_file_path}' was not found.")
    print("Please ensure the CSV file is in the same directory as this notebook or provide the full path.")
    # You might want to upload your CSV via VS Code's explorer if it's not there.
    # If running in Colab, use files.upload() or mount Google Drive.
    raise FileNotFoundError(f"CSV file not found: {csv_file_path}")

--- Loading data from 'tickets.csv' ---


In [None]:
# --- 1. Load Data ---
df = pd.read_csv(csv_file_path)
required_columns = [issue_column_name, resolution_column_name]

df = df[required_columns]
df.head()

Unnamed: 0,Issue,Resolution
0,Run solve having error,"HXVaf have nsmusd for 2 conversion groups, whi..."
1,OneMPS does not have RTF for 2025Q1 for all so...,RC: extra 2025Q1 quarter in weekly version du...
2,Error to pull Limiter Chart By VGG report. FVL...,The issue has been fixed. Pls help to verify -
3,"can't save "" modify conversion group'",Explaination provided: Need to save record fir...
4,,HX_CT_MB_ARROW_LAKE_H_6C+8A+GT2_N3B_HFE is co...


In [None]:
# Prepare data for LangChain Documents
# Each document will have the issue description as page_content
# And the resolution as part of its metadata.
documents = []
for index, row in df.iterrows():
    # Using the issue description as the main content for embedding
    page_content = row[issue_column_name]
    # Skip rows where issue description is missing or not a string
    if not isinstance(page_content, str) or pd.isna(page_content):
        continue
    # Storing resolution and original index in metadata
    metadata = {
        "resolution": row[resolution_column_name],
        "original_index": index,
        "issue_description": row[issue_column_name] # Also good to have original issue in metadata
    }
    documents.append(Document(page_content=page_content, metadata=metadata))

print(f"Successfully loaded {len(documents)} documents for LangChain.")

Successfully loaded 334 documents for LangChain.


In [15]:
# --- 2. Load Embedding Model ---
# SentenceTransformer directly wraps the Hugging Face model for embeddings
# LangChain's SentenceTransformerEmbeddings expects a model that can be loaded this way
from langchain_community.embeddings import SentenceTransformerEmbeddings
embed_model = SentenceTransformerEmbeddings(model_name=embedding_model_name)
print("Embedding model loaded successfully.")

Embedding model loaded successfully.


In [16]:
# --- 3. Create and Persist/Load Vector Store (FAISS) ---
# This step generates embeddings and builds the search index.
# We'll save it to disk so we don't have to re-embed every time.

if os.path.exists(vector_db_path):
    print(f"\n--- Loading existing FAISS index from '{vector_db_path}' ---")
    vectorstore = FAISS.load_local(vector_db_path, embed_model, allow_dangerous_deserialization=True)
    print("FAISS index loaded.")
else:
    print("\n--- Creating new FAISS index and generating embeddings ---")
    # This step will take some time depending on data size and GPU availability.
    # FAISS.from_documents takes care of encoding documents using the embed_model
    vectorstore = FAISS.from_documents(documents, embed_model)
    vectorstore.save_local(vector_db_path)
    print(f"FAISS index created and saved to '{vector_db_path}'.")

# Create a retriever from the vector store
retriever = vectorstore.as_retriever(search_kwargs={"k": 3}) # Retrieve top 2 relevant documents
print("Retriever configured.")


--- Creating new FAISS index and generating embeddings ---
FAISS index created and saved to 'faiss_index_pool'.
Retriever configured.


In [17]:
# --- 4. Initialize Ollama LLM ---
# Set up Ollama LLM and LangChain RAG Chain
from langchain_community.llms import Ollama
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

llm = Ollama(model="gemma3:1b")

  llm = Ollama(model="gemma3:1b")


In [69]:
# --- 5. Define the Prompt Template ---
# This is for guiding the LLM to use the context or output the fallback.
prompt_template = """
You are a helpful IT support assistant. Your goal is to provide concise and accurate solutions to technical issues.
You will be provided with a user's technical issue and relevant context (Issue Description and Resolution) from our knowledge base.

If the provided context contains relevant information to address the user's input, synthesize the best possible solution from the 'Resolution' in the context.
Prioritize the provided resolution in your answer.


If the provided context contains multiple resolutions, combine them and provide bulleted list of probable solutions.
Keep the response as professional support assistant.
Correct the spelling, grammer in the solution and paraphrase always, if needed.
If the context does NOT contain any information directly relevant to the user's input, or if you cannot form a solution from the provided context,
then you MUST respond with: "Please raise a pool ticket for this issue." 
User's Input: {question}

Context:
{context}

Solution:
"""

prompt = ChatPromptTemplate.from_template(prompt_template)

In [70]:
# --- 6. Set up the RAG Chain ---
# Define a format_docs function to prepare retrieved documents for the prompt
def format_docs(docs):
    # This function extracts relevant parts from the Document objects
    # and formats them into a string for the LLM context.
    formatted_context = ""
    for i, doc in enumerate(docs):
        # We put both the 'issue_description' and 'resolution' into the context for the LLM
        # So the LLM can see both the original problem and its solution.
        formatted_context += f"Issue {i+1}: {doc.metadata['issue_description']}\n"
        formatted_context += f"Resolution {i+1}: {doc.metadata['resolution']}\n\n"
    return formatted_context.strip() # Remove any trailing newlines


# Build the RAG chain
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [71]:
# Test the RAG system
query1 = "My VPN is not connecting."
response1 = rag_chain.invoke(query1)
response1 = response1.strip()  # Clean up the response
print(f"\nquery: '{query1}': ---\nresponse: {response1}")


query: 'My VPN is not connecting.': ---
response: Okay, let's try to resolve this VPN connectivity issue.

Based on the provided context, here’s a possible solution:

*   **Restart the PC:** This often resolves temporary network issues.
*   **Connect GlobalProtect:**  Attempting to connect via GlobalProtect might help establish a VPN connection.
*   **Check the VPN account:** Ensure your VPN account is properly configured and active.
*   **Verify the limiter:** Double-check the limiter settings to ensure the VPN connection is allowed.
*   **Check PG8_VGG_CMT_TIU_APN490_94_97:** Verify this limiter is set to a sufficient capacity.

**Please raise a pool ticket for this issue.**


In [72]:
# Test the RAG system
query2 = "I cannot access ISA, looks like access issue."
response2 = rag_chain.invoke(query2)
response2 = response2.strip()  # Clean up the response
print(f"\nquery: '{query2}': ---\nresponse: {response2}")


query: 'I cannot access ISA, looks like access issue.': ---
response: Okay, here’s a solution to your issue:

**Possible Solutions:**

*   Check the availability of the PSICapGroupGroup Available Inventory in W41/50/51.
*   Verify that the PSICapGroup is populated for PSIGCapgroup.
*   Ensure the PSIG data is being loaded from the EMS source.


In [73]:
# Test the RAG system
query3 = "EC Solve failed"
response3 = rag_chain.invoke(query3)
response3 = response3.strip()  # Clean up the response
print(f"\nquery: '{query3}': ---\nresponse: {response3}")


query: 'EC Solve failed': ---
response: Okay, let's address the EC Solve failure.

Based on the provided context, the best solution is:

*   **Change the finish yield2 in frozen horizon to 1.**

Here’s a breakdown of potential related solutions:

*   **VGG cannot handle NST flow prod, especially in frozen.**  This suggests a potential issue with the VGG model's ability to process the NST flow in the frozen state.
*   **Further check and fix by the tech team.**  The context indicates the tech team needs to investigate and resolve this issue.
*   **NSF yield on the 2nd stage is double counted, tech team will further check and fix it.** This is a potential cause of the problem, and the fix will involve investigating and resolving it.


In [74]:
# Test the RAG system
query4 = "Missing Cap Group in PSI Cap Group Mapping"
response4 = rag_chain.invoke(query4)
response4 = response4.strip()  # Clean up the response
print(f"\nquery: '{query4}': ---\nresponse: {response4}")


query: 'Missing Cap Group in PSI Cap Group Mapping': ---
response: Okay, I understand. Here's a breakdown of the issue and a proposed solution:

**Issue:** Missing Cap Group in PSI Cap Group Mapping

**Analysis:** The user is reporting that a "Cap Group" is missing in a PSI Cap Group Mapping. This likely impacts the functionality of the mapping and could lead to data discrepancies.

**Proposed Solution:** Verify that the Cap Group assigned to the PSI Cap Group Mapping is correctly populated and that the data is being loaded as expected.

**Possible Solutions from the Provided Resolution:**

*   **Resolution 1:** Re-load the data from VG.
*   **Resolution 2:** Re-load the data with the older start date.
*   **Resolution 3:** Ensure the Cap Group is correctly selected in the filter.

**Therefore, the best solution is to re-load the data from VG, as it was the most recent update.**
