In [300]:
import json
from neo4j import GraphDatabase
from typing import List

In [301]:
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModel

def embed(text):
    model_path =  "/Users/abhishekbairagi/Desktop/experiments/devcon/sent-transformer/all-MiniLM-L6-v2"
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    model = AutoModel.from_pretrained(model_path)
    # model = SentenceTransformer(model_path)
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    outputs = model(**inputs)
    embedding = outputs.last_hidden_state.mean(dim=1).detach().numpy()
    return embedding[0]

In [None]:
from google import genai
client = genai.Client(api_key="")
import json

def generate_text(prompt):
    response = client.models.generate_content(
        model="gemini-2.0-flash",
        contents=prompt,
    )
    # print(response.text)
    return response.text[7:-3]  if '```json' in response.text  else  response.text

# generate_text('hi')

In [303]:
kg_password = "strongpass123"
uri = "bolt://localhost:7687"
user = "neo4j"
kg_driver = GraphDatabase.driver(uri, auth=(user, kg_password))

In [355]:
def universal_formatting_function(input:dict, heading = ""):
    out = f"{heading}:\n" if heading else "" 
    for k, v in input.items():
        # print(v, type(v))
        if isinstance(v, list):
            if all(isinstance(item, dict) for item in v):
                out += f"{k}:\n" + "\n".join([f"  - {', '.join(f'{sub_k}: {sub_v}' for sub_k, sub_v in item.items())}" for item in v]) + "\n"
            else:
                out += f"{k}: {', '.join(v)}\n"
        else:
            out += f"{k}: {v}\n"
    return out 

def universal_formatting_function_list(input:list, heading = ""):
    out = f"{heading}:\n" if heading else "" 
    for item in input:
        out +=universal_formatting_function(item)
    
    return out 



In [351]:
def get_relevant_issues_with_articles(product_name, subtopic_name, query_text, threshold=0.5, top_k=5):
    """
    Fetch top semantically matching issues for a given product and subtopic from the knowledge graph,
    including articles related to the issues via the 'solved_by' relationship.
    """
    driver = GraphDatabase.driver(uri, auth=(user, kg_password))
    query_embedding = embed(query_text)

    cypher = """
    MATCH (p:Product)
    WHERE toLower(p.name) = toLower($product_name) OR any(alias IN p.alias WHERE toLower(alias) = toLower($product_name))

    OPTIONAL MATCH (p)-[:has_subtopic]->(s:SubTopic)
    WHERE $subtopic_name IS NULL OR any(word IN split(toLower($subtopic_name), " ") WHERE word IN split(toLower(s.name), " ") OR any(alias IN s.alias WHERE toLower(alias) = word))

    OPTIONAL MATCH (s)-[:has_issue]->(i:Issue)
    WHERE i.embedding IS NOT NULL

    OPTIONAL MATCH (i)-[:solved_by]->(a:Article)

    WITH p, s, i, a, vector.similarity.cosine(i.embedding, $query_embedding) AS score
    WHERE score > $threshold

    RETURN 
        i.id AS issue_id,
        i.description AS description,
        score,
        p.name AS product,
        s.name AS subtopic,
        collect(DISTINCT {id: a.id, title: a.title, content: a.content}) AS related_articles
    ORDER BY score DESC
    LIMIT $top_k
    """

    with driver.session() as session:
        result = session.run(
            cypher,
            product_name=product_name,
            subtopic_name=subtopic_name,
            query_embedding=query_embedding,
            threshold=threshold,
            top_k=top_k
        )
        return [record.data() for record in result]


def format_issues_with_articles(issues):
    """
    Format the list of issues with their related articles into a readable string format.

    Args:
        issues (list): List of issue dictionaries.

    Returns:
        str: Formatted string containing issue and article details.
    """
    output = ""
    for issue in issues:
        output += f"Issue ID: {issue['issue_id']}\n"
        output += f"Description: {issue['description']}\n"
        output += f"Product: {issue['product']}\n"
        output += f"Subtopic: {issue['subtopic']}\n"
        output += f"Score: {issue['score']:.4f}\n"
        output += "Related Articles:\n"
        for article in issue['related_articles']:
            output += f" - {article['title']} (ID: {article['id']})\n"
        output += "---\n"
    return output

# Example usage
relevant_issues_with_articles = get_relevant_issues_with_articles(
    product_name="webex",
    subtopic_name="Audio",
    query_text="I am facing audio issues in webex",
    threshold=0.3,
    top_k=10
)

# Example usage
formatted_issues = format_issues_with_articles(relevant_issues_with_articles)
print(formatted_issues)

# formatted_issues2 = universal_formatting_function_list(relevant_issues_with_articles)
# print(formatted_issues2)



Issue ID: issue3
Description: No audio during call
Product: Webex
Subtopic: Audio Issues
Score: 0.7444
Related Articles:
 - Resolving Audio Failures in Webex Calls Due to App Settings (ID: article28)
 - Fixing Audio Issues During Webex Calls on macOS (ID: article27)
 - Troubleshooting No Sound During Meetings in Windows (ID: article3)
---
Issue ID: issue4
Description: Microphone not detected
Product: Webex
Subtopic: Audio Issues
Score: 0.6814
Related Articles:
 - Fixing Microphone Detection Problems (ID: article4)
---
Issue ID: issue30
Description: Noise interference during calls
Product: Webex
Subtopic: Audio Issues
Score: 0.6284
Related Articles:
 - Dealing with Random Noise in Webex Calls (ID: article37)
---
Issue ID: issue29
Description: Background echo during calls
Product: Webex
Subtopic: Audio Issues
Score: 0.5962
Related Articles:
 - Eliminating Echo in Webex Calls (ID: article36)
---



In [356]:
def get_product_details_with_outages(product_name):
    """
    Fetch product details along with any active outages associated with the product.

    Args:
        product_id (str): The ID of the product.

    Returns:
        dict: A dictionary containing product details and active outages.
    """
    driver = GraphDatabase.driver(uri, auth=(user, kg_password))
    with driver.session() as session:
        # Query to fetch product details
        product_query = """
        MATCH (p:Product {name: $product_name})
        RETURN p.id AS product_id, p.name AS name, p.alias AS alias
        """
        product_result = session.run(product_query, product_name=product_name)
        product_details = product_result.single()

        if not product_details:
            return {"error": "Product not found"}

        # Query to fetch active outages related to the product
        outage_query = """
        MATCH (p:Product {name: $product_name})-[:has_outage]->(o:Outage)
        WHERE o.status = 'Active'
        RETURN o.id AS outage_id, o.title AS title, o.description AS description, 
               o.start_time AS start_time, o.expected_resolution AS expected_resolution, 
               o.impact AS impact, o.location AS location
        """
        outage_result = session.run(outage_query, product_name=product_name)
        outages = [record.data() for record in outage_result]

        # Combine product details and outages
        return {
            "product": {
                "id": product_details["product_id"],
                "name": product_details["name"],
                "alias": product_details["alias"]
            },
            "active_outages": outages
        }


def format_product_details_with_outages(details):
    """
    Format product details and active outages into a readable string format.

    Args:
        details (dict): A dictionary containing product details and active outages.

    Returns:
        str: Formatted string containing product and outage details.
    """
    output = f"Product Info:\n"
    output += f"ID: {details['product']['id']}\n"
    output += f"Name: {details['product']['name']}\n"
    output += f"Aliases: {', '.join(details['product']['alias'])}\n\n"

    if details['active_outages']:
        output += "Active Outages:\n"
        for outage in details['active_outages']:
            output += f"Outage ID: {outage['outage_id']}\n"
            output += f"Title: {outage['title']}\n"
            output += f"Description: {outage['description']}\n"
            output += f"Start Time: {outage['start_time']}\n"
            output += f"Expected Resolution: {outage['expected_resolution']}\n"
            output += f"Impact: {outage['impact']}\n"
            output += f"Location: {outage['location']}\n"
            output += "---\n"
    else:
        output += "No active outages.\n"

    return output

def get_subtopic_details(product_name, subtopic_name):
    """
    Fetch subtopic details for a given product and subtopic name.

    Args:
        product_name (str): Name of the product.
        subtopic_name (str): Name of the subtopic.

    Returns:
        dict: A dictionary containing subtopic details.
    """
    driver = GraphDatabase.driver(uri, auth=(user, kg_password))
    with driver.session() as session:
        query = """
        MATCH (p:Product)-[:has_subtopic]->(s:SubTopic)
        WHERE toLower(p.name) = toLower($product_name) AND  any(word IN split(toLower($subtopic_name), " ") WHERE word IN split(toLower(s.name), " ") OR any(alias IN s.alias WHERE toLower(alias) = word))

        RETURN s.id AS id, s.name AS name, s.alias AS alias, s.Constraints AS constraints, 
               s.Access AS access, s.`Common Issues` AS common_issues
        """
        result = session.run(query, product_name=product_name, subtopic_name=subtopic_name)
        subtopic_details = result.single()
        if subtopic_details:
            return {
                "id": subtopic_details["id"],
                "name": subtopic_details["name"],
                "alias": subtopic_details["alias"],
                "Constraints": subtopic_details["constraints"],
                "Access": subtopic_details["access"],
                "Common_Issues": subtopic_details["common_issues"]
            }
        return None
    
# Example usage
active_outages = get_product_details_with_outages("Outlook")
# Example usage
formatted_output_outage = format_product_details_with_outages(active_outages)
print(formatted_output_outage)




subtopic_details = get_subtopic_details(product_name="Webex", subtopic_name="Audio")
if subtopic_details:
    formatted_subtopic_details = universal_formatting_function(subtopic_details, heading="Subtopic Details")
    print(formatted_subtopic_details)
else:
    print("Subtopic not found.")

Product Info:
ID: prod2
Name: Outlook
Aliases: Microsoft Outlook, Outlook Email, MS Outlook

Active Outages:
Outage ID: out3
Title: Mail Delivery Delays and Failures
Description: Active outage affecting mail delivery in the India Office. Users are experiencing delays and failures in email delivery.
Start Time: 2025-06-10T09:00:00
Expected Resolution: 2025-06-10T14:00:00
Impact: Critical
Location: India Office
---

Subtopic Details:
id: sub2
name: Audio Issues
alias: No Sound, Audio Not Working, Microphone Problems
Constraints: Requires compatible audio drivers
Access: All users
Common_Issues: No sound output, Microphone not detected, Audio dropouts



In [357]:
def get_user_device_relationship(user_id):
    """
    Fetch the device associated with a specific user ID based on the 'has_device' relationship.

    Args:
        user_id (str): The ID of the user.

    Returns:
        dict: A dictionary containing user and device details.
    """
    driver = GraphDatabase.driver(uri, auth=(user, kg_password))
    with driver.session() as session:
        query = """
        MATCH (u:User {id: $user_id})-[:has_device]->(d:Device)
        RETURN u.name AS name, u.email AS email, u.location AS location, u.band AS band, u.team AS team,
               d.id AS device_id, d.model AS device_model, d.os AS device_os, d.os_version AS os_version, 
               d.ram AS ram, d.storage AS storage, d.lastupdate AS last_update, d.issued_on AS issued_on, 
               d.pending_updates AS pending_updates
        """
        result = session.run(query, user_id=user_id)
        user_device_details = result.single()
        if user_device_details:
            return {
                "name": user_device_details["name"],
                "email": user_device_details["email"],
                "location": user_device_details["location"],
                "band": user_device_details["band"],
                "team": user_device_details["team"],
                "device": {
                    "model": user_device_details["device_model"],
                    "os": user_device_details["device_os"],
                    "os_version": user_device_details["os_version"],
                    "ram": user_device_details["ram"],
                    "storage": user_device_details["storage"],
                    "last_update": user_device_details["last_update"],
                    "issued_on": user_device_details["issued_on"],
                    "pending_updates": user_device_details["pending_updates"],
                    "device_id": user_device_details["device_id"]
                    
                }
            }
        return None

def format_user_details(user_details, user_id):
    """
    Format user details and device details into a readable string format.

    Args:
        user_details (dict): A dictionary containing user and device details.
        user_id (str): The ID of the user.

    Returns:
        str: Formatted string containing user and device details.
    """
    if not user_details:
        return "User details not found."

    output = f"User Info:\n"
    output += f"User ID: {user_id}\n"
    output += f"Name: {user_details['name']}\n"
    output += f"Email: {user_details['email']}\n"
    output += f"Location: {user_details['location']}\n"
    output += f"Band: {user_details['band']}\n"
    output += f"Team: {user_details['team']}\n"
    output += f"\nDevice Info:\n"
    output += f"Device ID: {user_details['device']['device_id']}\n"
    output += f"Model: {user_details['device']['model']}\n"
    output += f"OS: {user_details['device']['os']}\n"
    output += f"OS Version: {user_details['device']['os_version']}\n"
    output += f"RAM: {user_details['device']['ram']} GB\n"
    output += f"Storage: {user_details['device']['storage']} GB\n"
    output += f"Last Update: {user_details['device']['last_update']}\n"
    output += f"Issued On: {user_details['device']['issued_on']}\n"
    output += f"Pending Updates: {user_details['device']['pending_updates']}\n"
    return output

# Example usage
user_id = "user10"
user_details = get_user_device_relationship(user_id)
formatted_details = format_user_details(user_details, user_id)
print(formatted_details)


User Info:
User ID: user10
Name: Julia
Email: julia@example.com
Location: UK
Band: 50
Team: GSG

Device Info:
Device ID: dev10
Model: iPad Pro
OS: ios
OS Version: 16.4
RAM: 16 GB
Storage: 256 GB
Last Update: 2025-06-02T21:29:14.482241
Issued On: 2024-11-03T21:29:14.482246
Pending Updates: 



In [307]:
import requests
import json

def semantic_search(query, top_k=5):
    """
    Perform semantic search on Elasticsearch index using query embedding.

    Args:
        query (str): The search query.
        es_url (str): Elasticsearch base URL.
        index_name (str): Name of the Elasticsearch index.
        embed_fn (function): Function to generate embedding for the query.
        top_k (int): Number of top results to return.

    Returns:
        list: List of top matching documents with their scores.
    """
    es_url = "http://localhost:9200/articles_1"
    query_embedding = embed(query)
    # Generate embedding for the query
    query_embedding = [float(value) for value in embed(query)]
    # Construct the search query for Elasticsearch
    search_query = {
        "size": top_k,
        "query": {
            "script_score": {
                "query": {"match": {"content":query}},
                "script": {
                    "source": "_score*2 + cosineSimilarity(params.query_vector, 'embedding') + 1.0",
                    "params": {"query_vector": query_embedding}
                }
            }
        }
    }

    # Perform the search request
    response = requests.post(
        f"{es_url}/_search",
        headers={"Content-Type": "application/json"},
        data=json.dumps(search_query)
    )

    # Check for errors
    if response.status_code != 200:
        raise Exception(f"Failed to perform search: {response.text}")

    # Parse the response
    results = response.json()["hits"]["hits"]
    return [{"id": hit["_id"], "score": hit["_score"], "source": hit["_source"]} for hit in results]


def format_search_results(results):
    """
    Format the search results into a readable string format.

    Args:
        results (list): List of search result dictionaries.

    Returns:
        str: Formatted string containing search results.
    """
    output = ""
    for result in results:
        output += f"ID: {result['id']}\n"
        output += f"Score: {result['score']:.4f}\n"
        output += f"Title: {result['source']['title']}\n"
        output += f"Content: {result['source']['content']}\n"
        output += "---\n"
    return output
# Example usage
query = "I am facing audio issue in webex"
top_results = semantic_search(query,  top_k=5)
formatted_results = format_search_results(top_results)
print(formatted_results)


ID: article31
Score: 14.2993
Title: Hosting Webinars in Zoom
Content: Zoom's webinar features are similar to Webex, including support for registration, Q&A, and panelists. Some users find Zoom easier to use for external-facing events. Remember that hosting rights in Zoom, like Webex, might be tied to your license or access level, which can vary depending on your team or region.
---
ID: article36
Score: 14.2790
Title: Eliminating Echo in Webex Calls
Content: Background echo during Webex calls can be caused by improper audio settings or hardware issues. Ensure that your microphone and speakers are not placed too close to each other, as this can create feedback loops. Use headphones to minimize echo. Check Webex audio settings to ensure the correct input and output devices are selected. If the issue persists, try muting participants who are not speaking, as their devices may be causing the echo. Additionally, ensure that your audio drivers are up-to-date and that your operating system is 

In [370]:
def generate_structured_kg_output(input: dict):
    """
    Generate a structured KG output by combining all keys and their values from the input dictionary.

    Args:
        input (dict): Dictionary containing various formatted data.

    Returns:
        str: Combined structured KG output.
    """
    def format_section(key, value):
        if isinstance(value, dict):
            formatted_value = "\n".join(f"{sub_key}: {sub_value}" for sub_key, sub_value in value.items())
        elif isinstance(value, list):
            formatted_value = "\n".join(str(item) for item in value)
        else:
            formatted_value = str(value)
        return f"🔹 {key}:\n{formatted_value}\n\n"

    return "".join(format_section(key, value) for key, value in input.items())



In [376]:
kg_prompt_universal = """
You are a helpful assistant. A user asked: "{user_query}"

You are provided with two kinds of inputs:

1. **Documents** – These are relevant articles retrieved for the query.
2. **Structured Context** – This includes user details, incident reports, product rules, or known issue graphs depending on the query type.

Use both inputs to generate a helpful, personalized, and precise response. Adjust your strategy based on the structured context type:

- For **user profiles + product constraints**, evaluate whether the user's profile matches the access requirements. Explain likely blockers based on region, device, or access level, and offer the next steps or alternatives. Avoid listing every possibility—prioritize the top 1–2 most relevant issues.
- For **active outage info**, check if the outage affects the user and summarize the impact. If not affected, suggest what else could be wrong.
- For **ambiguous issue queries which do not specify what the issue **, use the related issues to suggest a **follow-up question** with concise options if needed.

Do not be verbose. Do not repeat what’s already in the documents unless it’s especially relevant to the user profile.

Structured Context:
{kg_input}

Documents:
{context}
"""


vanilla_prompt_universal = """
You are a helpful assistant. A user asked: "{user_query}"

Based only on the following documents, provide the best possible answer.

Do not rely on any external data or user information. If clarification is needed, you may offer to assist further, but avoid asking follow-up questions directly.

Documents:
{context}
"""

In [377]:

# Inputs
query = "I’m not able to host a webinar in Webex"
user_id = "user11"
product= "Webex"
subtopic = "Webinar"


# Semantic Search (simulating RAG retrieval)
retrieved_articles = semantic_search(query, top_k=5)
formatted_retrieved_articles = format_search_results(retrieved_articles)
# Retrieved Articles (simulating RAG retrieval)
print("🔹 Retrieved Articles:")
print(formatted_retrieved_articles)

# Retrieveing details from kg 
user_profile = get_user_device_relationship(user_id)
formatted_user_profile = format_user_details(user_profile, user_id)

# print("\n🔹 User Profile:")
# print(formatted_user_profile)
#Retrieveing product details 
product_details_with_outages= get_product_details_with_outages(product)
formatted_product_details = format_product_details_with_outages(product_details_with_outages)


#Retrieving subtopic details 
subtopic_details = get_subtopic_details(product_name=product, subtopic_name=subtopic)
formatted_subtopic_details = universal_formatting_function(subtopic_details)

# Retrieveing related issues from KG 
relevant_issues_with_articles = get_relevant_issues_with_articles(
    product_name=product,
    subtopic_name=subtopic,
    query_text=query,
    threshold=0.3,
    top_k=5
)
formatted_relevant_issues = format_issues_with_articles(relevant_issues_with_articles)

# print("\n🔹 Relevant Issues with Articles:")
# print(formatted_relevant_issues)





input = {
    "User Profile": formatted_user_profile,
    "Product Details": formatted_product_details,
    "Subtopic Details": formatted_subtopic_details,
    "Relevant Issues": formatted_relevant_issues,

}
# Generate structured KG output using the function
structured_kg_output = generate_structured_kg_output(
input
)

print("🔹 Structured KG Output:")
print(structured_kg_output)



# # Vanilla RAG Prompt (no KG context)
vanilla_prompt = vanilla_prompt_universal.format(user_query=query, context=formatted_retrieved_articles)

kg_prompt = kg_prompt_universal.format(user_query=query, kg_input = structured_kg_output, context=formatted_retrieved_articles)
# # Run both prompts
vanilla_response = generate_text(vanilla_prompt)
kg_response = generate_text(kg_prompt)

# Show results
print("🔹 Vanilla RAG Response:\n", vanilla_response)
print("\n🔹 KG-RAG Response:\n", kg_response)


🔹 Retrieved Articles:
ID: article30
Score: 24.0633
Title: Webex Webinar Troubleshooting Guide
Content: Webex webinar issues often involve audio dropouts, invite errors, and permissions-related problems. Ensure you have a stable internet connection and your browser is up to date. Some users report not seeing the 'Host Webinar' option—this might be due to licensing or account-level permissions. In those cases, contact IT or your supervisor. Sometimes, webinar tools are only unlocked for advanced users or certain employee tiers, depending on how your access is provisioned.
---
ID: article29
Score: 21.7168
Title: How to Host a Webinar in Webex
Content: Hosting a webinar in Webex typically starts by accessing the Webex Events dashboard. You select your date, fill in the event details, and configure settings such as Q&A, polling, and attendee registration. Make sure you're signed in with your work credentials and that Webex Events is enabled for your account. Webinars are usually available t

In [378]:
# Inputs
query = "I’m not receiving mails in Outlook"
user_id = "user11"
product= "Outlook"
subtopic = "Mail"


# Semantic Search (simulating RAG retrieval)
retrieved_articles = semantic_search(query, top_k=5)
formatted_retrieved_articles = format_search_results(retrieved_articles)
# Retrieved Articles (simulating RAG retrieval)
print("🔹 Retrieved Articles:")
print(formatted_retrieved_articles)

# Retrieveing details from kg 
user_profile = get_user_device_relationship(user_id)
formatted_user_profile = format_user_details(user_profile, user_id)

# print("\n🔹 User Profile:")
# print(formatted_user_profile)
#Retrieveing product details 
product_details_with_outages= get_product_details_with_outages(product)
formatted_product_details = format_product_details_with_outages(product_details_with_outages)


#Retrieving subtopic details 
subtopic_details = get_subtopic_details(product_name=product, subtopic_name=subtopic)
formatted_subtopic_details = universal_formatting_function(subtopic_details)

# Retrieveing related issues from KG 
relevant_issues_with_articles = get_relevant_issues_with_articles(
    product_name=product,
    subtopic_name=subtopic,
    query_text=query,
    threshold=0.3,
    top_k=5
)
formatted_relevant_issues = format_issues_with_articles(relevant_issues_with_articles)

# print("\n🔹 Relevant Issues with Articles:")
# print(formatted_relevant_issues_case1)


input = {
    "User Profile": formatted_user_profile,
    "Product Details": formatted_product_details,
    "Subtopic Details": formatted_subtopic_details,
    "Relevant Issues": formatted_relevant_issues,

}
# Generate structured KG output using the function
structured_kg_output = generate_structured_kg_output(
input
)

print("🔹 Structured KG Output:")
print(structured_kg_output)



# # Vanilla RAG Prompt (no KG context)
vanilla_prompt = vanilla_prompt_universal.format(user_query=query, context=formatted_retrieved_articles)

kg_prompt = kg_prompt_universal.format(user_query=query, kg_input = structured_kg_output, context=formatted_retrieved_articles)
# # Run both prompts
vanilla_response = generate_text(vanilla_prompt)
kg_response = generate_text(kg_prompt)

# Show results
print("🔹 Vanilla RAG Response:\n", vanilla_response)
print("\n🔹 KG-RAG Response:\n", kg_response)

# # Retrieved Articles (Vanilla RAG)
# retrieved_articles_case2 = [
#     {
#         "title": "Troubleshooting Mail Delivery in Outlook",
#         "content": (
#             "If you're not receiving emails in Outlook, check your internet connection, spam folder, and mailbox storage limit. Make sure Outlook is connected to your email server. "
#             "Try restarting the app and ensure your credentials are valid. If issues persist, contact IT support for further diagnosis."
#         )
#     },
#     {
#         "title": "Common Outlook Email Issues",
#         "content": (
#             "Users often face delayed emails, sync errors, or missing emails in Outlook. Possible reasons include corrupted profiles, incorrect rules/filters, or regional settings. "
#             "If you're part of a shared mailbox, verify that permissions haven't changed recently."
#         )
#     },
#     {
#         "title": "Fixing Outlook Sync Errors",
#         "content": (
#             "Sync issues in Outlook can result in missing or delayed emails. Navigate to Account Settings and verify sync frequency. Restarting Outlook or repairing the OST file may help."
#         )
#     },
#     {
#         "title": "Mailbox Size Limits and Delivery Failures",
#         "content": (
#             "If your Outlook mailbox has exceeded its storage quota, incoming emails may be rejected. Empty trash or archive old messages to free up space."
#         )
#     }
# ]

# # User profile from KG
# user_profile_case2 = {
#     "name": "Jordan Singh",
#     "band": 35,
#     "device": "Windows",
#     "location": "NY Office"
# }

# # Outage Event from KG
# outage_event_kg = {
#     "service": "Outlook",
#     "status": "Active",
#     "impact": "Mail delivery delays and failures",
#     "location": "NY Office",
#     "start_time": "9:00 AM",
#     "date": "2025-06-10",
#     "expected_resolution": "2:00 PM"
# }

# # Vanilla RAG Prompt (no KG context)
# vanilla_context_case2 = "\n\n".join(f"{a['title']}: {a['content']}" for a in retrieved_articles_case2)

# vanilla_prompt= vanilla_prompt_universal.format(user_query=query_case2, context=vanilla_context_case2)
# # KG-RAG Prompt (includes structured KG context)
# kg_context_case2 = (
#     f"User Info: Name: {user_profile_case2['name']}, Location: {user_profile_case2['location']}, Device: {user_profile_case2['device']}\n\n"
# )
# outage_context = (
#     f"Outage Info: Service: {outage_event_kg['service']}, Location: {outage_event_kg['location']}, "
#     f"Status: {outage_event_kg['status']}, Impact: {outage_event_kg['impact']}, "
#     f"Start: {outage_event_kg['start_time']} on {outage_event_kg['date']}, "
#     f"Expected Resolution: {outage_event_kg['expected_resolution']}\n\n"
# )

# # kg_prompt_case2 = f"""
# # You are a helpful assistant. A user asked: "{query_case2}"

# # In addition to the documents below, consider the structured user data and active incident details to generate a more contextual and relevant response. Make sure the response is precise and to the point.

# # User Profile:
# # {kg_context_case2}

# # Outage Details:
# # {outage_context}

# # Documents:
# # {vanilla_context_case2}
# # """
# kg_prompt = kg_prompt_universal.format(
#     user_query=query_case2, 
#     kg_input=kg_context_case2 + outage_context, 
    
#     context=vanilla_context_case2
# )
# vanilla_prompt = vanilla_prompt_universal.format(user_query=query_case2, context=vanilla_context_case2)
# # Run both prompts
# vanilla_response_case2 = generate_text(vanilla_prompt)
# kg_response_case2 = generate_text(kg_prompt)

# # Show results
# print("🔹 Vanilla RAG Response:\n", vanilla_response_case2)
# print("\n🔹 KG-RAG Response:\n", kg_response_case2)


🔹 Retrieved Articles:
ID: article32
Score: 15.0698
Title: Troubleshooting Mail Delivery in Outlook
Content: If you're not receiving emails in Outlook, check your internet connection, spam folder, and mailbox storage limit. Make sure Outlook is connected to your email server. Try restarting the app and ensure your credentials are valid. If issues persist, contact IT support for further diagnosis.
---
ID: article10
Score: 9.3258
Title: Why You're Not Receiving Notifications and How to Fix It
Content: Missing notifications can cause users to overlook important updates and communications. If you’re not receiving notifications, first check the system settings to ensure they’re enabled for the relevant app. Make sure Do Not Disturb mode or Focus Mode isn't active. On mobile devices, battery optimization settings may prevent apps from running in the background—disable this for critical apps. In the app itself, ensure that notification permissions are granted and that notification types (e.g.,

In [379]:
query = "I’m facing an audio issue in Webex"
user_id = "user11"
product= "Webex"
subtopic = "Audio Issues"


# Semantic Search (simulating RAG retrieval)
retrieved_articles = semantic_search(query, top_k=5)
formatted_retrieved_articles = format_search_results(retrieved_articles)
# Retrieved Articles (simulating RAG retrieval)
print("🔹 Retrieved Articles:")
print(formatted_retrieved_articles)

# Retrieveing details from kg 
user_profile = get_user_device_relationship(user_id)
formatted_user_profile = format_user_details(user_profile, user_id)

# print("\n🔹 User Profile:")
# print(formatted_user_profile)
#Retrieveing product details 
product_details_with_outages= get_product_details_with_outages(product)
formatted_product_details = format_product_details_with_outages(product_details_with_outages)


#Retrieving subtopic details 
subtopic_details = get_subtopic_details(product_name=product, subtopic_name=subtopic)
formatted_subtopic_details = universal_formatting_function(subtopic_details)

# Retrieveing related issues from KG 
relevant_issues_with_articles = get_relevant_issues_with_articles(
    product_name=product,
    subtopic_name=subtopic,
    query_text=query,
    threshold=0.3,
    top_k=5
)
formatted_relevant_issues = format_issues_with_articles(relevant_issues_with_articles)

# print("\n🔹 Relevant Issues with Articles:")
# print(formatted_relevant_issues_case1)


input = {
    "User Profile": formatted_user_profile,
    "Product Details": formatted_product_details,
    "Subtopic Details": formatted_subtopic_details,
    "Relevant Issues": formatted_relevant_issues,

}
# Generate structured KG output using the function
structured_kg_output = generate_structured_kg_output(
input
)

print("🔹 Structured KG Output:")
print(structured_kg_output)



# # Vanilla RAG Prompt (no KG context)
vanilla_prompt = vanilla_prompt_universal.format(user_query=query, context=formatted_retrieved_articles)

kg_prompt = kg_prompt_universal.format(user_query=query, kg_input = structured_kg_output, context=formatted_retrieved_articles)
# # Run both prompts
vanilla_response = generate_text(vanilla_prompt)
kg_response = generate_text(kg_prompt)

# Show results
print("🔹 Vanilla RAG Response:\n", vanilla_response)
print("\n🔹 KG-RAG Response:\n", kg_response)


🔹 Retrieved Articles:
ID: article31
Score: 14.2947
Title: Hosting Webinars in Zoom
Content: Zoom's webinar features are similar to Webex, including support for registration, Q&A, and panelists. Some users find Zoom easier to use for external-facing events. Remember that hosting rights in Zoom, like Webex, might be tied to your license or access level, which can vary depending on your team or region.
---
ID: article36
Score: 14.2918
Title: Eliminating Echo in Webex Calls
Content: Background echo during Webex calls can be caused by improper audio settings or hardware issues. Ensure that your microphone and speakers are not placed too close to each other, as this can create feedback loops. Use headphones to minimize echo. Check Webex audio settings to ensure the correct input and output devices are selected. If the issue persists, try muting participants who are not speaking, as their devices may be causing the echo. Additionally, ensure that your audio drivers are up-to-date and that you

In [None]:
def process_query(query, user_id, product, subtopic):
    """
    Process a query by performing semantic search, retrieving KG data, and generating responses.

    Args:
        query (str): The user query.
        user_id (str): The user ID.
        product (str): The product name.
        subtopic (str): The subtopic name.

    Returns:
        dict: A dictionary containing vanilla response, KG+RAG response, retrieved articles, and structured KG output.
    """
    # Perform semantic search
    retrieved_articles = semantic_search(query, top_k=5)
    formatted_retrieved_articles = format_search_results(retrieved_articles)

    # Retrieve user profile from KG
    user_profile = get_user_device_relationship(user_id)
    formatted_user_profile = format_user_details(user_profile, user_id)

    # Retrieve product details with outages from KG
    product_details_with_outages = get_product_details_with_outages(product)
    formatted_product_details = format_product_details_with_outages(product_details_with_outages)

    # Retrieve subtopic details from KG
    subtopic_details = get_subtopic_details(product_name=product, subtopic_name=subtopic)
    formatted_subtopic_details = universal_formatting_function(subtopic_details)

    # Retrieve relevant issues with articles from KG
    relevant_issues_with_articles = get_relevant_issues_with_articles(
        product_name=product,
        subtopic_name=subtopic,
        query_text=query,
        threshold=0.3,
        top_k=5
    )
    formatted_relevant_issues = format_issues_with_articles(relevant_issues_with_articles)

    # Generate structured KG output
    input_data = {
        "User Profile": formatted_user_profile,
        "Product Details": formatted_product_details,
        "Subtopic Details": formatted_subtopic_details,
        "Relevant Issues": formatted_relevant_issues,
    }
    structured_kg_output = generate_structured_kg_output(input_data)

    # Generate vanilla and KG+RAG prompts
    vanilla_prompt = vanilla_prompt_universal.format(user_query=query, context=formatted_retrieved_articles)
    kg_prompt = kg_prompt_universal.format(user_query=query, kg_input=structured_kg_output, context=formatted_retrieved_articles)

    # Generate responses
    vanilla_response = generate_text(vanilla_prompt)
    kg_response = generate_text(kg_prompt)

    # Return results as a dictionary
    return {
        "vanilla_response": vanilla_response,
        "kg_response": kg_response,
        "retrieved_articles": retrieved_articles,
        "structured_kg_output": structured_kg_output
    }


In [384]:
product_data = """
Product: Webex
- Subtopics:
    - Meeting Scheduling
    - Audio Issues
    - Screen Sharing
    - Calendar Sync
    - Webinar - Webex

Product: Outlook
- Subtopics:
    - Calendar Sync
    - Login Problems
    - Email Configuration
    - Notifications
    - Outlook Mail

Product: Slack
- Subtopics:
    - Message Threads
    - Workspace Management
    - App Integration
    - Notifications

Product: BYOD
- Subtopics:
    - Device Registration
    - Security Compliance
    - Network Access

Product: Windows
- Subtopics:
    - OS Updates
    - Driver Issues
    - Blue Screen Errors
    - Performance Optimization

Product: Zoom
- Subtopics:
    - Webinar - Zoom
    - Meetings - Zoom
"""   
prompt = """Given a query and certain product data, classify what product and subtopic is query talking about if relevant product and subtopic is not found return put empty string. Output should be in this format.
<product>product written here</product>
<subtopic>subtopic written here</subtopic>
only return the product and subtopic tags, do not return any other text.
Do not modify the product and subtopic names, use the exact names as they are in the product data.
in case product is not found, return <product></product> and in case subtopic is not found return <subtopic></subtopic>.

Product Data: {product_data}
Query: {query}
Output: 

"""

def classify_product_and_subtopic(query, product_data):
    """
    Classify the product and subtopic from the given query using a prompt.

    Args:
        query (str): The user query.
        product_data (str): The product data string.

    Returns:
        str: The classification result in the specified format.
    """
    full_prompt = prompt.format(product_data=product_data, query=query)
    response = generate_text(full_prompt)
    import re
    def parser(response):
        product_match = re.search(r'<product>(.*?)</product>', response)
        subtopic_match = re.search(r'<subtopic>(.*?)</subtopic>', response)
        product = product_match.group(1) if product_match else ""
        subtopic = subtopic_match.group(1) if subtopic_match else ""
        return product, subtopic
    product, subtopic = parser(response)
    return {
        "product": product.strip(),
        "subtopic": subtopic.strip()
    }
# Example usage
query = "blah blah blah"
classification_result = classify_product_and_subtopic(query, product_data)



In [385]:
classification_result

{'product': '', 'subtopic': ''}