In [3]:
import os
import tempfile
import random
import pandas as pd
import numpy as np
from datetime import timedelta
import gradio as gr
from openai import OpenAI
from dotenv import load_dotenv

# LangChain + Chroma
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory

# --- Setup ---
load_dotenv(override=True)
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')
openai = OpenAI()
embeddings = OpenAIEmbeddings()
persist_dir = "vector_db"

# Load Chroma DB
db = Chroma(persist_directory=persist_dir, embedding_function=embeddings)

# Setup Conversational RAG
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
rag_chain = ConversationalRetrievalChain.from_llm(
    ChatOpenAI(model="gpt-4o-mini"),
    retriever=db.as_retriever(search_kwargs={"k": 3}),
    memory=memory
)

# --- CSV Generator ---
def generate_servicenow_data(n=10000):
    np.random.seed(42)
    random.seed(42)

    priorities = ["Low", "Medium", "High", "Critical"]
    impacts = ["Low", "Medium", "High"]
    urgencies = ["Low", "Medium", "High"]
    statuses = ["Open", "In Progress", "Resolved", "Closed"]
    categories = ["Network", "Software", "Hardware", "Database", "Security"]
    subcategories = {
        "Network": ["VPN", "LAN", "WAN"],
        "Software": ["Email", "OS", "Application"],
        "Hardware": ["Laptop", "Desktop", "Printer"],
        "Database": ["Oracle", "MySQL", "SQL Server"],
        "Security": ["Phishing", "Malware", "Access"],
    }
    resolution_codes = ["Solved Remotely", "Solved via On-Site", "Workaround Provided", "Not Reproducible"]

    opened_dates = pd.date_range("2025-07-01", periods=n, freq="h")
    status_selected = np.random.choice(statuses, n, p=[0.3, 0.3, 0.2, 0.2])

    closed_dates, resolution_codes_selected, resolved_by = [], [], []
    for i, status in enumerate(status_selected):
        if status in ["Resolved", "Closed"]:
            closed_dt = opened_dates[i] + timedelta(hours=random.randint(1, 72))
            closed_dates.append(closed_dt)
            resolution_codes_selected.append(random.choice(resolution_codes))
            resolved_by.append(f"user{random.randint(401,600)}")
        else:
            closed_dates.append(pd.NaT)
            resolution_codes_selected.append(None)
            resolved_by.append(None)

    categories_selected = np.random.choice(categories, n)
    subcategories_selected = [random.choice(subcategories[cat]) for cat in categories_selected]

    data = {
        "Incident_ID": [f"INC{i:06d}" for i in range(1, n+1)],
        "Opened_At": opened_dates,
        "Closed_At": closed_dates,
        "Opened_By": [f"user{random.randint(1,200)}" for _ in range(n)],
        "Assigned_To": [f"user{random.randint(201,400)}" for _ in range(n)],
        "Priority": np.random.choice(priorities, n),
        "Impact": np.random.choice(impacts, n),
        "Urgency": np.random.choice(urgencies, n),
        "Category": categories_selected,
        "Subcategory": subcategories_selected,
        "Status": status_selected,
        "Resolution_Code": resolution_codes_selected,
        "Resolved_By": resolved_by,
        "Short_Description": [f"Issue {i} - {random.choice(categories)} related" for i in range(1, n+1)],
        "Description": [f"Detailed description of incident {i}, auto-generated for testing." for i in range(1, n+1)],
        "Updated_At": [d + timedelta(hours=random.randint(1,100)) for d in opened_dates],
    }
    return pd.DataFrame(data)

# --- Chatbot Function (CSV + RAG) ---
def chatbot_fn(message, history):
    history = history or []

    # CSV command
    if "create csv" in message.lower():
        df = generate_servicenow_data(10000)
        tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
        df.to_csv(tmp_file.name, index=False)

        preview = df.head().to_markdown()
        bot_reply = f"Here’s a preview of the dataset:\n\n{preview}"
        history.append((message, bot_reply))
        history.append(("", (tmp_file.name, "📂 Download ServiceNow CSV (10k rows)")))
        return history, ""

    # Otherwise → run RAG
    rag_result = rag_chain({"question": message})
    reply = rag_result["answer"]

    history.append((message, reply))
    return history, ""

# --- Gradio App ---
with gr.Blocks() as demo:
    chatbot = gr.Chatbot(label="ServiceNow Assistant (CSV + RAG)", height=500)
    msg = gr.Textbox(
        label="Ask me something",
        placeholder="Type 'create csv' to generate a dataset, or ask a question from your documents...",
    )

    msg.submit(chatbot_fn, [msg, chatbot], [chatbot, msg])




  memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
  chatbot = gr.Chatbot(label="ServiceNow Assistant (CSV + RAG)", height=500)


In [4]:
demo.launch()

* Running on local URL:  http://127.0.0.1:7861
* To create a public link, set `share=True` in `launch()`.




  rag_result = rag_chain({"question": message})
