In [2]:
import time
import faiss
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.docstore.in_memory import InMemoryDocstore

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
EMBED = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


In [5]:
VECTOR_DIM = 384  # embedding size for the above model
MIN_SIMILARITY = 0.82  # dynamic threshold (best performance)
TOP_K = 5

In [6]:
index = faiss.IndexFlatIP(VECTOR_DIM)

In [7]:
QA_CACHE = FAISS(
    embedding_function=EMBED,
    index=index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={}
)

In [8]:
if QA_CACHE.index.ntotal == 0:
    print("no data")

no data


In [2]:
from langchain_ollama import ChatOllama

llm = ChatOllama(
    model="gemma3:12b",
    temperature=0.0
)

  from .autonotebook import tqdm as notebook_tqdm


In [9]:
import os
from dotenv import load_dotenv
load_dotenv()
os.environ["AZURE_OPENAI_API_KEY"]=os.getenv("AZURE_OPENAI_API_KEY")
os.environ["AZURE_OPENAI_ENDPOINT"] = os.getenv("AZURE_OPENAI_ENDPOINT")
os.environ["OPENAI_API_VERSION"] = os.getenv("AZURE_OPENAI_API_VERSION")
os.environ["AZURE_OPENAI_DEPLOYMENT_NAME"]=os.getenv("AZURE_OPENAI_DEPLOYMENT")
from langchain.chat_models import init_chat_model

llm_az = init_chat_model(
    "azure_openai:gpt-4.1",
    azure_deployment=os.environ["AZURE_OPENAI_DEPLOYMENT_NAME"],
)
# llm_az.invoke("hi how are you")

In [8]:
from pydantic import BaseModel, Field
from typing import List, Literal
from typing_extensions import TypedDict

class topicseg(TypedDict):
    topic:str =Field(
        ...,
        description="You are a topic segregation assistant. Your task is to analyze each user query and categorize it into an appropriate topic",
    )
    answer: str = Field(...,
                        description="give me a correct answer for user question")

In [9]:
structured_llm = llm.with_structured_output(topicseg)

In [43]:
question="who is chief minister?"
res=structured_llm.invoke(question)

In [44]:
QA_CACHE.add_texts(
        texts=[question],
        metadatas=[{
            "topic": res["topic"],
            "answer": res["answer"],
        }]
    )

['dcef3b46-a5d1-48f3-a39f-147394aa5155']

In [45]:
QA_CACHE

<langchain_community.vectorstores.faiss.FAISS at 0x1a43009d150>

In [48]:
question1="who is chief minister of Andhra Pradesh"

In [50]:
results = QA_CACHE.similarity_search_with_score(question1, k=TOP_K)
best_doc, dist = results[0]


In [51]:
best_doc.metadata
# dist

{'topic': 'Chief Minister',
 'answer': "The current Chief Minister of India varies depending on the state. Here's a breakdown as of today, May 16, 2024:\n\n*   **Andhra Pradesh:** N. Chandrababu Naidu\n*   **Arunachal Pradesh:** Pema Khandu\n*   **Assam:** Himanta Biswa Sarma\n*   **Bihar:** Nitish Kumar\n*   **Chhattisgarh:** Vishnu Deo Sai\n*   **Goa:** Pramod Sawant\n*   **Gujarat:** Bhupendra Patel\n*   **Haryana:** Manohar Lal Khattar (resigned, new CM to be appointed soon)\n*   **Himachal Pradesh:** Sukhvinder Singh Sukhu\n*   **Jharkhand:** Hemant Soren (currently under arrest, interim CM appointed)\n*   **Karnataka:** Siddaramaiah\n*   **Kerala:** Pinarayi Vijayan\n*   **Madhya Pradesh:** Mohan Yadav\n*   **Maharashtra:** Eknath Shinde\n*   **Manipur:** N. Biren Singh\n*   **Meghalaya:** Conrad K. Sangma\n*   **Mizoram:** Lal Thangwala\n*   **Nagaland:** Nephiu Rio\n*   **Odisha:** Naveen Patnaik\n*   **Punjab:** Bhagwant Mann\n*   **Rajasthan:** Bhajan Lal Sharma\n*   **Sikkim

In [54]:
for doc_id, doc in QA_CACHE.docstore._dict.items():
    print("ID:", doc_id)
    print("Text:", doc.page_content)
    print("Metadata:", doc.metadata["topic"])
    print("answer:", doc.metadata["answer"])
    print("-" * 50)

ID: be1b3eb5-dc41-47cc-90bf-9bc7b71cf41e
Text: tell me answer 2+2
Metadata: mathematics
answer: 2 + 2 = 4
--------------------------------------------------
ID: 63128617-f966-45c0-836f-1cb7dd82bdc6
Text: tell me about prime minister in 20 words
Metadata: Prime Minister
answer: The Prime Minister leads the government, typically the leader of the majority party, responsible for policy and representing the nation.
--------------------------------------------------
ID: 34e6408e-660e-4732-9fd1-52d447202cc6
Text: tell me about prime minister in 20 words
Metadata: Prime Minister
answer: The Prime Minister leads the government, typically the leader of the majority party, responsible for policy and representing the nation.
--------------------------------------------------
ID: dcef3b46-a5d1-48f3-a39f-147394aa5155
Text: who is chief minister?
Metadata: Chief Minister
answer: The current Chief Minister of India varies depending on the state. Here's a breakdown as of today, May 16, 2024:

*   **An

In [23]:
from pydantic import BaseModel, Field
from typing_extensions import TypedDict

class TopicSeg(TypedDict):
    topic: str
    answer: str

structured_llm = llm_az.with_structured_output(TopicSeg)


TOPIC_INDEX = FAISS(
    embedding_function=EMBED,
    index=index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={}
)
def resolve_topic(raw_topic):

    global TOPIC_INDEX

    # 1️⃣ If this is the very first topic → create FAISS index fully
    if TOPIC_INDEX is None:
        TOPIC_INDEX = FAISS.from_texts(
            texts=[raw_topic],
            embedding=EMBED,
            metadatas=[{"similarity": 1.0}]
        )
        return raw_topic

    # 2️⃣ Search for nearest topic
    topic_results = TOPIC_INDEX.similarity_search_with_score(raw_topic, k=1)
    doc, score = topic_results[0]

    # 3️⃣ If too different → add new topic to same index
    if score > 0.30:  # Distance threshold
        TOPIC_INDEX.add_texts(
            texts=[raw_topic],
            metadatas=[{"similarity": 1.0}]
        )
        return raw_topic

    # 4️⃣ Else → re-map to nearest existing topic
    return doc.page_content



def store_to_memory(question, topic, answer):
    QA_CACHE.add_texts(
        texts=[question],
        metadatas=[{
            "topic": topic,
            "answer": answer,
        }]
    )
def retrieve_memory(query, topic, k=3):
    results = QA_CACHE.similarity_search_with_score(query, k=k)

    # Filter by topic automatically (no manual comparison needed)
    filtered = [
        (doc, score)
        for doc, score in results
        if doc.metadata.get("topic") == topic
    ]
    return filtered

def answer_question(question):
    # Step 1: LLM extracts topic + rough answer
    res = structured_llm.invoke(question)
    raw_topic = res["topic"]

    # Step 2: Map automatically to best existing topic
    topic = resolve_topic(raw_topic)

    # Step 3: Fetch memory from that topic
    history = retrieve_memory(question, topic)

    context = "\n".join(d.metadata['answer'] for d, s in history)

    final_prompt = f"""
    Topic: {topic}
    Relevant History:
    {context}

    User Question: {question}
    """

    final_answer = llm_az.invoke(final_prompt)

    # Step 4: Store memory under topic cluster
    store_to_memory(question, topic, final_answer)

    return final_answer






In [28]:
# answer_question("Who is the Prime Minister of India?")

In [10]:
# ---------------------------------------------------------
# Imports
# ---------------------------------------------------------
from typing import List, Tuple
from typing_extensions import TypedDict
from pydantic import BaseModel, Field

from langchain_community.vectorstores import FAISS
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_core.documents import Document
import faiss

# ---------------------------------------------------------
# Assumptions: you ALREADY have these defined in your code
# ---------------------------------------------------------
# llm_az : your chat LLM (e.g., Azure OpenAI ChatModel)
# EMBED  : your embedding model (must implement .embed_query and .embed_documents)

# Example (JUST for reference, don't duplicate if you already have them):
# from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings
# llm_az = AzureChatOpenAI(...)
# EMBED = AzureOpenAIEmbeddings(...)


# ---------------------------------------------------------
# 1. Structured output schema for topic + answer
# ---------------------------------------------------------
class TopicSeg(TypedDict):
    topic: str
    answer: str

# LLM that returns { "topic": "...", "answer": "..." }
structured_llm = llm_az.with_structured_output(TopicSeg)


# ---------------------------------------------------------
# 2. QA Cache Vector Store (for questions + answers + topic)
# ---------------------------------------------------------
# Build an empty FAISS index for QA cache using the real embedding dimension
dummy_emb = EMBED.embed_query("dimension test")
embedding_dim = len(dummy_emb)

qa_faiss_index = faiss.IndexFlatL2(embedding_dim)

QA_CACHE = FAISS(
    embedding_function=EMBED,
    index=qa_faiss_index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={}
)


# ---------------------------------------------------------
# 3. Topic Index (for clustering topics themselves)
# ---------------------------------------------------------
# IMPORTANT: start as None, will be initialized on first topic
TOPIC_INDEX = None  # type: FAISS | None


def resolve_topic(raw_topic: str, distance_threshold: float = 0.30) -> str:
    """
    Map a raw topic string to an existing topic cluster if similar,
    otherwise create a new topic in the topic index.
    Uses L2 distance: smaller = more similar.
    """
    global TOPIC_INDEX

    # 1️⃣ First-ever topic → initialize topic FAISS index
    if TOPIC_INDEX is None:
        TOPIC_INDEX = FAISS.from_texts(
            texts=[raw_topic],
            embedding=EMBED,
            metadatas=[{"similarity": 1.0}]
        )
        # raw_topic is both the label and the first cluster center
        return raw_topic

    # 2️⃣ Topic index already exists → search nearest topic
    results = TOPIC_INDEX.similarity_search_with_score(raw_topic, k=1)

    doc, distance = results[0]  # distance = L2 distance from topic embedding

    # 3️⃣ If too far → treat as new topic cluster
    # (you can tune distance_threshold)
    if distance > distance_threshold:
        TOPIC_INDEX.add_texts(
            texts=[raw_topic],
            metadatas=[{"similarity": 1.0}]
        )
        return raw_topic

    # 4️⃣ Close enough → reuse existing mapped topic name
    return doc.page_content


# ---------------------------------------------------------
# 4. Memory Store and Retrieval
# ---------------------------------------------------------
def store_to_memory(question: str, topic: str, answer: str) -> None:
    """
    Store question + topic + answer in QA_CACHE.
    """
    QA_CACHE.add_texts(
        texts=[question],
        metadatas=[{
            "topic": topic,
            "answer": answer,
        }]
    )


def retrieve_memory(query: str, topic: str, k: int = 3) -> List[Tuple[Document, float]]:
    """
    Retrieve top-k similar QAs for this query, filtered to the same topic.
    Returns list of (Document, score).
    """
    # If QA cache is empty, nothing to retrieve
    if QA_CACHE.index.ntotal == 0:
        return []

    results = QA_CACHE.similarity_search_with_score(query, k=k)

    # Filter only docs matching this topic (topic-aware RAG)
    filtered = [
        (doc, score)
        for doc, score in results
        if doc.metadata.get("topic") == topic
    ]
    return filtered


# ---------------------------------------------------------
# 5. Main pipeline: answer_question()
# ---------------------------------------------------------
def answer_question(question: str) -> str:
    """
    Full flow:
      1) Use structured LLM to get {topic, rough answer}
      2) Resolve topic to an existing cluster (or create new)
      3) Retrieve history for that topic
      4) Ask final LLM with history as context
      5) Store final answer in memory under that topic
    """
    # Step 1: LLM extracts topic + rough answer (not final yet)
    res: TopicSeg = structured_llm.invoke(question)
    raw_topic = res["topic"]
    rough_answer = res["answer"]  # you can use this as a fallback if you want

    # Step 2: Map automatically to best existing topic cluster
    topic = resolve_topic(raw_topic)

    # Step 3: Fetch memory from that topic
    history = retrieve_memory(question, topic, k=5)

    # Build history text block
    if history:
        history_block = "\n".join(
            f"- Q: {doc.page_content}\n  A: {doc.metadata.get('answer', '')}"
            for doc, _ in history
        )
    else:
        history_block = "None yet."

    # Step 4: Ask final LLM with topic + history context
    final_prompt = f"""
You are a helpful assistant.

Topic: {topic}

Previous Q&A for this topic:
{history_block}

User question: {question}

Using the above history only if it is relevant, give the best possible answer.
Be accurate and don't hallucinate details you don't know.
"""

    final_answer_obj = llm_az.invoke(final_prompt)

    # LangChain ChatModels often return an object with `.content`
    try:
        final_answer = final_answer_obj.content
    except AttributeError:
        final_answer = str(final_answer_obj)

    # Step 5: Store final answer in memory
    store_to_memory(question, topic, final_answer)

    return final_answer


# ---------------------------------------------------------
# 6. Simple Testing
# ---------------------------------------------------------
# if __name__ == "__main__":
#     q1 = "Who is the Prime Minister of India?"
#     print("Q1:", q1)
#     a1 = answer_question(q1)
#     print("A1:", a1, "\n")

#     q2 = "Which party is currently ruling India?"
#     print("Q2:", q2)
#     a2 = answer_question(q2)
#     print("A2:", a2, "\n")

#     q3 = "Who is the Chief Minister of Tamil Nadu?"
#     print("Q3:", q3)
#     a3 = answer_question(q3)
#     print("A3:", a3, "\n")

#     # You can also inspect what's in QA_CACHE:
#     print("---- Stored QA CACHE metadata ----")
    


In [44]:
answer_question("the code u gave previously is not working check and crt")

"It seems that there is no prior code provided in this discussion thread. Could you please share the code you'd like me to check and correct? Once I have the code, I'll help you debug and fix any issues."

In [21]:
raw_topic="Leadership"
results = TOPIC_INDEX.similarity_search_with_score(raw_topic, k=1)
doc,distance = results[0]
print(doc)
print(distance)

page_content='Political Leadership in India' metadata={'similarity': 1.0}
0.77778465


In [43]:
for _id, doc in QA_CACHE.docstore._dict.items():
        print("ID:", _id)
        print("Text:", doc.page_content)
        print("Metadata:", doc.metadata)
        print("-" * 40)

ID: 744fcda4-1598-47cc-8fab-d22fe383882f
Text: Which party is ruling India?
Metadata: {'topic': 'Political Leadership in India', 'answer': "The Bharatiya Janata Party (BJP) is currently the ruling party in India. The party has been in power at the central government since 2014, with Prime Minister Narendra Modi serving as the leader. The BJP won consecutive elections in 2014 and 2019, securing its majority in the Lok Sabha, India's lower house of Parliament."}
----------------------------------------
ID: 9350e344-4b25-4936-a7a1-5fbb28201e6e
Text: How to unlock laptop without password?
Metadata: {'topic': 'Laptop Password Recovery', 'answer': 'If you need to unlock a laptop without a password, the best approach depends on the specific situation and the operating system being used. Below are general methods:\n\n### For Windows:\n1. **Use a Password Reset Disk or Security Questions**: If you set up a password reset disk or security questions when creating your account, you can use those t

In [None]:
# for doc_id, doc in QA_CACHE.docstore._dict.items():
#     print("ID:", doc_id)
#     print("Text:", doc.page_content)
#     print("Metadata:", doc.metadata["topic"])
#     print("answer:", doc.metadata["answer"])
#     print("-" * 50)