In [None]:
import os
from dotenv import load_dotenv
from langchain_groq import ChatGroq
load_dotenv()

########################################## model (use groq) #####################################################
groq_api_key = os.getenv("GROQ_API_KEY")
model = ChatGroq(model_name="Llama3-8b-8192",groq_api_key = groq_api_key)
print("MODEL ---->",model)


######################################### embedding model ######################################################
from langchain_chroma import Chroma
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.prompts import ChatPromptTemplate
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings 

os.environ['HF_TOKEN'] = os.getenv("huggin_face_token")
embeddings = HuggingFaceEmbeddings(model_name = "all-MiniLM-L6-v2")
print("embeddings ---->",embeddings)

####################################### load documents from the web ###############################################3
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
import bs4 
loader = WebBaseLoader(
    web_paths= ("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs = dict(
        parse_only = bs4.SoupStrainer(
            class_ = ("post-content","post-title","post-header")
        )
    ),
)
docs = loader.load()
print("docs --->",docs)

######################################### chunking ###############################################################
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000,chunk_overlap=200)
splits = text_splitter.split_documents(docs)

########################################## convert docs into embeddings and store into vectorstore #################
vectorstore = Chroma.from_documents(documents = splits,embedding=embeddings)
print("vectorstore ----->",vectorstore)

######################################### convert chroma into langchain runnable retriever #######################
retriever = vectorstore.as_retriever()
print("retriever ------>",retriever)

######################################### prompt template ##############################################
system_prompt = (""" You are an assistant for question-answering tasks. 
                 Use the following pieces of retrieved context to answer the question. 
                 If you don't know the answer, say that you don't know. 
                 Use three sentences maximum and keep the answer concise.\n\n
                 {context}
                 """)
prompt = ChatPromptTemplate.from_messages([
        ("system",system_prompt),
        ("human","{input}")
    ]
)
######################################## create retrieval chain (text summarization - stuff type) ###############################################
question_answer_chain = create_stuff_documents_chain(model,prompt)
reg_chain = create_retrieval_chain(retriever,question_answer_chain)
response = reg_chain.invoke({"input":"what is self-reflection?"})
print("response ---->",response)



################################### Adding chat History along with prompt template ####################################################################
from langchain.chains import create_history_aware_retriever
from langchain_core.prompts import MessagesPlaceholder 

## Prompt 1 - Reformulate user query with chat history, input is chat_history + input, will be used by create_history_aware_retriever()
contextualize_q_system_prompt = (""" Given a chat history and the latest user question which might reference context in the history, formulate a standalone question which can be understood without the chat history. 
                                 DO NOT answer the question, just reformulate it if needed and otherwise return as is.
                                 """)

contextualize_q_prompt = ChatPromptTemplate.from_messages([
        ("system",contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),       
        ("human","{input}")
    ]
)
## Prompt 2 - Answer question based on retrieved chunks, input is context + chat_history + input, will be used by create_stuff_documents_chain()
qa_system_prompt = """
You are an assistant for question-answering tasks. 
Use the following pieces of retrieved context to answer the question. 
If you don't know the answer, say that you don't know. 
Use three sentences maximum and keep the answer concise.

{context}
"""
System_Prompt = ChatPromptTemplate.from_messages([
        ("system",qa_system_prompt),
        ("human","{input}")
    ]
)

history_aware_retriever = create_history_aware_retriever(model,retriever,contextualize_q_prompt)
print("history_aware_retriever ------->",history_aware_retriever)

question_answer_chain_with_history = create_stuff_documents_chain(model,System_Prompt)
rag_chain_with_history = create_retrieval_chain(history_aware_retriever,question_answer_chain_with_history)

from langchain_core.messages import AIMessage, HumanMessage
chat_history = []
question = "what is task decomposition?"
response1 = rag_chain_with_history.invoke({"input":question,"chat_history":chat_history})
print("type of response1",type(response1))
print("response1 --->",response1["answer"])

chat_history.extend([
    HumanMessage(content = question),
    AIMessage(content = response1["answer"])
    ]
)

question2 = "provide detail explaination on the subject?"
response2 =  rag_chain_with_history.invoke({"input":question2,"chat_history":chat_history})
print("response2 ---->",response2["answer"])

chat_history.extend([
    HumanMessage(content = question),
    AIMessage(content = response2["answer"])
    ]
)

###################################################### hitory with session ids ######################################
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory
store = {}
# To separate the sessions
def get_session_history(session_id)->BaseChatMessageHistory:
    if session_id not in store:
        store[session_id]=ChatMessageHistory()  # creating an object of ChatMessageHistory for each session
    return store[session_id]         # returning ChatMessageHistory object of that particular session

# ------- creating 2 sessions -------
config1={"configurable":{"session_id":"Chat_1"}}   # configurable must be smallcase here
config2={"configurable":{"session_id":"Chat_2"}}

conversation_rag_chain = RunnableWithMessageHistory(rag_chain_with_history,get_session_history,input_messages_key="input",history_messages_key="chat_history",output_messages_key="answer")

response1_session1 = conversation_rag_chain.invoke({"input":"what is chain of Thought?"},config=config1)["answer"]
print("response1_session1  ->", response1_session1)

response1_session2 = conversation_rag_chain.invoke({"input":"what is Tree of Thoughts?"},config=config2)["answer"]
print("response1_session2 -> ",response1_session2)

response2_session1 = conversation_rag_chain.invoke({"input":"Please elaborate on the topic?"},config=config1)["answer"]
print("response2_session1 ->",response2_session1)

response2_session2 = conversation_rag_chain.invoke({"input":"Please elaborate on the topic?"},config=config2)["answer"]
print("response2_session2 ->",response2_session2)

# Text Summarization 
there are 3 types of text summarization or 3 common ways to combine retrieved documents before passing them to the LLM:
    1. Stuff -> All documents are concatenated and sent in a single prompt.
    2. Map-Reduce -> Each document is processed individually and then results are combined.
    3. Refine -> An answer is built iteratively, updating with each new doc.

## Explaination of above code
we are setting up a sentence transformer model (all-MiniLM-L6-v2) for converting text into vector embeddings. Used WebBaseLoader to load a blog post on autonomous agents from Lilian Weng’s site. Documents are too big to embed directly or pass to an LLM, so we split them into smaller chunks. Chroma is a fast and simple in-memory vector database which is then converted into a retriever. 
In Prompt it defines "system" message giving the assistant instructions (what to do with the context), and "human" message that provides the actual user query ({input}). {context} will be populated with the retrieved document chunks.

"create_stuff_documents_chain" builds a simple RAG chain using the “stuff” method:
    All retrieved documents are stuffed into one input along with the user’s question.
    Best for shorter documents or small k (e.g., 2–5 retrieved chunks)

create_retrieval_chain connects:
    Your retriever (searches Chroma)
    Your LLM + prompt chain

invoke step triggers the full process:
    Query → Retriever → Stuffed prompt → LLM → Answer

#### About {Context}
The {context} placeholder in your prompt template is populated automatically by LangChain, based on the document chunks retrieved by your retriever.
Prompt Template -> 
prompt = ChatPromptTemplate.from_messages([
    ("system", system_prompt),  # system_prompt includes {context}
    ("human", "{input}")
])
{context} is expected in the system message
{input} is expected in the human message (i.e., the question)

question_answer_chain = create_stuff_documents_chain(model, prompt)
reg_chain = create_retrieval_chain(retriever, question_answer_chain)

create_retrieval_chain() wires together:
    A retriever (searches for relevant docs)
    A document-processing chain (i.e., question_answer_chain)

when invoked, response = reg_chain.invoke({"input": "what is self-reflection?"}) 
Use "input" ("what is self-reflection?") as the question, Passes that to the retriever, The retriever returns a list of relevant Document objects.
LangChain automatically:
    Extracts their page_content, Concatenates them into one string (e.g., separated by newlines), Substitutes that string into the {context} variable in your prompt.

# Explaination of adding history code
We need to provide 2 prompts when history is involved, one for adding chat history to current query and other for question answering.

Prompt 1 — Query Reformulation (require -> chat_history + input)
Reformulates a follow-up question using previous chat history.
Used in create_history_aware_retriever().

Prompt 2 — Final Answer Prompt (require -> chat_history + input + context)
Uses retrieved context to generate the final response.
Used in create_stuff_documents_chain().

History-Aware Retriever
    history_aware_retriever = create_history_aware_retriever(model, retriever, contextualize_q_prompt)
QA Chain with Retrieved Chunks
    question_answer_chain_with_history = create_stuff_documents_chain(model, System_Prompt)
Final RAG Chain
    rag_chain_with_history = create_retrieval_chain(history_aware_retriever, question_answer_chain_with_history)


# Explaination of history with session code

# BaseMessageHistory 
An abstract base class that defines the interface for chat history storage.

## Common implementations
1-> ChatMessageHistory (in-memory)
2-> FileMessageHistory (local storage)
3-> RedisChatMessageHistory (Redis-backed) etc etc 

# ChatMessageHistory
A concrete in-memory implementation of BaseMessageHistory. It stores messages as a list of LangChain BaseMessage objects.

# RunnableWithMessageHistory
A wrapper that lets you add memory (chat history) to any LangChain Runnable — e.g., a prompt → LLM chain.
It's typically used to make a stateless LLM chain behave like a stateful chatbot.

get_session_history creates a session manager for chat history, Each session (Chat_1, Chat_2, etc.) gets its own independent message history stored in store.

conversation_rag_chain = RunnableWithMessageHistory(
    rag_chain_with_history,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer"
)
This wraps your rag_chain_with_history so that:
    It automatically manages message history per session.
        Adds: "input" from user, "chat_history" for context, "answer" from model, All wrapped neatly into one Runnable.

esponse1_session1 = conversation_rag_chain.invoke(
    {"input": "what is chain of Thought?"}, config=config1
)["answer"]
First call uses session "Chat_1" → stores that exchange in store["Chat_1"]

response1_session2 = conversation_rag_chain.invoke(
    {"input": "what is Tree of Thoughts?"}, config=config2
)["answer"]
Second call uses "Chat_2" → separate memory


## History is automatically retrieved and updated by LangChain behind the scenes.
