In [1]:
!pip install -qU langchain_community pypdf langchain-groq sentence_transformers faiss-gpu langgraph pydantic

In [2]:
# LIBRARY
from langchain_community.document_loaders import PyPDFLoader, Docx2txtLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain_core.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import MessagesPlaceholder
from langchain.chains import create_history_aware_retriever

from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from typing import List
from operator import itemgetter
from langchain_groq import ChatGroq
from langchain_community.vectorstores import FAISS
from langchain_core.documents import Document
import os

In [3]:
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
os.environ['LANGCHAIN_PROJECT'] = 'advanced-rag'
os.environ['LANGCHAIN_API_KEY'] = "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"
os.environ['GROQ_API_KEY'] = "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"
os.environ['TAVILY_API_KEY'] = "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"

In [4]:
# UPLOAD THE DOCUMENTS
documents = PyPDFLoader('/kaggle/input/kitchen-dataset/cook_book.pdf')
documents = documents.load()

In [6]:
# SPLIT THE DOCUMENTS INTO CHUNK
txt_spliter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=150,length_function=len)
splits = txt_spliter.split_documents(documents)
print(f"split the documents into {len(splits)} chunks")

split the documents into 134 chunks


In [7]:
#Create embedding
model_name = 'BAAI/bge-small-en'
model_kwargs = {"device": "cuda"}
encode_kwargs = {"normalize_embeddings": True}

In [9]:
HF_embedding = HuggingFaceBgeEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs)

In [10]:
#Vector store 
vectorstore = FAISS.from_documents(documents=splits, embedding=HF_embedding)
retriever = vectorstore.as_retriever()

In [11]:
def docs2str(documents):
    return "\n\n".join(doc.page_content for doc in documents)

In [12]:
# RAG
template = """Answer the following question based on this context:
{context}
Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

llm = ChatGroq(temperature=0)

final_rag_chain = ({"context": retriever | docs2str, "question": RunnablePassthrough()} 
                   | prompt
                   | llm
                   | StrOutputParser()
)

In [13]:
question = "how to cook Haleem?"
response = final_rag_chain.invoke(question)
print(f"Question: {question}")
print(f"Answer: {response}")


Question: how to cook Haleem?
Answer: To cook Haleem, follow these steps:

1. Heat remaining oil in a pan and stir-fry onions until they are golden brown. Keep them aside in the pan until required. Just before serving, heat the onions again in the pan.
2. Add boneless beef to the pan and stir-fry until it changes color.
3. Add all the other ingredients except ladyfinger, salt, and chopped coriander. Stir well and cook on low heat for 2-3 hours, or until the beef is tender and the mixture is thick.
4. Add ladyfinger, salt, and stir well. Cook for another 5-10 minutes.
5. Pour the mixture into a serving dish.
6. Garnish with chopped coriander.
7. Serve hot and crisp with loli, rice, or parathas.


**---------------------------------------------------------------------------The more advance RAG System---------------------------------------------------------------------------------**

In [14]:
#
contextualize_q_system_prompt = """
Given a chat history and the latest user question
which might reference context in the chat history,
formulate a standalone question which can be understood
without the chat history. Do NOT answer the question,
just reformulate it if needed and otherwise return it as is.
"""

contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

contextualize_chain = contextualize_q_prompt | llm | StrOutputParser()
print(contextualize_chain.invoke({"input": "How to cook Haleem?", "chat_history": []}))

You're asking for instructions on how to prepare Haleem. Here's the standalone question: "What are the steps to cook Haleem?"


In [15]:
history_aware_retriever = create_history_aware_retriever(
    llm, retriever, contextualize_q_prompt
)

qa_prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful AI assistant. Use the following context to answer the user's question."),
    ("system", "Context: {context}"),
    MessagesPlaceholder(variable_name="chat_history"),
    ("human", "{input}")
])

question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)
rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)

In [16]:
from langchain_core.messages import HumanMessage, AIMessage

chat_history = []
question1 = "Ingradient for cooking Haleem?"
answer1 = rag_chain.invoke({"input": question1, "chat_history": chat_history})['answer']
chat_history.extend([
    HumanMessage(content=question1),
    AIMessage(content=answer1)
])

print(f"Human: {question1}")
print(f"AI: {answer1}\n")

question2 = "how to cook Haleem?"
answer2 = rag_chain.invoke({"input": question2, "chat_history": chat_history})['answer']
chat_history.extend([
    HumanMessage(content=question2),
    AIMessage(content=answer2)
])

print(f"Human: {question2}")
print(f"AI: {answer2}")


Human: Ingradient for cooking Haleem?
AI: The ingredients required for cooking Haleem are:
1 kg boneless beef, fried onions, raw onions, garlic paste, kheema, chopped coriander, and chopped green chilies.

Human: how to cook Haleem?
AI: To cook Haleem, follow these steps:

1. Heat some oil in a pan and fry the onions until they are golden brown. Keep them aside in the pan for later use.
2. In the same pan, add the boneless beef and cook it until it is browned.
3. Add the fried onions, raw onions, garlic paste, kheema, chopped coriander, and chopped green chilies to the pan. Mix well and cook for a few minutes.
4. Add water to the pan and bring the mixture to a boil. Reduce the heat to low, cover the pan, and simmer for several hours until the beef is tender and the flavors are well combined.
5. Just before serving, heat the onions in the pan and add all the seasoning. Stir and cook for a few seconds.
6. Add ladyfinger and salt, stir and pour into a serving dish.
7. Garnish with chopped

**-------------------------Multi user chatbot using SQ-lite-----------------------------**

In [17]:
#settig for SQ lite data base system
import sqlite3
from datetime import datetime
import uuid

DB_NAME = "rag_app.db"

def get_db_connection():
    conn = sqlite3.connect(DB_NAME)
    conn.row_factory = sqlite3.Row
    return conn

def create_application_logs():
    conn = get_db_connection()
    conn.execute('''CREATE TABLE IF NOT EXISTS application_logs
    (id INTEGER PRIMARY KEY AUTOINCREMENT,
    session_id TEXT,
    user_query TEXT,
    gpt_response TEXT,
    model TEXT,
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP)''')
    conn.close()

def insert_application_logs(session_id, user_query, gpt_response, model):
    conn = get_db_connection()
    conn.execute('INSERT INTO application_logs (session_id, user_query, gpt_response, model) VALUES (?, ?, ?, ?)',
                 (session_id, user_query, gpt_response, model))
    conn.commit()
    conn.close()

def get_chat_history(session_id):
    conn = get_db_connection()
    cursor = conn.cursor()
    cursor.execute('SELECT user_query, gpt_response FROM application_logs WHERE session_id = ? ORDER BY created_at', (session_id,))
    messages = []
    for row in cursor.fetchall():
        messages.extend([
            {"role": "human", "content": row['user_query']},
            {"role": "ai", "content": row['gpt_response']}
        ])
    conn.close()
    return messages

# Initialize the database
create_application_logs()


In [19]:
# Example usage for a new user
session_id = str(uuid.uuid4())
question = "ingradient for cooking Haleem?"
chat_history = get_chat_history(session_id)
answer = rag_chain.invoke({"input": question, "chat_history": chat_history})['answer']
insert_application_logs(session_id, question, answer, "gpt-3.5-turbo")
print(f"Human: {question}")
print(f"AI: {answer}\n")

# Example of a follow-up question
question2 = "How to cook haleem?"
chat_history = get_chat_history(session_id)
answer2 = rag_chain.invoke({"input": question2, "chat_history": chat_history})['answer']
insert_application_logs(session_id, question2, answer2, "gpt-3.5-turbo")
print(f"Human: {question2}")
print(f"AI: {answer2}")


Human: ingradient for cooking Haleem?
AI: The ingredient for cooking Haleem that comes next in the recipe after adding the green chutney, tamarind pulp, and lemon juice is eggs.

Human: How to cook haleem?
AI: Here is a simplified version of how to cook Haleem using the ingredients and instructions provided in the context:

Ingredients:

* 1 pound (500 grams) of lamb or beef, cut into small pieces
* 1 cup of split chickpeas (chana dal)
* 1 cup of wheat berries (broken wheat)
* 1 large onion, finely chopped
* 1 tablespoon of ginger paste
* 1 tablespoon of garlic paste
* 4 large tomatoes, finely chopped
* 1 teaspoon of salt
* 1 tablespoon of grated coriander
* 1/2 teaspoon of red chili powder
* 2 teaspoons of garlic paste for "bhooning"
* 2 tablespoons of chopped coriander for decoration
* 2 tablespoons of chopped green chilies for decoration
* 2 eggs

Instructions:

1. Soak the chickpeas and wheat berries in water overnight.
2. Drain the chickpeas and wheat berries and put them in a lar