**Load Libraries**

In [None]:
!pip install -U langchain langchain-openai openai  chromadb gradio dotenv  langchain-community
print("loaded successfully")

In [None]:
!pip install --upgrade openai
import os
from openai import OpenAI
from dotenv import load_dotenv
load_dotenv()
openai_api_key=os.getenv("OPENAI_API_KEY")
openai_client=OpenAI(api_key=openai_api_key,base_url="https://openrouter.ai/api/v1")
print("first 5 chars ",{openai_api_key[:5]})

In [22]:
#load langchain components
from langchain_openai import OpenAIEmbeddings,OpenAI
from langchain.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import TextLoader
from langchain.chains import RetrievalQAWithSourcesChain 

In [23]:
DATA_FILE_PATH="eleven_madison_park_data.txt"
print(f"file path-> {DATA_FILE_PATH}")

file path-> eleven_madison_park_data.txt


In [24]:
loader=TextLoader(DATA_FILE_PATH,encoding="utf-8")
raw_document=loader.load()
print(f"loaded successfully {len(raw_document)} document")

loaded successfully 1 document


In [None]:
print(f"{raw_document[0].page_content[-750:] + "..." }")

In [26]:
#split the document into chunks
text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=150)
documents=text_splitter.split_documents(raw_document)
if not documents:
    raise ValueError("Error: Splitting resulted in zero documents. Check the input file and splitter settings.")
print(f"document split int {len(documents)} chunks")

document split int 38 chunks


In [27]:
print("documet content..")
print(documents[0].page_content)
print("metadata...")
print(documents[0].metadata)      


documet content..
Source: https://www.elevenmadisonpark.com/
Title: Eleven Madison Park
Content:
Book on Resy
---END OF SOURCE---
metadata...
{'source': 'eleven_madison_park_data.txt'}


In [None]:
!pip install -U sentence-transformers langchain-huggingface

from langchain_community.embeddings import HuggingFaceEmbeddings

# pick one of the models above
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

vector_store = Chroma.from_documents(documents=documents, embedding=embeddings)

print("Vector store created with", vector_store._collection.count(), "items")

In [None]:
stored_data = vector_store._collection.get(include=["embeddings", "documents"], limit = 1)  

# Display the results
print("First chunk text:\n", stored_data['documents'][0])
print("\nEmbedding vector:\n", stored_data['embeddings'][0])
print(f"\nFull embedding has {len(stored_data['embeddings'][0])} dimensions.")

In [31]:
test_query="Who is Daniel Humm?"
print(f"Searching for documents similar to: '{test_query}'")
try:
    similar_docs=vector_store.similarity_search(test_query,k=1)
    print(f"\nFound {len(similar_docs)} similar documents:")
    for i,doc in enumerate(similar_docs):
        print(f"\n--- Document {i+1} ---")
        content=doc.page_content[:700].strip()
        source=doc.metadata.get("source", "Unknown Source") 
        print(f"content:{content}")
        print(f"source:{source}")

except Exception as e:
    print(f"An error occurred during similarity search: {e}")

        
        


    

Searching for documents similar to: 'Who is Daniel Humm?'

Found 1 similar documents:

--- Document 1 ---
content:Source: https://www.elevenmadisonpark.com/press-and-accolades
Title: Press and Accolades — Eleven Madison Park
Content:
Accolades World’s 50 Best Best of the Best The New York Times Four Stars Michelin Guide Three Stars Wine Spectator Grand Award The World of Fine Wine Best Overall Wine List James Beard Foundation Outstanding Chef, Outstanding Service, Outstanding Pastry Chef, Outstanding Restaurant, Best Chef: NYC, Outstanding Wine Service, Rising Star Chef Recent Press Time: “Is Cooking for the 1% for a Reason” Washington Post: “The Joy of Plant-Based Eating” Interview: “Daniel Humm Is Begging You to Eat More Plants” Financial Times: "Epicurean escapes: Alain Ducasse and Daniel Humm’s tran
source:eleven_madison_park_data.txt


In [32]:
from langchain_openai import ChatOpenAI
retriever=vector_store.as_retriever(search_kwargs={"k":3})
llm=ChatOpenAI(temperature=0,openai_api_key=openai_api_key,
          openai_api_base="https://openrouter.ai/api/v1",
          model="mistralai/mistral-small-3.2-24b-instruct:free")
qa_chain=RetrievalQAWithSourcesChain.from_chain_type(llm=llm,
                                                    retriever=retriever,
                                                    chain_type="stuff",# chain_type="stuff": Puts all retrieved text directly into the prompt context.
#                      Suitable if the total text fits within the LLM's context limit.
                                                    return_source_documents = True,
                                                    verbose=True)


print("RetrievalQAWithSourcesChain created")

RetrievalQAWithSourcesChain created


In [None]:
print("\n Testing Full RAG Chain")
chain_test_query="what the name of chef"
try:
    result=qa_chain.invoke({"question":chain_test_query})
    print("answering")
    print(result.get("answer","No answer generated."))
    print("\n--- Sources ---")
    print(result.get("sources", "No sources identified."))

except Exception as e:
    print("there is an error")

In [None]:
import gradio as gr
def rag_query(user_query):
    try:
        result = qa_chain.invoke({"question": user_query})
        answer = result.get("answer", "No answer generated.")
        sources = result.get("sources", "No sources identified.")

        formatted_answer = f"### Answer\n{answer}\n\n### Sources\n{sources}"
        return formatted_answer
    except Exception as e:
        return f"⚠️ Error: {str(e)}"

# -----------------------------
# Gradio Interface
# -----------------------------
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("## 📚 RAG Q&A Assistant")
    with gr.Row():
        with gr.Column(scale=2):
            query_box = gr.Textbox(
                label="Ask a question:",
                placeholder="Type your question here...",
                lines=2,
            )
            submit_btn = gr.Button("🔍 Search & Answer")
        with gr.Column(scale=3):
            output_box = gr.Markdown(label="Answer")

    submit_btn.click(rag_query, inputs=query_box, outputs=output_box)

demo.launch()