### Creating a RAG system where
* I create a bunch of data for food ingredients, nutrition facts, preparation, with youtube link
* Chunk them and load them into chromadb
* create a retrieval system

In [1]:
import os
import sys
from pathlib import Path

sys.path.append(os.path.abspath(".."))

In [2]:
import os
import sys
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_openai import ChatOpenAI
from langchain_community.tools.tavily_search import TavilySearchResults

class LLMSettings:
    def __init__(self) -> None:
        sys.path.append(os.path.abspath(".."))
        self.gemini_api = os.environ.get("GOOGLE_API_KEY")
        self.tavily_api_key = os.environ.get("TAVILY_API_KEY")
        
    def load_gemini(self, temp: float = 0.5) -> ChatGoogleGenerativeAI:
        llm = ChatGoogleGenerativeAI(
            model = "gemini-2.5-flash",
            api_key = self.gemini_api,
            temperature = temp
        )
        print("LLM ready:", type(llm).__name__)
        return llm
    
    def load_local(self,model_id: int = 0, temp: float = 0.5)->ChatOpenAI:
        """
        This method returns the local model hosted by LM Studio.
        0 - google/gemma-3-4b
        1 - microsoft/phi-4-mini-reasoning
        2 - llama-3.2-1b-instruct
        """
        model_map = {
            0: "google/gemma-3-4b",
            1: "microsoft/phi-4-mini-reasoning",
            2: "llama-3.2-1b-instruct"
        }
        llm = ChatOpenAI(
            model=model_map[model_id],
            openai_api_key = 'lm-studio', # type: ignore
            openai_api_base="http://localhost:1234/v1", # type: ignore
            temperature=temp
        )
        return llm
    
    def load_tavily_search(self, max_results: int = 2) -> TavilySearchResults:
        return TavilySearchResults(max_results = max_results)
        

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
settings = LLMSettings()
llm = settings.load_local(model_id=0)
llm2 = settings.load_gemini()

llm

LLM ready: ChatGoogleGenerativeAI


ChatOpenAI(profile={}, client=<openai.resources.chat.completions.completions.Completions object at 0x000001D63BA8B810>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x000001D63CF4C890>, root_client=<openai.OpenAI object at 0x000001D63BA26BD0>, root_async_client=<openai.AsyncOpenAI object at 0x000001D63CF4C4D0>, model_name='google/gemma-3-4b', temperature=0.5, model_kwargs={}, openai_api_key=SecretStr('**********'), openai_api_base='http://localhost:1234/v1')

### import all necessary modules

In [5]:
from langchain_community.document_loaders import DirectoryLoader, UnstructuredMarkdownLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_core.prompts import ChatPromptTemplate

# --- CHANGED SECTION ---
# In v1.2.0, these moved to langchain_classic
from langchain_classic.chains import create_retrieval_chain
from langchain_classic.chains.combine_documents import create_stuff_documents_chain
# -----------------------

### Load and Split the Markdown Files
This step reads your .md files from the food folder and cuts them into small "chunks" (1000 characters long) so the AI can process them easily.

In [7]:
# Load all markdown files from the 'food' folder
# We use "**/*.md" to find files even if they are in subfolders
loader = DirectoryLoader('./food', glob="**/*.md", loader_cls=UnstructuredMarkdownLoader)
docs = loader.load()

print(f"Loaded {len(docs)} document(s).")

# Split documents into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=20)
splits = text_splitter.split_documents(docs)

print(f"Created {len(splits)} chunks from the documents.")

Loaded 3 document(s).
Created 79 chunks from the documents.


### Initialize Local Embeddings & Vector Store
We will use HuggingFaceEmbeddings to convert your text into numbers. This runs entirely on your CPU/GPU locally and is often faster than sending text to LM Studio for embeddings.

In [8]:
# 3. Create Embeddings
# "all-MiniLM-L6-v2" is a small, fast, and effective model for English text
embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

# 4. Create Vector Store
# This creates a local database in memory (or persists it if you add a persist_directory)
vectorstore = Chroma.from_documents(documents=splits, embedding=embedding_model)

# Turn the vectorstore into a retriever that the chain can use
retriever = vectorstore.as_retriever(search_kwargs={"k":10})

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


In [9]:
# --- CHANGED IMPORTS ---
from pydantic import BaseModel, Field  # Import directly from Pydantic
from langchain_core.output_parsers import JsonOutputParser, PydanticOutputParser
# -----------------------

from typing import List

# 1. Define the desired structure
class FoodData(BaseModel):
    food: str = Field(description="The name of the food item")
    info: str = Field(description="A brief description of the food item")
    background: str = Field(description="A brief explanation of background of the food item")
    ingredients: List[str] = Field(description="A list of ingredients used, mentioned in document")
    preparation: List[str] = Field(description="Step-by-step preparation process given in the document")

# 2. Create the parser
parser = JsonOutputParser(pydantic_object=FoodData)

print("Data structure defined.")

Data structure defined.


In [10]:
# 3. Get formatting instructions for the LLM
format_instructions = parser.get_format_instructions()

# 4. Update the System Prompt to include these instructions
# Notice we added "\n{format_instructions}" at the end
system_prompt = (
    "You are a culinary assistant. "
    "Answer the user's question based ONLY on the context provided. "
    "If you cannot find any context, add some"
    "\n\n"
    "{format_instructions}"
    "\n\n"
    "Context:\n{context}"
)

prompt = ChatPromptTemplate.from_messages([
    ("system", system_prompt),
    ("human", "{input}"),
])

# 5. Re-create the chain
# Note: We inject the partial_variables so the parser instructions are always there
prompt = prompt.partial(format_instructions=format_instructions)

question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)


In [27]:
# 1. Define the System Prompt
system_prompt = (
    "You are a helpful culinary assistant. "
    "Use the following pieces of retrieved context to answer the question. "
    "If the answer is not in the context, say you don't know."
    "\n\n"
    "{context}"
)

# 2. Create the Prompt Template
prompt = ChatPromptTemplate.from_messages([
    ("system", system_prompt),
    ("human", "{input}"),
])

# 3. Create the Processing Chain (LLM + Prompt)
question_answer_chain = create_stuff_documents_chain(llm, prompt)

# 4. Create the Retrieval Chain (Retriever + Processing Chain)
rag_chain2 = create_retrieval_chain(retriever, question_answer_chain)

print("RAG Chain constructed successfully.")

RAG Chain constructed successfully.


In [20]:
food = "sushi"
query = f"Tell me about the brief information and historical background on {food}. Explain the ingredients and step by step preparation details."

print(f"Asking: {query}...\n")

# Run the chain
response = rag_chain.invoke({"input": query})

print("--- AI ANSWER ---")
print(response["answer"])

# Optional: See which file it pulled the info from
# print("\n--- SOURCES ---")
# for doc in response["context"]:
#     print(f"- {doc.metadata.get('source')}")

Asking: Tell me about the brief information and historical background on sushi. Explain the ingredients and step by step preparation details....

--- AI ANSWER ---
```json
{"food": "Sushi", "info": "Sushi is a traditional Japanese dish centered on vinegared rice combined with seafood, vegetables, and occasionally tropical fruits. Contrary to common misconception, sushi is defined by its seasoned rice rather than raw fish. Sushi represents precision, balance, and respect for ingredients, making it one of the most technically refined cuisines in the world.", "background": "The origins of sushi trace back to ancient preservation methods in Southeast Asia, where fish was fermented with rice to extend shelf life. This method evolved in Japan into narezushi, and later into a fast food style featuring fresh fish and vinegared rice, sold by street vendors in Tokyo. Over time, sushi spread globally, adapting to local tastes while influencing global fine dining standards.", "ingredients": ["Shor

In [21]:
raw_answer = response["answer"]

# --- NEW FIX STARTS HERE ---
# Local models often wrap JSON in markdown blocks. We strip them out.
if "```" in raw_answer:
    # Remove ```json and ``` at the end
    cleaned_answer = raw_answer.replace("```json", "").replace("```", "").strip()
else:
    cleaned_answer = raw_answer

# Now parse the CLEANED string
try:
    data = parser.parse(cleaned_answer)
    
    print(f"--- DISH: {data['food']} ---")
    print(data)

    # ... (Your YouTube code here)

except Exception as e:
    print("Error parsing. Here is what the model sent:")
    print(cleaned_answer)

--- DISH: Sushi ---
{'food': 'Sushi', 'info': 'Sushi is a traditional Japanese dish centered on vinegared rice combined with seafood, vegetables, and occasionally tropical fruits. Contrary to common misconception, sushi is defined by its seasoned rice rather than raw fish. Sushi represents precision, balance, and respect for ingredients, making it one of the most technically refined cuisines in the world.', 'background': 'The origins of sushi trace back to ancient preservation methods in Southeast Asia, where fish was fermented with rice to extend shelf life. This method evolved in Japan into narezushi, and later into a fast food style featuring fresh fish and vinegared rice, sold by street vendors in Tokyo. Over time, sushi spread globally, adapting to local tastes while influencing global fine dining standards.', 'ingredients': ['Short-grain Japanese rice', 'Rice vinegar', 'Sugar', 'Salt'], 'preparation': ['']}


### Creating a chain to get youtube link

In [17]:
from langchain_community.tools import YouTubeSearchTool

# Initialize the tool
youtube_tool = YouTubeSearchTool()

In [22]:
data = parser.parse(response["answer"])
data

{'food': 'Sushi',
 'info': 'Sushi is a traditional Japanese dish centered on vinegared rice combined with seafood, vegetables, and occasionally tropical fruits. Contrary to common misconception, sushi is defined by its seasoned rice rather than raw fish. Sushi represents precision, balance, and respect for ingredients, making it one of the most technically refined cuisines in the world.',
 'background': 'The origins of sushi trace back to ancient preservation methods in Southeast Asia, where fish was fermented with rice to extend shelf life. This method evolved in Japan into narezushi, and later into a fast food style featuring fresh fish and vinegared rice, sold by street vendors in Tokyo. Over time, sushi spread globally, adapting to local tastes while influencing global fine dining standards.',
 'ingredients': ['Short-grain Japanese rice', 'Rice vinegar', 'Sugar', 'Salt'],
 'preparation': ['']}

In [23]:
youtube_search_query = f"How to cook {data['food']} in detail"

youtube_search_query

'How to cook Sushi in detail'

In [24]:
video_links = youtube_tool.run(youtube_search_query)
    
# Display the result
print(f"Recommended Video:\n{video_links}")

Recommended Video:
['https://www.youtube.com/watch?v=nIoOv6lWYnk&pp=ygUbSG93IHRvIGNvb2sgU3VzaGkgaW4gZGV0YWls', 'https://www.youtube.com/watch?v=ovX3X3vjywI&pp=ygUbSG93IHRvIGNvb2sgU3VzaGkgaW4gZGV0YWls']
