In [None]:
! pip install langchain_community tiktoken langchain-openai langchainhub chromadb langchain youtube-transcript-api pytube

In [2]:
import os
from dotenv import load_dotenv
load_dotenv() 
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
os.environ['LANGCHAIN_API_KEY'] =  os.environ.get('LANGCHAIN_API_KEY')
os.environ['GEMINI_API_KEY'] = os.environ.get('GEMINI_API_KEY')

In [3]:
api_key = os.environ.get('GEMINI_API_KEY')

## 2. Routing
#### After query gtransformation it is important to send that query to the mist relevant datasource/prompt. That is done through Routing

## 2.1 Logical routing
### <i> Letting the LLM decide which datasource out of the given options to point to / load depending on the question</i>

In [5]:
from typing import Literal
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel,Field
from langchain_google_genai import ChatGoogleGenerativeAI

class RouteQuery(BaseModel):
    """Route a user query to the most relevant datasource."""
    
    datasource : Literal["python_docs","js_docs","golang_docs"] = Field(
        ...,
        description="""Given a user question, choose which datasource would be most relevant for answering their question"""
    )

llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash",api_key=api_key,temperature=0)
structured_llm = llm.with_structured_output(RouteQuery)

#Prompt
system = """You are an expert at routing a user question to the relevant datsrource.
Based on the programming language the question os referring to, route it to the relevant data source."""

prompt = ChatPromptTemplate.from_messages(
    [
        ("system",system),
        ("human","{question}"),
    ]
)

router = prompt | structured_llm

In [6]:
question = """Why doesn't the following code work:

from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_messages(["human", "speak in {language}"])
prompt.invoke("french")
"""

result = router.invoke({"question":question})

In [8]:
print(result)
print(result.datasource)

datasource='python_docs'
python_docs


once that is developed it is importnat to complete the process by defining a branch that will use the `result.datasource`

In [9]:
def choose_route(result):
    if result.datasource == "python_docs":
        return "chain for python docs"
    elif result.datasource == "js_docs":
        return "chain for js docs"
    else :
        return "chain for golang docs"

from langchain_core.runnables import RunnableLambda
full_chain = router | RunnableLambda(choose_route)

In [10]:
full_chain.invoke({"question":question})

'chain for python docs'

## 2.2 Semantic Routing
### <i> Semantic Routing in LangChain refers to the process of intelligently directing user queries to the most appropriate chain, tool, or prompt based on the meaning or intent of the input, rather than relying on explicit keywords or rule-based logic.</i>

In [14]:
from langchain.utils.math import cosine_similarity
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
from langchain_google_genai import ChatGoogleGenerativeAI,GoogleGenerativeAIEmbeddings

# Two prompts
physics_template = """You are a very smart physics professor. \
You are great at answering questions about physics in a concise and easy to understand manner. \
When you don't know the answer to a question you admit that you don't know.

Here is a question:
{query}"""

math_template = """You are a very good mathematician. You are great at answering math questions. \
You are so good because you are able to break down hard problems into their component parts, \
answer the component parts, and then put them together to answer the broader question.

Here is a question:
{query}"""

#Embeddings
embeddings = GoogleGenerativeAIEmbeddings(model="text-embedding-004", google_api_key=api_key)
prompt_templates = [physics_template, math_template]
prompt_embeddings = embeddings.embed_documents(prompt_templates)

In [16]:
#Route questioon to prompt
def prompt_router(input):
    query_embedding = embeddings.embed_query(input["query"])
    similarity = cosine_similarity([query_embedding],prompt_embeddings)[0]
    most_similar = prompt_templates[similarity.argmax()]
    
    print("Using MATH" if most_similar == math_template else "Using PHYSICS")
    return PromptTemplate.from_template(most_similar)

chain = (
    {"query":RunnablePassthrough()}
    | RunnableLambda(prompt_router)
    | llm
    | StrOutputParser()
)

print(chain.invoke("What is a black hole."))

Using PHYSICS
A black hole is a region of spacetime where gravity is so incredibly strong that nothing, not even light, can escape from it.

This extreme gravity arises from a huge amount of matter being compressed into an extremely small space. Imagine taking something many times more massive than our Sun and squeezing it down to the size of a city – that's the kind of density we're talking about.

The "point of no return" around a black hole is called the **event horizon**. Once anything crosses this boundary, it's trapped forever. At the very center, all that mass is thought to be concentrated into an infinitely dense point called a **singularity**.


## 3. Query Construction
Query construction is done to convert the natural language into domain-specific language.


# Query Construction in RAG

1. **Collect docs & metadata** — load transcripts and metadata (title, views, dates, length).  
2. **Define schema** — design a structured query model (text fields + filters).  
3. **Prompt LLM** — instruct the LLM to convert user questions into schema fields.  
4. **LLM → structured output** — parse queries into text search + metadata filters.  
5. **Normalize values** — convert to standard formats (dates, counts, durations).  
6. **Map fields → vectorstore** — match content queries to embeddings, filters to metadata.  
7. **Run filtered search** — execute similarity + filter queries in the vectorstore.  
8. **Aggregate & rank** — combine and re-rank retrieved chunks.  
9. **Generate final answer** — feed results + question to the LLM for response.  
10. **Log & iterate** — record queries and outputs for refinement.
