In [6]:
"""
PROJECT: NeuralTranscript: Semantic Search & Q&A for YouTube Content
MODULE: 04_RAG_QUERY_ENGINE
-------------------------------------------------------------------------
DESCRIPTION:
Final stage of the pipeline. It takes a user query, retrieves
relevant context from the FAISS vector store, and uses Google Gemini to 
generate a precise, context-aware answer based on the video transcript.

AUTHOR: Engr. Inam Ullah Khan
-------------------------------------------------------------------------
"""

import os
from dotenv import load_dotenv

from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings

from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser


# --------------------------------------------------
# 1. CONFIGURATION & ENVIRONMENT SETUP
# --------------------------------------------------

load_dotenv()  # Make sure GOOGLE_API_KEY is inside your .env file
INDEX_PATH = "data/faiss_index"


# --------------------------------------------------
# 2. CORE FUNCTIONS
# --------------------------------------------------

def load_vector_store():
    """
    Loads the FAISS index.
    IMPORTANT: The embedding model must match the one used during indexing.
    """
    print("üìÇ Loading Vector Database...")

    embeddings = HuggingFaceEmbeddings(
        model_name="all-MiniLM-L6-v2"
    )

    vector_db = FAISS.load_local(
        INDEX_PATH,
        embeddings,
        allow_dangerous_deserialization=True
    )

    return vector_db


def build_rag_chain(vector_db):
    """
    Builds modern LangChain v1 RAG pipeline using LCEL.
    """
    print("ü§ñ Initializing Google Gemini Pro...")

    # 1. Initialize Gemini LLM
    llm = ChatGoogleGenerativeAI(
        model="gemini-2.5-flash",
        temperature=0.2,
        top_p=0.9
    )

    # 2. Prompt Template (LCEL style)
    prompt = ChatPromptTemplate.from_template("""
You are an AI Assistant specialized in analyzing video content.
Use the following transcript context to answer the question.
If the answer is not contained in the context, say you don't know.
Keep the answer concise and professional.

CONTEXT:
{context}

QUESTION:
{question}

ANSWER:
""")

    # 3. Create Retriever
    retriever = vector_db.as_retriever(search_kwargs={"k": 3})

    # 4. Build RAG Chain using LCEL
    rag_chain = (
        {
            "context": retriever,
            "question": RunnablePassthrough()
        }
        | prompt
        | llm
        | StrOutputParser()
    )

    return rag_chain


# --------------------------------------------------
# 3. EXECUTION PIPELINE
# --------------------------------------------------

if __name__ == "__main__":

    print("\n--- Starting NeuralTranscript Query Engine ---\n")

    # Step 1: Load Vector Database
    db = load_vector_store()

    # Step 2: Build RAG Chain
    neural_qa = build_rag_chain(db)

    # Step 3: User Query
    user_query = "What is the main topic of this video??"

    print(f"\n‚ùì User Query:\n{user_query}")
    print("\n‚è≥ Processing answer...\n")

    # Step 4: Invoke Chain
    response = neural_qa.invoke(user_query)

    # Step 5: Display Result
    print("‚ú® AI RESPONSE:\n")
    print(response)
    print("\n--- Query Completed Successfully ---\n")



--- Starting NeuralTranscript Query Engine ---

üìÇ Loading Vector Database...
ü§ñ Initializing Google Gemini Pro...

‚ùì User Query:
What is the main topic of this video??

‚è≥ Processing answer...

‚ú® AI RESPONSE:

The main topic of this video is a conversation with Demas about solving fundamental mysteries of the universe, including consciousness, life, and gravity, and the search for deeper explanations beyond the standard model of physics, potentially through the application of intelligence and reinforcement learning.

--- Query Completed Successfully ---



## üìä Observations & Technical Analysis

* **System Synergy (RAG Validation)**: The execution demonstrates a successful integration of the **FAISS** vector store with the **Gemini 2.5 Flash** model. The "I don't know" logic in the prompt prevents hallucinations, ensuring the AI remains grounded in the provided transcript context.
* **LCEL Pipeline Efficiency**: The use of **LangChain Expression Language (LCEL)** provides a transparent and efficient data flow. By using `RunnablePassthrough` and `StrOutputParser`, the system minimizes latency between context retrieval and answer generation.
* **Prompt Precision**: The human-centered prompt template successfully guides the LLM to maintain a professional and concise tone. This is critical for applications where the user requires factual summaries rather than creative interpretations.
* **Contextual Retrieval Performance**: With `k=3`, the retriever provides approximately 3,000 characters of context (based on our 1,000-character chunk size). This fills the LLM's context window with high-density information, allowing the model to "reason" across different segments of the video.

---

## üèÅ Summary: Module 04 ‚Äî RAG Query Engine

This module represents the completion of the **NeuralTranscript** pipeline. We have successfully transformed a raw YouTube transcript into an intelligent, queryable research tool.

### üõ†Ô∏è Key Technical Deliverables:

1. **Neural Retrieval Integration**: Connected the local FAISS index to the LangChain retrieval chain, allowing for semantic-based context fetching.
2. **Gemini 2.5 Implementation**: Leveraged Google's latest generative model to interpret retrieved video segments and synthesize natural language answers.
3. **Modern Chain Architecture**: Developed the engine using **LCEL**, moving away from legacy chains to a more modular, future-proof codebase.
4. **Zero-Shot Reliability**: Implemented strict grounding instructions in the prompt to ensure the system only answers based on the transcript, maintaining high factual integrity.

---

### üöÄ Project Conclusion

The **NeuralTranscript** project is now a fully functional end-to-end RAG system. It demonstrates proficiency in **Data Engineering** (Ingestion/Chunking), **Vector Mathematics** (Indexing), and **Generative AI** (RAG Orchestration).

---