In [28]:
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint, HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import PromptTemplate
import os 
from dotenv import load_dotenv
from langchain_community.llms import Ollama
from langchain.schema.runnable import RunnableSequence, RunnableParallel, RunnableLambda, RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

load_dotenv()

os.environ["HUGGINGFACEHUB_API_TOKEN"] = os.getenv("HUGGINGFACE_TOKEN")
os.environ["HF_HOME"] = os.getenv("HF_HOME")

### Step 1: Indexing

In [29]:
# 1a. Document ingestion
video_id = "oyBy9YXLlkQ"
transcript = ''
try:
    yt_api = YouTubeTranscriptApi()
    transcript_list = yt_api.fetch(video_id, languages=["en"])
    transcript = ' '.join([snippet.text for snippet in transcript_list.snippets])
except TranscriptsDisabled:
    print("Captions are not available")

print(f"Transcript:\n{transcript}")

# 1b. Text splitting
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
chunks = splitter.create_documents([transcript])
print(chunks)

# 1c. Embedding generation 
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

# 1d. Store embedding in Vector DB
vectorstore = FAISS.from_documents(chunks, embeddings)  

Transcript:
It's about Tim Ciphert now and it is about two to win from three deliveries. So another wicket now. Makes you sit forward, doesn't it? Field is in close. I've got to stop the single. I've got to hunt here. Get in close. Cipher to win the match. Misses. Oh no. Misses. Oh no. It's two from two. Well, the commentator in the middle here. He's He's not a betting coach anymore, Mac. Absolutely. Liberty hasn't got bad on ball there. Wow. It's good from Shami, though. He's not trying to hit the hole, which is far easier for Ciphford to get bat on ball. He's trying to create a dot. Trying to create a wicket taking opportunity. Good battle. Two off two. Tied match. Super overhead. Tied match. Super over. A few too many in 2019. Two off two. Ciphered again on strike. He's missed again. Taylor runs. Oh, the scores are tied. Ross Taylor very, very clever here. I said earlier, you can run a by to Rahul and he's done it. He's picked up on it. Ross Taylor. Wonderful thinking under pressure

### Step 2: Retrieval

In [30]:
retriever = vectorstore.as_retriever(search_type="similarity", searc_kwargs={"k": 4})
# retriever.invoke("What is Deep Learning?")

### Step 3: Augmentation

In [31]:
prompt = PromptTemplate(
    template="""
    You are a helpful assistant.
    Answer only from the provided context. 
    If the context is insufficient, just say that you don't know.
    {context}

    Question: {question}
    """,
    input_variables=["context", "question"]
)

# Code for testing
# question = "How much run did each team score?"    
# retrieved_docs = retriever.invoke(question)
# context_text = " ".join(doc.page_content for doc in retrieved_docs)

# final_prompt = prompt.invoke({
#     "context": context_text,
#     "question": question    
# })

### Step 4: Generation

In [32]:
# llm = HuggingFaceEndpoint(
#     repo_id="google/gemma-3-270m",
#     task="text-generation"
# )
# model = ChatHuggingFace(llm = llm)

llm = Ollama(model="gemma3:1b")

# Code for testing
# answer = llm.invoke(final_prompt)
# print(answer)

In [None]:
# Return both question and answer 
# Form a RunnableParallel chain
# RunnableParallel of (RunnablePassthrough (question) AND RunnableLambda(answer))

def format_docs(retrieved_docs):
    return "\n\n".join(doc.page_content for doc in retrieved_docs)

parallel_chain = RunnableParallel({
    "question": RunnablePassthrough(),
    "context": retriever | RunnableLambda(format_docs)
})

parser = StrOutputParser()
main_chain = parallel_chain | prompt | llm | parser

main_chain.invoke("can you summarize the video?")



'According to the text, the following players played well:\n\n*   Coley\n*   Sharma\n*   Ross Taylor\n*   K Rahul\n*   Shami\n*   Noah\n*   Bumra'