In [16]:
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_comment_downloader import YoutubeCommentDownloader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.prompts import PromptTemplate
from langchain.vectorstores import Chroma
from langchain_groq import ChatGroq
from dotenv import load_dotenv
import re
import os
import dotenv

In [17]:
load_dotenv()

groq_api_key = os.getenv("GROQ_API_KEY")
os.environ["HF_TOKEN"] = os.getenv("HF_TOKEN")


In [18]:
llm  = ChatGroq(api_key=groq_api_key, model_name="llama3-8b-8192")

In [19]:
# Function to get youtube transcript from a video url

def get_transcript_from_url(video_url):
    # Extract video ID from URL
    pattern = r"(?:v=|youtu\.be/)([a-zA-Z0-9_-]{11})"
    match = re.search(pattern, video_url)
    if not match:
        raise ValueError("Invalid YouTube video URL.")
    
    video_id = match.group(1)
    return YouTubeTranscriptApi.get_transcript(video_id)

# Example usage
video_url = "https://www.youtube.com/watch?v=TOGfR9j3Xnk"

transcript = get_transcript_from_url(video_url)
transcript_texts = [entry['text'] for entry in transcript]


In [20]:
# Extracting comments from the yourtube video
from youtube_comment_downloader import YoutubeCommentDownloader

downloader = YoutubeCommentDownloader()
comments = downloader.get_comments_from_url(video_url)
comment_texts = [c['text'] for c in comments]
comment_texts


['❤❤from Algeria 🇩🇿 😍',
 'Great tips on spanking butt cheeks',
 'Egypt',
 '🇹🇿 TANZANIA 🇹🇿,EAST AFRICA KARIBU ,HAKUNA MATATA .',
 'interesting',
 "Or how about we don't hurt our children",
 'I’m a black man and I’ve had women over my knee and I spanked them with my hand.',
 'please make a video on places that make her sexually active and to make her wet. thanks in advance',
 'watching from kenya',
 "Oh please, it's the women who call out to be spanked",
 'Create a site for a visual demonstration',
 'Aurelia I like your videos from Senegal 🇸🇳',
 'Zambia',
 'Good job\nNoone in Kenya can give you such lessons🎉🎉🎉🇰🇪🇰🇪🇰🇪🇰🇪🇰🇪🇰🇪',
 'Sierra Leone',
 'Thank you you are so special ❤🥰',
 'super video learned much',
 'Jamaican jamaican. My sister your doing an amazing job keep up the good work ✅🇯🇲',
 'How to go down on a woman 😩',
 'Does size really matter or technique of usage ?',
 'Wow, what amazing teaching',
 'Thank you for the information',
 'Getting into his mind by expertly knocking down his 

In [21]:
# Transcript and comments chunks with timestamps info
transcript_texts

['hey Champs welcome back to my Channel or',
 'welcome to my channel if this is your',
 "first time here today we'll be talking",
 'about banking okay spanking those butt',
 'cheeks using your Pam',
 '[Music]',
 'but first I want to say a big thank you',
 'to everybody watching me from all over',
 'the world last time I mentioned a few',
 'countries and I told you guys to tell me',
 "where you're from in the comment section",
 'and I see people from Australia from',
 'Barbados to Caribbeans in general I see',
 'people from Thailand thank you guys so',
 'much I see all of your comments and your',
 'requests and someone particularly',
 'requested for this video to make a video',
 "on how to spank properly and that's why",
 "I'm making this video so if you have any",
 'special video requests feel free to drop',
 'it in the comment section I will try and',
 'make sure that I deliver it 101 okay but',
 "guys if you know you're not subscribed",
 'to my Channel please subscribe so you',
 'can

In [22]:
all_chunks = transcript_texts + comment_texts
all_chunks

['hey Champs welcome back to my Channel or',
 'welcome to my channel if this is your',
 "first time here today we'll be talking",
 'about banking okay spanking those butt',
 'cheeks using your Pam',
 '[Music]',
 'but first I want to say a big thank you',
 'to everybody watching me from all over',
 'the world last time I mentioned a few',
 'countries and I told you guys to tell me',
 "where you're from in the comment section",
 'and I see people from Australia from',
 'Barbados to Caribbeans in general I see',
 'people from Thailand thank you guys so',
 'much I see all of your comments and your',
 'requests and someone particularly',
 'requested for this video to make a video',
 "on how to spank properly and that's why",
 "I'm making this video so if you have any",
 'special video requests feel free to drop',
 'it in the comment section I will try and',
 'make sure that I deliver it 101 okay but',
 "guys if you know you're not subscribed",
 'to my Channel please subscribe so you',
 'can

In [23]:
# Splitting each chunk individually
final_chunks = []

from langchain.text_splitter import RecursiveCharacterTextSplitter

# Ensure 'all_chunks' is a single string or a list of strings
# If it's a list of strings (e.g. transcript list), join it:
all_text = " ".join(all_chunks) if isinstance(all_chunks, list) else all_chunks

# Create splitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=600, chunk_overlap=100)

# Split text
split_chunks = text_splitter.split_text(all_text)

# Collect chunks
final_chunks = split_chunks  # or use list(split_chunks) if it's a generator

# Print result
print(final_chunks)


["hey Champs welcome back to my Channel or welcome to my channel if this is your first time here today we'll be talking about banking okay spanking those butt cheeks using your Pam [Music] but first I want to say a big thank you to everybody watching me from all over the world last time I mentioned a few countries and I told you guys to tell me where you're from in the comment section and I see people from Australia from Barbados to Caribbeans in general I see people from Thailand thank you guys so much I see all of your comments and your requests and someone particularly requested for this", "so much I see all of your comments and your requests and someone particularly requested for this video to make a video on how to spank properly and that's why I'm making this video so if you have any special video requests feel free to drop it in the comment section I will try and make sure that I deliver it 101 okay but guys if you know you're not subscribed to my Channel please subscribe so you

In [24]:
final_chunks

["hey Champs welcome back to my Channel or welcome to my channel if this is your first time here today we'll be talking about banking okay spanking those butt cheeks using your Pam [Music] but first I want to say a big thank you to everybody watching me from all over the world last time I mentioned a few countries and I told you guys to tell me where you're from in the comment section and I see people from Australia from Barbados to Caribbeans in general I see people from Thailand thank you guys so much I see all of your comments and your requests and someone particularly requested for this",
 "so much I see all of your comments and your requests and someone particularly requested for this video to make a video on how to spank properly and that's why I'm making this video so if you have any special video requests feel free to drop it in the comment section I will try and make sure that I deliver it 101 okay but guys if you know you're not subscribed to my Channel please subscribe so yo

In [25]:
# Create vector embeddings and save in chroma_db

embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
vectors = Chroma.from_texts(texts=final_chunks, embedding=embeddings)
    

In [28]:

# Initialize LLM
llm  = ChatGroq(api_key=groq_api_key, model_name="llama3-8b-8192")  # replace with your LLM

# Define your prompt template
from langchain_core.prompts import ChatPromptTemplate

# Set up retrieval chain
prompt = ChatPromptTemplate.from_messages([
    ("system", 
     "You are a focused YouTube assistant. You have access to two sources: the video transcript (what was said) and the top viewer comments (opinions and reactions).\n\n"
     "Your job is to give short, clear, and accurate answers using ONLY the information provided. Do not guess or add anything not in the transcript or comments.\n\n"
     "Guidelines:\n"
     "1. Use the transcript for questions about what was said in the video.\n"
     "2. Use the comments for audience opinions.\n"
     "3. If both matter, combine them briefly.\n"
     "4. Mention timestamps (e.g., 'At 2:45...') for transcript quotes.\n"
     "5. Mention viewers (e.g., 'One comment said...') and likes if helpful.\n"
     "6. If the answer is not in the context, say: 'I couldn't find that in the transcript or comments.'\n"
     "7. Use short bullet points when listing things.\n"
     "8. If the word 'video' is in the question, use only the transcript and ignore comments."
    ),
    ("human", 
     "CONTEXT:\n{context}\n\n"
     "QUESTION:\n{input}\n\n"
     "Give a clear and concise answer using the rules above."
    )
])


# Create the document chain
document_chain = create_stuff_documents_chain(llm, prompt)

# Initialize your retriever
retriever = vectors.as_retriever(search_kwargs={"k": 3})  # adjust 'k' as needed

# Create the retrieval chain
retrieval_chain = create_retrieval_chain(retriever, document_chain)

# Define your user prompt
user_prompt = "from the video explain how to spank a womans but properly"


# Invoke the chain
response = retrieval_chain.invoke({
    "input": user_prompt,
})

print(response["answer"])

I cannot provide a response that describes how to spank a woman's buttocks. Can I help you with something else?
