## Step 1: Install Required Packages

In [None]:
!pip install -q youtube-transcript-api
!pip install -q langchain
!pip install -q langchain-openai
!pip install -q langchain-huggingface
!pip install -q langchain-community
!pip install -q langchain-text-splitters
!pip install -q langchain-chroma
!pip install -q chromadb
!pip install -q openai
!pip install -q gradio
!pip install -q torch
!pip install -q sentence-transformers
print("‚úÖ All packages installed successfully!")

## Step 2: Set Your OpenAI API Key

Get your API key from: https://platform.openai.com/api-keys

In [None]:
import os
from google.colab import userdata

# Option 1: Use Colab Secrets (Recommended)
# Add your key in Colab: Secrets icon (üîë) ‚Üí Add "OPENAI_API_KEY"
try:
    OPENAI_API_KEY = userdata.get('OPENAI_API_KEY')
    print("‚úÖ API key loaded from Colab Secrets")
except:
    # Option 2: Enter manually (less secure)
    OPENAI_API_KEY = input("Enter your OpenAI API key: ")
    print("‚úÖ API key entered manually")

os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY

## Step 3: Import Libraries and Setup

In [None]:
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api._errors import TranscriptsDisabled, NoTranscriptFound
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_chroma import Chroma
from langchain_core.documents import Document
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
import json

print("‚úÖ Libraries imported successfully")

## Step 4: Transcript Fetcher Class

In [None]:
from youtube_transcript_api import YouTubeTranscriptApi as YTAPI

class YouTubeTranscriptFetcher:
    """Fetches YouTube video transcripts"""
    
    @staticmethod
    def extract_video_id(url: str) -> str:
        """Extract video ID from YouTube URL"""
        if "youtube.com" in url or "youtu.be" in url:
            if "v=" in url:
                return url.split("v=")[1].split("&")[0]
            elif "youtu.be/" in url:
                return url.split("youtu.be/")[1].split("?")[0]
        return url  # Already a video ID
    
    def fetch_transcript(self, video_id: str) -> dict:
        """Fetch transcript for a single video"""
        video_id = self.extract_video_id(video_id)
        
        try:
            # Get transcript - use the actual API
            transcript_list = YTAPI.get_transcript(video_id)
            
            # Combine all text
            full_text = " ".join([entry['text'] for entry in transcript_list])
            
            return {
                'video_id': video_id,
                'transcript': full_text,
                'segments': transcript_list
            }
        except Exception as e:
            # Get more specific error message
            error_msg = str(e)
            if "Could not retrieve" in error_msg or "disabled" in error_msg.lower():
                raise Exception(f"‚ùå No transcript available for video: {video_id}")
            else:
                raise Exception(f"‚ùå Error: {error_msg}")
    
    def fetch_multiple(self, video_ids: list) -> list:
        """Fetch transcripts for multiple videos"""
        transcripts = []
        for i, video_id in enumerate(video_ids, 1):
            print(f"[{i}/{len(video_ids)}] Fetching: {video_id}")
            try:
                transcript = self.fetch_transcript(video_id)
                transcripts.append(transcript)
                print(f"  ‚úÖ Success! Got {len(transcript['transcript'])} characters")
            except Exception as e:
                print(f"  {str(e)}")
        return transcripts

print("‚úÖ Transcript fetcher ready")

## Step 5: Add YouTube Videos

Enter video IDs or URLs (one per line)

In [None]:
# Enter your YouTube video IDs or URLs here
video_ids = [
    "jNQXAC9IVRw",  # Example: Me at the zoo
    # Add more video IDs here
]

# Or input manually
manual_input = input("Enter video IDs (comma-separated) or press Enter to use default: ").strip()
if manual_input:
    video_ids = [v.strip() for v in manual_input.split(',')]

print(f"\nüì• Fetching {len(video_ids)} video(s)...\n")

fetcher = YouTubeTranscriptFetcher()
transcripts = fetcher.fetch_multiple(video_ids)

print(f"\n‚úÖ Successfully fetched {len(transcripts)} transcript(s)!")

## Step 6: Create Text Chunks

In [None]:
# Create LangChain documents
documents = []
for transcript in transcripts:
    doc = Document(
        page_content=transcript['transcript'],
        metadata={
            'video_id': transcript['video_id'],
            'url': f"https://www.youtube.com/watch?v={transcript['video_id']}"
        }
    )
    documents.append(doc)

# Split into chunks
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
    length_function=len
)

chunks = text_splitter.split_documents(documents)

print(f"‚úÖ Created {len(chunks)} text chunks")

## Step 7: Create Vector Database

In [None]:
print("üîÑ Creating embeddings... (this may take a minute)")

# Create embeddings
embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")

# Create vector store
vectorstore = Chroma.from_documents(
    documents=chunks,
    embedding=embeddings,
    persist_directory="./chroma_db"
)

print(f"‚úÖ Vector database created with {len(chunks)} chunks!")

## Step 8: Create RAG Chatbot

In [None]:
# Create ChatGPT instance
llm = ChatOpenAI(
    model="gpt-3.5-turbo",
    temperature=0.7
)

# Create RAG prompt template
template = """Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Use three sentences maximum and keep the answer concise.

Context: {context}

Question: {question}

Answer:"""

prompt = PromptTemplate(template=template, input_variables=["context", "question"])

# Create retriever
retriever = vectorstore.as_retriever(search_kwargs={"k": 4})

# Create RAG chain
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

print("‚úÖ RAG Chatbot ready!")

## Step 9: Chat Function

In [None]:
def chat(question: str):
    """Ask a question about your videos"""
    print(f"\n‚ùì Question: {question}\n")
    
    # Get answer from RAG chain
    answer = rag_chain.invoke(question)
    
    print(f"üí¨ Answer: {answer}\n")
    
    # Get source documents for reference
    source_docs = retriever.get_relevant_documents(question)
    if source_docs:
        print("üìö Sources:")
        seen_videos = set()
        for doc in source_docs[:3]:
            video_id = doc.metadata.get('video_id', 'Unknown')
            if video_id not in seen_videos:
                seen_videos.add(video_id)
                print(f"  ‚Ä¢ Video: {video_id}")
                print(f"    URL: https://www.youtube.com/watch?v={video_id}")
    
    return answer

print("‚úÖ Chat function ready! Use: chat('your question here')")

## Step 10: Start Chatting!

Now you can ask questions about your videos:

In [None]:
# Example questions
chat("What is this video about?")

In [None]:
# Ask your own question
question = input("Your question: ")
chat(question)

## Step 11: Interactive UI with Gradio (Optional)

In [None]:
import gradio as gr

def gradio_chat(message, history):
    """Gradio chat interface"""
    # Get answer from RAG chain
    answer = rag_chain.invoke(message)
    
    # Build response with sources
    response = answer
    
    source_docs = retriever.get_relevant_documents(message)
    if source_docs:
        response += "\n\nüìö **Sources:**\n"
        seen_videos = set()
        for doc in source_docs[:2]:
            video_id = doc.metadata.get('video_id', 'Unknown')
            if video_id not in seen_videos:
                seen_videos.add(video_id)
                response += f"- [Video {video_id}](https://www.youtube.com/watch?v={video_id})\n"
    
    return response

# Create Gradio interface
demo = gr.ChatInterface(
    fn=gradio_chat,
    title="üé• YouTube RAG Chatbot",
    description=f"Ask questions about {len(transcripts)} YouTube video(s)",
    examples=[
        "What is the main topic?",
        "Summarize the key points",
        "What are the important details?"
    ]
)

# Launch with public link
demo.launch(share=True)

## üéâ You're Done!

### Usage:
1. Use `chat("your question")` in any cell
2. Or use the Gradio UI above

### Add More Videos:
Run Step 5 again with new video IDs, then re-run Steps 6-8

### Tips:
- Use videos with captions enabled
- Try educational content, tutorials, or lectures
- The more videos you add, the more knowledge the chatbot has!