In [None]:
# STEP 1: Install dependencies
!pip install langchain youtube-transcript-api sentence-transformers faiss-cpu

# STEP 2: Imports
from youtube_transcript_api import YouTubeTranscriptApi
from langchain.text_splitter import CharacterTextSplitter
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np

# STEP 3: Get YouTube transcript
def get_transcript(video_id):
    try:
        transcript = YouTubeTranscriptApi.get_transcript(video_id)
        return " ".join([entry['text'] for entry in transcript])
    except Exception as e:
        return f"Error fetching transcript: {e}"

# Try with this video (change to your own if needed)
video_id = "ATlila3e9dM"  # <-- replace with actual video ID
raw_text = get_transcript(video_id)

# STEP 4: Chunk the transcript
splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=100)
docs = splitter.create_documents([raw_text])

# STEP 5: Create embeddings using SentenceTransformers
model = SentenceTransformer('all-MiniLM-L6-v2')

# Access the text from the Document objects using .page_content
embeddings = model.encode([doc.page_content for doc in docs], show_progress_bar=True)

# STEP 6: Create FAISS index
dim = embeddings.shape[1]  # Embedding dimension
index = faiss.IndexFlatL2(dim)  # L2 distance metric
index.add(np.array(embeddings))  # Add embeddings to FAISS index

# STEP 7: Create a function for querying the FAISS index
def query_faiss(query, k=5):
    query_embedding = model.encode([query])
    distances, indices = index.search(query_embedding, k)
    return [(docs[i].page_content, distances[0][idx]) for idx, i in enumerate(indices[0])]

# STEP 8: Query the system for a summary
query = "Summarize this video in a short paragraph."
results = query_faiss(query, k=3)  # Get top 3 most relevant chunks
summary = " ".join([result[0] for result in results])
print("🔍 Summary:\n", summary)




Batches:   0%|          | 0/1 [00:00<?, ?it/s]

🔍 Summary:
 Look at this baby. So gentle, so innocent. You'd think this child would grow up to become a good man, an honest man. Well, think again. *alarm sounds* Hitler was born Adolphus Hitler in 1889 in a small town in Austria-Hungary. His father, Alois Schicklgruber, was born out of wedlock, but eventually changed his name to that of his stepfather, becoming Alois Hitler. Alois was a mid-level Austrian customs officer-- not really rolling in cash, but certainly rolling in women. He married a rich, older lady, but then immediately started having affairs, including one with a much younger house servant. A few years later, he left his sick wife to be with his mistress, but since the Catholic Church didn't allow divorce at the time, he couldn't marry her. So he waited for his old wife to die and had a child in the meantime. Then his wife died, so he married his mistress and had another child, but then his new wife got sick, so he employed his much, much younger cousin Clara to take car

In [None]:
!pip install -U langchain langchain-community


Collecting langchain-community
  Downloading langchain_community-0.3.21-py3-none-any.whl.metadata (2.4 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading pydantic_settings-2.8.1-py3-none-any.whl.metadata (3.5 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain-community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading marshmallow-3.26.1-py3-none-any.whl.metadata (7.3 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)
Collecting python-dotenv>=0.21.0 (from pydantic-settings<3.0.0,>=2.4.0->langchain-community)
  Downloading python_dotenv-1.1.0-py3-none-any.whl.metadata (24 kB