Copyright 2024 Google LLC.

In [96]:
# @title Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


#Gemini API: GeminiVideoReader -- Batch Prediction with Long Context and Context Caching Code Sample 🚀🧠

This notebook serves as a step-by-step guide for building an AI-powered video analysis application.

The app uses Streamlit for the web interface, Vertex AI for generating content from video transcripts, and the YouTube Transcript API to fetch transcripts. We also cache summaries to save API calls.


In [97]:
!pip install -U -q google-genai youtube_transcript_api

# Configure your API key
To run the following cell, your API key must be stored in a Colab Secret named GOOGLE_API_KEY. If you don't already have an API key, or you're not sure how to create a Colab Secret, see Authentication for an example.

In [98]:
from google import genai
from google.colab import userdata

API_KEY = userdata.get('GOOGLE_API_KEY')
client = genai.Client(api_key=API_KEY)

## Import Libraries and Initialize Google Gemini Client
Import required libraries and initialize the Google Gemini API client using Colab’s userdata.

In [99]:
import asyncio
import logging
import json
import os
import time
from hashlib import sha256
from youtube_transcript_api import YouTubeTranscriptApi, NoTranscriptFound
from google import genai
from google.colab import userdata

MODEL_ID = "gemini-2.0-flash"
SYSTEM_PROMPT = "Answer the question based on the provided transcript context. Provide a concise answer."
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("GeminiVideoReader")


## Define Define Helper Functions
Define functions to extract the YouTube video ID, fetch its transcript, and call the Gemini API asynchronously.

In [100]:
def extract_video_id(url):
    if "v=" in url:
        return url.split("v=")[1].split("&")[0]
    elif "youtu.be/" in url:
        return url.split("youtu.be/")[1].split("?")[0]
    return None

def fetch_youtube_transcript(video_id, lang_code="en"):
    try:
        return YouTubeTranscriptApi.get_transcript(video_id, languages=[lang_code])
    except NoTranscriptFound:
        try:
            transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
            generated_transcript = transcript_list.find_generated_transcript([lang_code])
            return generated_transcript.fetch()
        except Exception as e:
            logger.error(f"Fallback transcript fetch failed: {e}")
            return None
    except Exception as e:
        logger.error(f"Error fetching transcript: {e}")
        return None

def segment_text(text, max_chars=3000):
    sentences = text.split('. ')
    segments = []
    current_segment = ""
    for sentence in sentences:
        if len(current_segment) + len(sentence) + 1 < max_chars:
            current_segment += sentence + ". "
        else:
            segments.append(current_segment.strip())
            current_segment = sentence + ". "
    if current_segment:
        segments.append(current_segment.strip())
    return segments

class GeminiCache:
    CACHE_FILE = "context_cache.json"
    def __init__(self):
        if os.path.exists(self.CACHE_FILE):
            with open(self.CACHE_FILE, "r") as f:
                self.cache = json.load(f)
        else:
            self.cache = {}
    def save_cache(self):
        with open(self.CACHE_FILE, "w") as f:
            json.dump(self.cache, f)
    def generate_cache_key(self, text):
        return sha256(text.encode()).hexdigest()
    def get_or_summarize(self, text, summarization_prompt):
        key = self.generate_cache_key(text)
        if key in self.cache:
            return self.cache[key]
        response = client.models.generate_content(
            model=f"models/{MODEL_ID}",
            contents=[f"{summarization_prompt}\n{text}"],
            config=genai.types.GenerateContentConfig(system_instruction="Summarize the text concisely.")
        )
        summary = response.text
        self.cache[key] = summary
        self.save_cache()
        return summary

gemini_cache = GeminiCache()

def process_long_transcript(transcript_context, max_chars=3000):
    if len(transcript_context) <= max_chars:
        return transcript_context
    segments = segment_text(transcript_context, max_chars)
    summarized_segments = []
    for seg in segments:
        summary = gemini_cache.get_or_summarize(seg, "Summarize this transcript segment:")
        summarized_segments.append(summary)
    return "\n".join(summarized_segments)

def call_gemini_api_sync(prompt, cached_context, history, max_retries=3):
    history_text = "\n".join([f"Q: {q['question']} A: {q['answer']}" for q in history])
    full_prompt = f"Use this context:\n{cached_context}\n\n{history_text}\n\n{prompt}"
    retries = 0
    while retries < max_retries:
        try:
            response = client.models.generate_content(
                model=f"models/{MODEL_ID}",
                contents=[full_prompt],
                config=genai.types.GenerateContentConfig(system_instruction=SYSTEM_PROMPT)
            )
            return {"question": prompt, "answer": response.text}
        except Exception as e:
            logger.error(f"Error calling Gemini API: {e}. Retrying in {2 ** retries} seconds...")
            time.sleep(2 ** retries)
            retries += 1
    return {"question": prompt, "answer": "Error: Unable to get response after retries."}

async def call_gemini_api(prompt, cached_context, history):
    return await asyncio.to_thread(call_gemini_api_sync, prompt, cached_context, history)

async def batch_predict_async(prompts, cached_context, history, max_concurrent=5):
    semaphore = asyncio.Semaphore(max_concurrent)
    results = []
    async def sem_call(prompt):
        async with semaphore:
            result = await call_gemini_api(prompt, cached_context, history)
            history.append(result)
            return result
    tasks = [asyncio.create_task(sem_call(p)) for p in prompts]
    for task in asyncio.as_completed(tasks):
        results.append(await task)
    return results


## Input YouTube Link, Language, and Fetch Transcript

Let's use [GSOC 2025 Complete Roadmap: Step by Step Guide as a example](https://www.youtube.com/watch?v=5JYJlQpni6o)

In [101]:
import nest_asyncio
nest_asyncio.apply()

youtube_url = input("Enter the YouTube video URL: ")
video_id = extract_video_id(youtube_url)
lang_code = input("Enter transcript language code (default 'en'): ") or "en"
if not video_id:
    print("Invalid YouTube URL!")
else:
    print("Extracted video ID:", video_id)
transcript_data = fetch_youtube_transcript(video_id, lang_code)
if transcript_data is None:
    print("No transcript available for this video!")
else:
    if isinstance(transcript_data, list):
        transcript_context = "\n".join([item["text"] for item in transcript_data])
    else:
        transcript_context = "\n".join(transcript_data)
    print("Transcript fetched successfully.")
    print("\n--- Transcript Preview (first 500 characters) ---\n")
    print(transcript_context[:500] + "\n...")
    transcript_context = process_long_transcript(transcript_context)
    print("\nTranscript context processed.")


Enter the YouTube video URL: https://www.youtube.com/watch?v=5JYJlQpni6o
Enter transcript language code (default 'en'): 
Extracted video ID: 5JYJlQpni6o
Transcript fetched successfully.

--- Transcript Preview (first 500 characters) ---

20 people got into gck last year from
the 100ex cohort I've personally done
gck twice it's November right now gso
application start in March I think it's
the perfect time to get a high level
road map of gck in this video I'll take
you through how I would prepare for G if
I had to do it all over again today G is
open not just to students but also to
working professionals so irrespective of
where you're looking at this video from
this video is a high level guide to G
2025 without any further Ado l
...

Transcript context processed.


# Input Questions and Run Batch Prediction

In [102]:
print("Enter your questions (one per line). Press Enter on an empty line when done:")
questions_list = []
while True:
    line = input()
    if not line:
        break
    questions_list.append(line)
if not questions_list:
    print("No questions were entered!")
else:
    print("Questions received:", questions_list)
if transcript_data is not None and questions_list:
    conversation_history = []
    results = asyncio.run(batch_predict_async(questions_list, transcript_context, conversation_history))
    print("\n--- AI-Generated Answers ---\n")
    for i, res in enumerate(results, start=1):
        print(f"Q{i}: {res['question']}")
        print(f"A{i}: {res['answer']}\n")
    print("\n--- Conversation History ---\n")
    for entry in conversation_history:
        print(f"Q: {entry['question']}\nA: {entry['answer']}\n")

start_time = time.time()
if transcript_data is not None and questions_list:
    conversation_history = []
    results = asyncio.run(batch_predict_async(questions_list, transcript_context, conversation_history))
end_time = time.time()
print(f"\nTotal batch prediction time: {end_time - start_time:.2f} seconds")


Enter your questions (one per line). Press Enter on an empty line when done:
How to prepaer for Gsoc
How to get into Gsoc

Questions received: ['How to prepaer for Gsoc', 'How to get into Gsoc']

--- AI-Generated Answers ---

Q1: How to get into Gsoc
A1: To get into GSoC, focus on contributing to open-source projects early and often, especially targeting newer or less popular organizations, and communicate with maintainers.


Q2: How to prepaer for Gsoc
A2: To prepare for GSoC, target "luck-based" organizations if you're a beginner, focus on contributions, talk to maintainers, start early, and choose a stack like JavaScript or Python to become proficient in. Contributions are the most important factor for selection.



--- Conversation History ---

Q: How to get into Gsoc
A: To get into GSoC, focus on contributing to open-source projects early and often, especially targeting newer or less popular organizations, and communicate with maintainers.


Q: How to prepaer for Gsoc
A: To prepar