Copyright 2024 Google LLC.

In [3]:
# @title Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


#Gemini API: GeminiVideoReader -- Batch Prediction with Long Context and Context Caching Code Sample 🚀🧠

This notebook serves as a step-by-step guide for building an AI-powered video analysis Batch Prediction with Long Context and Context Caching Code Sample.

In [4]:
!pip install -U -q google-genai youtube_transcript_api

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/144.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m144.7/144.7 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.9 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.9 MB[0m [31m31.7 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m1.9/1.9 MB[0m [31m33.1 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m21.4 MB/s[0m eta [36m0:00:00[0m
[?25h

# Configure your API key
To run the following cell, your API key must be stored in a Colab Secret named GOOGLE_API_KEY. If you don't already have an API key, or you're not sure how to create a Colab Secret, see Authentication for an example.

In [35]:
from google import genai
from google.colab import userdata

API_KEY = userdata.get('GOOGLE_API_KEY')
client = genai.Client(api_key=API_KEY)

## Import Libraries and Initialize Google Gemini Client
Import required libraries and initialize the Google Gemini API client using Colab’s userdata.

In [37]:
import asyncio
import logging
import json
import os,re
import time
from hashlib import sha256
from google import genai
from google.colab import userdata

MODEL_ID = "gemini-2.0-flash"
SYSTEM_PROMPT = "Answer the question based on the provided transcript context. Provide a concise answer."
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("GeminiVideoReader")


## Define Define Helper Functions
Define functions to extract the YouTube video ID, fetch its transcript, and call the Gemini API asynchronously.

In [38]:
def extract_video_id(url):
    if "v=" in url:
        return url.split("v=")[1].split("&")[0]
    elif "youtu.be/" in url:
        return url.split("youtu.be/")[1].split("?")[0]
    return None

def get_youtube_transcript(video_url):
    response = client.models.generate_content(
        model=MODEL_ID,
        contents=genai.types.Content(
            parts=[
                genai.types.Part(text="Extract the full transcript from this video."),
                genai.types.Part(
                    file_data=genai.types.FileData(file_uri=video_url)
                )
            ]
        )
    )
    return response.text

def segment_text(text, max_chars=3000):
    sentences = text.split('. ')
    segments = []
    current_segment = ""
    for sentence in sentences:
        if len(current_segment) + len(sentence) + 1 < max_chars:
            current_segment += sentence + ". "
        else:
            segments.append(current_segment.strip())
            current_segment = sentence + ". "
    if current_segment:
        segments.append(current_segment.strip())
    return segments

class GeminiCache:
    CACHE_FILE = "context_cache.json"
    def __init__(self):
        if os.path.exists(self.CACHE_FILE):
            with open(self.CACHE_FILE, "r") as f:
                self.cache = json.load(f)
        else:
            self.cache = {}
    def save_cache(self):
        with open(self.CACHE_FILE, "w") as f:
            json.dump(self.cache, f)
    def generate_cache_key(self, text):
        return sha256(text.encode()).hexdigest()
    def get_or_summarize(self, text, summarization_prompt):
        key = self.generate_cache_key(text)
        if key in self.cache:
            return self.cache[key]
        response = client.models.generate_content(
            model=f"models/{MODEL_ID}",
            contents=[f"{summarization_prompt}\n{text}"],
            config=genai.types.GenerateContentConfig(system_instruction="Summarize the text concisely.")
        )
        summary = response.text
        self.cache[key] = summary
        self.save_cache()
        return summary

gemini_cache = GeminiCache()

def process_long_transcript(transcript_context, max_chars=3000):
    if len(transcript_context) <= max_chars:
        return transcript_context
    segments = segment_text(transcript_context, max_chars)
    summarized_segments = []
    for seg in segments:
        summary = gemini_cache.get_or_summarize(seg, "Summarize this transcript segment:")
        summarized_segments.append(summary)
    return "\n".join(summarized_segments)

def call_gemini_api_sync(prompt, cached_context, history, max_retries=3):
    history_text = "\n".join([f"Q: {q['question']} A: {q['answer']}" for q in history])
    full_prompt = f"Use this context:\n{cached_context}\n\n{history_text}\n\n{prompt}"
    retries = 0
    while retries < max_retries:
        try:
            response = client.models.generate_content(
                model=f"models/{MODEL_ID}",
                contents=[full_prompt],
                config=genai.types.GenerateContentConfig(system_instruction=SYSTEM_PROMPT)
            )
            return {"question": prompt, "answer": response.text}
        except Exception as e:
            logger.error(f"Error calling Gemini API: {e}. Retrying in {2 ** retries} seconds...")
            time.sleep(2 ** retries)
            retries += 1
    return {"question": prompt, "answer": "Error: Unable to get response after retries."}

async def call_gemini_api(prompt, cached_context, history):
    return await asyncio.to_thread(call_gemini_api_sync, prompt, cached_context, history)

async def batch_predict_async(prompts, cached_context, history, max_concurrent=5):
    semaphore = asyncio.Semaphore(max_concurrent)
    results = []
    async def sem_call(prompt):
        async with semaphore:
            result = await call_gemini_api(prompt, cached_context, history)
            history.append(result)
            return result
    tasks = [asyncio.create_task(sem_call(p)) for p in prompts]
    for task in asyncio.as_completed(tasks):
        results.append(await task)
    return results


## Input YouTube Link, and Fetch Transcript

Let's use [Why was the Rosetta Stone so important? - Franziska Naether](https://www.youtube.com/watch?v=Z8dZSySRX_g) as an example

In [39]:
import nest_asyncio
nest_asyncio.apply()

youtube_url = input("Enter the YouTube video URL: ")
video_id = extract_video_id(youtube_url)
if not video_id:
    print("Invalid YouTube URL!")
else:
    print("Extracted video ID:", video_id)
transcript_data = get_youtube_transcript(youtube_url)
if transcript_data is None:
    print("No transcript available for this video!")
print("Transcript fetched successfully.")
print("\n--- Transcript Preview (first 500 characters) ---\n")
print(transcript_data[:500])
transcript_context = process_long_transcript(transcript_data)
print("\nTranscript context processed.")


Enter the YouTube video URL: https://www.youtube.com/watch?v=Z8dZSySRX_g
Extracted video ID: Z8dZSySRX_g
Transcript fetched successfully.

--- Transcript Preview (first 500 characters) ---

Okay, here's the full transcript from the video:

[00:00:06] For centuries, scholars puzzled over the hieroglyphs they found carved onto ancient Egyptian ruins, tablets, and papyri.

[00:00:14] But a unique discovery would finally help unlock their meaning.

[00:00:17] In 1799, as the French military invaded Egypt, an officer encountered a curious stone on the outskirts of Rashid or Rosetta.

[00:00:27] It was inscribed with three different portions of text: Egyptian hieroglyphs, which is the ol

Transcript context processed.


# Input Questions and Run Batch Prediction

In [41]:
print("Enter your questions (one per line). Press Enter on an empty line when done:")
questions_list = []
while True:
    line = input()
    if not line:
        break
    questions_list.append(line)
if not questions_list:
    print("No questions were entered!")
else:
    print("Questions received:", questions_list)
if transcript_data is not None and questions_list:
    conversation_history = []
    results = asyncio.run(batch_predict_async(questions_list, transcript_context, conversation_history))
    print("\n--- AI-Generated Answers ---\n")
    for i, res in enumerate(results, start=1):
        print(f"Q{i}: {res['question']}")
        print(f"A{i}: {res['answer']}\n")
    print("\n--- Conversation History ---\n")
    for entry in conversation_history:
        print(f"Q: {entry['question']}\nA: {entry['answer']}\n")

start_time = time.time()
if transcript_data is not None and questions_list:
    conversation_history = []
    results = asyncio.run(batch_predict_async(questions_list, transcript_context, conversation_history))
end_time = time.time()
print(f"\nTotal batch prediction time: {end_time - start_time:.2f} seconds")


Enter your questions (one per line). Press Enter on an empty line when done:
What is the ancient message of the Rosetta Stone?
Who are the scholars decoded the ancient message?

Questions received: ['What is the ancient message of the Rosetta Stone?', 'Who are the scholars decoded the ancient message?']

--- AI-Generated Answers ---

Q1: Who are the scholars decoded the ancient message?
A1: Åkerblad, Young, and Champollion are the scholars who worked on decoding the Rosetta Stone.


Q2: What is the ancient message of the Rosetta Stone?
A2: The Rosetta Stone contains a decree honoring Pharaoh Ptolemy V's coronation anniversary, outlining benefits for the priesthood and temple maintenance.



--- Conversation History ---

Q: Who are the scholars decoded the ancient message?
A: Åkerblad, Young, and Champollion are the scholars who worked on decoding the Rosetta Stone.


Q: What is the ancient message of the Rosetta Stone?
A: The Rosetta Stone contains a decree honoring Pharaoh Ptolemy V's