In [1]:
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# YouTube Video Analysis with Gemini

<table align="left">
  <td style="text-align: center">
    <a href="https://colab.research.google.com/github/GoogleCloudPlatform/generative-ai/blob/main/gemini/use-cases/video-analysis/youtube_video_analysis.ipynb">
      <img width="32px" src="https://www.gstatic.com/pantheon/images/bigquery/welcome_page/colab-logo.svg" alt="Google Colaboratory logo"><br> Open in Colab
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2FGoogleCloudPlatform%2Fgenerative-ai%2Fmain%2Fgemini%2Fuse-cases%2Fvideo-analysis%2Fyoutube_video_analysis.ipynb">
      <img width="32px" src="https://lh3.googleusercontent.com/JmcxdQi-qOpctIvWKgPtrzZdJJK-J3sWE1RsfjZNwshCFgE_9fULcNpuXYTilIR2hjwN" alt="Google Cloud Colab Enterprise logo"><br> Open in Colab Enterprise
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/generative-ai/main/gemini/use-cases/video-analysis/youtube_video_analysis.ipynb">
      <img src="https://www.gstatic.com/images/branding/gcpiconscolors/vertexai/v1/32px.svg" alt="Vertex AI logo"><br> Open in Vertex AI Workbench
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/use-cases/video-analysis/youtube_video_analysis.ipynb">
      <img width="32px" src="https://www.svgrepo.com/download/217753/github.svg" alt="GitHub logo"><br> View on GitHub
    </a>
  </td>
</table>

<div style="clear: both;"></div>

<b>Share to:</b>

<a href="https://www.linkedin.com/sharing/share-offsite/?url=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/use-cases/video-analysis/youtube_video_analysis.ipynb" target="_blank">
  <img width="20px" src="https://upload.wikimedia.org/wikipedia/commons/8/81/LinkedIn_icon.svg" alt="LinkedIn logo">
</a>

<a href="https://bsky.app/intent/compose?text=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/use-cases/video-analysis/youtube_video_analysis.ipynb" target="_blank">
  <img width="20px" src="https://upload.wikimedia.org/wikipedia/commons/7/7a/Bluesky_Logo.svg" alt="Bluesky logo">
</a>

<a href="https://twitter.com/intent/tweet?url=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/use-cases/video-analysis/youtube_video_analysis.ipynb" target="_blank">
  <img width="20px" src="https://upload.wikimedia.org/wikipedia/commons/5/5a/X_icon_2.svg" alt="X logo">
</a>

<a href="https://reddit.com/submit?url=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/use-cases/video-analysis/youtube_video_analysis.ipynb" target="_blank">
  <img width="20px" src="https://redditinc.com/hubfs/Reddit%20Inc/Brand/Reddit_Logo.png" alt="Reddit logo">
</a>

<a href="https://www.facebook.com/sharer/sharer.php?u=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/use-cases/video-analysis/youtube_video_analysis.ipynb" target="_blank">
  <img width="20px" src="https://upload.wikimedia.org/wikipedia/commons/5/51/Facebook_f_logo_%282019%29.svg" alt="Facebook logo">
</a>            

| | |
|-|-|
| Author(s) | [Alok Pattani](https://github.com/alokpattani/) |

## Overview

In this notebook, you'll explore how to do direct analysis of publicly available [YouTube](https://www.youtube.com/) videos with Gemini.

You will complete the following tasks:

- Summarizing a single YouTube video using Gemini 2.5 Flash
- Extracting a specific set of structured outputs from a longer YouTube video using Gemini 2.5 Pro  and controlled generation
- Creating insights from analyzing multiple YouTube videos together using asynchronous generation with Gemini

## Get started

### Install Google Gen AI SDK and other required packages


In [1]:
%pip install --upgrade --quiet google-genai itables

Note: you may need to restart the kernel to use updated packages.


### Restart runtime

To use the newly installed packages in this Jupyter runtime, you must restart the runtime. You can do this by running the cell below, which restarts the current kernel.

The restart might take a minute or longer. After it's restarted, continue to the next step.

In [3]:
import IPython

app = IPython.Application.instance()
app.kernel.do_shutdown(True)

{'status': 'ok', 'restart': True}

<div class="alert alert-block alert-warning">
<b>⚠️ The kernel is going to restart. Wait until it's finished before continuing to the next step. ⚠️</b>
</div>


### Set Google Cloud project information and create client

To get started using Vertex AI, you must have an existing Google Cloud project and [enable the Vertex AI API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com).

Learn more about [setting up a project and a development environment](https://cloud.google.com/vertex-ai/docs/start/cloud-environment).

In [1]:
import os
from google import genai

PROJECT_ID = "qwiklabs-gcp-03-b230f75cd320"
LOCATION = os.environ.get("us-central1", "global")
client = genai.Client(vertexai=True, project=PROJECT_ID, location=LOCATION)

## Set up libraries, options, and models

### Import libraries

In [2]:
import json

from IPython.display import HTML, Markdown, display
from google.genai.types import GenerateContentConfig, Part
from itables import show
import itables.options as itable_opts
import pandas as pd
from tenacity import retry, stop_after_attempt, wait_random_exponential

### Configure some notebook options

In [3]:
# Configure some options related to interactive tables
itable_opts.maxBytes = 1e9
itable_opts.maxColumns = 50

itable_opts.order = []
itable_opts.column_filters = "header"

### Create a helper function

In [4]:
def display_youtube_video(url: str) -> None:
    youtube_video_embed_url = url.replace("/watch?v=", "/embed/")

    # Create HTML code to directly embed video
    youtube_video_embed_html_code = f"""
    <iframe width="560" height="315" src="{youtube_video_embed_url}"
    title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; 
    clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen>
    </iframe>
    """

    # Display embedded YouTube video
    display(HTML(youtube_video_embed_html_code))

### Load models

In [5]:
# Set Gemini Flash and Pro models to be used in this notebook
GEMINI_FLASH_MODEL_ID = "gemini-2.5-flash"
GEMINI_PRO_MODEL_ID = "gemini-2.5-pro"

## Summarize a YouTube video

Provide a link to a public YouTube video that you'd like to summarize. Ensure that the video is less than an hour long to make sure it fits in the context window.

The default content to be summarized is [this 6.5-minute video showing how Major League Baseball (MLB) analyzes data using Google Cloud](https://www.youtube.com/watch?v=O_W_VGUeHVI).

In [6]:
# Provide link to a public YouTube video to summarize
YOUTUBE_VIDEO_URL = (
    "https://www.youtube.com/watch?v=O_W_VGUeHVI"  # @param {type:"string"}
)

display_youtube_video(YOUTUBE_VIDEO_URL)

In [7]:
# Call Gemini API with prompt to summarize video
video_summary_prompt = "Give a detailed summary of this video."

video_summary_response = client.models.generate_content(
    model=GEMINI_FLASH_MODEL_ID,
    contents=[
        Part.from_uri(
            file_uri=YOUTUBE_VIDEO_URL,
            mime_type="video/webm",
        ),
        video_summary_prompt,
    ],
)

# Display results
display(Markdown(video_summary_response.text))

This video explores how Major League Baseball (MLB) leverages data and Google Cloud to enhance the game for fans, teams, and analysts. Priyanka Vergadia, Lead Developer Advocate at Google, guides the viewer through this journey.

**0:00 - 0:39: Introduction - Baseball and the Power of Data**
The video opens by highlighting the long-standing relationship between baseball and statistics, noting that historical almanacs are filled with game stories and data. However, intuitively answering deep questions from this data has always been a significant challenge. The MLB has partnered with Google Cloud to push the boundaries of what's possible with data, both on and off the field.

**0:39 - 1:14: The Big Leagues of Data - Unveiling the Scale**
Priyanka introduces Josh Frost, Vice President of Product Management at MLB, who explains the sheer volume of data involved. In a single game, MLB captures an astonishing **25 million unique data points**. With 2,430 regular-season games each year, this presents immense data management challenges. The video shows clips of dynamic baseball plays, emphasizing the intricate details being captured.

**1:14 - 2:04: Decoding the Game with MLB Film Room**
Alok Pattani, Data Science Developer Advocate at Google, discusses how MLB has successfully "married the data to the video" through MLB Film Room, powered by Google Cloud. This tool allows users to search for specific plays (e.g., "longest home runs of 2021" or "Ohtani home runs in the 9th inning") and filter by season, hit result, team, and other metrics. This shows how granular data can be linked to visual replays.

Rob Engel, Senior Director of Software Engineering at MLB, then explains the technical stack used to process this data. Data from in-ballpark tools, particularly the Hawkeye cameras, is sent to:
*   **Anthos (in-ballpark tools):** A deployed Kubernetes solution inside each ballpark for initial processing.
*   **Kubernetes Engine Gameday Engine:** The data then flows to a Kubernetes cluster running in Google Cloud for further processing.
*   **Cloud SQL PostgreSQL Database:** Where the processed data is stored, accessible by the MLB Stats API to serve millions of fans globally in real-time.

**2:04 - 3:00: Baseball's Megabrain - Deep Dive into Data Collection and Processing**
The video highlights the sophistication of data collection. As soon as a pitch is released, **12 Hawkeye cameras** around the stadium track every aspect of the pitch at **30-100 frames per second (FPS)**, capturing around **60 data points per pitch**. This includes pose and player tracking. All this raw data, amounting to **over 25 terabytes per season**, is collected and processed within the ballpark's data center.

To handle this massive influx and subsequent analysis, MLB utilizes a robust Google Cloud pipeline:
*   **Anthos (In-ballpark Tools):** Collects raw data from Hawkeye cameras.
*   **Kubernetes Engine Gameday Engine:** Processes real-time data streams.
*   **Bigtable:** Used for highly scalable, low-latency storage, particularly for real-time tracking data like pose tracking.
*   **Dataflow & Cloud Composer:** For batch jobs that move data out of Bigtable and Cloud SQL into Cloud Storage.
*   **Cloud Storage:** Stores the vast amount of raw and processed data.
*   **BigQuery:** Where the data ultimately lands for advanced analytics, allowing analysts to query and share insights with all 30 clubs.
*   **Cloud SQL PostgreSQL Database & MLB Stats API:** Continues to serve data for real-time statistics and fan-facing applications.

**3:00 - 3:48: FieldVision and Real-time Analytics**
A new project called **FieldVision** is introduced, which generates a constant stream of pose tracking data throughout the game. This 3D animated visualization is visible to fans on their desktops or mobile phones, offering an immersive perspective. Bigtable is crucial for powering FieldVision due to its low-latency requirements. The video demonstrates how the system can analyze pitch trajectory, exit velocity, and batted ball spray charts in real-time.

**3:48 - 4:45: Home-field Advantage - How Teams Use Data**
John Krazit, Director of Baseball Systems at Arizona Diamondbacks, explains how teams leverage this data. They pull data from various stats API resources, including regular Gameday feeds and pose data. This information is then aggregated and filtered into actionable insights for player performance reviews, player acquisitions, and running proprietary models. John emphasizes the competitive advantage gained through these insights, hinting at deeper analysis without revealing specific tools or strategies.

**4:45 - 5:58: Play by Play - Empowering Analysts and Broadcasters**
Sarah Langs, Reporter/Researcher at MLB Advanced Media, details how she uses the cloud-powered data to tell compelling stories. She relies on websites like Baseball Savant, Fangraphs, and Baseball-Reference. **Statcast**, in particular, allows her to measure virtually everything on the field – pitch speed, home run distance, player movements, and more. This data helps her identify extreme instances (e.g., longest career home run, fastest pitch) and broader career trends. Brian Kenny, Sportscaster and Host at MLB Now, echoes this, stating that finding answers in baseball now frequently involves asking questions of the data.

**5:58 - 6:30: Conclusion - The Future of Sports Data Analytics**
Priyanka summarizes the journey: a massive amount of data is captured from the field, processed, and published via APIs. This powers tools like Savant, Film Room, and internal/external research, benefiting fans, teams, and analysts. She expresses excitement for the future of sports data analytics, highlighting the powerful tools provided by Google Cloud.

## Extract structured output from a YouTube video

Next, we'll show how to extract structured outputs using [controlled generation](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/control-generated-output), in this case from a video that covers multiple topics.

We're going to see how Gemini's industry-leading 2 million token context window can help analyze [the full opening keynote](https://www.youtube.com/watch?v=V6DJYGn2SFk) from our Next conference back in April - all 1 hour and 41 minutes of it!

In [8]:
# Link to full Cloud Next '24 Opening Keynote video
# cloud_next_keynote_video_url = "https://www.youtube.com/watch?v=V6DJYGn2SFk"

# Uncomment line below to replace with 14-min keynote summary video instead (faster)
cloud_next_keynote_video_url = "https://www.youtube.com/watch?v=M-CzbTUVykg"

display_youtube_video(cloud_next_keynote_video_url)

Below is a prompt to extract the biggest product announcements that were made during this keynote. We use the response schema to show that we want valid JSON output in a particular form, including a constraint specifying that the "product status" field should be either GA, Preview, or Coming Soon.

The following cell may take several minutes to run, as Gemini 2.5 Pro  is analyzing all 101 minutes of the video and audio to produce comprehensive results.

In [9]:
# Set up pieces (prompt, response schema, config) and run video extraction

video_extraction_prompt = (
    "Provide a summary of the biggest product announcements "
    "that were made in this Google Cloud Next keynote video including:\n"
    "  - name\n"
    '  - product status: "GA" (Generally Available), "Preview", or "Coming Soon"\n'
    "  - key quote from the presenter about the product, 20 words or fewer per product\n\n"
    "Make sure to look through and listen to the whole video, start to finish, to find "
    "the top product announcements. Only reference information in the video itself in "
    "your response."
)

video_extraction_response_schema = {
    "type": "ARRAY",
    "items": {
        "type": "OBJECT",
        "properties": {
            "name": {"type": "STRING"},
            "product_status": {
                "type": "STRING",
                "enum": ["GA", "Preview", "Coming Soon"],
            },
            "quote_from_presenter": {"type": "STRING"},
        },
    },
}

video_extraction_json_generation_config = GenerateContentConfig(
    temperature=0.0,
    max_output_tokens=8192,
    response_mime_type="application/json",
    response_schema=video_extraction_response_schema,
)

video_extraction_response = client.models.generate_content(
    model=GEMINI_PRO_MODEL_ID,
    contents=[
        video_extraction_prompt,
        Part.from_uri(
            file_uri=cloud_next_keynote_video_url,
            mime_type="video/webm",
        ),
    ],
    config=video_extraction_json_generation_config,
)

print(video_extraction_response.text)

[
  {
    "name": "Gemini 1.5 Pro",
    "product_status": "Preview",
    "quote_from_presenter": "Bringing the world's largest context window to developers."
  },
  {
    "name": "Google Axion Processors",
    "product_status": "Preview",
    "quote_from_presenter": "Our first custom Arm-based CPU designed for the data center."
  },
  {
    "name": "NVIDIA GB200 NVL72",
    "product_status": "Coming Soon",
    "quote_from_presenter": "NVIDIA's newest Grace Blackwell generation of GPUs, coming to Google Cloud early in 2025."
  },
  {
    "name": "Hyperdisk ML",
    "product_status": "Preview",
    "quote_from_presenter": "Our next-generation block storage service optimized for AI inference and serving workloads."
  },
  {
    "name": "Vertex AI Agent Builder",
    "product_status": "Preview",
    "quote_from_presenter": "Create customer agents that are amazingly powerful in just three key steps."
  },
  {
    "name": "Google Vids",
    "product_status": "Coming Soon",
    "quote_from_pr

In [10]:
# Convert structured output from response to data frame for display and/or further analysis
video_extraction_response_df = pd.DataFrame(video_extraction_response.parsed)

show(video_extraction_response_df)

  int: is not an instance of int


0
Loading ITables v2.6.1 from the internet...  (need help?)


## Creating insights from analyzing multiple YouTube videos together

### Google "Year in Search" videos
Now, consider expanding the problem to a more common enterprise use case: extracting information from _multiple_ YouTube videos at once.

This time, we'll use [Google's "Year in Search" videos](https://about.google/intl/ALL_us/stories/year-in-search/), which summarize the questions, people, and moments that captured the world's attention in each year. As of fall 2024, there are 14 of these videos, each 2-4 minutes in length, from [2010](https://www.youtube.com/watch?v=F0QXB5pw2qE) through [2023](https://www.youtube.com/watch?v=3KtWfp0UopM).

We start by reading in a CSV file that has links to all the videos.

In [11]:
# Read in table of Year in Search video links from public CSV file
GOOGLE_YEAR_IN_SEARCH_VIDEO_LINKS_CSV_GCS_URI = (
    "gs://github-repo/video/google_year_in_search_video_links.csv"
)

year_in_search_yt_links = pd.read_csv(GOOGLE_YEAR_IN_SEARCH_VIDEO_LINKS_CSV_GCS_URI)

year_in_search_yt_links

Unnamed: 0,year,yt_link
0,2023,https://www.youtube.com/watch?v=3KtWfp0UopM
1,2022,https://www.youtube.com/watch?v=4WXs3sKu41I
2,2021,https://www.youtube.com/watch?v=EqboAI-Vk-U
3,2020,https://www.youtube.com/watch?v=rokGy0huYEA
4,2019,https://www.youtube.com/watch?v=ZRCdORJiUgU
5,2018,https://www.youtube.com/watch?v=6aFdEhEZQjE
6,2017,https://www.youtube.com/watch?v=vI4LHl4yFuo
7,2016,https://www.youtube.com/watch?v=KIViy7L_lo8
8,2015,https://www.youtube.com/watch?v=q7o7R5BgWDY
9,2014,https://www.youtube.com/watch?v=DVwHCGAr_OE


### Set up for analyzing multiple video files

Let's say we are a sports agency who wants to see which athletes or teams appear most often in these videos as a measure of cultural relevance. Instead of watching and manually counting, we can use Gemini's multimodal capabilities and world knowledge to extract each appearance of an athlete or team into a structured output that we can use for further analysis.

The system instructions, prompt, and response schema that will apply to all 14 videos are each created in the cell below.

In [12]:
# Set up pieces (prompt, response schema, config) for Google Year in Search videos
multiple_video_extraction_system_instruction_text = (
    "You are a video analyst that "
    "carefully looks through all frames of provided videos, extracting out the "
    "pieces necessary to respond to user prompts."
)

multiple_video_extraction_prompt = (
    "Which sports athletes or teams are mentioned or "
    "shown in this video? Please look through each frame carefully, and respond "
    "with a complete list that includes the athlete or team's name (1 row per "
    "athlete or team), whether they are an athlete or team, the sport they play, "
    "and the timestamp into the video at which they appear (in mm:ss format, "
    "do not give extra precision) for each one."
)

multiple_video_extraction_response_schema = {
    "type": "ARRAY",
    "items": {
        "type": "OBJECT",
        "properties": {
            "name": {"type": "STRING"},
            "athlete_or_team": {"type": "STRING", "enum": ["athlete", "team"]},
            "sport": {"type": "STRING"},
            "video_timestamp": {"type": "STRING"},
        },
    },
}

multiple_video_extraction_json_generation_config = GenerateContentConfig(
    temperature=0.0,
    max_output_tokens=8192,
    response_mime_type="application/json",
    response_schema=multiple_video_extraction_response_schema,
)

Next, we'll set up to run each of these prompt/video pairs through the Gemini API _asynchronously_. This allows us to send all the requests to Gemini at once, then wait for all the answers to come back - a more efficient process than sending them synchronously (one-by-one). See more details in [this Google Cloud Community Medium blog post](https://medium.com/google-cloud/how-to-prompt-gemini-asynchronously-using-python-on-google-cloud-986ca45d9f1b).


In [13]:
# Function for asynchronous generation


@retry(wait=wait_random_exponential(multiplier=1, max=120), stop=stop_after_attempt(2))
async def async_generate(prompt, yt_link):
    try:
        response = await client.aio.models.generate_content(
            model=GEMINI_PRO_MODEL_ID,
            contents=[prompt, Part.from_uri(file_uri=yt_link, mime_type="video/webm")],
            config=multiple_video_extraction_json_generation_config,
        )

        return response.to_json_dict()
    except Exception as e:
        print("Something failed, retrying")
        print(e)
        with retry.stop_after_attempt(2) as retry_state:
            if retry_state.attempt > 2:
                return None
        raise  # Re-raise the exception for tenacity to handle

### Run asynchronous Gemini calls to do video extraction

In [14]:
# Perform asynchronous calls across all videos, gather responses
import asyncio

start_time = asyncio.get_event_loop().time()

get_responses = [
    async_generate(multiple_video_extraction_prompt, yt_link)
    for yt_link in year_in_search_yt_links["yt_link"]
]

multiple_video_extraction_responses = await asyncio.gather(*get_responses)

end_time = asyncio.get_event_loop().time()

elapsed_time = end_time - start_time

print(f"Elapsed time: {elapsed_time:.2f} seconds")

Elapsed time: 46.45 seconds


### Extract and analyze video results across years

Once we have the results from Gemini, we can process them and get table of every athlete or team appearance across all 14 "Year in Search" videos.

In [15]:
# Add structured outputs by year back to original table, show full extraction results
year_in_search_responses = year_in_search_yt_links.copy()

year_in_search_responses["gemini_response"] = [
    json.dumps(response) for response in multiple_video_extraction_responses
]


def extract_result_df_from_gemini_response(year, gemini_response):
    extract_response_text = json.loads(gemini_response)["candidates"][0]["content"][
        "parts"
    ][0]["text"]

    extract_result_df = pd.DataFrame(json.loads(extract_response_text))

    extract_result_df["year"] = year

    return extract_result_df


year_in_search_responses["extract_result_df"] = year_in_search_responses.apply(
    lambda row: extract_result_df_from_gemini_response(
        row["year"], row["gemini_response"]
    ),
    axis=1,
)

all_year_in_search_extractions = pd.concat(
    year_in_search_responses["extract_result_df"].tolist(), ignore_index=True
)[["year", "name", "athlete_or_team", "sport", "video_timestamp"]]

show(all_year_in_search_extractions)

  int: is not an instance of int


0
Loading ITables v2.6.1 from the internet...  (need help?)


Finally, we can count the number of years in which each athlete or team appeared in these videos, and return results for those who appeared more than once.

In [16]:
# Analyze results to show athletes/teams showing up most often in Year in Search videos
multiple_year_in_search_app = (
    all_year_in_search_extractions.assign(
        # Convert 'name' to uppercase to handle e.g. "LeBron" vs "Lebron"
        name=all_year_in_search_extractions["name"].str.upper(),
        # Convert 'athlete_or_team' to lowercase for consistency
        athlete_or_team=all_year_in_search_extractions["athlete_or_team"].str.lower(),
    )
    .groupby(["name", "athlete_or_team"])
    .apply(
        lambda x: pd.Series(
            {
                # Aggregate 'sport' across type and name (handling different cases)
                "sport": ", ".join(sorted(x["sport"].str.lower().unique())),
                # Count # of diff years in which each athlete/team appears in video
                "num_years": x["year"].nunique(),
            }
        )
    )
    .reset_index()
    .
    # Filter to only those appearing multiple times
    query("num_years >= 2")
    .sort_values(["num_years", "name"], ascending=[False, True])
    .reset_index(drop=True)
)

# Display results
display(Markdown("<b>Athletes/Teams Appearing in Multiple Year in Search Videos<b>"))
display(multiple_year_in_search_app)

  .apply(


<b>Athletes/Teams Appearing in Multiple Year in Search Videos<b>

Unnamed: 0,name,athlete_or_team,sport,num_years
0,USAIN BOLT,athlete,"athletics, track and field",4
1,LEBRON JAMES,athlete,basketball,3
2,NAOMI OSAKA,athlete,tennis,3
3,SERENA WILLIAMS,athlete,tennis,3
4,SIMONE BILES,athlete,gymnastics,3
5,CLEVELAND CAVALIERS,team,basketball,2
6,COCO GAUFF,athlete,tennis,2
7,GOLDEN STATE WARRIORS,team,basketball,2
8,HOUSTON ASTROS,team,baseball,2
9,LEICESTER CITY FC,team,soccer,2
