In [None]:
!pip install moviepy

In [None]:
from google import genai
from google.colab import userdata

gemini_api_key = userdata.get("GEMINI_API_KEY")
client = genai.Client(api_key=gemini_api_key)

In [None]:
import ipywidgets as widgets
from IPython.display import display
from google.colab import files

uploaded = files.upload()

for name, data in uploaded.items():
  video_file_name = name
  video_bytes = data

print(f"Uploaded video: {video_file_name}")

In [None]:
!pip show google-genai

In [None]:
FRAMES_PER_SECOND = 8

In [None]:
from moviepy.editor import VideoFileClip, ImageSequenceClip
import os
from IPython.display import display, Video
import tempfile
from PIL import Image
import numpy as np

# Set desired resolution (e.g., (640, 480) for 640 pixels wide by 480 pixels tall), or None for original resolution
TARGET_RESOLUTION = None # (640, 480)

# Create a temporary directory to save frames
with tempfile.TemporaryDirectory() as tmpdir:
    # Load the video clip
    clip = VideoFileClip(video_file_name)

    # Extract frames at the specified FPS
    frame_paths = []
    for i, t in enumerate(range(0, int(clip.duration * FRAMES_PER_SECOND))):
        frame_time = t / FRAMES_PER_SECOND
        frame = clip.get_frame(frame_time)

        # Resize frame if a target resolution is specified
        if TARGET_RESOLUTION:
            frame = Image.fromarray(frame).resize(TARGET_RESOLUTION)
            frame = np.array(frame) # Convert back to numpy array

        frame_path = os.path.join(tmpdir, f"frame_{i:04d}.png")
        Image.fromarray(frame).save(frame_path)
        frame_paths.append(frame_path)

    clip.close() # Close the original clip

    # Create a video clip from the extracted frames
    output_video_path = os.path.join(tmpdir, "output_video.mp4")
    video_clip = ImageSequenceClip(frame_paths, fps=FRAMES_PER_SECOND)
    video_clip.write_videofile(output_video_path, codec='libx264')

    # Display the created video
    print(f"Displaying video at {FRAMES_PER_SECOND} FPS:")
    if TARGET_RESOLUTION:
        print(f"Resized to {TARGET_RESOLUTION}")
    display(Video(output_video_path, embed=True))

    video_clip.close() # Close the new clip

print("Finished creating and displaying video.")

In [None]:
from IPython.display import Video

try:
  display(Video(video_file_name, embed=True), )
except NameError:
  print("Please upload a video first in the cell above.")

In [None]:
PROMPT='''
You are classifying a single basketball shot attempt from a short video clip.

Return ONLY a single JSON object per the schema below—no extra text, no explanations.

TASKS
1) Decide if the shot is a MAKE or MISS.
2) Decide the range category: LAY_UP, IN_PAINT, MID_RANGE, THREE_POINTER, or FREE_THROW.

DEFINITIONS (strict)
- MAKE: Ball passes completely through the hoop from above during the clip (counts even if there’s a visible and-1).
- MISS: Any shot that does not go in (including blocked after release, airball, rim-out). If no clear attempt occurs, set "make_miss": null.

Range category is based on shooter position at RELEASE and the context of the play:
- LAY_UP: At/under the rim (layups, finger rolls, scoop shots, tip-ins, putbacks, and dunks all count as LAY_UP).
- IN_PAINT: Non-layup two-point attempts with both feet inside the painted lane at release (floaters/runners/hooks/post moves inside paint).
- MID_RANGE: Two-point attempts outside the paint but inside the 3-pt line (pull-ups, long twos, post fades outside paint).
- THREE_POINTER: Both feet clearly behind the 3-pt arc at release. If any part of either foot is on the line, classify as MID_RANGE.
- FREE_THROW: A stationary, unguarded shot taken from the free-throw line during a stoppage (typical cues: shooter at stripe, lane players lined up outside the lane, official handing ball, no defenders contesting the shot).

TIE-BREAKS / EDGE CASES
- Tip-ins/putbacks: Treat as LAY_UP.
- Floaters/Runners: Inside paint → IN_PAINT; outside paint → MID_RANGE.
- Bank vs swish: Ignore; use location only.
- Corner threes: Toe on line → MID_RANGE; clearly behind → THREE_POINTER.
- Blocked shots: If release happened → MISS with appropriate range. If blocked before release, output the first completed shot attempt in the clip.
- Free throws: If free-throw context is clear, use FREE_THROW regardless of whether the shooter’s feet appear slightly ahead/behind the painted line due to angle. Do NOT label normal live-play mid-range shots near the stripe as FREE_THROW.
- Poor camera/occlusion: Use best cues (paint edges, arc line, rim, lane lineup for free throws). When uncertain between two non–free-throw ranges, choose the closer-in one.

CONSTRAINTS
- Classify the first complete attempt in the clip; ignore later attempts.
- Output must be valid JSON only. Do not include reasoning or extra fields.

OUTPUT SCHEMA (use exactly these keys)
{
  "make_miss": "MAKE | MISS | null",
  "range": "LAY_UP | IN_PAINT | MID_RANGE | THREE_POINTER | FREE_THROW | null",
  "confidence": 0.0
}
'''


In [None]:
# Only for videos of size <20Mb
# video_file_name = "/path/to/your/video.mp4" # Comment out the original file path
# video_bytes = open(video_file_name, 'rb').read() # Comment out the original file read

from google import genai
from google.genai import types
import time

start_time = time.time()
response = client.models.generate_content(
    model='models/gemini-2.5-flash',
    contents=types.Content(
        parts=[
            types.Part(
                inline_data=types.Blob(data=video_bytes, mime_type='video/mp4'),
                video_metadata=types.VideoMetadata(fps=FRAMES_PER_SECOND)
            ),
            types.Part(text=PROMPT)
        ]
    ),
    config=types.GenerateContentConfig(
        media_resolution=types.MediaResolution.MEDIA_RESOLUTION_MEDIUM
    ),
)
end_time = time.time()
print(f"Time taken: {end_time - start_time} seconds")

In [None]:
response

In [None]:
import json

output_string = response.candidates[0].content.parts[0].text

# Extract the JSON string from the output string
json_string = output_string.strip().replace('```json\n', '').replace('\n```', '')

# Parse the JSON string into a Python dictionary
json_object = json.loads(json_string)

display(json_object)