In [4]:
import pandas as pd

splits = {'train': 'data/train.json', 'validation': 'data/val.json', 'test': 'data/test.json'}
df = pd.read_json("hf://datasets/hbfreed/Picklebot-2M/" + splits["train"])

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
#Do EDA, get number of unique pitcher, pitch types, strikes, balls
print("Number of unique pitchers:", df['pitcher'].nunique())
print("Count of strikes:", df[df['pitch_result'] != 'Ball'].shape[0])
print("Count of balls:", df[df['pitch_result'] == 'Ball'].shape[0])
# Get the number of unique pitches
print("Number of unique pitches:", df['pitch'].nunique())
# Get the video_link that has the string "a726eb60-fc99-45b1-b57f-485d4ff95ce5"
print("Video link with specific ID:", df[df['video_link'].str.contains("a726eb60-fc99-45b1-b57f-485d4ff95ce5")]['video_link'].values[0])

Number of unique pitchers: 1881
Count of strikes: 699313
Count of balls: 1416961
Number of unique pitches: 17
Video link with specific ID: https://baseballsavant.mlb.com/sporty-videos?playId=a726eb60-fc99-45b1-b57f-485d4ff95ce5


In [6]:
df.head()

Unnamed: 0,date,pitch,mph,spin_rate,pitcher,batter,zone,count,inning,pitch_result,video_link
0,2021-05-01,FF,96.6,2478,"Scott, Tanner(L)","Piscotty, Stephen(R)",12,1-2,Bot 8,Ball,https://baseballsavant.mlb.com/sporty-videos?p...
1,2021-06-03,KC,80.7,2663,"Workman, Brandon(R)","Alvarez, Yordan(L)",13,0-2,Bot 9,Ball,https://baseballsavant.mlb.com/sporty-videos?p...
2,2023-04-25,SI,91.0,2055,"Pérez, Martín(L)","Maile, Luke(R)",14,0-0,Bot 3,Ball,https://baseballsavant.mlb.com/sporty-videos?p...
3,2020-09-27,FF,93.5,1715,"Rodriguez, Nivaldo(R)","Odor, Rougned(L)",11,0-2,Bot 5,Ball,https://baseballsavant.mlb.com/sporty-videos?p...
4,2017-09-23,FF,99.2,2554,"Ellington, Brian(R)","Lamb, Jake(L)",11,2-0,Bot 6,Ball,https://baseballsavant.mlb.com/sporty-videos?p...


In [7]:
#get pitcher names who have thrown between 1200 and 1300 pitches
pitcher_counts = df['pitcher'].value_counts()
pitchers_1200_1300 = pitcher_counts[(pitcher_counts >= 1200) & (pitcher_counts <= 1300)].index.tolist()
print("Pitchers who have thrown between 1200 and 1300 pitches:", pitchers_1200_1300)

Pitchers who have thrown between 1200 and 1300 pitches: ['Thompson, Zach(R)', 'Blevins, Jerry(L)', 'Chargois, JT(R)', 'Phillips, Evan(R)', 'Hutchison, Drew(R)', 'Drake, Oliver(R)', 'Ventura, Yordano(R)', 'Staumont, Josh(R)', 'Hardy, Blaine(L)', 'Crawford, Kutter(R)', 'Bowman, Matt(R)', 'Suero, Wander(R)', 'Abreu, Bryan(R)', 'Moronta, Reyes(R)', 'Wilson, Alex(R)', 'Lambert, Peter(R)', 'Brown, Hunter(R)', 'Mize, Casey(R)', 'Jurado, Ariel(R)', 'Raley, Brooks(L)', 'Bielak, Brandon(R)', 'Osuna, Roberto(R)', 'Osich, Josh(L)', 'Adleman, Tim(R)', 'Tropeano, Nick(R)', 'Kela, Keone(R)', 'Kinley, Tyler(R)', 'May, Dustin(R)', 'Erlin, Robbie(L)', 'Sparkman, Glenn(R)', 'Johnson, Brian(L)', 'Underwood Jr., Duane(R)', 'Neverauskas, Dovydas(R)', 'Otero, Dan(R)', 'Middleton, Keynan(R)', 'Adams, Austin(R)', 'Bickford, Phil(R)', 'Falter, Bailey(L)', 'Payamps, Joel(R)', 'Oswalt, Corey(R)', 'Domínguez, Seranthony(R)']


In [8]:
#get all unique pitch types
pitch_types = df['pitch'].unique()
print("Unique pitch types:", pitch_types)
#filter for only pither Blevins, Jerry(L)
df = df[df['pitcher'] == 'May, Dustin(R)']
#Unique pitch types: ['FF' 'KC' 'SI' 'SL' 'CU' 'CH' 'FC' 'FA' 'ST' 'FS' '' 'SV' 'KN' 'EP' 'SC'

Unique pitch types: ['FF' 'KC' 'SI' 'SL' 'CU' 'CH' 'FC' 'FA' 'ST' 'FS' '' 'SV' 'KN' 'EP' 'SC'
 'FO' 'CS']


In [9]:
#get size
print(df.shape)
#get the data type of mph
print(df['mph'].dtype)
#convert mph to a float from a string
df['mph'] = pd.to_numeric(df['mph'], errors='coerce')
df = df.dropna(subset=['mph'])
print(df.shape)
print(df['mph'].mean())

(1233, 11)
object
(1233, 11)
94.00145985401458


In [10]:
#get counts of pitch_result
df['pitch_result'].value_counts()

pitch_result
Ball             796
Called Strike    437
Name: count, dtype: int64

In [11]:
#print the min, 25th, 50th, 75th and max percentiles of mph
print(df['mph'].describe())

count    1233.000000
mean       94.001460
std         4.565858
min        81.100000
25%        91.300000
50%        95.700000
75%        97.600000
max       100.500000
Name: mph, dtype: float64


In [12]:
import cv2
import numpy as np
import requests
from bs4 import BeautifulSoup

def get_video_url(play_id):
    url = f"https://baseballsavant.mlb.com/sporty-videos?playId={play_id}"
    response = requests.get(url)
    soup = BeautifulSoup(response.text, "html.parser")

    video_tag = soup.find("video")
    if video_tag:
        source_tag = video_tag.find("source")
        if source_tag and source_tag.get("src"):
            return source_tag["src"]
    return None

def get_video(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, "html.parser")

    video_tag = soup.find("video")
    if video_tag:
        source_tag = video_tag.find("source")
        if source_tag and source_tag.get("src"):
            return source_tag["src"]
    return None

def download_video(video_url, output_file="temp.mp4"):
    r = requests.get(video_url, stream=True)
    with open(output_file, "wb") as f:
        for chunk in r.iter_content(chunk_size=1024*1024):
            if chunk:
                f.write(chunk)

def video_to_numpy(video_path, resize=None, max_frames=None):
    cap = cv2.VideoCapture(video_path)
    frames = []
    count = 0

    while True:
        ret, frame = cap.read()
        if not ret:
            break
        if resize:
            frame = cv2.resize(frame, resize)
        frames.append(frame)
        count += 1
        if max_frames and count >= max_frames:
            break

    cap.release()
    return np.array(frames)

def save_numpy_array(array, filename="video_frames.npy"):
    np.save(filename, array)


Prompt:
The seconds when the pitcher is possibly in the middle, and the location of the pitchers change depending on the video. We may have to use ML for this.



ChatGPT said:
You're absolutely right — since the pitcher’s timing and location vary from video to video, a fixed crop won't work well. This is a classic use case for computer vision with machine learning. Here’s a structured way to approach it:

✅ Goal:
Automatically detect and crop the pitcher across different videos with varying camera angles and timings.

🧠 Solution Options:
Option 1: Pose Estimation (Recommended)
Use a pre-trained pose estimation model like:

OpenPose

MediaPipe Pose (Google)

YOLO + Keypoint detection

✅ Pros:

Accurate for identifying human bodies.

Can isolate the pitcher based on pose and movement patterns.

Option 2: Object Detection
Train or use a model (like YOLOv5, YOLOv8, or Detectron2) to detect “pitcher” as a custom class.

✅ Pros:

Works on frame-by-frame basis.

More precise than pose estimation in cluttered scenes.

❌ Cons:

Requires labeled data (you may need to annotate a small dataset of frames showing pitchers).

🔧 Workflow Example (with MediaPipe Pose in Colab):
python
Copy
Edit
import cv2
import mediapipe as mp

mp_pose = mp.solutions.pose
pose = mp_pose.Pose()

def detect_pitcher_bbox(video_path, max_frames=100):
    cap = cv2.VideoCapture(video_path)
    bboxes = []

    for i in range(max_frames):
        ret, frame = cap.read()
        if not ret:
            break
        h, w, _ = frame.shape

        # Run pose estimation
        results = pose.process(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
        if results.pose_landmarks:
            # Get min/max of landmark coordinates for bounding box
            x_coords = [lmk.x for lmk in results.pose_landmarks.landmark]
            y_coords = [lmk.y for lmk in results.pose_landmarks.landmark]
            xmin, xmax = min(x_coords), max(x_coords)
            ymin, ymax = min(y_coords), max(y_coords)

            # Convert to pixel coordinates
            x, y = int(xmin * w), int(ymin * h)
            width, height = int((xmax - xmin) * w), int((ymax - ymin) * h)
            bboxes.append((x, y, width, height))

    cap.release()
    return bboxes
This gives you approximate bounding boxes of the pitcher from the first max_frames frames.

🧪 Next Steps:
You can either:

Crop each frame using the detected bounding box.

Track the pitcher across frames.

Save the cropped regions into a new video or a NumPy array.

Let me know if you want help setting up pose-based cropping, YOLO training with pitcher labels, or a pipeline to auto-crop and save video segments.

In [13]:
from ultralytics import YOLO
import cv2

# Load YOLOv8 model once globally, force to use GPU if available
model = YOLO('yolo11n.pt')
if hasattr(model, 'to'):
    model.to('cuda')

def detect_and_draw_all_humans(frame):
    """
    Detects all humans in the frame using YOLOv8.
    Crops 20% off each horizontal side before detection for focus.
    Returns:
        - The original frame with bounding boxes drawn.
        - A list of bounding boxes: (x1, y1, x2, y2, 'person') in original frame coordinates.
    """
    h, w, _ = frame.shape
    left_crop = int(w * 0.2)
    right_crop = int(w * 0.8)

    # Crop the horizontal sides
    cropped_frame = frame[:, left_crop:right_crop]
    rgb_frame = cv2.cvtColor(cropped_frame, cv2.COLOR_BGR2RGB)

    # Run YOLOv8 detection
    results = model.predict(rgb_frame, device=0, verbose=False)[0]

    detected_boxes = []
    for box in results.boxes:
        cls_id = int(box.cls[0])
        cls_name = model.names[cls_id]
        if cls_name == 'person':
            x1, y1, x2, y2 = map(int, box.xyxy[0])
            # Adjust x-coordinates back to original frame
            x1 += left_crop
            x2 += left_crop

            #Draw bounding box on the original frame
            #cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            
            
            detected_boxes.append((x1, y1, x2, y2, cls_name))

    return frame, detected_boxes





In [14]:
# prompt: pull each video from the video output folder, and go through each one in a while loop, converting it to a numpy array

import os
import cv2
import numpy as np
import matplotlib.pyplot as plt

from skimage.metrics import structural_similarity as ssim

def ssim_diff(frame1, frame2):
    grayA = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY)
    grayB = cv2.cvtColor(frame2, cv2.COLOR_BGR2GRAY)
    score, _ = ssim(grayA, grayB, full=True)
    return score

#create a function to go through the video numpy array frame by frame and identify the gradient between each frame by a percentage
def gradient_analysis(frames_array):
    grad_arr = []
    for i in range(1, len(frames_array)):
        #calculate the gradient between the current frame and the previous frame
        grad = ssim_diff(cv2.resize(frames_array[i], (100,100)), cv2.resize(frames_array[i-1], (100,100)))
        grad_arr.append(grad)
        #plot the grad_arr
    # plt.plot(grad_arr)
    # plt.show()
    return grad_arr
#given the grad_arr, we want to crop out the parts in frames array before or after the value in grad_arr goes below 0.4, keep the middle of the video
def crop_video(frames_array, grad_arr):
    if frames_array is None or len(frames_array) <= 10:
        return [frames_array]
    for i in range(int(len(grad_arr))):
        if grad_arr[i] < 0.4:
            #return the frames array before i and run crop_video on the frames array after i+2
            return [frames_array[:i]] + crop_video(frames_array[i+2:], grad_arr[i+2:])
    #if we reach here, it means we didn't find any frames to crop, so we return the original frames_array
    return [frames_array]
    
            
    return [frames_array]
#after we crop, convert video to mp4 and save
def convert_video(frames_array, output_path):
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, 20.0, (frames_array.shape[2], frames_array.shape[1]))
    for frame in frames_array:
        out.write(frame)
    out.release()
    return output_path

#create a function to use MediaPipe Pose from google to identify the bounding box of all the players
def detect_pitcher_bbox(video_path, max_frames=100):
    cap = cv2.VideoCapture(video_path)
    bboxes = []
    mp_pose = mp.solutions.pose
    pose = mp_pose.Pose()
    for i in range(max_frames):
        ret, frame = cap.read()
        if not ret:
            break
        h, w, _ = frame.shape



import os
import numpy as np
import cv2

def process_videos_in_folder(input_folder, output_folder):
    """
    Iterates through each MP4 video in the input folder, processes it to find the lowest human,
    crops the video around that human, and saves the result in the output folder.

    Args:
        input_folder (str): The path to the folder containing input videos.
        output_folder (str): The path to the folder where processed videos will be saved.
    """
    if not os.path.isdir(input_folder):
        print(f"Error: Input folder not found at {input_folder}")
        return

    os.makedirs(output_folder, exist_ok=True)

    video_files = [f for f in os.listdir(input_folder) if f.endswith('.mp4')]

    if not video_files:
        print(f"No MP4 videos found in {input_folder}")
        return

    for video_file in video_files:
        video_path = os.path.join(input_folder, video_file)
        print(f"Processing video: {video_path}")

        frames_array = video_to_numpy(video_path)

        gradient_array = gradient_analysis(frames_array)
        print(f"Gradient array shape: {np.array(gradient_array).shape}")
        print(f"Gradient array: {gradient_array}")

        cropped_frames = crop_video(frames_array, gradient_array)

        tracked_boxes = []

    for i in range(len(cropped_frames)):
        print(f"Cropped frames array shape: {np.array(cropped_frames[i]).shape}")

        frames_segment = cropped_frames[i]
        num_frames = len(frames_segment)
        if num_frames == 0:
            continue

        bounding_boxes = []
        annotated_frames = []

        # Use the middle frame to choose which person to track
        mid_index = num_frames // 2
        mid_frame = frames_segment[mid_index]

        # Detect all humans in the middle frame
        mid_annotated_frame, mid_detected_boxes = detect_and_draw_all_humans(mid_frame)
        candidates = [
            (int(x1), int(y1), int(x2), int(y2))
            for (x1, y1, x2, y2, cls) in mid_detected_boxes
            if cls == 'person'
        ]

        if not candidates:
            print("No humans detected in the middle frame. Skipping segment.")
            continue

        # Pick the tallest person in the middle frame (as an example heuristic)
        def height(b): return b[3] - b[1]
        selected_box = max(candidates, key=height)

        # Apply IoU to track this person across the segment
        def iou(b1, b2):
            xa, ya = max(b1[0], b2[0]), max(b1[1], b2[1])
            xb, yb = min(b1[2], b2[2]), min(b1[3], b2[3])
            inter_area = max(0, xb - xa) * max(0, yb - ya)
            area1 = (b1[2] - b1[0]) * (b1[3] - b1[1])
            area2 = (b2[2] - b2[0]) * (b2[3] - b2[1])
            union_area = area1 + area2 - inter_area
            return inter_area / union_area if union_area > 0 else 0

        for frame in frames_segment:
            annotated_frame, detected_boxes = detect_and_draw_all_humans(frame)
            candidates = [
                (int(x1), int(y1), int(x2), int(y2))
                for (x1, y1, x2, y2, cls) in detected_boxes
                if cls == 'person'
            ]

            best_box = max(candidates, key=lambda b: iou(b, selected_box), default=None) if candidates else None

            # Check if match is good enough
            if best_box and iou(best_box, selected_box) >= 0.5:
                bounding_boxes.append(best_box)
            else:
                bounding_boxes.append(selected_box)  # fallback to selected
            annotated_frames.append(annotated_frame)

        if not bounding_boxes:
            print("No valid humans detected in this segment. Skipping.")
            continue

        x1s, y1s, x2s, y2s = zip(*bounding_boxes)
        crop_x1, crop_y1 = max(0, min(x1s)), max(0, min(y1s))
        crop_x2, crop_y2 = min(frame.shape[1], max(x2s)), min(frame.shape[0], max(y2s))

        if crop_y2 - crop_y1 > 550:
            print("Cropped region too tall. Skipping segment.")
            continue

        final_frames = [
            frame[crop_y1:crop_y2, crop_x1:crop_x2] for frame in annotated_frames
        ]

        output_filename = f"cropped_{i}_{video_file}"
        output_path = os.path.join(output_folder, output_filename)
        result_path = convert_video(np.array(final_frames), output_path)

        print(f"Cropped video saved to: {result_path}")



        # Cleanup
        try:
            del frames_array
        except NameError as e:
            print(f"NameError: {e}")
        try:
            del gradient_array
        except NameError as e:
            print(f"NameError: {e}")
        try:
            del cropped_frames
        except NameError as e:
            print(f"NameError: {e}")
        try:
            del final_frames
        except NameError as e:
            print(f"NameError: {e}")
        try:
            del annotated_frames
        except NameError as e:
            print(f"NameError: {e}")
        try:
            del bounding_boxes
        except NameError as e:
            print(f"NameError: {e}")
        try:
            del result_path
        except NameError as e:
            print(f"NameError: {e}")

In [15]:
import numpy as np
import cv2

def average_sampled_frames_from_array(frame_array, num_samples=10):
    """
    Averages sampled frames from a NumPy array of frames.

    Args:
        frame_array (np.ndarray): A NumPy array of shape (num_frames, height, width, channels).
        num_samples (int): Number of frames to sample and average.

    Returns:
        np.ndarray: The averaged frame resized to (100, 100).
    """
    total_frames = frame_array.shape[0]
    if total_frames < num_samples:
        raise ValueError("Array has fewer frames than the number of samples requested.")

    indices = np.linspace(0, total_frames - 1, num_samples, dtype=int)
    sampled_frames = frame_array[indices].astype(np.float32)

    avg_frame = np.mean(sampled_frames, axis=0)
    avg_frame = avg_frame.astype(np.uint8)

    # Resize to 100x100
    avg_frame = cv2.resize(avg_frame, (100, 100), interpolation=cv2.INTER_AREA)
    return avg_frame


In [16]:
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader, random_split
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(16, 32, 3, padding=1)
        self.fc1 = nn.Linear(32 * 25 * 25, 64)
        self.fc2 = nn.Linear(64, 2)  # 2 classes

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))  # -> (B, 16, 50, 50)
        x = self.pool(F.relu(self.conv2(x)))  # -> (B, 32, 25, 25)
        x = x.view(-1, 32 * 25 * 25)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Load the trained model
device = torch.device("cuda")

# Re-create the model instance
class_model = SimpleCNN()

# Load state dict
class_model.load_state_dict(torch.load("cnn_model.pth", map_location=device))

# Move model to GPU (if available)
class_model = class_model.to(device)

# Set model to evaluation mode if you're doing inference
class_model.eval()

SimpleCNN(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (fc1): Linear(in_features=20000, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=2, bias=True)
)

In [17]:
import torch
import os
import cv2
import shutil

def process_numpy_video_for_inference(frame_array, class_model, video_name, num_samples=10, device='cuda'):
    """
    Processes a NumPy array of video frames for inference using the given model.

    Args:
        frame_array (np.ndarray): NumPy array of shape (num_frames, height, width, channels), in RGB format.
        model (torch.nn.Module): Trained PyTorch model for inference.
        video_name (str): The name of the original video (used for saving output).
        output_folder (str): Path to the output folder where results will be saved.
        num_samples (int): Number of frames to sample and average.
        device (str): 'cuda' or 'cpu'

    Returns:
        str: Predicted label ('pitching' or 'other').
    """
    try:

        # Compute average frame
        avg_frame = average_sampled_frames_from_array(frame_array, num_samples)
        avg_frame_tensor = torch.tensor(avg_frame).permute(2, 0, 1).unsqueeze(0).float() / 255.0
        avg_frame_tensor = avg_frame_tensor.to(device)

        # Predict
        class_model.eval()
        with torch.no_grad():
            output = class_model(avg_frame_tensor)
            _, predicted = torch.max(output, 1)

        label = 'pitching' if predicted.item() == 1 else 'other'


        print(f"Processed {video_name}: {label}")
        return label

    except Exception as e:
        print(f"Error processing {video_name}: {e}")
        return None


In [18]:
import os
import re

# Update output folder path
output_folder = "Video Outputs/"


import tempfile
import os

def crop_video_size(frames_array, detect_and_draw_fn):
    """
    Processes a video in numpy array format, detects the lowest human in each frame,
    crops all frames to the bounding box covering all detected humans, and returns
    the cropped video as a numpy array.

    Args:
        frames_array (np.ndarray): Input video as a numpy array (frames, H, W, C).
        detect_and_draw_fn (function): Function to detect and return (frame, bbox) for each frame.

    Returns:
        np.ndarray: Cropped video as a numpy array.
    """
    bounding_boxes = []
    annotated_frames = []

    for frame in frames_array:
        annotated_frame, bbox = detect_and_draw_fn(frame)
        if bbox:
            bounding_boxes.append(bbox)
            annotated_frames.append(annotated_frame)

    if not bounding_boxes:
        print("No humans detected in this video. Returning original frames.")
        return frames_array

    x1s, y1s, x2s, y2s = zip(*bounding_boxes)
    crop_x1, crop_y1 = max(0, min(x1s)), max(0, min(y1s))
    crop_x2, crop_y2 = min(frames_array.shape[2], max(x2s)), min(frames_array.shape[1], max(y2s))

    final_frames = [
        frame[crop_y1:crop_y2, crop_x1:crop_x2] for frame in annotated_frames
    ]

    return np.array(final_frames)

import os
import re
import tempfile
import numpy as np
import pandas as pd

#Function to flip the video frames horizontally
def flip_video_frames(frames_array):
    """
    Flips the video frames horizontally.

    Args:
        frames_array (np.ndarray): Input video as a numpy array (frames, H, W, C).

    Returns:
        np.ndarray: Flipped video as a numpy array.
    """
    return np.flip(frames_array, axis=2)  # Flip along the width dimension

def process_videos_from_df_numpy(df, starting_row, n_rows, output_folder):
    """
    Processes the first n_rows of the DataFrame by downloading videos from their URLs,
    detecting the main human subject, cropping the video accordingly, and saving the result.

    Args:
        df (pd.DataFrame): The input DataFrame with a 'video_link' column.
        n_rows (int): The number of rows to process.
        output_folder (str): Where to save cropped videos.
    """
    os.makedirs(output_folder, exist_ok=True)

    for index, row in df.head(n_rows).iterrows():
        pitcher_name = row.get('pitcher')
        #remove all non-alphanumeric characters from pitcher name
        if pitcher_name:
            pitcher_name = re.sub(r'\W+', '', pitcher_name)
        else:
            pitcher_name = "UnknownPitcher"


        # Create a subfolder for each pitcher
        pitcher_folder = os.path.join(output_folder, pitcher_name)
        os.makedirs(pitcher_folder, exist_ok=True)
        if index < starting_row:
            continue
        # Extract video link
        video_page_url = row.get('video_link')
        if pd.isna(video_page_url) or not video_page_url:
            print(f"Skipping row {index} due to missing video_link.")
            continue

        match = re.search(r"playId=([a-z0-9\-]+)", video_page_url)
        if not match:
            print(f"Could not extract playId from URL: {video_page_url}")
            continue

        play_id = match.group(1)
        video_url = get_video_url(play_id)

        if not video_url:
            print(f"No video URL found for playId: {play_id}")
            continue

        print(f"Processing row index: {index}, play_id: {play_id}")

        with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp:
            download_video(video_url, tmp.name)
            temp_video_path = tmp.name

        try:
            frames_array = video_to_numpy(temp_video_path)
            gradient_array = gradient_analysis(frames_array)
            cropped_segments = crop_video(frames_array, gradient_array)



            for k, segment in enumerate(cropped_segments):
                try:
                    #process numpy video for inference
                    label = process_numpy_video_for_inference(segment, class_model, play_id, num_samples=10, device='cuda')
                    if label != 'pitching':
                        print(f"Skipping segment {k} for play_id {play_id} as it is not a pitching video.")
                        continue
                    # Choose the middle frame of the full video to identify the person
                    mid_index = len(segment) // 2
                    mid_frame = segment[mid_index]

                    mid_annotated, mid_boxes = detect_and_draw_all_humans(mid_frame)
                    candidates = [
                        (int(x1), int(y1), int(x2), int(y2))
                        for (x1, y1, x2, y2, cls) in mid_boxes if cls == 'person'
                    ]
                    #remove all candidates where the height, width is less than 75 pixels
                    candidates = [b for b in candidates if (b[3] - b[1]) > 75]
                    candidates = [b for b in candidates if (b[2] - b[0]) > 75]


                    if not candidates:
                        print("No humans found in the middle frame. Skipping video.")
                        continue

                    # Select most central person based on midpoint
                    def midpoint(b):
                        return (b[0] + b[2]) / 2, (b[1] + b[3]) / 2
                    mid_points = [midpoint(b) for b in candidates]
                    # Set mid_y as 40% of the height of the middle frame
                    mid_y = int(mid_frame.shape[0] * 0.65)
                    # Select the box whose midpoint y is closest to mid_y
                    selected_box = min(candidates, key=lambda b: abs(midpoint(b)[1] - mid_y))
                    print(f"Selected box for tracking: {selected_box}")
                    selected_box = (int(selected_box[0]), int(selected_box[1]), int(selected_box[2]), int(selected_box[3]))

                    # Track the selected person across the entire video using IoU
                    def iou(b1, b2):
                        xa, ya = max(b1[0], b2[0]), max(b1[1], b2[1])
                        xb, yb = min(b1[2], b2[2]), min(b1[3], b2[3])
                        inter_area = max(0, xb - xa) * max(0, yb - ya)
                        area1 = (b1[2] - b1[0]) * (b1[3] - b1[1])
                        area2 = (b2[2] - b2[0]) * (b2[3] - b2[1])
                        union_area = area1 + area2 - inter_area
                        return inter_area / union_area if union_area > 0 else 0

                    tracked_boxes = [None] * len(segment)
                    tracked_boxes[mid_index] = selected_box
                    annotated_frames = [None] * len(segment)

                    # Track forward from middle
                    prev_box = selected_box
                    for i in range(mid_index, len(segment)):
                        frame = segment[i]
                        annotated, detected_boxes = detect_and_draw_all_humans(frame)
                        annotated_frames[i] = annotated

                        candidates = [
                            (int(x1), int(y1), int(x2), int(y2))
                            for (x1, y1, x2, y2, cls) in detected_boxes if cls == 'person'
                        ]

                        best_box = max(candidates, key=lambda b: iou(b, prev_box), default=None) if candidates else None
                        if best_box and iou(best_box, prev_box) >= 0.75:
                            tracked_boxes[i] = best_box
                            prev_box = best_box
                        else:
                            tracked_boxes[i] = prev_box  # fallback

                    # Track backward from middle
                    prev_box = selected_box
                    for i in range(mid_index - 1, -1, -1):
                        frame = segment[i]
                        annotated, detected_boxes = detect_and_draw_all_humans(frame)
                        annotated_frames[i] = annotated

                        candidates = [
                            (int(x1), int(y1), int(x2), int(y2))
                            for (x1, y1, x2, y2, cls) in detected_boxes if cls == 'person'
                        ]

                        best_box = max(candidates, key=lambda b: iou(b, prev_box), default=None) if candidates else None
                        if best_box and iou(best_box, prev_box) >= 0.75:
                            tracked_boxes[i] = best_box
                            prev_box = best_box
                        else:
                            tracked_boxes[i] = prev_box

                    # Compute crop box from all tracked positions
                    x1s, y1s, x2s, y2s = zip(*tracked_boxes)
                    crop_x1, crop_y1 = max(0, min(x1s)), max(0, min(y1s))
                    crop_x2, crop_y2 = min(frame.shape[1], max(x2s)), min(frame.shape[0], max(y2s))

                    # Add 10% padding
                    padding_x = int(0.1 * (crop_x2 - crop_x1))
                    padding_y = int(0.05 * (crop_y2 - crop_y1))
                    crop_x1 = max(0, crop_x1 - padding_x)
                    crop_y1 = max(0, crop_y1 - padding_y)
                    crop_x2 = min(frame.shape[1], crop_x2 + padding_x)
                    crop_y2 = min(frame.shape[0], crop_y2 + padding_y)
                    #update the crop box to make it a square
                    max_dim = max(crop_y2 - crop_y1, crop_x2 - crop_x1)
                    if (crop_y2 - crop_y1) < max_dim:
                        crop_y1 = (crop_y2 + crop_y1 - max_dim)//2
                        crop_y2 = (crop_y1 + max_dim + crop_y2)//2

                    if (crop_x2 - crop_x1) < max_dim:
                        crop_x1 = (crop_x2 + crop_x1 - max_dim)//2
                        crop_x2 = (crop_x1 + max_dim + crop_x2)//2

                

                    #if height or width of the crop is less than 75 pixels, skip this segment
                    if (crop_y2 - crop_y1) < 100 or (crop_x2 - crop_x1) < 100:
                        print(f"Skipping segment {k} for play_id {play_id} due to small crop size.")
                        continue

                    # Final crop
                    final_frames = [
                        frame[crop_y1:crop_y2, crop_x1:crop_x2] for frame in annotated_frames
                    ]

                    # If the last character is 'L', flip the video frames horizontally
                    if pitcher_name.endswith('L'):
                        print(f"Flipping video frames for left-handed pitcher: {pitcher_name}")
                        final_frames = flip_video_frames(final_frames)

                    #Add square padding to the final frames
                    # max_dim = max(crop_y2 - crop_y1, crop_x2 - crop_x1)
                    # padded_frames = []
                    # for frame in final_frames:
                    #     h, w, _ = frame.shape
                    #     if h < max_dim:
                    #         pad_h = (max_dim - h) // 2
                    #         frame = cv2.copyMakeBorder(frame, pad_h, pad_h, 0, 0, cv2.BORDER_CONSTANT)
                    #     if w < max_dim:
                    #         pad_w = (max_dim - w) // 2
                    #         frame = cv2.copyMakeBorder(frame, 0, 0, pad_w, pad_w, cv2.BORDER_CONSTANT)
                    #     padded_frames.append(frame)





                    output_filename = f"cropped_{play_id}_{k}.mp4"
                    output_path = os.path.join(pitcher_folder, output_filename)
                    result_path = convert_video(np.array(final_frames), output_path)

                    print(f"Saved cropped video to: {result_path}")
                except Exception as e:
                    print(f"Error processing segment {k} for play_id {play_id}: {e}")
                    continue

        finally:
            os.remove(temp_video_path)


# Example usage: Process the first 5 rows and save to 'Video Outputs' folder
process_videos_from_df_numpy(df, 0, 1300, output_folder+f"cropping_videos/")

Processing row index: 121, play_id: f5d533d9-0dec-44cf-9e44-1763025eeeb2
Processed f5d533d9-0dec-44cf-9e44-1763025eeeb2: other
Skipping segment 0 for play_id f5d533d9-0dec-44cf-9e44-1763025eeeb2 as it is not a pitching video.
Processed f5d533d9-0dec-44cf-9e44-1763025eeeb2: pitching
Selected box for tracking: (344, 284, 503, 607)


KeyboardInterrupt: 

In [30]:
import os
import re
import tempfile
import queue
import threading
import concurrent.futures
import pandas as pd
import numpy as np

def process_videos_from_df_numpy(df, starting_row, n_rows, output_folder):
    os.makedirs(output_folder, exist_ok=True)

    task_queue = queue.Queue(maxsize=1)
    stop_signal = object()

    def producer():
        print(f"Starting producer thread for rows {starting_row} to {starting_row + n_rows}")
        for index, row in df.iterrows():
            print(f"Processing row index: {index}, pitcher: {row.get('pitcher')}, video_link: {row.get('video_link')}")
            if index < starting_row:
                continue

            pitcher_name = row.get('pitcher')
            pitcher_name = re.sub(r'\W+', '', pitcher_name) if pitcher_name else "UnknownPitcher"
            pitcher_folder = os.path.join(output_folder, pitcher_name)
            os.makedirs(pitcher_folder, exist_ok=True)

            video_page_url = row.get('video_link')
            if pd.isna(video_page_url) or not video_page_url:
                print(f"Skipping row {index} due to missing video_link.")
                continue

            match = re.search(r"playId=([a-z0-9\-]+)", video_page_url)
            if not match:
                print(f"Could not extract playId from URL: {video_page_url}")
                continue

            play_id = match.group(1)
            video_url = get_video_url(play_id)

            if not video_url:
                print(f"No video URL found for playId: {play_id}")
                continue

            try:
                with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp:
                    download_video(video_url, tmp.name)
                    temp_video_path = tmp.name

                frames_array = video_to_numpy(temp_video_path)
                os.remove(temp_video_path)
                task_queue.put((index, row, frames_array, play_id, pitcher_folder, pitcher_name))

            except Exception as e:
                print(f"Error in producer for play_id {play_id}: {e}")
                continue

        task_queue.put(stop_signal)

    def consumer():
        while True:
            item = task_queue.get()
            if item is stop_signal:
                break

            index, row, frames_array, play_id, pitcher_folder, pitcher_name = item
            try:
                gradient_array = gradient_analysis(frames_array)
                cropped_segments = crop_video(frames_array, gradient_array)

                for k, segment in enumerate(cropped_segments):
                    try:
                        label = process_numpy_video_for_inference(segment, class_model, play_id, num_samples=10, device='cuda')
                        if label != 'pitching':
                            print(f"Skipping segment {k} for play_id {play_id} as it is not a pitching video.")
                            continue

                        mid_index = len(segment) // 2
                        mid_frame = segment[mid_index]
                        mid_annotated, mid_boxes = detect_and_draw_all_humans(mid_frame)
                        candidates = [(int(x1), int(y1), int(x2), int(y2)) for (x1, y1, x2, y2, cls) in mid_boxes if cls == 'person']
                        candidates = [b for b in candidates if (b[3] - b[1]) > 75 and (b[2] - b[0]) > 75]

                        if not candidates:
                            print("No humans found in the middle frame. Skipping video.")
                            continue

                        def midpoint(b): return (b[0] + b[2]) / 2, (b[1] + b[3]) / 2
                        mid_y = int(mid_frame.shape[0] * 0.65)
                        selected_box = min(candidates, key=lambda b: abs(midpoint(b)[1] - mid_y))
                        selected_box = tuple(map(int, selected_box))
                        print(f"Selected box for tracking: {selected_box}")

                        def iou(b1, b2):
                            xa, ya = max(b1[0], b2[0]), max(b1[1], b2[1])
                            xb, yb = min(b1[2], b2[2]), min(b1[3], b2[3])
                            inter_area = max(0, xb - xa) * max(0, yb - ya)
                            area1 = (b1[2] - b1[0]) * (b1[3] - b1[1])
                            area2 = (b2[2] - b2[0]) * (b2[3] - b2[1])
                            union_area = area1 + area2 - inter_area
                            return inter_area / union_area if union_area > 0 else 0

                        tracked_boxes = [None] * len(segment)
                        tracked_boxes[mid_index] = selected_box
                        annotated_frames = [None] * len(segment)

                        prev_box = selected_box
                        for i in range(mid_index, len(segment)):
                            frame = segment[i]
                            annotated, detected_boxes = detect_and_draw_all_humans(frame)
                            annotated_frames[i] = annotated
                            candidates = [(int(x1), int(y1), int(x2), int(y2)) for (x1, y1, x2, y2, cls) in detected_boxes if cls == 'person']
                            best_box = max(candidates, key=lambda b: iou(b, prev_box), default=None) if candidates else None
                            tracked_boxes[i] = best_box if best_box and iou(best_box, prev_box) >= 0.75 else prev_box
                            prev_box = tracked_boxes[i]

                        prev_box = selected_box
                        for i in range(mid_index - 1, -1, -1):
                            frame = segment[i]
                            annotated, detected_boxes = detect_and_draw_all_humans(frame)
                            annotated_frames[i] = annotated
                            candidates = [(int(x1), int(y1), int(x2), int(y2)) for (x1, y1, x2, y2, cls) in detected_boxes if cls == 'person']
                            best_box = max(candidates, key=lambda b: iou(b, prev_box), default=None) if candidates else None
                            tracked_boxes[i] = best_box if best_box and iou(best_box, prev_box) >= 0.75 else prev_box
                            prev_box = tracked_boxes[i]

                        x1s, y1s, x2s, y2s = zip(*tracked_boxes)
                        crop_x1, crop_y1 = max(0, min(x1s)), max(0, min(y1s))
                        crop_x2, crop_y2 = min(segment[0].shape[1], max(x2s)), min(segment[0].shape[0], max(y2s))
                        padding_x = int(0.1 * (crop_x2 - crop_x1))
                        padding_y = int(0.05 * (crop_y2 - crop_y1))
                        crop_x1 = max(0, crop_x1 - padding_x)
                        crop_y1 = max(0, crop_y1 - padding_y)
                        crop_x2 = min(segment[0].shape[1], crop_x2 + padding_x)
                        crop_y2 = min(segment[0].shape[0], crop_y2 + padding_y)

                        max_dim = max(crop_y2 - crop_y1, crop_x2 - crop_x1)
                        if (crop_y2 - crop_y1) < max_dim:
                            crop_y1 = (crop_y2 + crop_y1 - max_dim)//2
                            crop_y2 = (crop_y1 + max_dim + crop_y2)//2
                        if (crop_x2 - crop_x1) < max_dim:
                            crop_x1 = (crop_x2 + crop_x1 - max_dim)//2
                            crop_x2 = (crop_x1 + max_dim + crop_x2)//2

                        if (crop_y2 - crop_y1) < 100 or (crop_x2 - crop_x1) < 100:
                            print(f"Skipping segment {k} for play_id {play_id} due to small crop size.")
                            continue

                        final_frames = [frame[crop_y1:crop_y2, crop_x1:crop_x2] for frame in annotated_frames]

                        if pitcher_name.endswith('L'):
                            print(f"Flipping video frames for left-handed pitcher: {pitcher_name}")
                            final_frames = flip_video_frames(final_frames)

                        output_filename = f"cropped_{play_id}_{k}.mp4"
                        output_path = os.path.join(pitcher_folder, output_filename)
                        result_path = convert_video(np.array(final_frames), output_path)
                        print(f"Saved cropped video to: {result_path}")

                    except Exception as e:
                        print(f"Error processing segment {k} for play_id {play_id}: {e}")
                        continue

            except Exception as e:
                print(f"Error in consumer for play_id {play_id}: {e}")

    with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
        executor.submit(producer)
        executor.submit(consumer)


In [31]:
process_videos_from_df_numpy(df, 0, 1300, output_folder+f"cropping_videos/")

Starting producer thread for rows 0 to 1300
Processing row index: 121, pitcher: May, Dustin(R), video_link: https://baseballsavant.mlb.com/sporty-videos?playId=f5d533d9-0dec-44cf-9e44-1763025eeeb2
Processing row index: 903, pitcher: May, Dustin(R), video_link: https://baseballsavant.mlb.com/sporty-videos?playId=e8ffa775-fbb7-49be-99cd-b92484b1d18b
Processed f5d533d9-0dec-44cf-9e44-1763025eeeb2: other
Skipping segment 0 for play_id f5d533d9-0dec-44cf-9e44-1763025eeeb2 as it is not a pitching video.
Processed f5d533d9-0dec-44cf-9e44-1763025eeeb2: pitching
Selected box for tracking: (344, 284, 503, 607)
Processing row index: 1188, pitcher: May, Dustin(R), video_link: https://baseballsavant.mlb.com/sporty-videos?playId=e3058963-3dc0-4eaa-92d2-0ccb043ac574
Saved cropped video to: Video Outputs/cropping_videos/MayDustinR\cropped_f5d533d9-0dec-44cf-9e44-1763025eeeb2_1.mp4
Processing row index: 7240, pitcher: May, Dustin(R), video_link: https://baseballsavant.mlb.com/sporty-videos?playId=7cc90

In [32]:
import os
import re
import tempfile
import numpy as np
import pandas as pd

def process_video_from_url(video_page_url, output_folder, play_id_prefix="manual"):
    """
    Processes a single video URL: downloads, extracts, crops, and saves it.

    Args:
        video_page_url (str): URL to the video page.
        output_folder (str): Directory to save the cropped videos.
        play_id_prefix (str): Prefix for the play ID if not extracted from URL.
    """
    if not video_page_url:
        print("No video URL provided.")
        return

    match = re.search(r"playId=([a-z0-9\-]+)", video_page_url)
    play_id = match.group(1) if match else play_id_prefix

    print(f"Processing video with play_id: {play_id}")
    video_url = get_video_url(play_id)
    if not video_url:
        print(f"Could not retrieve video URL for play_id: {play_id}")
        return

    with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp:
        download_video(video_url, tmp.name)
        temp_video_path = tmp.name

    frames_array = video_to_numpy(temp_video_path)
    print(f"Frames array shape: {np.array(frames_array).shape}")

    gradient_array = gradient_analysis(frames_array)
    cropped_frames = crop_video(frames_array, gradient_array)

    for i, frames in enumerate(cropped_frames):
        output_filename = os.path.join(output_folder, f"cropped_{play_id}_{i}.mp4")
        output_vid = crop_video_size(np.array(frames), detect_and_draw_lowest_human)
        convert_video(output_vid, output_filename)
        print(f"Cropped video saved to: {output_filename}")

    os.remove(temp_video_path)

# Example usage: Process a single video URL
video_page_url = "https://baseballsavant.mlb.com/sporty-videos?playId=a726eb60-fc99-45b1-b57f-485d4ff95ce5"
output_folder = "Video Outputs/"

process_video_from_url(video_page_url, output_folder)


Processing video with play_id: a726eb60-fc99-45b1-b57f-485d4ff95ce5
Frames array shape: (1080, 720, 1280, 3)


NameError: name 'detect_and_draw_lowest_human' is not defined

In [None]:
import torch
import numpy as np

x = np.array([1, 2, 3])
t = torch.from_numpy(x)  # This should not raise an error


In [None]:
!pip install mediapipe opencv-python



In [32]:
import cv2
import numpy as np
import mediapipe as mp
import matplotlib.pyplot as plt

mp_pose = mp.solutions.pose
mp_drawing = mp.solutions.drawing_utils
pose_model = mp_pose.Pose(static_image_mode=False, model_complexity=1)

def draw_pose(frame):
    results = pose_model.process(frame)
    if results.pose_landmarks:
        mp_drawing.draw_landmarks(
            frame,
            results.pose_landmarks,
            mp_pose.POSE_CONNECTIONS,
            mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2),
            mp_drawing.DrawingSpec(color=(0, 0, 255), thickness=2)
        )
    return frame

def save_video(output_path, frames, fps):
    if not frames:
        print("No frames to save.")
        return

    h, w = frames[0].shape[:2]
    out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))

    for frame in frames:
        bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
        out.write(bgr)

    out.release()
    print(f"✅ Saved pose-overlay video to: {output_path}")

def overlay_pose_and_save(video_path, output_path):
    video_array = video_to_numpy(video_path)
    posed_frames = []

    for frame in video_array:
        posed_frame = draw_pose(frame.copy())
        posed_frames.append(posed_frame)

    convert_video(np.array(posed_frames), output_path)
    print(f"Pose overlay video saved to: {output_path}")


ModuleNotFoundError: No module named 'mediapipe'

In [None]:
# prompt: create a function that lists all the mp4 videos in a given directory that start with the word cropped
import os
from IPython.display import HTML, display
from base64 import b64encode

def list_cropped_mp4_videos(directory):
  """
  Lists all MP4 video files in a given directory that start with 'cropped'.

  Args:
    directory (str): The path to the directory to search.

  Returns:
    list: A list of filenames (strings) that match the criteria.
  """
  cropped_videos = []
  if not os.path.isdir(directory):
    print(f"Error: Directory not found at {directory}")
    return cropped_videos

  for filename in os.listdir(directory):
    if filename.startswith('cropped') and filename.endswith('.mp4'):
      cropped_videos.append(filename)

  return cropped_videos

import cv2
from google.colab.patches import cv2_imshow
import time

def play_video_cv2(video_path, delay=30):
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print("Error opening video file")
        return

    while True:
        ret, frame = cap.read()
        if not ret:
            break
        cv2_imshow(frame)
        time.sleep(delay / 10000.0)  # delay in seconds
    cap.release()

# Example usage:
cropped_video_files = list_cropped_mp4_videos(output_folder + "cropped_videos/")
print("Cropped MP4 videos found:")
for video_file in cropped_video_files:
  print(video_file)
  # === USAGE ===
  video_path = os.path.join(output_folder+"cropped_videos/", video_file)
  video_array = video_to_numpy(video_path)
  overlay_pose_and_save(video_path, os.path.join(output_folder+"posed_videos/", f"posed_{video_file}"))
  #display(play_video_cv2(os.path.join(output_folder+"posed_videos/", f"posed_{video_file}")))


ModuleNotFoundError: No module named 'google.colab'

In [None]:
import torch
print(torch.cuda.is_available())  # True if CUDA is available
print(torch.cuda.current_device())
print(torch.cuda.get_device_name(0))
#put a torch tensor on the GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
# Example: Create a tensor and move it to the GPU
tensor = torch.tensor([1.0, 2.0, 3.0]).to(device)
# Check if the tensor is on the GPU
print(f"Tensor is on GPU: {tensor.is_cuda}")
# Check if the tensor is on the GPU




True
0
NVIDIA GeForce RTX 5060 Ti
Using device: cuda
Tensor is on GPU: True
