Creating Class for Baseball Savant API

In [2]:
import requests
import polars as pl
import numpy as np
import pandas as pd

pd.set_option('display.max_columns', None)
pd.set_option('display.expand_frame_repr', False)
pd.set_option('max_colwidth', None)

class baseball_savant_game_feed_scraper:

    def __init__(self):
        self.base_url = "https://baseballsavant.mlb.com/gf"
        
        pass


    def fix_for_lefties(self, p_throws, value):
        # Check if the value is None
        if value is None:
            return None  # or return a default value (e.g., 0) if preferred

        # If p_throws is 'L', negate the value
        if p_throws == 'L':
            return value * -1
        else:
            return value
        
    
    def fix_plate_z(self, plate_z, sz_bot, sz_top):
        try:
            # Check for missing or invalid values
            if plate_z is None or sz_bot is None or sz_top is None:
                return None
            
            # Perform calculation and round
            result = round(((plate_z - ((sz_bot + sz_top) / 2)) / (sz_top - ((sz_bot + sz_top) / 2))) + 2.5, 2)
            return result
        except Exception as e:
            # Handle unexpected errors
            #print(f"Error calculating fix_plate_z: {e}")
            return None


    def is_whiff(self, is_strike_swinging):
        """
        Determine if the pitch resulted in a whiff.

        Parameters:
        description (str): The description of the pitch.

        Returns:
        int: 1 if the pitch is a whiff, 0 otherwise.
        """
        try:
            if is_strike_swinging is None:
                return 0

            return 1 if is_strike_swinging == True else 0
        except Exception as e:
            # Handle unexpected errors
            #print(f"Error in is_whiff: {e}")
            return 0
        
    def is_swing(self, result_code):
        """
        Determine if the pitch resulted in a swing.

        Parameters:
        result_code (str): The result_code of the pitch.

        Returns:
        int: 1 if the pitch is a swing, 0 otherwise.
        """
        swing_list = ['X', 'F', 'S', 'D', 'E', 'T', 'W']

        try:
            if result_code is None:
                return 0

            return 1 if result_code in swing_list else 0
        except Exception as e:
            # Handle unexpected errors
            #print(f"Error in is_whiff: {e}")
            return 0


    def fetch_game_feed(self, game_pk):
        """
        Fetch game feed data from Baseball Savant API and return as a Polars DataFrame.
        """
        api_url = f"{self.base_url}?game_pk={game_pk}"
        try:
            response = requests.get(api_url)
            response.raise_for_status()  # Raise an error for bad responses
            data = response.json()
            
            # Extract play data from team_home and team_away
            plays = data.get("team_home", []) + data.get("team_away", [])
            
            # Convert play data to Polars DataFrame
            df = pl.DataFrame(plays)

            return df
        
        except requests.RequestException as e:
            print(f"Error fetching data: {e}")
            return pl.DataFrame([])
    

    def backcalculate_release_position(self, df):
        """
        Back-calculate release_pos_x and release_pos_z using x0, z0, and velocity data.
        """
        # Calculate mound_to_release as 60.5 - extension
        df = df.with_columns([
            (60.5 - df["extension"]).alias("release_pos_y")
        ])
        
        # Calculate delta time (Δt)
        delta_t = (df["release_pos_y"] - df["y0"]) / df["vy0"]

        # Corrected back-calculation of release_pos_x and release_pos_z
        df = df.with_columns([
            (df["x0"] + df["vx0"] * delta_t + 0.5 * df["ax"] * delta_t ** 2).alias("release_pos_x"),
            (df["z0"] + df["vz0"] * delta_t + 0.5 * df["az"] * delta_t ** 2).alias("release_pos_z")
        ])

        return df
    

    def add_release_metrics_polars(self, df):
        z_constant = 32.174
        # Compute yR
        df = df.with_columns(
                (60.5 - pl.col("extension")).alias("yR")
        )
            # Compute tR (requires yR)
        df = df.with_columns(
                (
                    (-pl.col("vy0") - (pl.col("vy0")**2 - 2 * pl.col("ay") * (50 - pl.col("yR"))).sqrt())
                    / pl.col("ay")
                ).alias("tR")
        )
        # Compute vxR, vyR, vzR
        df = df.with_columns(
                (pl.col("vx0") + pl.col("ax_flipped") * pl.col("tR")).alias("vxR")
        )
        df = df.with_columns(
                (pl.col("vy0") + pl.col("ay") * pl.col("tR")).alias("vyR")
        )
        df = df.with_columns(
                (pl.col("vz0") + pl.col("az") * pl.col("tR")).alias("vzR")
        )
        # Compute tf (requires vyR and yR)
        df = df.with_columns(
                (
                    (-pl.col("vyR") - (pl.col("vyR")**2 - 2 * pl.col("ay") * (pl.col("yR") - 17 / 12)).sqrt())
                    / pl.col("ay")
                ).alias("tf")
        )
        # Compute vxbar, vybar, vzbar, and vbar
        df = df.with_columns(
                ((2 * pl.col("vxR") + pl.col("ax_flipped") * pl.col("tf")) / 2).alias("vxbar")
        )
        df = df.with_columns(
                ((2 * pl.col("vyR") + pl.col("ay") * pl.col("tf")) / 2).alias("vybar")
        )
        df = df.with_columns(
                ((2 * pl.col("vzR") + pl.col("az") * pl.col("tf")) / 2).alias("vzbar")
        )
        df = df.with_columns(
                (
                    (pl.col("vxbar")**2 + pl.col("vybar")**2 + pl.col("vzbar")**2).sqrt()
                ).alias("vbar")
        )
        # Compute adrag
        df = df.with_columns(
                (
                    -(
                    pl.col("ax_flipped") * pl.col("vxbar")
                        + pl.col("ay") * pl.col("vybar")
                        + (pl.col("az") + z_constant) * pl.col("vzbar")
                    ) / pl.col("vbar")
                ).alias("adrag")
        )
        # Compute amagx, amagy, amagz, and amag
        df = df.with_columns(
                (
                    pl.col("ax_flipped") + pl.col("adrag") * pl.col("vxbar") / pl.col("vbar")
                ).alias("amagx")
        )
        df = df.with_columns(
                (
                    pl.col("ay") + pl.col("adrag") * pl.col("vybar") / pl.col("vbar")
                ).alias("amagy")
        )
        df = df.with_columns(
                (
                    pl.col("az") + pl.col("adrag") * pl.col("vzbar") / pl.col("vbar") + z_constant
                ).alias("amagz")
        )
        df = df.with_columns(
                (
                    (pl.col("amagx")**2 + pl.col("amagy")**2 + pl.col("amagz")**2).sqrt()
                ).alias("amag")
        )

        return df


    def add_attributes(self, df):
        """
        Add important attributes to dataset
        """
        df = self.backcalculate_release_position(df)

        # Flip/Normalize all horizontal columns
        df = df.with_columns(pl.struct(['p_throws','release_pos_x']) \
            .map_elements(lambda x: self.fix_for_lefties(x['p_throws'], x['release_pos_x']), return_dtype=pl.Float64).alias('release_pos_x_normalized'))
        df = df.with_columns(pl.struct(['p_throws','px']) \
            .map_elements(lambda x: self.fix_for_lefties(x['p_throws'], x['px']), return_dtype=pl.Float64).alias('plate_x_normalized'))
        df = df.with_columns(pl.struct(['pz', 'sz_bot', 'sz_top']) \
            .map_elements(lambda x: self.fix_plate_z(x['pz'], x['sz_bot'], x['sz_top']), return_dtype=pl.Float64).alias('plate_z_normalized'))
        
        df = df.with_columns(
            (pl.col('ax') * -1).alias('ax_flipped')
        )
        df = df.with_columns(pl.struct(['p_throws','ax_flipped']) \
            .map_elements(lambda x: self.fix_for_lefties(x['p_throws'], x['ax_flipped']), return_dtype=pl.Float64).alias('ax_normalized'))
        
        # Add more release metrics
        df = self.add_release_metrics_polars(df)

        # Add hand split
        df = df.with_columns(
                pl.when(pl.col("p_throws") == pl.col("stand"))
                .then(pl.lit("SHH"))  # Ensure "SHH" is treated as a literal
                .otherwise(pl.lit("OHH"))  # Ensure "OHH" is treated as a literal
                .alias("hand_split")
            )
        
        # Add whiff and swing summation columns
        df = df.with_columns(pl.struct(['is_strike_swinging']) \
            .map_elements(lambda x: self.is_whiff(x['is_strike_swinging']), return_dtype=pl.Float64).alias('is_whiff'))
        df = df.with_columns(pl.struct(['result_code']) \
            .map_elements(lambda x: self.is_swing(x['result_code']), return_dtype=pl.Float64).alias('is_swing'))
        
        return df
    
    
    def construct_game(self, game_pk):
        df = self.fetch_game_feed(game_pk)
        df = self.add_attributes(df)
    
        return df

Example Usage

In [4]:
# Example usage:
scraper = baseball_savant_game_feed_scraper()

df = scraper.construct_game(game_pk=745691)

df.head()

play_id,inning,ab_number,cap_index,outs,batter,stand,batter_name,pitcher,p_throws,pitcher_name,team_batting,team_fielding,team_batting_id,team_fielding_id,result,des,events,contextMetrics,strikes,balls,pre_strikes,pre_balls,call,call_name,pitch_type,pitch_name,description,result_code,pitch_call,is_strike_swinging,balls_and_strikes,start_speed,end_speed,sz_top,sz_bot,extension,…,hit_speed_round,hit_speed,hit_distance,xba,hit_angle,is_barrel,hc_x,hc_x_ft,hc_y,hc_y_ft,runnerOn1B,release_pos_y,release_pos_x,release_pos_z,release_pos_x_normalized,plate_x_normalized,plate_z_normalized,ax_flipped,ax_normalized,yR,tR,vxR,vyR,vzR,tf,vxbar,vybar,vzbar,vbar,adrag,amagx,amagy,amagz,amag,hand_split,is_whiff,is_swing
str,i64,i64,i64,i64,i64,str,str,i64,str,str,str,str,i64,i64,str,str,str,struct[1],i64,i64,i64,i64,str,str,str,str,str,str,str,bool,str,f64,f64,f64,f64,f64,…,str,str,str,str,str,i64,f64,f64,f64,f64,bool,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,str,f64,f64
"""2940db95-bfb1-4e8b-8df5-500605…",1,1,0,1,683002,"""L""","""Gunnar Henderson""",543037,"""R""","""Gerrit Cole""","""BAL""","""NYY""",110,147,"""Flyout""","""Gunnar Henderson flies out sha…","""Flyout""",{null},0,0,0,0,"""S""","""Strike""","""FF""","""4-Seam Fastball""","""Foul""","""F""","""foul""",False,"""00""",95.5,86.7,3.697097,1.74748,6.275149,…,,,,,,,,,,,,54.224851,-2.206763,6.28288,-2.206763,0.838763,2.84,6.468288,6.468288,54.224851,-0.030364,8.565384,-139.640526,-5.869046,0.39666,9.84824,-133.132128,-8.14336,133.74403,33.450354,8.931404,-0.481309,18.669965,20.701914,"""OHH""",0.0,1.0
"""10d8770c-43d4-490b-9103-8495d9…",1,1,0,1,683002,"""L""","""Gunnar Henderson""",543037,"""R""","""Gerrit Cole""","""BAL""","""NYY""",110,147,"""Flyout""","""Gunnar Henderson flies out sha…","""Flyout""",{null},1,0,1,0,"""B""","""Ball""","""SL""","""Slider""","""Ball""","""B""","""ball""",False,"""01""",87.8,81.5,3.628416,1.753242,6.086659,…,,,,,,,,,,,,54.413341,-2.056032,6.214661,-2.056032,1.123536,0.27,-3.572436,-3.572436,54.413341,-0.034467,6.858302,-128.478345,-6.507674,0.430656,6.089056,-123.060201,-13.038055,123.89867,25.361847,-2.326016,-0.027957,-0.822429,2.46729,"""OHH""",0.0,0.0
"""2dd391a1-e2ae-491b-8f08-dca206…",1,1,0,1,683002,"""L""","""Gunnar Henderson""",543037,"""R""","""Gerrit Cole""","""BAL""","""NYY""",110,147,"""Flyout""","""Gunnar Henderson flies out sha…","""Flyout""",{null},1,1,1,1,"""B""","""Ball""","""FF""","""4-Seam Fastball""","""Ball""","""B""","""ball""",False,"""11""",95.1,86.8,3.653792,1.778718,6.406042,…,,,,,,,,,,,,54.093958,-2.212036,6.180485,-2.212036,1.669389,3.38,6.25226,6.25226,54.093958,-0.029557,10.655177,-138.969759,-4.043972,0.396553,11.894853,-132.837935,-6.651568,133.535193,31.154753,9.027418,-0.066461,17.470832,19.665418,"""OHH""",0.0,0.0
"""458a6ca0-9833-481f-a76e-977221…",1,1,0,1,683002,"""L""","""Gunnar Henderson""",543037,"""R""","""Gerrit Cole""","""BAL""","""NYY""",110,147,"""Flyout""","""Gunnar Henderson flies out sha…","""Flyout""",{null},1,2,1,2,"""S""","""Strike""","""KC""","""Knuckle Curve""","""Foul""","""F""","""foul""",False,"""21""",82.4,75.2,3.7,1.75,6.029231,…,,,,,,,,,,,,54.470769,-2.265121,6.163486,-2.265121,-0.625646,2.32,-4.983155,-4.983155,54.470769,-0.037141,2.764697,-120.860056,2.614529,0.462213,1.613057,-114.782753,-7.827881,115.060671,25.417751,-4.626819,0.940182,-14.739623,15.477335,"""OHH""",0.0,1.0
"""d413e6c4-1d22-418f-a616-5c2622…",1,1,0,1,683002,"""L""","""Gunnar Henderson""",543037,"""R""","""Gerrit Cole""","""BAL""","""NYY""",110,147,"""Flyout""","""Gunnar Henderson flies out sha…","""Flyout""",{4},2,2,2,2,"""X""","""In Play""","""FF""","""4-Seam Fastball""","""In play, out(s)""","""X""","""hit_into_play""",False,"""22""",95.0,86.5,3.7,1.75,6.191392,…,"""102.0""","""102.1""","""383""",""".670""","""25""",1.0,75.19,-120.542032,52.92,362.710749,,54.308608,-2.319894,6.28869,-2.319894,-0.549938,2.31,8.238381,8.238381,54.308608,-0.031084,5.568687,-139.111549,-6.762845,0.39857,7.210472,-132.704306,-9.415564,133.233167,32.910718,10.019483,-0.628918,16.537017,19.345762,"""OHH""",0.0,1.0
