In [17]:
import regex as re
import ast
import math
import os

import pandas as pd
import numpy as np
from pandasql import sqldf
import warnings
warnings.simplefilter(action="ignore")
baseball_path = r"C:\Users\james\Documents\MLB\Data"


In [2]:
# At bat events
def create_events(df):
    df['eventsModel'] = np.where(df['event'] == 'Strikeout', "so", "")
    df['eventsModel'] = np.where(df['event'] == 'Strikeout Double Play', "so", df['eventsModel'])

    df['eventsModel'] = np.where(df['event'] == 'Groundout', "go", df['eventsModel'])
    df['eventsModel'] = np.where(df['event'] == 'Fielders Choice', "go", df['eventsModel'])
    df['eventsModel'] = np.where(df['event'] == 'Double Play', "go", df['eventsModel'])
    df['eventsModel'] = np.where(df['event'] == 'Grounded Into DP', "go", df['eventsModel'])
    df['eventsModel'] = np.where(df['event'] == 'Triple Play', "go", df['eventsModel'])
    df['eventsModel'] = np.where(df['event'] == 'Field Error', "go", df['eventsModel'])
    df['eventsModel'] = np.where(df['event'] == 'Forceout', "go", df['eventsModel'])

    df['eventsModel'] = np.where(df['event'] == 'Lineout', "lo", df['eventsModel'])
    df['eventsModel'] = np.where(df['event'] == 'Bunt Lineout', "lo", df['eventsModel'])

    df['eventsModel'] = np.where(df['event'] == 'Flyout', "fo", df['eventsModel'])
    df['eventsModel'] = np.where(df['event'] == 'Sac Fly', "fo", df['eventsModel'])
    df['eventsModel'] = np.where(df['event'] == 'Sac Fly Double Play', "fo", df['eventsModel'])

    df['eventsModel'] = np.where(df['event'] == 'Pop Out', "po", df['eventsModel'])
    df['eventsModel'] = np.where(df['event'] == 'Bunt Pop Out', "po", df['eventsModel'])

    df['eventsModel'] = np.where(df['event'] == 'Hit By Pitch', "hbp", df['eventsModel'])

    df['eventsModel'] = np.where(df['event'] == 'Walk', "bb", df['eventsModel'])
    df['eventsModel'] = np.where(df['event'] == 'Intent Walk', "bb", df['eventsModel'])

    df['eventsModel'] = np.where(df['event'] == 'Single', "b1", df['eventsModel'])
    df['eventsModel'] = np.where(df['event'] == 'Double', "b2", df['eventsModel'])
    df['eventsModel'] = np.where(df['event'] == 'Triple', "b3", df['eventsModel'])
    df['eventsModel'] = np.where(df['event'] == 'Home Run', "hr", df['eventsModel'])

    df['eventsModel'] = np.where(df['eventsModel'] == "", "Cut", df['eventsModel'])
    
    return df

In [3]:
# Calculate wind vectors
# Note: 2 is to centerfield, 6 is from centerfield, clockwise
# Note: y vector is positive to centerfield, negative from centerfield
# Note: x vector is positive from left to right, negatives from right to left
# Assumption is wind is blowing in 8 cardinal directions, so we can use simple right isosceles triangles
def y_vect(df):
    wind_speed = df['windSpeed']
    angled = df['windSpeed'] / 2 * math.sqrt(2)
    
    if df['windDirection'] == "Out To CF": 
        y_vect = wind_speed
    elif df['windDirection'] == "Out To RF": 
        y_vect = angled
    elif df['windDirection'] == "L To R": 
        y_vect = 0
    elif df['windDirection'] == "In From LF": 
        y_vect = angled * -1
    elif df['windDirection'] == "In From CF": 
        y_vect = wind_speed * - 1
    elif df['windDirection'] == "In From RF": 
        y_vect = angled * -1
    elif df['windDirection'] == "R To L": 
        y_vect = 0
    elif df['windDirection'] == "Out To LF": 
        y_vect = angled
    else:
        y_vect = 0
        
    return y_vect

def x_vect(df):
    wind_speed = df['windSpeed']
    angled = df['windSpeed'] / 2 * math.sqrt(2)
    
    if df['windDirection'] == "L To R": 
        x_vect = wind_speed
    elif df['windDirection'] == "In From LF": 
        x_vect = angled
    elif df['windDirection'] == "In From CF": 
        x_vect = 0
    elif df['windDirection'] == "In From RF": 
        x_vect = angled * -1
    elif df['windDirection'] == "R To L": 
        x_vect = wind_speed * - 1
    elif df['windDirection'] == "Out To LF": 
        x_vect = angled * -1
    elif df['windDirection'] == "Out To CF": 
        x_vect = 0
    elif df['windDirection'] == "Out To RF": 
        x_vect = angled
    else:
        x_vect = 0
        
    return x_vect

In [4]:
# Creates weather variables
def clean_weather(df):
    # Separate weather into temperature and weather type
    df[['temperature', 'weather']] = df['weather'].str.split(", ", expand=True)
    df['temperature'] = df['temperature'].str.replace(" degrees", "").astype('int')
    # Separate wind into speed and direction
    df[['windSpeed', 'windDirection']] = df['wind'].str.split(", ", expand=True)
    df['windSpeed'].fillna("0 mph", inplace=True)
    df['windSpeed'] = df['windSpeed'].str.replace(" mph", "")
    df['windSpeed'] = pd.to_numeric(df['windSpeed'], errors='coerce')
    df['windSpeed'].fillna(0, inplace=True)
    df['windDirection'].fillna('L to R', inplace=True)
    df['windSpeed'].unique()
    df['windDirection'] = df['windDirection'].str.replace(".", "")
    # Calculate vectors
    df['x_vect'] = df.apply(x_vect, axis=1)
    df['y_vect'] = df.apply(y_vect, axis=1)
    
    return df

In [5]:
# This turns several variables, including events, venues, hands, and bases into dummies
def create_dummies(df):
    event_dummies = pd.get_dummies(df['eventsModel'])
    venue_dummies = pd.get_dummies(df['venue_id'], prefix='venue')
    pitcher_dummies = pd.get_dummies(df['pitchHand'], prefix='p')
    batter_dummies = pd.get_dummies(df['batSide'], prefix='b')
    year_dummies = pd.get_dummies(df['year'], prefix='year')
    
    venue_list = venue_dummies.columns.tolist()
    year_list = year_dummies.columns.tolist()
    dummy_list = venue_list + year_list
    
    df = pd.concat([df, event_dummies, venue_dummies, pitcher_dummies, batter_dummies, year_dummies], axis=1)
    
    df['preOnFirst'] = df.groupby(['gamePk', 'inning', 'halfInning'])['postOnFirst'].shift(1)
    df['preOnSecond'] = df.groupby(['gamePk', 'inning', 'halfInning'])['postOnSecond'].shift(1)
    df['preOnThird'] = df.groupby(['gamePk', 'inning', 'halfInning'])['postOnThird'].shift(1)
    
    df['onFirst'] = df['preOnFirst'].notnull().astype('int')
    df['onSecond'] = df['preOnSecond'].notnull().astype('int')
    df['onThird'] = df['preOnThird'].notnull().astype('int')
    
    df['top'] = np.where(df['halfInning'] == "top", 1, 0)
    
    df['pa'] = np.where(df['eventsModel'] != "Cut", 1, 0)
    df['ab'] = df['pa'] - df['hbp']
    
    return df, dummy_list

In [6]:
def statcast(df):
    statcast = df.copy()
    # Hard hit dummy
    statcast['hard_hit'] = (statcast['hardness'].str.contains('hard')).astype('int')
    
    def find_max(lst):
        if lst:
            return max(lst)
        else:
            return 0
    
    # Max pitch speed
    statcast['startSpeeds'] = statcast['startSpeeds'].apply(lambda x: ast.literal_eval(x))
    statcast['maxSpeed'] = statcast['startSpeeds'].apply(find_max)
    # Have to drop, can't take lists
    statcast.drop(columns={'startSpeeds'}, inplace=True)
    
    # Max spin rate
    statcast['spinRates'] = statcast['spinRates'].apply(lambda x: ast.literal_eval(x))
    statcast['maxSpin'] = statcast['spinRates'].apply(find_max)
    # Have to drop, can't take lists
    statcast.drop(columns={'spinRates'}, inplace=True)
    
    # Launch speeds
    statcast['launchSpeeds'] = statcast['launchSpeeds'].str.replace("[", "")
    statcast['launchSpeeds'] = statcast['launchSpeeds'].str.replace("]", "")
    statcast['launchSpeed'] = (statcast['launchSpeeds']).astype('float', errors='ignore')
    statcast['launchSpeed'] = pd.to_numeric(statcast['launchSpeed'])
    
    # Launch angle
    statcast['launchAngles'] = statcast['launchAngles'].str.replace("[", "")
    statcast['launchAngles'] = statcast['launchAngles'].str.replace("]", "")
    statcast['launchAngle'] = (statcast['launchAngles']).astype('float', errors='ignore')
    statcast['launchAngle'] = pd.to_numeric(statcast['launchAngle'])
        
    # Total distances
    statcast['totalDistances'] = statcast['totalDistances'].str.replace("[", "")
    statcast['totalDistances'] = statcast['totalDistances'].str.replace("]", "")
    statcast['totalDistance'] = (statcast['totalDistances']).astype('float', errors='ignore')
    statcast['totalDistance'] = pd.to_numeric(statcast['totalDistance'])
    
    # Coordinates of batted ball
    statcast['coord'] = statcast['coord'].str.replace("[", "")
    statcast['coord'] = statcast['coord'].str.replace("]", "")    
    statcast[['x', 'y']] = statcast['coord'].str.split(",", expand=True)
    statcast['x'] = pd.to_numeric(statcast['x'])
    statcast['y'] = pd.to_numeric(statcast['y'])
    
    statcast['spray_angle'] = np.arctan((statcast['x']-125.42)/(198.27-statcast['y'])) * 180/np.pi * 0.75
    statcast['to_left'] = (statcast['spray_angle'] < -15).astype('int')
    statcast['to_middle'] = ((statcast['spray_angle'] >= -15) & (statcast['spray_angle'] <= 15)).astype('int')
    statcast['to_right'] = (statcast['spray_angle'] > 15).astype('int')

    
    return statcast

In [57]:
# This will return a dataframe that can eventually be used as the model input. Has pitcher vs hitter stats, specific to hand
def rolling_pas(df, pa_num):
    stat_list = ['so', 'b1', 'b2', 'b3', 'hr', 'bb', 'hbp', 'lo', 'po', 'go', 'fo', 'hard_hit', 'to_left', 'to_middle', 'to_right', 'pa', 'ab']
    max_list = ['totalDistance', 'maxSpeed', 'maxSpin']
        
    df['pa_num'] = df.index
    
    batter_stats = []
    pitcher_stats = []
    batter_stats2 = []
    pitcher_stats2 = []

    for stat in stat_list:
        batter_stat = stat + "_b"
        pitcher_stat = stat + "_p"
        batter_stats.append(batter_stat)
        pitcher_stats.append(pitcher_stat)
        
    for stat in max_list:
        batter_stat = stat + "_b"
        pitcher_stat = stat + "_p"
        batter_stats2.append(batter_stat)
        pitcher_stats2.append(pitcher_stat)

    df[batter_stats] = df.groupby(['batter', 'pitchHand'])[stat_list].transform(lambda x: x.shift().rolling(pa_num, min_periods=1).sum())
    df[batter_stats2] = df.groupby(['batter', 'pitchHand'])[max_list].transform(lambda x: x.shift().rolling(pa_num, min_periods=1).max())
      
    df[pitcher_stats] = df.groupby(['pitcher', 'batSide'])[stat_list].transform(lambda x: x.shift().rolling(pa_num, min_periods=1).sum())
    df[pitcher_stats2] = df.groupby(['pitcher', 'batSide'])[max_list].transform(lambda x: x.shift().rolling(pa_num, min_periods=1).max())

    
    df.sort_values(['pa_num'], axis=0, ascending=True, inplace=True)

    # wOBA - using 2022 values throughout
    df['woba_b'] = (0.690 * df['bb_b']) + (0.721 * df['hbp_b']) + (0.885 * df['b1_b']) + (1.262 * df['b2_b']) + (1.601 * df['b3_b']) + (2.070 * df['hr_b'])
    df['woba_p'] = (0.690 * df['bb_p']) + (0.721 * df['hbp_p']) + (0.885 * df['b1_p']) + (1.262 * df['b2_p']) + (1.601 * df['b3_p']) + (2.070 * df['hr_p'])
    
    # Slugging
    df['slg_b'] = (1 * df['b1_b']) + (2 * df['b2_b']) + (3 * df['b3_b']) + (4 * df['hr_b'])
    df['slg_b'] = df['slg_b'] / df['ab_b']
    df['slg_p'] = (1 * df['b1_p']) + (2 * df['b2_p']) + (3 * df['b3_p']) + (4 * df['hr_p'])
    df['slg_p'] = df['slg_p'] / df['ab_p']

    # OBP    
    df['obp_b'] = df[['b1_b', 'b2_b', 'b3_b', 'hr_b', 'bb_b', 'hbp_b']].sum(axis=1)
    df['obp_p'] = df[['b1_p', 'b2_p', 'b3_p', 'hr_p', 'bb_p', 'hbp_p']].sum(axis=1)

    # Calculate rates
    stat_short = ['so', 'b1', 'b2', 'b3', 'hr', 'bb', 'hbp', 'lo', 'po', 'go', 'fo', 'woba', 'obp', 'hard_hit', 'to_left', 'to_middle', 'to_right']
    for stat in stat_short:
        batter_stat = stat + "_b"
        pitcher_stat = stat + "_p"  
        df[batter_stat] = df[batter_stat] / df['pa_b']
        df[pitcher_stat] = df[pitcher_stat] / df['pa_p']
        
    batter_stats = batter_stats + batter_stats2
    pitcher_stats = pitcher_stats + pitcher_stats2
        
    df.sort_values('pa_num', inplace=True)
    
                 
    return df, batter_stats, pitcher_stats

In [42]:
df_list = []
for year in range(2023, 2024):
    filename = "Play" + str(year) + ".csv"
    df = pd.read_csv(os.path.join(baseball_path, "A3. Raw API", filename))
    
    df['year'] = year
    # Only keep one observation per PA (don't keep each runner)
    df.drop_duplicates(['gamePk', 'atBatIndex'], keep='first', inplace=True, ignore_index=True)

    df = create_events(df)
    df = clean_weather(df)
    df, dummy_list = create_dummies(df)
    df = statcast(df)
    df['game_date'] = df['game_date'].str.replace("-", "")
    df['pitcher_outs'] = (df['inning'] - 1) * 3 + (df['outs'])
    df['start'] = (df['inning'] == 1).astype('int')

    df['pitcher_outs'] = df.groupby(['pitcher', 'gamePk'])['pitcher_outs'].transform('max')
    df['start'] = df.groupby(['pitcher', 'gamePk'])['start'].transform('max')
    
    df_list.append(df)
    
all_years = pd.concat(df_list, axis=0)
all_years.reset_index(inplace=True)
all_years.drop(columns={'Unnamed: 0', 'index'}, inplace=True)
all_years.rename(columns={'level_0':'index'}, inplace=True)
all_years

Unnamed: 0,index,atBatIndex,inning,halfInning,outs,type,id,event,eventType,description,...,launchSpeed,launchAngle,totalDistance,x,y,spray_angle,to_left,to_middle,to_right,pitcher_outs
0,0,0,1,top,0,atBat,660670,Single,single,Ronald Acuna Jr. singles on a sharp line driv...,...,107.9,11.0,227.0,201.3,110.5,30.633355,0,0,1,9
1,1,1,1,top,1,atBat,660670,Strikeout,strikeout,Matt Olson strikes out swinging.,...,,,,,,,0,0,0,9
2,2,2,1,top,1,atBat,663586,Walk,walk,Austin Riley walks.,...,,,,,,,0,0,0,9
3,3,3,1,top,2,atBat,645277,Flyout,field_out,Ozzie Albies flies out sharply to right fielde...,...,103.7,23.0,362.0,184.7,65.2,18.009006,0,0,1,9
4,4,4,1,top,3,atBat,669221,Strikeout,strikeout,Sean Murphy strikes out swinging.,...,,,,,,,0,0,0,9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9383,9383,72,9,top,1,atBat,543807,Strikeout,strikeout,George Springer strikes out swinging.,...,,,,,,,0,0,0,27
9384,9384,73,9,top,2,atBat,666182,Lineout,field_out,Bo Bichette lines out sharply to second basema...,...,104.0,9.0,181.0,132.2,129.1,4.198660,0,1,0,27
9385,9385,74,9,top,2,atBat,665489,Single,single,Vladimir Guerrero Jr. singles on a sharp line...,...,102.9,8.0,176.0,175.2,92.0,18.824832,0,0,1,27
9386,9386,75,9,top,2,atBat,665489,Walk,walk,Matt Chapman walks.,...,,,,,,,0,0,0,27


In [169]:
df = all_years.copy()
df['pa']

# Calculate advanced stats here
# ISO
df['iso'] = df['b2'] * 1 + df['b3'] * 2 + df['hr'] * 3
df['iso'] = np.where(df['ab'] == 0, np.nan, df['iso']) # denominator is ab
# SLG
df['slg'] = df['b1'] * 1 + df['b2'] * 2 + df['b3'] * 3 + df['hr'] * 4 
df['slg'] = np.where(df['ab'] == 0, np.nan, df['slg']) # denominator is ab
# wOBA
df['woba'] = df['bb'] * 0.690 + df['hbp'] * 0.721 + df['b1'] * 0.885 + df['b2'] * 1.262 + df['b3'] * 1.601 + df['hr'] * 2.070 
# denominator is PA - IBB, but I think I'm ignoring IBBs for now


df

Unnamed: 0,index,atBatIndex,inning,halfInning,outs,type,id,event,eventType,description,...,x,y,spray_angle,to_left,to_middle,to_right,pitcher_outs,iso,slg,woba
0,0,0,1,top,0,atBat,660670,Single,single,Ronald Acuna Jr. singles on a sharp line driv...,...,201.3,110.5,30.633355,0,0,1,9,0.0,1.0,0.885
1,1,1,1,top,1,atBat,660670,Strikeout,strikeout,Matt Olson strikes out swinging.,...,,,,0,0,0,9,0.0,0.0,0.000
2,2,2,1,top,1,atBat,663586,Walk,walk,Austin Riley walks.,...,,,,0,0,0,9,0.0,0.0,0.690
3,3,3,1,top,2,atBat,645277,Flyout,field_out,Ozzie Albies flies out sharply to right fielde...,...,184.7,65.2,18.009006,0,0,1,9,0.0,0.0,0.000
4,4,4,1,top,3,atBat,669221,Strikeout,strikeout,Sean Murphy strikes out swinging.,...,,,,0,0,0,9,0.0,0.0,0.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9383,9383,72,9,top,1,atBat,543807,Strikeout,strikeout,George Springer strikes out swinging.,...,,,,0,0,0,27,0.0,0.0,0.000
9384,9384,73,9,top,2,atBat,666182,Lineout,field_out,Bo Bichette lines out sharply to second basema...,...,132.2,129.1,4.198660,0,1,0,27,0.0,0.0,0.000
9385,9385,74,9,top,2,atBat,665489,Single,single,Vladimir Guerrero Jr. singles on a sharp line...,...,175.2,92.0,18.824832,0,0,1,27,0.0,1.0,0.885
9386,9386,75,9,top,2,atBat,665489,Walk,walk,Matt Chapman walks.,...,,,,0,0,0,27,0.0,0.0,0.690


In [170]:
# Define the dictionary of statistics and aggregation functions
stat_dict = {
    'b1': 'mean',
    'b2': 'mean',
    'b3': 'mean',
    'hr': 'mean',
    'hbp': 'mean',
    'bb': 'mean',
    'so': 'mean',
    'fo': 'mean',
    'go': 'mean',
    'lo': 'mean',
    'po': 'mean',
    'iso': 'mean',
    'slg': 'mean',
    'woba': 'mean',
    'to_left': 'mean',
    'to_middle': 'mean',
    'to_right': 'mean',
    'hard_hit': 'mean',
    'totalDistance': 'max',
    'launchSpeed': 'max',
    'maxSpeed': 'max',
    'maxSpin': 'max',
    'ab': 'sum',
    'pa': 'sum'
}



def rolling_stats(stat_dict, window, position, suffix, all=False):
    if position == "batter":
        opp_hand = "pitchHand"
    else:
        opp_hand = "batSide"
    
    if all == False:
        # Use the rolling method to perform the aggregations
        vl_df = df.query(f'{opp_hand}== "L"').groupby(position).shift(1).rolling(window=window, min_periods=1).agg(stat_dict)

        # Add the suffix "_b" to the column names
        vl_df.columns = [col + suffix for col in vl_df.columns]

        vr_df = df.query(f'{opp_hand} == "R"').groupby(position).shift(1).rolling(window=window, min_periods=1).agg(stat_dict)

        # Add the suffix "_b" to the column names
        vr_df.columns = [col + suffix for col in vr_df.columns]

        pa_df = pd.concat([vl_df, vr_df], axis=0)
    else:
        # Use the rolling method to perform the aggregations
        pa_df = df.groupby(position).shift(1).rolling(window=window, min_periods=1).agg(stat_dict)

        # Add the suffix "_b" to the column names
        pa_df.columns = [col + suffix for col in pa_df.columns]


    
    return pa_df

batter_short = rolling_stats(stat_dict, 30, "batter", "_b")
batter_long = rolling_stats(stat_dict, 250, "batter", "_b_long")
pitcher_short = rolling_stats(stat_dict, 30, "pitcher", "_p")
pitcher_long = rolling_stats(stat_dict, 250, "pitcher", "_p_long")

batter_all = rolling_stats(stat_dict, 250, "batter", "_b_all", all=True)
pitcher_all = rolling_stats(stat_dict, 250, "pitcher", "_p_all", all=True)


merged_df = pd.concat([df, batter_short, batter_long, batter_all, pitcher_short, pitcher_long, pitcher_all], axis=1)

merged_df



Unnamed: 0,index,atBatIndex,inning,halfInning,outs,type,id,event,eventType,description,...,to_left_p_all,to_middle_p_all,to_right_p_all,hard_hit_p_all,totalDistance_p_all,launchSpeed_p_all,maxSpeed_p_all,maxSpin_p_all,ab_p_all,pa_p_all
0,0,0,1,top,0,atBat,660670,Single,single,Ronald Acuna Jr. singles on a sharp line driv...,...,,,,,,,,,,
1,1,1,1,top,1,atBat,660670,Strikeout,strikeout,Matt Olson strikes out swinging.,...,0.000000,0.000000,1.000000,1.000000,227.0,107.9,92.5,2148.0,1.0,1.0
2,2,2,1,top,1,atBat,663586,Walk,walk,Austin Riley walks.,...,0.000000,0.000000,0.500000,0.500000,227.0,107.9,93.0,2265.0,2.0,2.0
3,3,3,1,top,2,atBat,645277,Flyout,field_out,Ozzie Albies flies out sharply to right fielde...,...,0.000000,0.000000,0.333333,0.333333,227.0,107.9,93.0,2265.0,3.0,3.0
4,4,4,1,top,3,atBat,669221,Strikeout,strikeout,Sean Murphy strikes out swinging.,...,0.000000,0.000000,0.500000,0.500000,362.0,107.9,93.0,2265.0,4.0,4.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9383,9383,72,9,top,1,atBat,543807,Strikeout,strikeout,George Springer strikes out swinging.,...,0.232932,0.253012,0.220884,0.104418,439.0,111.1,98.9,2872.0,241.0,245.0
9384,9384,73,9,top,2,atBat,666182,Lineout,field_out,Bo Bichette lines out sharply to second basema...,...,0.232932,0.248996,0.220884,0.104418,439.0,111.1,98.9,2872.0,241.0,245.0
9385,9385,74,9,top,2,atBat,665489,Single,single,Vladimir Guerrero Jr. singles on a sharp line...,...,0.228916,0.253012,0.220884,0.108434,439.0,111.1,98.9,2872.0,241.0,245.0
9386,9386,75,9,top,2,atBat,665489,Walk,walk,Matt Chapman walks.,...,0.228916,0.248996,0.224900,0.112450,439.0,111.1,98.9,2872.0,241.0,245.0


In [171]:
for col in merged_df.columns:
    print(col)

index
atBatIndex
inning
halfInning
outs
type
id
event
eventType
description
rbi
awayScore
homeScore
batter
batterName
batSide
pitcher
pitcherName
pitchHand
postOnFirst
postOnSecond
postOnThird
start
end
movementReason
code
launchSpeeds
launchAngles
totalDistances
trajectories
hardness
coord
gamePk
weather
wind
venue
date
away_name
home_name
game_date
venue_id
game_type
year
eventsModel
temperature
windSpeed
windDirection
x_vect
y_vect
Cut
b1
b2
b3
bb
fo
go
hbp
hr
lo
po
so
venue_1
venue_2
venue_3
venue_4
venue_5
venue_7
venue_10
venue_12
venue_15
venue_17
venue_19
venue_22
venue_31
venue_32
venue_680
venue_2392
venue_2394
venue_2395
venue_2602
venue_2680
venue_2681
venue_2889
venue_3289
venue_3309
venue_3312
venue_3313
venue_4169
venue_4705
venue_5325
p_L
p_R
b_L
b_R
year_2023
preOnFirst
preOnSecond
preOnThird
onFirst
onSecond
onThird
top
pa
ab
hard_hit
maxSpeed
maxSpin
launchSpeed
launchAngle
totalDistance
x
y
spray_angle
to_left
to_middle
to_right
pitcher_outs
iso
slg
woba
b1_b
b2_b
b

In [None]:
# SQL stuff I don't need for now