In [1]:
import pandas as pd
import numpy as np
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from pybaseball import statcast, cache
from catboost import CatBoostClassifier
from xgboost import XGBClassifier
import joblib
import math
from pybaseball import statcast
import scipy.stats as stats
from catboost import Pool
import optuna
from sklearn.metrics import log_loss
from sklearn.metrics import accuracy_score, precision_score
import sqlite3
cache.enable()

In [2]:
with sqlite3.connect("../../../Desktop/MLB Statcast.db") as conn:
    sc_23 = pd.read_sql_query("SELECT * FROM statcast_data_2023", conn)
    sc_22 = pd.read_sql_query("SELECT * FROM statcast_data_2022", conn)
    sc_21 = pd.read_sql_query("SELECT * FROM statcast_data_2021", conn)
    sc_20 = pd.read_sql_query("SELECT * FROM statcast_data_2020", conn)
conn.close()

In [3]:
total_sc = pd.concat([sc_23, sc_22, sc_21, sc_20])

In [4]:
features = ['balls', 'strikes', 'plate_x', 'plate_z', 'stand']
total_sc = total_sc[total_sc[features].notnull().all(axis=1)]

In [5]:
total_sc = total_sc[(total_sc['balls'] < 4) & (total_sc['strikes'] < 3)]
total_sc['stand'] = total_sc['stand'].replace({'R': 0, 'L': 1})

In [6]:
total_sc = total_sc[~total_sc['pitch_name'].isin(['Pitch Out', 'Eephus', 'Knuckleball'])]

In [7]:
fastballs = ['4-Seam Fastball', 'Sinker']
offspeed = ['Split-Finger', 'Changeup', 'Forkball']
breaking = ['Curveball', 'Slider', 'Cutter', 'Knuckle Curve', 'Sweeper', 'Screwball', 'Slow Curve']

In [8]:
total_sc['description'] = np.where(total_sc['description'] == 'hit_into_play', total_sc['events'], total_sc['description'])
field_outs = ['force_out', 'grounded_into_double_play', 'fielders_choice_out', 'fielders_choice', 'field_out', 'double_play', 'sac_fly', 'field_error', 'sac_fly_double_play', 'triple_play']
total_sc['description'] = total_sc['description'].replace(field_outs, 'field_out')

In [9]:
total_sc['whiff'] = total_sc['description'].isin(['swinging_strike', 'swinging_strike_blocked'])
total_sc['foul'] = total_sc['description'].isin(['foul', 'foul_tip'])
total_sc['in_play'] = total_sc['description'].isin(['single', 'double', 'triple', 'home_run', 'field_out'])
total_sc['swing'] = (total_sc['whiff'] | total_sc['foul'] | total_sc['in_play'])

total_sc['take'] = (~total_sc['swing'] & (total_sc['description'].isin(['hit_by_pitch', 'ball', 'called_strike', 'blocked_ball'])))
total_sc['hbp'] = total_sc['description'] == 'hit_by_pitch'
total_sc['ball'] = total_sc['description'].isin(['blocked_ball', 'ball'])
total_sc['strike'] = total_sc['description'] == 'called_strike'

total_sc['single'] = total_sc['description'] == 'single'
total_sc['double'] = total_sc['description'] == 'double'
total_sc['triple'] = total_sc['description'] == 'triple'
total_sc['home_run'] = total_sc['description'] == 'home_run'
total_sc['field_out'] = total_sc['description'] == 'field_out'

In [10]:
total_sc.loc[total_sc['swing'] & total_sc['foul'], 'type_swing'] = 'foul'
total_sc.loc[total_sc['swing'] & total_sc['in_play'], 'type_swing'] = 'in_play'
total_sc.loc[total_sc['swing'] & total_sc['whiff'], 'type_swing'] = 'whiff'

total_sc.loc[total_sc['take'] & total_sc['hbp'], 'type_take'] = 'hbp'
total_sc.loc[total_sc['take'] & total_sc['ball'], 'type_take'] = 'ball'
total_sc.loc[total_sc['take'] & total_sc['strike'], 'type_take'] = 'strike'

total_sc.loc[total_sc['in_play'] & total_sc['single'], 'type_in_play'] = 'single'
total_sc.loc[total_sc['in_play'] & total_sc['double'], 'type_in_play'] = 'double'
total_sc.loc[total_sc['in_play'] & total_sc['triple'], 'type_in_play'] = 'triple'
total_sc.loc[total_sc['in_play'] & total_sc['home_run'], 'type_in_play'] = 'home_run'
total_sc.loc[total_sc['in_play'] & total_sc['field_out'], 'type_in_play'] = 'field_out'

In [11]:
total_sc = total_sc.dropna(subset=['swing', 'take'])
total_sc = total_sc[total_sc['swing'] != total_sc['take']]

In [12]:
from sklearn.preprocessing import LabelEncoder
from hyperopt import hp, fmin, tpe

def objective(space, X_train, X_test, y_train, y_test):
    model = XGBClassifier(
        max_depth=int(space['max_depth']),
        gamma=space['gamma'],
        reg_alpha=int(space['reg_alpha']),
        reg_lambda=space['reg_lambda'],
        colsample_bytree=space['colsample_bytree'],
        min_child_weight=int(space['min_child_weight']),
        n_estimators=int(space['n_estimators']))
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    return -accuracy

le_swing = LabelEncoder()

swing_df = total_sc[(total_sc['swing']) & total_sc['pitch_name'].isin(fastballs)]
swing_X = swing_df[features]
swing_y = le_swing.fit_transform(swing_df['type_swing'])

X_train, X_test, y_train, y_test = train_test_split(swing_X, swing_y, test_size=0.2, random_state=np.random.seed())

space = {
    'max_depth': hp.quniform("max_depth", 3, 18, 1),
    'gamma': hp.uniform('gamma', 1, 9),
    'reg_alpha': hp.quniform('reg_alpha', 40, 180, 1),
    'reg_lambda': hp.uniform('reg_lambda', 0, 1),
    'colsample_bytree': hp.uniform('colsample_bytree', 0.5, 1),
    'min_child_weight': hp.quniform('min_child_weight', 0, 10, 1),
    'n_estimators': hp.quniform('n_estimators', 50, 200, 1),
    'seed': 12
}

best_params = fmin(fn=lambda params: objective(params, X_train, X_test, y_train, y_test),
                   space=space,
                   algo=tpe.suggest,
                   max_evals=10)
best_params = {
    'max_depth': int(best_params['max_depth']),
    'gamma': float(best_params['gamma']),
    'reg_alpha': int(best_params['reg_alpha']),
    'reg_lambda': float(best_params['reg_lambda']),
    'colsample_bytree': float(best_params['colsample_bytree']),
    'min_child_weight': int(best_params['min_child_weight']),
    'n_estimators': int(best_params['n_estimators'])
}
print("Best parameters:", best_params)

swing_model = XGBClassifier(**best_params)
swing_model.fit(X_train, y_train)

100%|███████████████████████████████████████████████| 10/10 [04:01<00:00, 24.16s/trial, best loss: -0.4912836486558236]
Best parameters: {'max_depth': 15, 'gamma': 5.424563622698727, 'reg_alpha': 107, 'reg_lambda': 0.7452469022445155, 'colsample_bytree': 0.7071394270850866, 'min_child_weight': 2, 'n_estimators': 108}


In [13]:
swing_df = total_sc[(total_sc['swing']) & total_sc['pitch_name'].isin(breaking)]
swing_X = swing_df[features]
swing_y = le_swing.fit_transform(swing_df['type_swing'])

X_train, X_test, y_train, y_test = train_test_split(swing_X, swing_y, test_size=0.2, random_state=np.random.seed())

best_params = fmin(fn=lambda params: objective(params, X_train, X_test, y_train, y_test),
                   space=space,
                   algo=tpe.suggest,
                   max_evals=10)
best_params = {
    'max_depth': int(best_params['max_depth']),
    'gamma': float(best_params['gamma']),
    'reg_alpha': int(best_params['reg_alpha']),
    'reg_lambda': float(best_params['reg_lambda']),
    'colsample_bytree': float(best_params['colsample_bytree']),
    'min_child_weight': int(best_params['min_child_weight']),
    'n_estimators': int(best_params['n_estimators'])
}
print(best_params)

swing_bb_model = XGBClassifier(**best_params)
swing_bb_model.fit(X_train, y_train)

100%|███████████████████████████████████████████████| 10/10 [02:16<00:00, 13.67s/trial, best loss: -0.5192806909897578]
{'max_depth': 11, 'gamma': 6.5705969024540565, 'reg_alpha': 80, 'reg_lambda': 0.24316664504211993, 'colsample_bytree': 0.9192580228305194, 'min_child_weight': 0, 'n_estimators': 106}


In [14]:
swing_df = total_sc[(total_sc['swing']) & total_sc['pitch_name'].isin(offspeed)]
swing_X = swing_df[features]
swing_y = le_swing.fit_transform(swing_df['type_swing'])

X_train, X_test, y_train, y_test = train_test_split(swing_X, swing_y, test_size=0.2, random_state=np.random.seed())
best_params = fmin(fn=lambda params: objective(params, X_train, X_test, y_train, y_test),
                   space=space,
                   algo=tpe.suggest,
                   max_evals=10)
best_params = {
    'max_depth': int(best_params['max_depth']),
    'gamma': float(best_params['gamma']),
    'reg_alpha': int(best_params['reg_alpha']),
    'reg_lambda': float(best_params['reg_lambda']),
    'colsample_bytree': float(best_params['colsample_bytree']),
    'min_child_weight': int(best_params['min_child_weight']),
    'n_estimators': int(best_params['n_estimators'])
}
print(best_params)

swing_offs_model = XGBClassifier(**best_params)
swing_offs_model.fit(X_train, y_train)

100%|██████████████████████████████████████████████| 10/10 [01:06<00:00,  6.62s/trial, best loss: -0.49374147973726606]
{'max_depth': 14, 'gamma': 5.721431230064632, 'reg_alpha': 41, 'reg_lambda': 0.22318413890833544, 'colsample_bytree': 0.788048803788258, 'min_child_weight': 9, 'n_estimators': 59}


In [15]:
le_take = LabelEncoder()

take_total_sc = total_sc[(total_sc['take']) & total_sc['pitch_name'].isin(fastballs)]
take_X = take_total_sc[features]
take_y = le_take.fit_transform(take_total_sc['type_take'])

X_train, X_test, y_train, y_test = train_test_split(take_X, take_y, test_size=0.2, random_state=np.random.seed())
best_params = fmin(fn=lambda params: objective(params, X_train, X_test, y_train, y_test),
                   space=space,
                   algo=tpe.suggest,
                   max_evals=10)
best_params = {
    'max_depth': int(best_params['max_depth']),
    'gamma': float(best_params['gamma']),
    'reg_alpha': int(best_params['reg_alpha']),
    'reg_lambda': float(best_params['reg_lambda']),
    'colsample_bytree': float(best_params['colsample_bytree']),
    'min_child_weight': int(best_params['min_child_weight']),
    'n_estimators': int(best_params['n_estimators'])
}
print(best_params)

take_model = XGBClassifier(**best_params)
take_model.fit(X_train, y_train)

100%|███████████████████████████████████████████████| 10/10 [04:03<00:00, 24.31s/trial, best loss: -0.9203182547927095]
{'max_depth': 6, 'gamma': 6.912494862635312, 'reg_alpha': 147, 'reg_lambda': 0.34762776386011984, 'colsample_bytree': 0.8214588263884712, 'min_child_weight': 9, 'n_estimators': 137}


In [16]:
take_total_sc = total_sc[(total_sc['take']) & total_sc['pitch_name'].isin(breaking)]
take_X = take_total_sc[features]
take_y = le_take.fit_transform(take_total_sc['type_take'])

X_train, X_test, y_train, y_test = train_test_split(take_X, take_y, test_size=0.2, random_state=np.random.seed())
best_params = fmin(fn=lambda params: objective(params, X_train, X_test, y_train, y_test),
                   space=space,
                   algo=tpe.suggest,
                   max_evals=10)
best_params = {
    'max_depth': int(best_params['max_depth']),
    'gamma': float(best_params['gamma']),
    'reg_alpha': int(best_params['reg_alpha']),
    'reg_lambda': float(best_params['reg_lambda']),
    'colsample_bytree': float(best_params['colsample_bytree']),
    'min_child_weight': int(best_params['min_child_weight']),
    'n_estimators': int(best_params['n_estimators'])
}
print(best_params)

take_bb_model = XGBClassifier(**best_params)
take_bb_model.fit(X_train, y_train)

100%|███████████████████████████████████████████████| 10/10 [02:53<00:00, 17.34s/trial, best loss: -0.9342219854304772]
{'max_depth': 8, 'gamma': 1.5492659079086417, 'reg_alpha': 96, 'reg_lambda': 0.7230987087406487, 'colsample_bytree': 0.5422964518276985, 'min_child_weight': 8, 'n_estimators': 151}


In [17]:
take_total_sc = total_sc[(total_sc['take']) & total_sc['pitch_name'].isin(offspeed)]
take_X = take_total_sc[features]
take_y = le_take.fit_transform(take_total_sc['type_take'])

X_train, X_test, y_train, y_test = train_test_split(take_X, take_y, test_size=0.2, random_state=np.random.seed())
best_params = fmin(fn=lambda params: objective(params, X_train, X_test, y_train, y_test),
                   space=space,
                   algo=tpe.suggest,
                   max_evals=10)
best_params = {
    'max_depth': int(best_params['max_depth']),
    'gamma': float(best_params['gamma']),
    'reg_alpha': int(best_params['reg_alpha']),
    'reg_lambda': float(best_params['reg_lambda']),
    'colsample_bytree': float(best_params['colsample_bytree']),
    'min_child_weight': int(best_params['min_child_weight']),
    'n_estimators': int(best_params['n_estimators'])
}
print(best_params)

take_offs_model = XGBClassifier(**best_params)
take_offs_model.fit(X_train, y_train)

100%|███████████████████████████████████████████████| 10/10 [00:30<00:00,  3.02s/trial, best loss: -0.9498587570621468]
{'max_depth': 8, 'gamma': 5.890926774193238, 'reg_alpha': 81, 'reg_lambda': 0.06258933384993315, 'colsample_bytree': 0.5105303544633344, 'min_child_weight': 4, 'n_estimators': 150}


In [18]:
le_woba = LabelEncoder()

woba_total_sc = total_sc[(total_sc['in_play']) & total_sc['pitch_name'].isin(fastballs)]
woba_X = woba_total_sc[features]
woba_y = le_woba.fit_transform(woba_total_sc['type_in_play'])

X_train, X_test, y_train, y_test = train_test_split(woba_X, woba_y, test_size=0.2, random_state=np.random.seed())
best_params = fmin(fn=lambda params: objective(params, X_train, X_test, y_train, y_test),
                   space=space,
                   algo=tpe.suggest,
                   max_evals=10)
best_params = {
    'max_depth': int(best_params['max_depth']),
    'gamma': float(best_params['gamma']),
    'reg_alpha': int(best_params['reg_alpha']),
    'reg_lambda': float(best_params['reg_lambda']),
    'colsample_bytree': float(best_params['colsample_bytree']),
    'min_child_weight': int(best_params['min_child_weight']),
    'n_estimators': int(best_params['n_estimators'])
}
print(best_params)

woba_model = XGBClassifier(**best_params)
woba_model.fit(X_train, y_train)

100%|███████████████████████████████████████████████| 10/10 [01:56<00:00, 11.62s/trial, best loss: -0.6631721580373698]
{'max_depth': 17, 'gamma': 2.7242826556954567, 'reg_alpha': 60, 'reg_lambda': 0.6857435506140654, 'colsample_bytree': 0.7052463238209812, 'min_child_weight': 8, 'n_estimators': 141}


In [19]:
woba_total_sc = total_sc[(total_sc['in_play']) & total_sc['pitch_name'].isin(breaking)]
woba_X = woba_total_sc[features]
woba_y = le_woba.fit_transform(woba_total_sc['type_in_play'])

X_train, X_test, y_train, y_test = train_test_split(woba_X, woba_y, test_size=0.2, random_state=np.random.seed())
best_params = fmin(fn=lambda params: objective(params, X_train, X_test, y_train, y_test),
                   space=space,
                   algo=tpe.suggest,
                   max_evals=10)
best_params = {
    'max_depth': int(best_params['max_depth']),
    'gamma': float(best_params['gamma']),
    'reg_alpha': int(best_params['reg_alpha']),
    'reg_lambda': float(best_params['reg_lambda']),
    'colsample_bytree': float(best_params['colsample_bytree']),
    'min_child_weight': int(best_params['min_child_weight']),
    'n_estimators': int(best_params['n_estimators'])
}
print(best_params)

woba_bb_model = XGBClassifier(**best_params)
woba_bb_model.fit(X_train, y_train)

100%|███████████████████████████████████████████████| 10/10 [01:08<00:00,  6.81s/trial, best loss: -0.6833696688922196]
{'max_depth': 18, 'gamma': 7.550591687778578, 'reg_alpha': 118, 'reg_lambda': 0.7417655014150599, 'colsample_bytree': 0.8382330485997989, 'min_child_weight': 0, 'n_estimators': 92}


In [20]:
woba_total_sc = total_sc[(total_sc['in_play']) & total_sc['pitch_name'].isin(offspeed)]
woba_X = woba_total_sc[features]
woba_y = le_woba.fit_transform(woba_total_sc['type_in_play'])

X_train, X_test, y_train, y_test = train_test_split(woba_X, woba_y, test_size=0.2, random_state=np.random.seed())
best_params = fmin(fn=lambda params: objective(params, X_train, X_test, y_train, y_test),
                   space=space,
                   algo=tpe.suggest,
                   max_evals=10)
best_params = {
    'max_depth': int(best_params['max_depth']),
    'gamma': float(best_params['gamma']),
    'reg_alpha': int(best_params['reg_alpha']),
    'reg_lambda': float(best_params['reg_lambda']),
    'colsample_bytree': float(best_params['colsample_bytree']),
    'min_child_weight': int(best_params['min_child_weight']),
    'n_estimators': int(best_params['n_estimators'])
}
print(best_params)

woba_offs_model = XGBClassifier(**best_params)
woba_offs_model.fit(X_train, y_train)

100%|███████████████████████████████████████████████| 10/10 [00:23<00:00,  2.32s/trial, best loss: -0.7054473749075671]
{'max_depth': 7, 'gamma': 4.895435253174544, 'reg_alpha': 142, 'reg_lambda': 0.23821374885760582, 'colsample_bytree': 0.8483208812277623, 'min_child_weight': 1, 'n_estimators': 125}


In [42]:
def will_swing_objective(trial, will_swing_X, will_swing_y):
    X_train, X_test, y_train, y_test = train_test_split(will_swing_X, will_swing_y, test_size=0.2, random_state=np.random.seed())
    
    params = {
        "iterations": trial.suggest_int("iterations", 1000, 2000),
        "learning_rate": trial.suggest_float("learning_rate", 0.001, 0.1, log=True),
        "depth": trial.suggest_int("depth", 1, 10),
        "colsample_bylevel": trial.suggest_float("colsample_bylevel", .05, 1.0),
        "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 1, 100)
    }

    model = CatBoostClassifier(**params, silent=True, thread_count=-1)
    model.fit(X_train, y_train)
    y_pred = model.predict_proba(X_test)[:, 1]  # Probability of class 1 (swing)
    logloss = log_loss(y_test, y_pred)
    return logloss

will_swing_total_sc = total_sc[total_sc['pitch_name'].isin(fastballs)]
will_swing_X = will_swing_total_sc[features]
will_swing_y = will_swing_total_sc['swing']

study = optuna.create_study(direction='minimize')
study.optimize(lambda trial: will_swing_objective(trial, will_swing_X, will_swing_y), n_trials=5)
best_params = study.best_params
best_ll = study.best_value

print('Best hyperparameters:', best_params)
print('Best logloss:', best_ll)
X_train, X_test, y_train, y_test = train_test_split(will_swing_X, will_swing_y, test_size=0.2, random_state=np.random.seed())

will_swing_model = CatBoostClassifier(**best_params, silent=True)
will_swing_model.fit(X_train, y_train)

[I 2023-12-01 20:19:36,519] A new study created in memory with name: no-name-e237afa0-ee73-43d6-8a75-c6a4f8646c63
[I 2023-12-01 20:20:20,417] Trial 0 finished with value: 0.5031226852127366 and parameters: {'iterations': 1685, 'learning_rate': 0.047036812963948134, 'depth': 5, 'colsample_bylevel': 0.993397299242294, 'min_data_in_leaf': 88}. Best is trial 0 with value: 0.5031226852127366.
[I 2023-12-01 20:21:04,399] Trial 1 finished with value: 0.5033653685645395 and parameters: {'iterations': 1374, 'learning_rate': 0.08862424278845682, 'depth': 7, 'colsample_bylevel': 0.6962916683186798, 'min_data_in_leaf': 53}. Best is trial 0 with value: 0.5031226852127366.
[I 2023-12-01 20:21:33,168] Trial 2 finished with value: 0.511623433285111 and parameters: {'iterations': 1340, 'learning_rate': 0.008140142789361504, 'depth': 3, 'colsample_bylevel': 0.7131280430909304, 'min_data_in_leaf': 35}. Best is trial 0 with value: 0.5031226852127366.
[I 2023-12-01 20:22:17,025] Trial 3 finished with value

Best hyperparameters: {'iterations': 1685, 'learning_rate': 0.047036812963948134, 'depth': 5, 'colsample_bylevel': 0.993397299242294, 'min_data_in_leaf': 88}
Best logloss: 0.5031226852127366


<catboost.core.CatBoostClassifier at 0x1f9c8be0310>

In [43]:
will_swing_total_sc = total_sc[total_sc['pitch_name'].isin(breaking)]
will_swing_X = will_swing_total_sc[features]
will_swing_y = will_swing_total_sc['swing']

study = optuna.create_study(direction='minimize')
study.optimize(lambda trial: will_swing_objective(trial, will_swing_X, will_swing_y), n_trials=5)
best_params = study.best_params
best_ll = study.best_value

print('Best hyperparameters:', best_params)
print('Best logloss:', best_ll)
X_train, X_test, y_train, y_test = train_test_split(will_swing_X, will_swing_y, test_size=0.2, random_state=np.random.seed())

will_swing_bb_model = CatBoostClassifier(**best_params, silent=True)
will_swing_bb_model.fit(X_train, y_train)

[I 2023-12-01 20:23:26,642] A new study created in memory with name: no-name-758a1553-906f-43cc-b1be-794dd03056b0
[I 2023-12-01 20:25:15,952] Trial 0 finished with value: 0.5091861890075141 and parameters: {'iterations': 1762, 'learning_rate': 0.0031742086132378385, 'depth': 7, 'colsample_bylevel': 0.5034174378807437, 'min_data_in_leaf': 100}. Best is trial 0 with value: 0.5091861890075141.
[I 2023-12-01 20:26:54,926] Trial 1 finished with value: 0.5036150453948778 and parameters: {'iterations': 1614, 'learning_rate': 0.028081711849282594, 'depth': 7, 'colsample_bylevel': 0.9152329795743777, 'min_data_in_leaf': 55}. Best is trial 1 with value: 0.5036150453948778.
[I 2023-12-01 20:29:10,018] Trial 2 finished with value: 0.5030371683387838 and parameters: {'iterations': 1706, 'learning_rate': 0.008188192147175841, 'depth': 10, 'colsample_bylevel': 0.6927768619717879, 'min_data_in_leaf': 87}. Best is trial 2 with value: 0.5030371683387838.
[I 2023-12-01 20:30:04,310] Trial 3 finished with

Best hyperparameters: {'iterations': 1706, 'learning_rate': 0.008188192147175841, 'depth': 10, 'colsample_bylevel': 0.6927768619717879, 'min_data_in_leaf': 87}
Best logloss: 0.5030371683387838


<catboost.core.CatBoostClassifier at 0x1f9ce34a880>

In [44]:
will_swing_total_sc = total_sc[total_sc['pitch_name'].isin(offspeed)]
will_swing_X = will_swing_total_sc[features]
will_swing_y = will_swing_total_sc['swing']

study = optuna.create_study(direction='minimize')
study.optimize(lambda trial: will_swing_objective(trial, will_swing_X, will_swing_y), n_trials=5)
best_params = study.best_params
best_ll = study.best_value

print('Best hyperparameters:', best_params)
print('Best logloss:', best_ll)
X_train, X_test, y_train, y_test = train_test_split(will_swing_X, will_swing_y, test_size=0.2, random_state=np.random.seed())

will_swing_offs_model = CatBoostClassifier(**best_params, silent=True)
will_swing_offs_model.fit(X_train, y_train)

[I 2023-12-01 20:33:54,698] A new study created in memory with name: no-name-ece91655-28e4-42b6-8560-808435673ea6
[I 2023-12-01 20:34:17,226] Trial 0 finished with value: 0.46917725694132306 and parameters: {'iterations': 1336, 'learning_rate': 0.02421773490077291, 'depth': 3, 'colsample_bylevel': 0.3676030223978894, 'min_data_in_leaf': 86}. Best is trial 0 with value: 0.46917725694132306.
[I 2023-12-01 20:34:46,800] Trial 1 finished with value: 0.4675277909334208 and parameters: {'iterations': 1562, 'learning_rate': 0.019461056258408386, 'depth': 4, 'colsample_bylevel': 0.5802627563038034, 'min_data_in_leaf': 77}. Best is trial 1 with value: 0.4675277909334208.
[I 2023-12-01 20:35:36,511] Trial 2 finished with value: 0.4837997395680802 and parameters: {'iterations': 1986, 'learning_rate': 0.001296133070148001, 'depth': 7, 'colsample_bylevel': 0.5834673385000408, 'min_data_in_leaf': 19}. Best is trial 1 with value: 0.4675277909334208.
[I 2023-12-01 20:36:07,244] Trial 3 finished with v

Best hyperparameters: {'iterations': 1562, 'learning_rate': 0.019461056258408386, 'depth': 4, 'colsample_bylevel': 0.5802627563038034, 'min_data_in_leaf': 77}
Best logloss: 0.4675277909334208


<catboost.core.CatBoostClassifier at 0x1f9d4eccb20>

In [45]:
fl = pd.read_csv('../AutomatedPitchTagging/AutoTaggedCSVs/2023 Frontier League Autotagged.csv')

In [46]:
fl = fl.rename(columns={
    'PitcherThrows': 'p_throws',
    'HorzBreak': 'pfx_x',
    'InducedVertBreak': 'pfx_z',
    'yt_Efficiency': 'active_spin',
    'RelSpeed': 'release_speed',
    'Extension': 'release_extension',
    'SpinAxis': 'spin_axis',
    'RelSide': 'release_pos_x',
    'RelHeight': 'release_pos_z',
    'Pitcher': 'player_name',
    'AutoPitchType': 'pitch_name',
    'Balls': 'balls', 
    'Strikes': 'strikes',
    'PlateLocSide': 'plate_x',
    'PlateLocHeight': 'plate_z',
    'BatterSide': 'stand',
})

fl = fl[(fl['balls'] < 4) & (fl['strikes'] < 3)]
fl['stand'] = fl['stand'].replace({'Right': 0, 'Left': 1, 'Switch': np.where(('p_throws' == 'Right'), 1, 0)})

In [47]:
fastballs = ['Fastball', 'Sinker']
breaking = ['Curveball', 'Slider', 'Cutter']
offspeed = ['Splitter', 'Changeup']

In [48]:
fastball_df = fl[fl['pitch_name'].isin(fastballs)]
bb_df = fl[fl['pitch_name'].isin(breaking)]
offs_df = fl[fl['pitch_name'].isin(offspeed)]

In [49]:
values = {
    'home_run': 1.374328827219,
    'triple': 1.05755624961515,
    'double': 0.766083123898271,
    'single': 0.467292970729251,
    'ball': 0.0636883289483747,
    'hit_by_pitch': 0.0636883289483747,
    'blocked_ball': 0.0636883289483747,
    'foul': -0.0380502742575014,
    'foul_tip': -0.0380502742575014,
    'bunt_foul': -0.0380502742575014,
    'bunt_foul_tip': -0.0380502742575014,
    'called_strike': -0.065092516089806,
    'swinging_strike': -0.118124935770601,
    'swinging_strike_blocked': -0.118124935770601,
    'force_out': -0.1955687665555,
    'grounded_into_double_play': -0.1955687665555,
    'fielders_choice_out': -0.1955687665555,
    'fielders_choice': -0.1955687665555,
    'field_out': -0.1955687665555,
    'double_play': -0.1955687665555,
    'sac_fly': -0.236889645519856,
    'field_error': -0.236889645519856,
    'catcher_interf': -0.789788814378052,
    'sac_fly_double_play': -0.789788814378052,
    'triple_play': -0.789788814378052
}

In [50]:
swing_probs = swing_model.predict_proba(fastball_df[features])
take_probs = take_model.predict_proba(fastball_df[features])
ws_probs = will_swing_model.predict_proba(fastball_df[features])
woba_probs = woba_model.predict_proba(fastball_df[features])

fastball_df['whiff_prob'] = swing_probs[:, list(le_swing.inverse_transform(swing_model.classes_)).index('whiff')]
fastball_df['in_play_prob'] = swing_probs[:, list(le_swing.inverse_transform(swing_model.classes_)).index('in_play')]
fastball_df['foul_prob'] = swing_probs[:, list(le_swing.inverse_transform(swing_model.classes_)).index('foul')]

fastball_df['strike_prob'] = take_probs[:, list(le_take.inverse_transform(take_model.classes_)).index('strike')]
fastball_df['ball_prob'] = take_probs[:, list(le_take.inverse_transform(take_model.classes_)).index('ball')]
fastball_df['hbp_prob'] = take_probs[:, list(le_take.inverse_transform(take_model.classes_)).index('hbp')]

fastball_df['single_prob'] = woba_probs[:, list(le_woba.inverse_transform(woba_model.classes_)).index('single')]
fastball_df['double_prob'] = woba_probs[:, list(le_woba.inverse_transform(woba_model.classes_)).index('double')]
fastball_df['triple_prob'] = woba_probs[:, list(le_woba.inverse_transform(woba_model.classes_)).index('triple')]
fastball_df['hr_prob'] = woba_probs[:, list(le_woba.inverse_transform(woba_model.classes_)).index('home_run')]
fastball_df['fo_prob'] = woba_probs[:, list(le_woba.inverse_transform(woba_model.classes_)).index('field_out')]
fastball_df['xwOBAcon'] = values['single'] * fastball_df['single_prob'] + values['double'] * fastball_df['double_prob'] + values['triple'] * fastball_df['triple_prob'] + values['home_run'] * fastball_df['hr_prob'] + values['field_out'] * fastball_df['fo_prob']

fastball_df['swing_prob'] = ws_probs[:, list(will_swing_model.classes_).index('True')]
fastball_df['take_prob'] = 1 - fastball_df['swing_prob'] 

fastball_df['val_swing'] = values['swinging_strike'] * fastball_df['whiff_prob'] + values['foul'] * fastball_df['foul_prob']
+ fastball_df['xwOBAcon'] * fastball_df['in_play_prob']
fastball_df['val_take'] = values['called_strike'] * fastball_df['strike_prob'] + values['ball'] * fastball_df['ball_prob'] + values['hit_by_pitch'] * fastball_df['hbp_prob']
fastball_df['xRV'] = fastball_df['val_swing'] * fastball_df['swing_prob'] + fastball_df['val_take'] * fastball_df['take_prob']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  fastball_df['whiff_prob'] = swing_probs[:, list(le_swing.inverse_transform(swing_model.classes_)).index('whiff')]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  fastball_df['in_play_prob'] = swing_probs[:, list(le_swing.inverse_transform(swing_model.classes_)).index('in_play')]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#return

In [51]:
swing_bb_probs = swing_bb_model.predict_proba(bb_df[features])
take_bb_probs = take_bb_model.predict_proba(bb_df[features])
ws_bb_probs = will_swing_bb_model.predict_proba(bb_df[features])
woba_bb_probs = woba_bb_model.predict_proba(bb_df[features])

bb_df['whiff_prob'] = swing_bb_probs[:, list(le_swing.inverse_transform(swing_bb_model.classes_)).index('whiff')]
bb_df['in_play_prob'] = swing_bb_probs[:, list(le_swing.inverse_transform(swing_bb_model.classes_)).index('in_play')]
bb_df['foul_prob'] = swing_bb_probs[:, list(le_swing.inverse_transform(swing_bb_model.classes_)).index('foul')]

bb_df['strike_prob'] = take_bb_probs[:, list(le_take.inverse_transform(take_bb_model.classes_)).index('strike')]
bb_df['ball_prob'] = take_bb_probs[:, list(le_take.inverse_transform(take_bb_model.classes_)).index('ball')]
bb_df['hbp_prob'] = take_bb_probs[:, list(le_take.inverse_transform(take_bb_model.classes_)).index('hbp')]

bb_df['single_prob'] = woba_bb_probs[:, list(le_woba.inverse_transform(woba_bb_model.classes_)).index('single')]
bb_df['double_prob'] = woba_bb_probs[:, list(le_woba.inverse_transform(woba_bb_model.classes_)).index('double')]
bb_df['triple_prob'] = woba_bb_probs[:, list(le_woba.inverse_transform(woba_bb_model.classes_)).index('triple')]
bb_df['hr_prob'] = woba_bb_probs[:, list(le_woba.inverse_transform(woba_bb_model.classes_)).index('home_run')]
bb_df['fo_prob'] = woba_bb_probs[:, list(le_woba.inverse_transform(woba_bb_model.classes_)).index('field_out')]
bb_df['xwOBAcon'] = values['single'] * bb_df['single_prob'] + values['double'] * bb_df['double_prob'] + values['triple'] * bb_df['triple_prob'] + values['home_run'] * bb_df['hr_prob'] + values['field_out'] * bb_df['fo_prob']

bb_df['swing_prob'] = ws_bb_probs[:, list(will_swing_bb_model.classes_).index('True')]
bb_df['take_prob'] = 1 - bb_df['swing_prob'] 

bb_df['val_swing'] = values['swinging_strike'] * bb_df['whiff_prob'] + values['foul'] * bb_df['foul_prob']
+ bb_df['xwOBAcon'] * bb_df['in_play_prob']
bb_df['val_take'] = values['called_strike'] * bb_df['strike_prob'] + values['ball'] * bb_df['ball_prob'] + values['hit_by_pitch'] * bb_df['hbp_prob']
bb_df['xRV'] = bb_df['val_swing'] * bb_df['swing_prob'] + bb_df['val_take'] * bb_df['take_prob']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  bb_df['whiff_prob'] = swing_bb_probs[:, list(le_swing.inverse_transform(swing_bb_model.classes_)).index('whiff')]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  bb_df['in_play_prob'] = swing_bb_probs[:, list(le_swing.inverse_transform(swing_bb_model.classes_)).index('in_play')]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#return

In [52]:
swing_offs_probs = swing_offs_model.predict_proba(offs_df[features])
take_offs_probs = take_offs_model.predict_proba(offs_df[features])
ws_offs_probs = will_swing_offs_model.predict_proba(offs_df[features])
woba_offs_probs = woba_offs_model.predict_proba(offs_df[features])

offs_df['whiff_prob'] = swing_offs_probs[:, list(le_swing.inverse_transform(swing_offs_model.classes_)).index('whiff')]
offs_df['in_play_prob'] = swing_offs_probs[:, list(le_swing.inverse_transform(swing_offs_model.classes_)).index('in_play')]
offs_df['foul_prob'] = swing_offs_probs[:, list(le_swing.inverse_transform(swing_offs_model.classes_)).index('foul')]

offs_df['strike_prob'] = take_offs_probs[:, list(le_take.inverse_transform(take_offs_model.classes_)).index('strike')]
offs_df['ball_prob'] = take_offs_probs[:, list(le_take.inverse_transform(take_offs_model.classes_)).index('ball')]
offs_df['hbp_prob'] = take_offs_probs[:, list(le_take.inverse_transform(take_offs_model.classes_)).index('hbp')]

offs_df['single_prob'] = woba_offs_probs[:, list(le_woba.inverse_transform(woba_offs_model.classes_)).index('single')]
offs_df['double_prob'] = woba_offs_probs[:, list(le_woba.inverse_transform(woba_offs_model.classes_)).index('double')]
offs_df['triple_prob'] = woba_offs_probs[:, list(le_woba.inverse_transform(woba_offs_model.classes_)).index('triple')]
offs_df['hr_prob'] = woba_offs_probs[:, list(le_woba.inverse_transform(woba_offs_model.classes_)).index('home_run')]
offs_df['fo_prob'] = woba_offs_probs[:, list(le_woba.inverse_transform(woba_offs_model.classes_)).index('field_out')]
offs_df['xwOBAcon'] = values['single'] * offs_df['single_prob'] + values['double'] * offs_df['double_prob'] + values['triple'] * offs_df['triple_prob'] + values['home_run'] * offs_df['hr_prob'] + values['field_out'] * offs_df['fo_prob']

offs_df['swing_prob'] = ws_offs_probs[:, list(will_swing_offs_model.classes_).index('True')]
offs_df['take_prob'] = 1 - offs_df['swing_prob'] 

offs_df['val_swing'] = values['swinging_strike'] * offs_df['whiff_prob'] + values['foul'] * offs_df['foul_prob']
+ offs_df['xwOBAcon'] * offs_df['in_play_prob']
offs_df['val_take'] = values['called_strike'] * offs_df['strike_prob'] + values['ball'] * offs_df['ball_prob'] + values['hit_by_pitch'] * offs_df['hbp_prob']
offs_df['xRV'] = offs_df['val_swing'] * offs_df['swing_prob'] + offs_df['val_take'] * offs_df['take_prob']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  offs_df['whiff_prob'] = swing_offs_probs[:, list(le_swing.inverse_transform(swing_offs_model.classes_)).index('whiff')]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  offs_df['in_play_prob'] = swing_offs_probs[:, list(le_swing.inverse_transform(swing_offs_model.classes_)).index('in_play')]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing

In [53]:
total_df = pd.concat([fastball_df, bb_df, offs_df])

In [54]:
def calculate_location_plus(pitch_sc, min_num_pitches=100):
    agg_location_plus = pitch_sc.groupby(['player_name', 'pitch_name']).agg(
        mean_xrv=('xRV', 'mean'),
        num_pitches=('player_name', 'count'),
        velo=('release_speed', 'mean'),
        vert_break=('pfx_z', 'mean'),
        horz_break=('pfx_x', 'mean')
    ).reset_index()
    
    agg_location_plus['xrv_100'] = agg_location_plus['mean_xrv'] * 100
    agg_location_plus['xrv_100_adj'] = abs(agg_location_plus['xrv_100'] - agg_location_plus['xrv_100'].max())
    agg_location_plus['location_plus'] = (agg_location_plus['xrv_100_adj'] / agg_location_plus['xrv_100_adj'].mean()) * 100
    agg_location_plus = agg_location_plus.sort_values('location_plus', ascending=False)

    agg_location_plus = agg_location_plus[agg_location_plus['num_pitches'] > min_num_pitches]

    return agg_location_plus

def calculate_agg_location_plus(pitch_sc, min_num_pitches=100):
    agg_location_plus = pitch_sc.groupby(['player_name']).agg(
        mean_xrv=('xRV', 'mean'),  
        swing_prob=('swing_prob', 'mean'),
        whiff_prob=('whiff_prob', 'mean'),
        take_prob=('take_prob', 'mean'),
        xwobacon=('xwOBAcon', 'mean'),
        fo_prob=('fo_prob', 'mean'),
        num_pitches=('player_name', 'count'),
        velo=('release_speed', 'mean'),
        vert_break=('pfx_z', 'mean'),
        horz_break=('pfx_x', 'mean')
    ).reset_index()

    agg_location_plus['xrv_100'] = agg_location_plus['mean_xrv'] * 100
    agg_location_plus['xrv_100_adj'] = abs(agg_location_plus['xrv_100'] - agg_location_plus['xrv_100'].max())
    agg_location_plus['location_plus'] = (agg_location_plus['xrv_100_adj'] / agg_location_plus['xrv_100_adj'].mean()) * 100
    agg_location_plus = agg_location_plus.sort_values('location_plus', ascending=False)

    agg_location_plus = agg_location_plus[agg_location_plus['num_pitches'] > min_num_pitches]

    return agg_location_plus

agg_total = calculate_agg_location_plus(total_df, 500)
agg_fb = calculate_location_plus(fastball_df, 50)
agg_bb = calculate_location_plus(bb_df, 50)
agg_offs = calculate_location_plus(offs_df, 50)

In [55]:
agg_total

Unnamed: 0,player_name,mean_xrv,swing_prob,whiff_prob,take_prob,xwobacon,fo_prob,num_pitches,velo,vert_break,horz_break,xrv_100,xrv_100_adj,location_plus
270,Kenny Mathews,-0.010960,0.503924,0.250508,0.496076,0.077370,0.681668,575,79.074236,10.711522,-3.908572,-1.095999,7.283319,116.396228
277,Kobe Foster,-0.009914,0.494634,0.270904,0.505366,0.077127,0.682981,1074,82.343265,8.873294,-2.095920,-0.991365,7.178685,114.724044
104,Chris Burica,-0.009132,0.478539,0.298235,0.521461,0.078110,0.683926,994,79.314711,7.997518,0.056050,-0.913186,7.100506,113.474657
374,Ruben Ramirez,-0.008922,0.476404,0.296594,0.523596,0.079062,0.680238,1330,85.815992,6.800940,2.295719,-0.892177,7.079497,113.138911
190,Grant Larson,-0.008295,0.472386,0.316713,0.527614,0.074650,0.682985,1510,79.880474,4.405372,-7.364732,-0.829465,7.016785,112.136692
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
457,Zach Smith,0.006605,0.373454,0.385353,0.626546,0.071677,0.684487,1562,86.032628,5.088700,4.892457,0.660538,5.526782,88.324651
252,Jose Ledesma jr.,0.008603,0.366669,0.339165,0.633331,0.074474,0.683403,726,89.646329,7.588948,5.407151,0.860276,5.327044,85.132587
303,Mark Moclair,0.009293,0.347442,0.328949,0.652558,0.073520,0.686457,1630,89.236215,7.995880,0.250436,0.929330,5.257990,84.029027
205,Jack Dellinger,0.010174,0.339700,0.332669,0.660300,0.075061,0.682895,1012,84.196424,8.698755,5.484433,1.017415,5.169905,82.621320


In [56]:
agg_offs

Unnamed: 0,player_name,pitch_name,mean_xrv,num_pitches,velo,vert_break,horz_break,xrv_100,xrv_100_adj,location_plus
88,Brooks Walton,Changeup,-0.015664,122,77.497429,8.622382,10.881350,-1.566444,7.905446,132.932120
483,Zac Westcott,Changeup,-0.013212,460,75.610705,6.744719,15.237170,-1.321219,7.660221,128.808599
205,Harley Gollert,Changeup,-0.011043,290,77.786211,7.405897,-12.899763,-1.104268,7.443270,125.160517
118,Christian Cosby,Changeup,-0.010504,54,84.734505,10.121728,6.512684,-1.050418,7.389421,124.255024
261,Jonaiker Villalobos,Changeup,-0.009752,92,82.962870,11.275917,-9.358845,-0.975169,7.314171,122.989684
...,...,...,...,...,...,...,...,...,...,...
219,Jack Dellinger,Changeup,0.017052,147,79.895832,5.396713,10.293358,1.705164,4.633838,77.919184
427,Taylor Sugg,Changeup,0.017689,61,82.783427,7.832408,12.753259,1.768922,4.570080,76.847084
386,Ricky Castro,Changeup,0.019312,138,79.849449,4.753976,13.701835,1.931207,4.407795,74.118214
257,Joey Gonzalez,Changeup,0.019964,291,82.031492,5.878463,14.390047,1.996434,4.342568,73.021410


In [57]:
import pandas as pd

leaderboard = pd.DataFrame(columns=['Pitcher', 'FB+', 'CB+', 'SI+', 'CUT+', 'SPL+', 'CH+', 'SL+', 'Location+'])

for pitcher in agg_total.player_name.unique():
    p = pitcher
    location = agg_total.loc[agg_total['player_name'] == pitcher, 'location_plus'].values[0]
    
    fb_d = agg_fb[agg_fb['player_name'] == pitcher]
    bb_d = agg_bb[agg_bb['player_name'] == pitcher]
    offs_d = agg_offs[agg_offs['player_name'] == pitcher]
    
    fb_sp = np.nan
    si_sp = np.nan
    
    sl_sp = np.nan
    cb_sp = np.nan
    cut_sp = np.nan
    
    ch_sp = np.nan
    spl_sp = np.nan
    
    if 'Fastball' in fb_d.pitch_name.unique():
        fb_sp = fb_d[fb_d['pitch_name'] == 'Fastball']['location_plus'].values[0]
    if 'Sinker' in fb_d.pitch_name.unique():
        si_sp = fb_d[fb_d['pitch_name'] == 'Sinker']['location_plus'].values[0]
    
    if 'Curveball' in bb_d.pitch_name.unique():
        cb_sp = bb_d[bb_d['pitch_name'] == 'Curveball']['location_plus'].values[0]
    if 'Cutter' in bb_d.pitch_name.unique():
        cut_sp = bb_d[bb_d['pitch_name'] == 'Cutter']['location_plus'].values[0]
    if 'Slider' in bb_d.pitch_name.unique():
        sl_sp = bb_d[bb_d['pitch_name'] == 'Slider']['location_plus'].values[0]
    
    if 'Changeup' in offs_d.pitch_name.unique():
        ch_sp = offs_d[offs_d['pitch_name'] == 'Changeup']['location_plus'].values[0]
    if 'Splitter' in offs_d.pitch_name.unique():
        spl_sp = offs_d[offs_d['pitch_name'] == 'Splitter']['location_plus'].values[0]
    
    leaderboard = leaderboard.append({
        'Pitcher': p,
        'FB+': fb_sp,
        'CB+': cb_sp,
        'SI+': si_sp,
        'CUT+': cut_sp,
        'SPL+': spl_sp,
        'CH+': ch_sp,
        'SL+': sl_sp,
        'Location+': location
    }, ignore_index=True)

  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
 

  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
 

  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
 

  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({
  leaderboard = leaderboard.append({


In [58]:
leaderboard.to_csv('../../../Documents/GitHub/Frontier-League-Savant/csvs/location+.csv')

In [60]:
leaderboard[leaderboard['Pitcher'] == 'Cole Cook']

Unnamed: 0,Pitcher,FB+,CB+,SI+,CUT+,SPL+,CH+,SL+,Location+
13,Cole Cook,108.637194,118.016857,,116.260916,,93.236634,105.338891,109.647465
