In [70]:
import pandas as pd
import numpy as np

filepath = r"C:\Users\brend\OneDrive - Stonehill College\All Cape League Trackman.xlsx"
df = pd.read_excel(filepath)

In [179]:
for col in ['PlateLocHeight', 'PlateLocSide', 'Balls', 'Strikes']:
    df[col] = pd.to_numeric(df[col], errors='coerce')

ZONE_TOP = 3.5
ZONE_BOTTOM = 1.5
ZONE_LEFT = -0.85
ZONE_RIGHT = 0.85

In [180]:
pitch_type_mapping = {
    "Fastball": "Four-Seam",
    "Fourseamfastball": "Four-Seam",
    "Twoseamfastball": "Two-Seam",
    "Twoseamfour-Seam": "Two-Seam",
    "Sweeper": "Slider",
}

df['TaggedPitchType'] = (
    df['TaggedPitchType']
    .astype(str)
    .str.strip()
    .str.title()
    .replace(pitch_type_mapping)
)

In [181]:
df['InZone'] = (
    (df['PlateLocSide'].between(ZONE_LEFT, ZONE_RIGHT)) &
    (df['PlateLocHeight'].between(ZONE_BOTTOM, ZONE_TOP))
)

df['IsFastball'] = df['TaggedPitchType'].str.contains(
    'Fastball|Four-Seam|Two-Seam|Sinker', case=False, na=False
)

swing_calls = ['StrikeSwinging', 'FoulBallFieldable', 'FoulBallNotFieldable', 'InPlay']
df['IsSwing'] = df['PitchCall'].isin(swing_calls)
df['IsOutsideZone'] = ~df['InZone']

df['AheadInCount'] = ((df['Balls'] > df['Strikes']) & (df['Balls'] >= 2)) | (df['Balls'] == 3)

In [182]:
first_pitch_df = df[(df['Balls'] == 0) & (df['Strikes'] == 0)]
two_strike_df = df[df['Strikes'] == 2]

In [183]:
in_zone_ahead_df = df[df['AheadInCount']].groupby('Batter')['InZone'].mean().rename('in_zone_ahead')

batter_stats = (
    df.groupby('Batter')
      .agg(
          pitches=('TaggedPitchType', 'count'),
          in_zone=('InZone', 'mean'),
          fastball_rate=('IsFastball', 'mean')
      )
      .reset_index()
      .merge(in_zone_ahead_df, on='Batter', how='left')
)

first_pitch_zone = first_pitch_df.groupby('Batter')['InZone'].mean().rename('first_pitch_in_zone_rate')
two_strike_zone = two_strike_df.groupby('Batter')['InZone'].mean().rename('two_strike_zone_rate')

batter_stats = (
    batter_stats
    .merge(first_pitch_zone, on='Batter', how='left')
    .merge(two_strike_zone, on='Batter', how='left')
)

batter_stats = batter_stats[batter_stats['pitches'] >= 100]
valid_first_pitch = first_pitch_df.groupby('Batter')['InZone'].count()
valid_two_strike = two_strike_df.groupby('Batter')['InZone'].count()

batter_stats = batter_stats[
    (batter_stats['Batter'].isin(valid_first_pitch[valid_first_pitch >= 10].index)) &
    (batter_stats['Batter'].isin(valid_two_strike[valid_two_strike >= 10].index))
]

In [184]:
chase_rate = (
    df[df['IsOutsideZone']]
    .groupby('Batter')['IsSwing']
    .mean()
    .rename('chase_rate')
)
batter_stats = batter_stats.merge(chase_rate, on='Batter', how='left')

In [185]:
metrics = {
    'in_zone': -1,
    'in_zone_ahead': -1,
    'fastball_rate': -1,
    'chase_rate': -1,
    'first_pitch_in_zone_rate': -1,
    'two_strike_zone_rate': -1
}

batter_stats = batter_stats.replace([np.inf, -np.inf], np.nan)
batter_stats = batter_stats.fillna(batter_stats.mean(numeric_only=True))

for metric, direction in metrics.items():
    std = batter_stats[metric].std(ddof=0)
    if std == 0 or pd.isna(std):
        batter_stats[f'{metric}_z'] = 0
    else:
        z = (batter_stats[metric] - batter_stats[metric].mean()) / std
        batter_stats[f'{metric}_z'] = (z * direction)

In [186]:
corrs = batter_stats[
    list(metrics.keys())
].corrwith(batter_stats['in_zone']).abs()

corrs = corrs[corrs > 0.05]
weights = (corrs / corrs.sum()).to_dict()

batter_stats['RespectScore'] = sum(batter_stats[f"{metric}_z"] * weights[metric] for metric in metrics.keys())

In [187]:
batter_stats['Respected+'] = 100 + 25 * (
    (batter_stats['RespectScore'] - batter_stats['RespectScore'].mean()) / batter_stats['RespectScore'].std(ddof=0)
)

In [188]:
most_respected = batter_stats.sort_values('Respected+', ascending=False).head(10)
least_respected = batter_stats.sort_values('Respected+', ascending=True).head(10)

print("=== MOST Respected ===")
print(most_respected[['Batter', 'Respected+']])
print("\n=== LEAST Respected ===")
print(least_respected[['Batter', 'Respected+']])

print("\n=== Summary Statistics ===")
print(batter_stats[['in_zone', 'chase_rate', 'RespectScore', 'Respected+']].describe().round(3))

=== MOST Respected ===
                     Batter  Respected+
7            Bastian, Jaden  199.851844
56          Gasparino, Will  165.199846
170        Wright, Garrett   155.503597
5             Bailey, Myles  155.425442
135            Rogers, Drew  151.578149
95            Martin, Roman  150.395256
65          Hernandez, Alex  150.340276
105       Mexico, Sebastian  144.774078
122  O'Shaughnessy, Michael  142.638726
37               Cyr, Blake  137.505565

=== LEAST Respected ===
                   Batter  Respected+
132            Reese, Ace   14.290099
63       Harrison, Connor   52.230658
41           Dobie, Jayce   52.937955
114          Myatt, Tyler   53.247391
124  Pisacreta, Sebastian   56.889109
33    Covarrubias, Abbrie   57.381895
58           Groves, Nick   58.261359
54         Gargett, Kyuss   59.120741
109       Mitchell, Devin   60.900642
74          Keenan, Jimmy   61.436911

=== Summary Statistics ===
       in_zone  chase_rate  RespectScore  Respected+
count  174.00

In [189]:
team_info = df[['Batter', 'BatterTeam']].drop_duplicates()
batter_stats = batter_stats.merge(team_info, on='Batter', how='left')

fal_batters = batter_stats[batter_stats['BatterTeam'] == 'FAL_COM']

most_respected_fal = fal_batters.sort_values('Respected+', ascending=False).head(10)
least_respected_fal = fal_batters.sort_values('Respected+', ascending=True).head(10)

print("=== MOST Respected (FAL_COM) ===")
print(most_respected_fal[['Batter', 'Respected+']])

print("\n=== LEAST Respected (FAL_COM) ===")
print(least_respected_fal[['Batter', 'Respected+']])

=== MOST Respected (FAL_COM) ===
                Batter  Respected+
20   Brumbaugh, Cayden  119.899526
91       Lopez, Adrian  108.363969
118    Newstrom, Chris   99.767136
129     Quatrani, Mark   98.840996
176    Zuckerman, Ryan   94.921279
144      Schmidt, Kent   92.126450
143      Schmidt, Carl   84.343851
120         Niu, Maika   83.940623
99       McCann, David   83.264303
112   Morales, Antonio   82.344682

=== LEAST Respected (FAL_COM) ===
                 Batter  Respected+
63     Harrison, Connor   52.230658
169      Wilson, Ashton   68.866306
138    Salinas, Tristan   71.488997
123  Osterhouse, Justin   76.839410
62       Harrison, Bear   77.968162
113      Morrison, Kyle   78.196859
112    Morales, Antonio   82.344682
99        McCann, David   83.264303
120          Niu, Maika   83.940623
143       Schmidt, Carl   84.343851


In [190]:
batter_stats.to_csv(r"C:\Users\brend\OneDrive - Stonehill College\RespectedPlus.csv", index=False)