## 1. Data Preparation & Filtering

In [1]:
import pandas as pd
import numpy as np
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.ensemble import RandomForestRegressor

DATA_DIR = Path("Trackman CSVs")   

In [2]:
import pandas as pd
import numpy as np

# -------------------------------
# Load the two datasets
# -------------------------------
ncaa_df = pd.read_csv("all_games.csv")
ucsd_df = pd.read_csv("ucsd_fall_pitching_data.csv")  # <-- match your exact filename on the left

# -------------------------------
# Backward-compatible aliases
# (so the rest of the notebook works)
# -------------------------------
all_df = ncaa_df.copy()      # NCAA-wide (used for baselines / percentiles / betas)
ucsd_all = ucsd_df.copy()    # UCSD-only (used for your tables / rankings)

print("Loaded NCAA rows:", len(all_df), "| UCSD rows:", len(ucsd_all))
display(all_df.head(3))
display(ucsd_all.head(3))

  ncaa_df = pd.read_csv("all_games.csv")


Loaded NCAA rows: 1501313 | UCSD rows: 7268


Unnamed: 0,PitchNo,Date,Time,PAofInning,PitchofPA,Pitcher,PitcherId,PitcherThrows,PitcherTeam,Batter,...,ThrowTrajectoryZc1,ThrowTrajectoryZc2,PitchReleaseConfidence,PitchLocationConfidence,PitchMovementConfidence,HitLaunchConfidence,HitLandingConfidence,CatcherThrowCatchConfidence,CatcherThrowReleaseConfidence,CatcherThrowLocationConfidence
0,1,2025-02-14,18:04:47.71,1,1,"Stuprich, Brennan",1000099000.0,Right,SOU_LIO,"Isom-McCall, Charlie",...,,,High,High,High,,,,,
1,2,2025-02-14,18:05:00.16,1,2,"Stuprich, Brennan",1000099000.0,Right,SOU_LIO,"Isom-McCall, Charlie",...,,,High,High,High,,,,,
2,3,2025-02-14,18:05:14.48,1,3,"Stuprich, Brennan",1000099000.0,Right,SOU_LIO,"Isom-McCall, Charlie",...,-78.35097,12.46061,High,High,High,,,Medium,Medium,Low


Unnamed: 0,PitchNo,Date,Time,PAofInning,PitchofPA,Pitcher,PitcherId,PitcherThrows,PitcherTeam,Batter,...,ThrowTrajectoryZc1,ThrowTrajectoryZc2,PitchReleaseConfidence,PitchLocationConfidence,PitchMovementConfidence,HitLaunchConfidence,HitLandingConfidence,CatcherThrowCatchConfidence,CatcherThrowReleaseConfidence,CatcherThrowLocationConfidence
0,1,2025-11-01,11:35:12.81,1.0,1.0,"Gregson, Niccolas",1000039282,Right,CSD_TRI,"Crossland, Michael",...,,,High,High,High,,,,,
1,2,2025-11-01,11:35:34.61,1.0,2.0,"Gregson, Niccolas",1000039282,Right,CSD_TRI,"Crossland, Michael",...,,,High,High,High,,,,,
2,3,2025-11-01,11:36:01.08,1.0,3.0,"Gregson, Niccolas",1000039282,Right,CSD_TRI,"Crossland, Michael",...,,,High,High,High,,,,,


In [3]:
ncaa_df.groupby(['Pitcher', 'PitcherTeam']).size().sort_values(ascending=False)

Pitcher            PitcherTeam
Grinsell, Grayson  ORE_DUC        1486
Lockwood, Connor   VAL_CRU        1343
Hugus, Griffin     MIA_HUR        1331
Wright, Davis      WCC            1308
Jacobs, Ben        ARI_SUN        1284
                                  ... 
Shaw, Bradyn       AKR_ZIP           1
Smith, Caleb       HAR_SIM           1
Bevis, Blake       BAL_CAR           1
Monheimer, Riley   LIN_WIL1          1
Hervat, Jake       SLC_CCB           1
Length: 7332, dtype: int64

## 2. Pitch-Type–Specific Release Variability Metrics

In [4]:
df = all_df.copy()

def pick_col(candidates):
    for c in candidates:
        if c in df.columns:
            return c
    return None

PITCHER_COL = pick_col(["Pitcher", "PitcherName", "PitcherNameFull"])
TEAM_COL    = pick_col(["PitcherTeam", "Team", "Pitcher Team", "PitcherTeamAbbrev"])
PITCHTYPE_COL = pick_col(["TaggedPitchType", "PitchType", "AutoPitchType"])

HRA_COL = pick_col(["HorzRelAngle"])
VRA_COL = pick_col(["VertRelAngle"])

# Horizontal + vertical release POINTS (usually X and Z)
RELX_COL = pick_col(["RelSide"])
RELZ_COL = pick_col(["RelHeight"])

print("Pitcher:", PITCHER_COL)
print("Team:", TEAM_COL)
print("PitchType:", PITCHTYPE_COL)
print("HRA:", HRA_COL)
print("VRA:", VRA_COL)
print("RelX:", RELX_COL)
print("RelZ:", RELZ_COL)

req = [PITCHER_COL, TEAM_COL, PITCHTYPE_COL, HRA_COL, VRA_COL, RELX_COL, RELZ_COL]
missing = [r for r in req if r is None]
if missing:
    raise ValueError("Missing required columns. Fix candidates list above. Missing: " + str(missing))

# Keep only what we need + clean
df = df.dropna(subset=[PITCHER_COL, TEAM_COL, PITCHTYPE_COL, HRA_COL, VRA_COL, RELX_COL, RELZ_COL]).copy()
df[PITCHTYPE_COL] = df[PITCHTYPE_COL].astype(str).str.strip()
df[PITCHER_COL]   = df[PITCHER_COL].astype(str).str.strip()
df[TEAM_COL]      = df[TEAM_COL].astype(str).str.strip()

df.head()

Pitcher: Pitcher
Team: PitcherTeam
PitchType: TaggedPitchType
HRA: HorzRelAngle
VRA: VertRelAngle
RelX: RelSide
RelZ: RelHeight


Unnamed: 0,PitchNo,Date,Time,PAofInning,PitchofPA,Pitcher,PitcherId,PitcherThrows,PitcherTeam,Batter,...,ThrowTrajectoryZc1,ThrowTrajectoryZc2,PitchReleaseConfidence,PitchLocationConfidence,PitchMovementConfidence,HitLaunchConfidence,HitLandingConfidence,CatcherThrowCatchConfidence,CatcherThrowReleaseConfidence,CatcherThrowLocationConfidence
0,1,2025-02-14,18:04:47.71,1,1,"Stuprich, Brennan",1000099000.0,Right,SOU_LIO,"Isom-McCall, Charlie",...,,,High,High,High,,,,,
1,2,2025-02-14,18:05:00.16,1,2,"Stuprich, Brennan",1000099000.0,Right,SOU_LIO,"Isom-McCall, Charlie",...,,,High,High,High,,,,,
2,3,2025-02-14,18:05:14.48,1,3,"Stuprich, Brennan",1000099000.0,Right,SOU_LIO,"Isom-McCall, Charlie",...,-78.35097,12.46061,High,High,High,,,Medium,Medium,Low
3,4,2025-02-14,18:05:39.46,2,1,"Stuprich, Brennan",1000099000.0,Right,SOU_LIO,"Edwards, Kameron",...,,,High,High,High,,,,,
4,5,2025-02-14,18:05:53.55,2,2,"Stuprich, Brennan",1000099000.0,Right,SOU_LIO,"Edwards, Kameron",...,,,High,High,High,,,,,


## NCAA-Wide Spread Table

In [5]:
MIN_PITCHES_PER_TYPE = 0  # adjust if you want (prevents noisy tiny samples)

g = df.groupby([PITCHER_COL, PITCHTYPE_COL])

spread = g.agg(
    n_pitches=(PITCHTYPE_COL, "size"),
    sd_hra=(HRA_COL, "std"),
    sd_vra=(VRA_COL, "std"),
    sd_relx=(RELX_COL, "std"),
    sd_relz=(RELZ_COL, "std"),
).reset_index()

spread = spread[spread["n_pitches"] >= MIN_PITCHES_PER_TYPE].dropna()
spread.head()

Unnamed: 0,Pitcher,TaggedPitchType,n_pitches,sd_hra,sd_vra,sd_relx,sd_relz
0,"Abad, Adrian",Sinker,31,0.829183,1.192912,0.196204,0.118152
1,"Abad, Adrian",Slider,17,1.254848,1.380489,0.219038,0.119576
2,"Abbadessa, Jude",ChangeUp,7,1.00573,1.908953,0.290681,0.136591
3,"Abbadessa, Jude",Fastball,174,0.83736,0.954544,0.216094,0.11465
4,"Abbadessa, Jude",Sinker,20,0.751234,1.019078,0.300658,0.126558


## NCAA Percentile-Based Command Scores

In [6]:
# Percentile-based command (higher = better)
for col in ["sd_hra", "sd_vra", "sd_relx", "sd_relz"]:
    spread[f"{col}_pct"] = 1 - spread.groupby(PITCHTYPE_COL)[col].rank(pct=True)

## Regression Model for Beta Weights

In [7]:
# Select Features + Target (Plate Location)

# Features (predictors)
FEATURES = [
    "VertRelAngle",   # VRA
    "HorzRelAngle",   # HRA
    "RelHeight",      # vRel
    "RelSide"         # hRel
]

# Targets (plate location)
TARGETS = [
    "PlateLocHeight",  # Z location
    "PlateLocSide"     # X location
]

model_df = df.dropna(subset=FEATURES + TARGETS)

X = model_df[FEATURES]
y = model_df[TARGETS]

In [8]:
# Test/Train Split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [9]:
# Linear Regression (Kirby-style)

lin_reg = LinearRegression()
lin_reg.fit(X_train, y_train)

y_pred = lin_reg.predict(X_test)

print("MSE:", mean_squared_error(y_test, y_pred))
print("R²:", r2_score(y_test, y_pred))

MSE: 0.6286337962161277
R²: 0.3795154898604312


In [10]:
# Extract Beta Weights

beta_df = pd.DataFrame(
    lin_reg.coef_,
    columns=FEATURES,
    index=["PlateSide", "PlateHeight"]
)

beta_df

Unnamed: 0,VertRelAngle,HorzRelAngle,RelHeight,RelSide
PlateSide,0.389138,-0.029815,0.679094,-0.035437
PlateHeight,-0.057208,0.524754,-0.074106,0.69717


In [11]:
beta_weights = beta_df.abs().mean(axis=0)
beta_weights = beta_weights / beta_weights.sum()  # normalize

beta_weights

VertRelAngle    0.179492
HorzRelAngle    0.223012
RelHeight       0.302889
RelSide         0.294608
dtype: float64

## Build Pitch Command Spread (Raw SDs)

In [12]:
import numpy as np

# --- 1) Identify the correct release point columns in YOUR CSV ---
# Trackman commonly uses RelHeight (vertical release) and RelSide (horizontal release)
CAND_VREL = ["RelHeight", "ReleaseHeight", "release_pos_z", "RelZ", "vRel"]
CAND_HREL = ["RelSide", "ReleaseSide", "release_pos_x", "RelX", "hRel"]

vrel_col = next((c for c in CAND_VREL if c in df.columns), None)
hrel_col = next((c for c in CAND_HREL if c in df.columns), None)

print("Using vRel column:", vrel_col)
print("Using hRel column:", hrel_col)

if vrel_col is None or hrel_col is None:
    raise ValueError(
        f"Could not find release point columns. "
        f"Columns in df include: {list(df.columns)[:40]} ..."
    )

# --- 2) Compute SDs per (Pitcher, PitchType) ---
PITCHER_COL = "Pitcher"
PITCHTYPE_COL = "TaggedPitchType"

spread = (
    df.groupby([PITCHER_COL, PITCHTYPE_COL])
      .agg(
          sd_vra=("VertRelAngle", lambda s: s.std(ddof=1)),
          sd_hra=("HorzRelAngle", lambda s: s.std(ddof=1)),
          sd_vrel=(vrel_col, lambda s: s.std(ddof=1)),
          sd_hrel=(hrel_col, lambda s: s.std(ddof=1)),
          n=("VertRelAngle", "size")
      )
      .reset_index()
)

# (Optional but recommended) drop tiny sample sizes to reduce noise
MIN_PITCHES_PER_TYPE = 0
spread = spread[spread["n"] >= MIN_PITCHES_PER_TYPE].copy()

# --- 3) Z-score each SD metric within pitch type (NCAA-wide benchmark) ---
def z_by_pitchtype(series):
    mu = series.mean()
    sd = series.std(ddof=0)
    return (series - mu) / sd if sd != 0 else np.nan

spread["z_sd_vra"]  = spread.groupby(PITCHTYPE_COL)["sd_vra"].transform(z_by_pitchtype)
spread["z_sd_hra"]  = spread.groupby(PITCHTYPE_COL)["sd_hra"].transform(z_by_pitchtype)
spread["z_sd_vrel"] = spread.groupby(PITCHTYPE_COL)["sd_vrel"].transform(z_by_pitchtype)
spread["z_sd_hrel"] = spread.groupby(PITCHTYPE_COL)["sd_hrel"].transform(z_by_pitchtype)

spread.head()

Using vRel column: RelHeight
Using hRel column: RelSide


Unnamed: 0,Pitcher,TaggedPitchType,sd_vra,sd_hra,sd_vrel,sd_hrel,n,z_sd_vra,z_sd_hra,z_sd_vrel,z_sd_hrel
0,"Abad, Adrian",Sinker,1.192912,0.829183,0.118152,0.196204,31,0.926395,-0.290006,0.032746,0.169239
1,"Abad, Adrian",Slider,1.380489,1.254848,0.119576,0.219038,17,0.68751,0.406221,-0.133067,0.152883
2,"Abbadessa, Jude",ChangeUp,1.908953,1.00573,0.136591,0.290681,7,1.893653,-0.05087,0.047106,0.59443
3,"Abbadessa, Jude",Fastball,0.954544,0.83736,0.11465,0.216094,174,-0.222635,-0.489315,-0.186771,0.079375
4,"Abbadessa, Jude",Sinker,1.019078,0.751234,0.126558,0.300658,20,0.388122,-0.528884,0.131199,0.930157


## NCAA-Normalized Release Consistency Percentiles

In [13]:
# ============================================================
# Convert release SDs to percentiles (lower SD = better)
# Done per pitch type (NCAA-normalized)
# Creates: sd_vra_pct, sd_hra_pct, sd_vrel_pct, sd_hrel_pct
# ============================================================

PCT_COLS = {
    "sd_vra":  "sd_vra_pct",
    "sd_hra":  "sd_hra_pct",
    "sd_vrel": "sd_vrel_pct",
    "sd_hrel": "sd_hrel_pct",
}

missing_raw = [c for c in PCT_COLS.keys() if c not in spread.columns]
if missing_raw:
    raise ValueError(f"Missing raw SD columns needed for percentiles: {missing_raw}")

for raw_col, pct_col in PCT_COLS.items():
    # rank within each pitch type; invert so lower SD = higher percentile (better)
    spread[pct_col] = 1 - spread.groupby(PITCHTYPE_COL)[raw_col].rank(pct=True)

# round for display (hundredths)
for c in PCT_COLS.values():
    spread[c] = spread[c].round(2)

display(spread[list(PCT_COLS.values())].describe())

Unnamed: 0,sd_vra_pct,sd_hra_pct,sd_vrel_pct,sd_hrel_pct
count,29727.0,29727.0,29727.0,29727.0
mean,0.499765,0.499765,0.499765,0.499765
std,0.288716,0.288716,0.288716,0.288716
min,0.0,0.0,0.0,0.0
25%,0.25,0.25,0.25,0.25
50%,0.5,0.5,0.5,0.5
75%,0.75,0.75,0.75,0.75
max,1.0,1.0,1.0,1.0


## Random Forest -> Feature Importance Betas

In [14]:
# ================================
# Random Forest: Feature Importances (Beta Weights)
# (nonlinear; already normalized to sum to 1)
# ================================

from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
import pandas as pd

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.20, random_state=42
)

rf_regressor = RandomForestRegressor(
    n_estimators=300,
    random_state=42,
    n_jobs=-1,
    min_samples_leaf=5,
    max_features="sqrt"   # <-- FIX: don't use "auto"
)

rf_regressor.fit(X_train, y_train)

rf_importance = pd.Series(
    rf_regressor.feature_importances_,
    index=FEATURES
).sort_values(ascending=False)

# already sums to 1, but keep for safety
beta_weights = rf_importance / rf_importance.sum()

print("RF importances sum:", rf_importance.sum())
display(rf_importance)

print("Beta weights sum:", beta_weights.sum())
display(beta_weights)

RF importances sum: 0.9999999999999999


HorzRelAngle    0.328630
VertRelAngle    0.297787
RelSide         0.215161
RelHeight       0.158421
dtype: float64

Beta weights sum: 1.0000000000000002


HorzRelAngle    0.328630
VertRelAngle    0.297787
RelSide         0.215161
RelHeight       0.158421
dtype: float64

## TriKirby Equation - Composite Pitch Command Metric

In [15]:
# TriKirby Equation (percentile-based, higher = better)

betas = {
    "sd_vra_pct": beta_weights["VertRelAngle"],
    "sd_hra_pct": beta_weights["HorzRelAngle"],
    "sd_vrel_pct": beta_weights["RelHeight"],
    "sd_hrel_pct": beta_weights["RelSide"],
}

spread["TriKirby"] = (
    betas["sd_vra_pct"] * spread["sd_vra_pct"] +
    betas["sd_hra_pct"] * spread["sd_hra_pct"] +
    betas["sd_vrel_pct"] * spread["sd_vrel_pct"] +
    betas["sd_hrel_pct"] * spread["sd_hrel_pct"]
).round(3)

display(
    spread[
        ["Pitcher", "TaggedPitchType", "TriKirby",
         "sd_vra_pct", "sd_hra_pct", "sd_vrel_pct", "sd_hrel_pct"]
    ].head()
)

Unnamed: 0,Pitcher,TaggedPitchType,TriKirby,sd_vra_pct,sd_hra_pct,sd_vrel_pct,sd_hrel_pct
0,"Abad, Adrian",Sinker,0.364,0.11,0.67,0.36,0.25
1,"Abad, Adrian",Slider,0.266,0.17,0.26,0.48,0.25
2,"Abbadessa, Jude",ChangeUp,0.247,0.04,0.47,0.36,0.11
3,"Abbadessa, Jude",Fastball,0.552,0.58,0.73,0.5,0.28
4,"Abbadessa, Jude",Sinker,0.391,0.24,0.77,0.31,0.08


In [20]:
# ================================
# NCAA D1: Top/Bottom 20 TriKirby per Pitch Type (min pitches = 30)
# Requires: spread dataframe with TriKirby already computed
# ================================

MIN_PITCHES = 30
TOP_N = 10

PITCHER_COL   = "Pitcher"
PITCHTYPE_COL = "TaggedPitchType"
TRIKIRBY_COL  = "TriKirby"

# detect pitch count column (if you have one)
COUNT_COL = None
for c in ["n", "n_pitches", "count", "N"]:
    if c in spread.columns:
        COUNT_COL = c
        break

# optional team column (only included if present)
TEAM_COL = "PitcherTeam" if "PitcherTeam" in spread.columns else None

# build a clean base table
cols = [PITCHER_COL, PITCHTYPE_COL, TRIKIRBY_COL]
if COUNT_COL is not None:
    cols.insert(2, COUNT_COL)
if TEAM_COL is not None:
    cols.insert(1, TEAM_COL)

base = (
    spread.loc[:, cols]
    .dropna(subset=[TRIKIRBY_COL])
)

if COUNT_COL is not None:
    base = base[base[COUNT_COL] >= MIN_PITCHES]

# loop each pitch type and display top/bottom 20
pitch_types = sorted(base[PITCHTYPE_COL].dropna().unique())

print(f"Using MIN_PITCHES = {MIN_PITCHES} (count col = {COUNT_COL})")
print("Pitch types found:", pitch_types)

for pt in pitch_types:
    df_pt = base[base[PITCHTYPE_COL] == pt].copy()

    # sort high->low for top
    top20 = df_pt.sort_values(TRIKIRBY_COL, ascending=False).head(TOP_N)
    # sort low->high for bottom
    bot20 = df_pt.sort_values(TRIKIRBY_COL, ascending=True).head(TOP_N)

    print(f"\n=== {pt} — TOP {TOP_N} TriKirby (min {MIN_PITCHES} pitches) ===")
    display(top20.reset_index(drop=True))

    print(f"\n=== {pt} — BOTTOM {TOP_N} TriKirby (min {MIN_PITCHES} pitches) ===")
    display(bot20.reset_index(drop=True))

Using MIN_PITCHES = 30 (count col = n)
Pitch types found: ['ChangeUp', 'Curveball', 'Cutter', 'Fastball', 'FourSeamFastBall', 'Knuckleball', 'Sinker', 'Slider', 'Splitter', 'Sweeper', 'TwoSeamFastBall', 'Undefined']

=== ChangeUp — TOP 10 TriKirby (min 30 pitches) ===


Unnamed: 0,Pitcher,TaggedPitchType,n,TriKirby
0,"Clack, Miles",ChangeUp,43,0.87
1,"Beuter, Gavin",ChangeUp,30,0.864
2,"McCullough, Jon",ChangeUp,33,0.862
3,"Husak, Joe",ChangeUp,38,0.838
4,"Chrest, Evan",ChangeUp,39,0.819
5,"Barroqueiro, Jayden",ChangeUp,47,0.807
6,"Boccabello, Tanner",ChangeUp,32,0.799
7,"Rooney, Noah",ChangeUp,35,0.797
8,"Herrera, Tyler",ChangeUp,35,0.779
9,"Galy, Gavin",ChangeUp,67,0.776



=== ChangeUp — BOTTOM 10 TriKirby (min 30 pitches) ===


Unnamed: 0,Pitcher,TaggedPitchType,n,TriKirby
0,"Smith, Tyler",ChangeUp,34,0.042
1,"Cychosz, Isaac",ChangeUp,30,0.054
2,"Madariaga, Reid",ChangeUp,31,0.067
3,"Nelson, Drew",ChangeUp,166,0.071
4,"Miner, Jace",ChangeUp,41,0.079
5,"Bender, Pryce",ChangeUp,45,0.082
6,"Sexton, Landon",ChangeUp,58,0.103
7,"Lewis, Kyle",ChangeUp,35,0.106
8,"Brown, Dylan",ChangeUp,206,0.106
9,"Jibben, Payten",ChangeUp,37,0.113



=== Curveball — TOP 10 TriKirby (min 30 pitches) ===


Unnamed: 0,Pitcher,TaggedPitchType,n,TriKirby
0,"Clack, Miles",Curveball,37,0.846
1,"Hadden, Jacob",Curveball,33,0.809
2,"Hagerty, Blake",Curveball,37,0.763
3,"Rose, Hagen",Curveball,33,0.763
4,"Kuromoto, Matthew",Curveball,56,0.763
5,"Eaton, Austin",Curveball,52,0.761
6,"Lavelle, Declan",Curveball,148,0.745
7,"McDougall, Collin",Curveball,33,0.745
8,"Lawson, Ty",Curveball,39,0.744
9,"Lutz, Travis",Curveball,36,0.735



=== Curveball — BOTTOM 10 TriKirby (min 30 pitches) ===


Unnamed: 0,Pitcher,TaggedPitchType,n,TriKirby
0,"Nelson, Drew",Curveball,42,0.059
1,"Tobin, Will",Curveball,32,0.066
2,"Gebhardt, Braden",Curveball,36,0.072
3,"Hagler, Micah",Curveball,30,0.091
4,"Robert, Nick",Curveball,32,0.092
5,"Johnston, Connor",Curveball,31,0.101
6,"Keene, Sam",Curveball,55,0.104
7,"Budd, Kade",Curveball,37,0.105
8,"Chaffee, Evan",Curveball,60,0.11
9,"Walker, Charlie",Curveball,47,0.113



=== Cutter — TOP 10 TriKirby (min 30 pitches) ===


Unnamed: 0,Pitcher,TaggedPitchType,n,TriKirby
0,"Fix, Isaac",Cutter,51,0.751
1,"Wiessinger, Jack",Cutter,30,0.747
2,"Vanek, Jacob",Cutter,31,0.745
3,"Murphy, Colin",Cutter,32,0.713
4,"Chrest, Evan",Cutter,39,0.706
5,"Martin, Jevarra",Cutter,31,0.704
6,"Cole, Lawson",Cutter,38,0.696
7,"Morrissey, Joe",Cutter,36,0.694
8,"Johnson, Ashton",Cutter,69,0.691
9,"Saloman, Logan",Cutter,243,0.682



=== Cutter — BOTTOM 10 TriKirby (min 30 pitches) ===


Unnamed: 0,Pitcher,TaggedPitchType,n,TriKirby
0,"Tylicki, JJ",Cutter,62,0.073
1,"Smith, Mason",Cutter,114,0.104
2,"Gebhardt, Braden",Cutter,46,0.143
3,"Rumberg, Logan",Cutter,128,0.165
4,"Jones, Gavyn",Cutter,40,0.176
5,"Solimine , Justin",Cutter,37,0.176
6,"Strother, Grant",Cutter,37,0.183
7,"Rossow, David",Cutter,47,0.185
8,"Whelan, Will",Cutter,56,0.194
9,"Steele, Anthony",Cutter,80,0.196



=== Fastball — TOP 10 TriKirby (min 30 pitches) ===


Unnamed: 0,Pitcher,TaggedPitchType,n,TriKirby
0,"Murray, Landon",Fastball,53,0.917
1,"Hayden, Connor",Fastball,30,0.904
2,"Schumann, Eric",Fastball,44,0.899
3,"Merson, Trystin",Fastball,40,0.881
4,"Curtis, Connor",Fastball,30,0.879
5,"Beasley, James",Fastball,97,0.878
6,"Kolze, Nick",Fastball,61,0.875
7,"Long, Richard",Fastball,81,0.868
8,"Yates, Braxton",Fastball,59,0.867
9,"Hickey, Josh",Fastball,38,0.867



=== Fastball — BOTTOM 10 TriKirby (min 30 pitches) ===


Unnamed: 0,Pitcher,TaggedPitchType,n,TriKirby
0,"Murphy, Josh",Fastball,83,0.0
1,"Croghan, Kamden",Fastball,148,0.003
2,"Adams, R'Mani",Fastball,41,0.003
3,"Webb, Joseph",Fastball,208,0.008
4,"Brown, Matthew",Fastball,155,0.012
5,"Stephens, Brady",Fastball,58,0.013
6,"Tobin, Will",Fastball,72,0.016
7,"Nelson, Drew",Fastball,301,0.018
8,"Cheyne, Owen",Fastball,97,0.021
9,"Byers, Evan",Fastball,32,0.028



=== FourSeamFastBall — TOP 10 TriKirby (min 30 pitches) ===


Unnamed: 0,Pitcher,TaggedPitchType,n,TriKirby
0,"Templeton, Cody",FourSeamFastBall,53,0.892
1,"Remington, Kyle",FourSeamFastBall,32,0.858
2,"Cychosz, Isaac",FourSeamFastBall,32,0.858
3,"Larson, Jake",FourSeamFastBall,56,0.814
4,"Ryan, Brayden",FourSeamFastBall,42,0.791
5,"Roberts, Cole",FourSeamFastBall,31,0.769
6,"Nichols, Mason",FourSeamFastBall,46,0.76
7,"Bouchard, Alec",FourSeamFastBall,35,0.759
8,"Parache, Nuno",FourSeamFastBall,50,0.749
9,"Colarusso, AJ",FourSeamFastBall,33,0.747



=== FourSeamFastBall — BOTTOM 10 TriKirby (min 30 pitches) ===


Unnamed: 0,Pitcher,TaggedPitchType,n,TriKirby
0,"Mejia, Ryan",FourSeamFastBall,50,0.087
1,"Grunkemeyer, Jack",FourSeamFastBall,60,0.099
2,"Davis, Alton",FourSeamFastBall,89,0.104
3,"Westfall, Derek",FourSeamFastBall,33,0.104
4,"Mitchell, Joey",FourSeamFastBall,115,0.111
5,"Pazos, Bryan",FourSeamFastBall,71,0.116
6,"Polledo, Jeffrey",FourSeamFastBall,31,0.121
7,"Brooks, Connor",FourSeamFastBall,119,0.125
8,"Barberi, Jackson",FourSeamFastBall,147,0.141
9,"Rokose, Davis",FourSeamFastBall,148,0.146



=== Knuckleball — TOP 10 TriKirby (min 30 pitches) ===


Unnamed: 0,Pitcher,TaggedPitchType,n,TriKirby
0,"Barr, Kurt",Knuckleball,37,0.404



=== Knuckleball — BOTTOM 10 TriKirby (min 30 pitches) ===


Unnamed: 0,Pitcher,TaggedPitchType,n,TriKirby
0,"Barr, Kurt",Knuckleball,37,0.404



=== Sinker — TOP 10 TriKirby (min 30 pitches) ===


Unnamed: 0,Pitcher,TaggedPitchType,n,TriKirby
0,"Desch, Dominic",Sinker,30,0.841
1,"Kent, Tommy",Sinker,32,0.806
2,"Stellano, Jack",Sinker,37,0.749
3,"Galy, Gavin",Sinker,92,0.741
4,"Burns, Dane",Sinker,43,0.735
5,"Clark, Caleb",Sinker,50,0.73
6,"Hyde, Jace",Sinker,66,0.728
7,"Beaty, Kyle",Sinker,39,0.727
8,"Podeszwa, Connor",Sinker,39,0.718
9,"Diamond, Camden",Sinker,30,0.718



=== Sinker — BOTTOM 10 TriKirby (min 30 pitches) ===


Unnamed: 0,Pitcher,TaggedPitchType,n,TriKirby
0,"Vota, Cade",Sinker,45,0.015
1,"Croghan, Kamden",Sinker,40,0.022
2,"Kehoe, Jaxon",Sinker,59,0.031
3,"Brown, Dylan",Sinker,188,0.032
4,"Bender, Pryce",Sinker,99,0.053
5,"Walker, Ethan",Sinker,82,0.066
6,"Yoak, Bryant",Sinker,67,0.071
7,"Meeks, Gardner",Sinker,74,0.081
8,"Kelly, Carson",Sinker,125,0.086
9,"Tenscher, Miles",Sinker,88,0.098



=== Slider — TOP 10 TriKirby (min 30 pitches) ===


Unnamed: 0,Pitcher,TaggedPitchType,n,TriKirby
0,"Kieck, Chayce",Slider,56,0.867
1,"Chrest, Evan",Slider,36,0.864
2,"Stuprich, Brennan",Slider,33,0.851
3,"Detmers, Parker",Slider,55,0.85
4,"Barquin, Blake",Slider,45,0.835
5,"Bowery, Jack",Slider,43,0.827
6,"Osbolt, Braden",Slider,48,0.823
7,"Hilker, Michael",Slider,105,0.823
8,"Mazza, Joe",Slider,83,0.819
9,"Pehrson, Ryan",Slider,37,0.819



=== Slider — BOTTOM 10 TriKirby (min 30 pitches) ===


Unnamed: 0,Pitcher,TaggedPitchType,n,TriKirby
0,"Murphy, Josh",Slider,38,0.009
1,"Nelson, Drew",Slider,104,0.013
2,"Oppenheim, Drew",Slider,32,0.055
3,"Ingram, Chase",Slider,38,0.055
4,"Brown, Matthew",Slider,91,0.056
5,"Soliday, Cody",Slider,37,0.057
6,"Kaplan, Max",Slider,105,0.066
7,"Croghan, Kamden",Slider,63,0.07
8,"Tylicki, JJ",Slider,62,0.082
9,"Clark, Brady",Slider,58,0.087



=== Splitter — TOP 10 TriKirby (min 30 pitches) ===


Unnamed: 0,Pitcher,TaggedPitchType,n,TriKirby
0,"Aoki, Caden",Splitter,34,0.765
1,"Hernandez, Ryan",Splitter,39,0.646
2,"Drake, Jt",Splitter,31,0.642
3,"Goodpaster, Marcus",Splitter,38,0.602
4,"Tuttoilmondo, Joey",Splitter,87,0.591
5,"Chadwick, Tyrelle",Splitter,61,0.579
6,"Green, Mason",Splitter,41,0.578
7,"Rady, Aidan",Splitter,108,0.551
8,"Brown, Kade",Splitter,146,0.547
9,"Yates, Chad",Splitter,91,0.525



=== Splitter — BOTTOM 10 TriKirby (min 30 pitches) ===


Unnamed: 0,Pitcher,TaggedPitchType,n,TriKirby
0,"Foster, Christopher",Splitter,37,0.042
1,"Eyanson, Anthony",Splitter,47,0.158
2,"Bencosme, Sean",Splitter,55,0.197
3,"Clapp, Reece",Splitter,66,0.2
4,"Martinez, Victor",Splitter,231,0.237
5,"Foutch, Christian",Splitter,49,0.266
6,"Sheets, Joe",Splitter,60,0.272
7,"Jenkins, Sean",Splitter,66,0.29
8,"Corbett, Ryan",Splitter,39,0.306
9,"Hartman, Lucas",Splitter,127,0.313



=== Sweeper — TOP 10 TriKirby (min 30 pitches) ===


Unnamed: 0,Pitcher,TaggedPitchType,n,TriKirby
0,"Kolarov, Connor",Sweeper,36,0.655
1,"Baker, Trace",Sweeper,95,0.57
2,"Kleinschmit, Ethan",Sweeper,32,0.555
3,"Seebold, Gavin",Sweeper,82,0.547
4,"Boyer, Matt",Sweeper,39,0.542
5,"Smith, Blake",Sweeper,36,0.525
6,"Marshburn, Connor",Sweeper,163,0.515
7,"Little, Branton",Sweeper,35,0.493
8,"Thornton, Bromley",Sweeper,51,0.484
9,"Ramos, Alex",Sweeper,49,0.481



=== Sweeper — BOTTOM 10 TriKirby (min 30 pitches) ===


Unnamed: 0,Pitcher,TaggedPitchType,n,TriKirby
0,"Egan, Tommy",Sweeper,31,0.151
1,"Foltz Jr., Michael",Sweeper,78,0.201
2,"Flora, Jackson",Sweeper,142,0.241
3,"Pace, Jackson",Sweeper,38,0.267
4,"Patel, Sahil",Sweeper,52,0.29
5,"Savary, Aaron",Sweeper,43,0.292
6,"Tryba, Cole",Sweeper,95,0.297
7,"Barnett, Michael",Sweeper,121,0.303
8,"Barnes II, Matthew",Sweeper,54,0.303
9,"Schutte, Nic",Sweeper,54,0.306



=== TwoSeamFastBall — TOP 10 TriKirby (min 30 pitches) ===


Unnamed: 0,Pitcher,TaggedPitchType,n,TriKirby
0,"Tronson, Hayden",TwoSeamFastBall,37,0.713
1,"Jones, Luke",TwoSeamFastBall,35,0.669
2,"Roettgen, Jacob",TwoSeamFastBall,40,0.647
3,"Eberle, Ethan",TwoSeamFastBall,43,0.647
4,"Peters, Treyson",TwoSeamFastBall,67,0.625
5,"Bennett, Jack",TwoSeamFastBall,48,0.57
6,"Blanco, Evan",TwoSeamFastBall,51,0.568
7,"Miller, Griffin",TwoSeamFastBall,60,0.565
8,"Kelly, Owen",TwoSeamFastBall,65,0.562
9,"Nichols, Luke",TwoSeamFastBall,124,0.551



=== TwoSeamFastBall — BOTTOM 10 TriKirby (min 30 pitches) ===


Unnamed: 0,Pitcher,TaggedPitchType,n,TriKirby
0,"Maloney, Collin",TwoSeamFastBall,270,0.105
1,"Monaco, Dominic",TwoSeamFastBall,39,0.141
2,"Voth, Brant",TwoSeamFastBall,43,0.142
3,"Iglesias, Joey",TwoSeamFastBall,70,0.173
4,"West, Manning",TwoSeamFastBall,31,0.174
5,"Peteson, Sam",TwoSeamFastBall,38,0.175
6,"Arroyo, Chris",TwoSeamFastBall,49,0.181
7,"Van Der Lelie , Jelle",TwoSeamFastBall,131,0.188
8,"Turner , Kole",TwoSeamFastBall,220,0.197
9,"White, Caleb",TwoSeamFastBall,40,0.2



=== Undefined — TOP 10 TriKirby (min 30 pitches) ===


Unnamed: 0,Pitcher,TaggedPitchType,n,TriKirby
0,"Tollett, JR",Undefined,50,0.867
1,"Mabee, Oliver",Undefined,40,0.853
2,"Barquin, Blake",Undefined,43,0.827
3,"Merkel, Alex",Undefined,33,0.824
4,"Mankoski, Nathan",Undefined,57,0.818
5,"Whitmer, Marshall",Undefined,30,0.805
6,"Mertens, Nathan",Undefined,80,0.802
7,"Tejada, Anthony",Undefined,32,0.79
8,"Johnson, Nolan",Undefined,94,0.787
9,"Herrmann, Andrew",Undefined,73,0.786



=== Undefined — BOTTOM 10 TriKirby (min 30 pitches) ===


Unnamed: 0,Pitcher,TaggedPitchType,n,TriKirby
0,"Mitchell, Joey",Undefined,251,0.044
1,"Hunsaker, Riely",Undefined,181,0.054
2,"Kimball, Blake",Undefined,182,0.069
3,"Renuart, Jake",Undefined,66,0.076
4,"Neal, Austin",Undefined,85,0.079
5,"Arrichiello, Anthony",Undefined,80,0.08
6,"Cooksey, Braxton",Undefined,35,0.082
7,"Westfall, Derek",Undefined,44,0.1
8,"Dottavio, Dom",Undefined,65,0.109
9,"Posey, Jack",Undefined,82,0.117


## NCAA D1 Average TriKirby Index Score Per Pitch

In [33]:
# ============================================
# NCAA D1 Average TriKirby Score by Pitch Type
# ============================================

ncaa_pitch_averages = (
    spread
    .dropna(subset=["TriKirby"])
    .groupby("TaggedPitchType")
    .agg(
        NCAA_Avg_TriKirby=("TriKirby", "mean"),
        Num_Pitchers=("Pitcher", "nunique"),
        Num_Pitcher_Pitch_Pairs=("TriKirby", "count")
    )
    .sort_values("NCAA_Avg_TriKirby", ascending=False)
    .round(3)
    .reset_index()
)

display(ncaa_pitch_averages)


Unnamed: 0,TaggedPitchType,NCAA_Avg_TriKirby,Num_Pitchers,Num_Pitcher_Pitch_Pairs
0,Fastball,0.5,6758,6758
1,Slider,0.5,6226,6226
2,ChangeUp,0.5,5125,5125
3,Curveball,0.5,3730,3730
4,Cutter,0.5,1988,1988
5,FourSeamFastBall,0.5,1773,1773
6,Sinker,0.5,1845,1845
7,Undefined,0.5,1010,1010
8,TwoSeamFastBall,0.499,477,477
9,Splitter,0.499,439,439
