In [7]:
import pandas as pd
import numpy as np
import pybaseball

### Read in BRef data & Calculate Height and Weight Percentiles

In [9]:
pitchers = pd.read_csv("pitchers_br.csv")

def height_to_inches(height_str):
    feet, inches = height_str.split("'")
    inches = inches.replace('"', '').strip()
    return int(feet) * 12 + int(inches)

pitchers['height_inches'] = pitchers['Ht'].apply(height_to_inches)

pitchers['height_percentile'] = pitchers['Ht'].rank(pct=True).round(2)
pitchers['weight_percentile'] = pitchers['Wt'].rank(pct=True).round(2)

# pitchers_sorted = pitchers.sort_values(by='height_percentile', ascending=False)
# print(pitchers_sorted)

# pitchers_sorted = pitchers.sort_values(by='weight_percentile', ascending=False)
# print(pitchers_sorted)


### Calculate Size Score

In [10]:
pitchers["size_score"] = 10 * pitchers["height_percentile"] + 90 * pitchers["weight_percentile"]
pitchers_sorted = pitchers.sort_values(by='size_score', ascending=False)
# print(pitchers_sorted)

### Big-Body Score as an alternative to Size Score

Capture a score somewhere between BMI and Corpulence Index (using Imperial units - percentiles will be calculated anyway)
Big-Body Score = $\frac{Weight (lbs)}{(Height (in)^{2.5})}$

In [11]:
pitchers["big_body_raw"] = pitchers["Wt"] / (pitchers["height_inches"])**2.5
pitchers['big_body_score'] = pitchers['big_body_raw'].rank(pct=True).round(4) * 100
pitchers_sorted = pitchers.sort_values(by='big_body_score', ascending=False)
# print(pitchers_sorted)

### Calculate Dominance Score from stats
This year's stats from FanGraphs

In [12]:
pitching_stats = pd.read_csv("pitchers_fg.csv")

In [13]:
# set a 10-game qualification filter
pitching_stats = pitching_stats[pitching_stats["G"] > 10]

# determine starters/relievers
pitching_stats["Pos"] = np.where(
    pitching_stats["GS"] > 0.5 * pitching_stats["G"], 
    "SP", 
    "RP"
)

In [None]:
pitching_stats.columns



In [14]:
pitching_stats_sp = pd.DataFrame(pitching_stats[pitching_stats["Pos"] == "SP"])
pitching_stats_sp['SO_percentile'] = pitching_stats_sp['SO'].rank(pct=True).round(2)
pitching_stats_sp['SO9_percentile'] = pitching_stats_sp['K/9'].rank(pct=True).round(2)
pitching_stats_rp = pd.DataFrame(pitching_stats[pitching_stats["Pos"] == "RP"])
pitching_stats_rp['SO_percentile'] = pitching_stats_rp['SO'].rank(pct=True).round(2)
pitching_stats_rp['SO9_percentile'] = pitching_stats_rp['K/9'].rank(pct=True).round(2)

pitching_stats = pd.concat([pitching_stats_sp, pitching_stats_rp], ignore_index=True)

pitching_stats["dominance_score_raw"] = 10 * pitching_stats["SO_percentile"] + 80 * pitching_stats["SO9_percentile"]
pitching_stats["dominance_score"] = pitching_stats['dominance_score_raw'].rank(pct=True).round(4) * 100

print(pitching_stats.sort_values(by='dominance_score', ascending=False).head(5))

     Unnamed: 0              Name  WAR   G  GS   SO    K/9   FIP     K%  \
185         180      Mason Miller  1.2  44   0   73  14.71  2.71  0.420   
5            72      Zack Wheeler  4.0  24  24  195  11.73  2.98  0.333   
24          281       Dylan Cease  2.8  24  24  169  11.76  3.44  0.307   
166         296       Griffin Jax  1.6  57   0   79  13.76  2.24  0.354   
168          58  Jeremiah Estrada  1.5  58   0   81  13.02  2.79  0.358   

     FB% 2  ...  vFC (sc)  Zone%  SwStr%  Stuff+  Pitching+  Pos  \
185  0.546  ...       NaN  0.395   0.212     124        118   RP   
5    0.581  ...      91.8  0.428   0.145     117        124   SP   
24   0.441  ...       NaN  0.395   0.159     115        117   SP   
166  0.267  ...      93.5  0.412   0.187     109        132   RP   
168  0.607  ...       NaN  0.439   0.175     122        135   RP   

     SO_percentile SO9_percentile  dominance_score_raw  dominance_score  
185           0.95           1.00                 89.5           1

### Calculate HOSS = Size Score + Dominance Score

In [15]:
pitchers = pd.merge(
    pitching_stats, 
    pitchers, 
    on="Name",
    how="left"
)

# pitchers["HOSS"] = pitchers["size_score"] + pitchers["dominance_score"]
pitchers["HOSS"] = pitchers["big_body_score"] + pitchers["dominance_score"]

pitchers_sorted = pitchers.sort_values(by='HOSS', ascending=False)
print(pitchers_sorted[["Name", "big_body_score", "dominance_score", "HOSS"]].head(10))

                   Name  big_body_score  dominance_score    HOSS
165        David Bednar           99.46            95.38  194.84
164         Bryan Abreu           93.87            98.64  192.51
207       Fernando Cruz           93.33            97.01  190.34
0          Tarik Skubal           89.96            97.83  187.79
306         Kirby Yates           92.45            89.95  182.40
110        Grant Holmes           96.09            83.56  179.65
248         Edwin Uceta           84.57            94.84  179.41
263  Garrett Cleavinger           90.63            87.77  178.40
2           Paul Skenes           87.67            90.35  178.02
6            Logan Webb           89.15            87.23  176.38
