In [1]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

file_path = r"C:\Users\brend\OneDrive - Stonehill College\All Cape League Trackman.xlsx"
df = pd.read_excel(file_path)

batted_ball_df = df[
    (df["PitchCall"] == "InPlay") &
    df["ExitSpeed"].notna() &
    df["Angle"].notna()
].copy()

batted_ball_df["isHit"] = batted_ball_df["PlayResult"].isin(["Single", "Double", "Triple", "HomeRun"]).astype(int)

X = batted_ball_df[["ExitSpeed", "Angle"]]
y = batted_ball_df["isHit"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = LogisticRegression()
model.fit(X_train, y_train)

batted_ball_df["xBA"] = model.predict_proba(X)[:, 1]

In [2]:
valid_ab_results = ["Single", "Double", "Triple", "HomeRun", "Out", "Error", "FieldersChoice"]
falcom_ab_df = df[
    (df["BatterTeam"] == "FAL_COM") &
    (
        (df["PlayResult"].isin(valid_ab_results)) |
        (df["KorBB"] == "Strikeout")
    )
].copy()

falcom_ab_df["AB"] = 1
falcom_ab_df["Hit"] = falcom_ab_df["PlayResult"].isin(["Single", "Double", "Triple", "HomeRun"]).astype(int)

falcom_ab_df = falcom_ab_df.merge(
    batted_ball_df[["PlayID", "xBA"]],
    on="PlayID",
    how="left"
)

In [3]:
name_mapping = {
    "Harrison, Bear": "Harrison, Bear",
    "Harrison, Connor": "Harrison, Bear"
}

falcom_ab_df["Batter"] = falcom_ab_df["Batter"].replace(name_mapping)

player_xba = (
    falcom_ab_df
    .groupby("Batter")
    .agg(
        AB=("AB", "sum"),
        Hits=("Hit", "sum"),
        ActualAVG=("Hit", lambda x: x.sum() / x.count()),
        Total_xBA=("xBA", "sum"),
        xBA_PA_Count=("xBA", "count")
    )
    .reset_index()
)

player_xba["xBA_AVG"] = player_xba["Total_xBA"] / player_xba["AB"]
player_xba["AVG_minus_xBA"] = player_xba["ActualAVG"] - player_xba["xBA_AVG"]

player_xba = player_xba.sort_values("AVG_minus_xBA", ascending=False)
player_xba = player_xba.drop(columns=["Total_xBA", "xBA_PA_Count"])

player_xba = player_xba.rename(columns={
    "ActualAVG": "AVG",
    "xBA_AVG": "xBA",
    "AVG_minus_xBA": "AVG - xBA"
})

print(player_xba)

                Batter   AB  Hits       AVG       xBA  AVG - xBA
11      Moroney, Reece   14     6  0.428571  0.243886   0.184685
24       Yamin, Edward    9     3  0.333333  0.154086   0.179248
6       Maroney, Reece    4     1  0.250000  0.179663   0.070337
14          Niu, Maika  124    34  0.274194  0.212870   0.061323
25     Zuckerman, Ryan   50    13  0.260000  0.206310   0.053690
15  Osterhouse, Justin  119    31  0.260504  0.220908   0.039596
0    Brumbaugh, Cayden   21     5  0.238095  0.198836   0.039259
4       Harrison, Bear   78    21  0.269231  0.231677   0.037554
23      Wilson, Ashton   37     7  0.189189  0.156253   0.032937
20       Schmidt, Carl  136    38  0.279412  0.255671   0.023741
16      Quatrani, Mark   33     8  0.242424  0.231299   0.011126
21       Schmidt, Kent  123    34  0.276423  0.274488   0.001935
17       Quigley, Jack    1     0  0.000000  0.000000   0.000000
9     Morales, Antonio  127    29  0.228346  0.234066  -0.005720
5        Lopez, Adrian  1

In [4]:
batted_ball_df["SLG_Value"] = batted_ball_df["PlayResult"].map({
    "Single": 1,
    "Double": 2,
    "Triple": 3,
    "HomeRun": 4
}).fillna(0)

X = batted_ball_df[["ExitSpeed", "Angle"]]
y = batted_ball_df["SLG_Value"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

slg_model = LinearRegression()
slg_model.fit(X_train, y_train)

batted_ball_df["xTB"] = slg_model.predict(X)

In [5]:
falcom_ab_df = falcom_ab_df.drop(columns=["xTB", "SLG_Value"], errors="ignore")

falcom_ab_df = falcom_ab_df.merge(
    batted_ball_df[["PlayID", "xTB", "SLG_Value"]],
    on="PlayID",
    how="left"
)

player_xba_slg = (
    falcom_ab_df
    .groupby("Batter")
    .agg(
        AB=("AB", "sum"),
        Hits=("Hit", "sum"),
        AVG=("Hit", lambda x: x.sum() / x.count()),
        SLG=("SLG_Value", "sum"),
        xSLG_Total=("xTB", "sum")
    )
    .reset_index()
)

player_xba_slg["xSLG"] = player_xba_slg["xSLG_Total"] / player_xba_slg["AB"]
player_xba_slg["SLG - xSLG"] = player_xba_slg["SLG"] / player_xba_slg["AB"] - player_xba_slg["xSLG"]

player_xba_slg = player_xba_slg.drop(columns=["xSLG_Total", "SLG", "AVG"])
player_xba_slg = player_xba_slg.sort_values("SLG - xSLG", ascending=False)

print(player_xba_slg)

                Batter   AB  Hits      xSLG  SLG - xSLG
14          Niu, Maika  124    34  0.309380    0.182555
10       Moran, Landon   18     3  0.302478    0.141967
24       Yamin, Edward    9     3  0.191725    0.141608
25     Zuckerman, Ryan   50    13  0.303486    0.136514
7        McCann, David   32     6  0.302822    0.103428
4       Harrison, Bear   78    21  0.341831    0.081246
11      Moroney, Reece   14     6  0.351652    0.076920
23      Wilson, Ashton   37     7  0.227037    0.070260
16      Quatrani, Mark   33     8  0.341157    0.052782
0    Brumbaugh, Cayden   21     5  0.248088    0.037626
15  Osterhouse, Justin  119    31  0.298171    0.029560
20       Schmidt, Carl  136    38  0.351094    0.016553
17       Quigley, Jack    1     0  0.000000    0.000000
6       Maroney, Reece    4     1  0.259881   -0.009881
13     Newstrom, Chris   79    11  0.207341   -0.030125
21       Schmidt, Kent  123    34  0.399895   -0.058431
12      Morrison, Kyle   35     6  0.348583   -0