In [40]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

filepath = r"C:\Users\brend\OneDrive\Desktop\my-dashboard\data\2025 SB Fall Ball Logging.xlsx"
df = pd.read_excel(filepath)

In [41]:
cols = ["Hitter", "AB Result", "BBT", "ExitVelocity", "HH"]
df = df[cols].copy()

for c in ["Hitter", "AB Result", "BBT"]:
    df[c] = df[c].apply(lambda x: x.strip() if isinstance(x, str) else x)

df["AB_present"] = df["AB Result"].notna() & (df["AB Result"].astype(str).str.strip() != "")
print("✅ Rows with AB results (completed PAs):", df["AB_present"].sum(), "of", len(df))

df = df.replace(np.nan, "")

df["AB_result_clean"] = df["AB Result"].astype(str).str.upper().str.strip()
df["BBT_clean"] = df["BBT"].astype(str).str.upper().str.strip()

✅ Rows with AB results (completed PAs): 688 of 2633


In [42]:
def negative_components(row):
    if not row["AB_present"]:
        return (False, 0, 0, 0)
    
    k = gb = iffb = 0
    
    if row["AB_result_clean"] == "K":
        k = 1
        
    if row["BBT_clean"] == "IFFB":
        iffb = 1
        
    if row["BBT_clean"] == "GB":
        try:
            ev = float(row["ExitVelocity"])
            if ev <= 86:
                gb = 1
        except (ValueError, TypeError):
            if row["HH"] == 0:
                gb = 1
            elif row["HH"] == 1:
                gb = 0
            else:
                gb = 0
    
    negative = bool(k + gb + iffb)
    return (negative, k, gb, iffb)

df[["NegativePA", "Neg_K", "Neg_GB", "Neg_IFFB"]] = df.apply(
    negative_components, axis=1, result_type="expand"
)

neg_pa = (
    df[df["AB_present"]]
    .groupby("Hitter", dropna=False)
    .agg(
        TotalPAs=("AB_present", "sum"),
        Neg_K=("Neg_K", "sum"),
        Neg_GB=("Neg_GB", "sum"),
        Neg_IFFB=("Neg_IFFB", "sum"),
        NegativePAs=("NegativePA", "sum")
    )
    .reset_index()
)

neg_pa["Negative_PA%"] = neg_pa.apply(
    lambda r: (r["NegativePAs"] / r["TotalPAs"] * 100) if r["TotalPAs"] > 0 else 0.0,
    axis=1
)

median_val = neg_pa.loc[neg_pa["TotalPAs"] > 0, "Negative_PA%"].median()
neg_pa["Above_Median"] = neg_pa["Negative_PA%"] > median_val

In [43]:
highest_neg_pa = neg_pa.sort_values(by="Negative_PA%", ascending=False)
lowest_neg_pa = neg_pa.sort_values(by="Negative_PA%", ascending=True)

print("\n--- Highest Negative PA% (Struggling Hitters) ---")
print(highest_neg_pa.head(10).to_string(index=False))

print("\n--- Lowest Negative PA% (Best Hitters) ---")
print(lowest_neg_pa.head(10).to_string(index=False))

print("\nTotal rows in raw data:", len(df))
print("Rows with AB results (completed PAs):", df['AB_present'].sum())


--- Highest Negative PA% (Struggling Hitters) ---
           Hitter  TotalPAs  Neg_K  Neg_GB  Neg_IFFB  NegativePAs  Negative_PA%  Above_Median
  Robinson, Kacin        30     11       5         1           17     56.666667          True
     Durham, Noah        29      8       6         0           14     48.275862          True
 Rickert, Michael        25      7       3         2           12     48.000000          True
    Redden, Tyler        23      5       5         1           11     47.826087          True
   Connolly, Sean        21      7       3         0           10     47.619048          True
     Bureau, John        35      9       5         1           15     42.857143          True
Christopher, Will        28      8       4         0           12     42.857143          True
    Kearney, Nate        39     11       4         1           16     41.025641          True
 Howland, Jamison        29      6       4         1           11     37.931034          True
    Morri

In [39]:
neg_pa.to_csv(r"C:\Users\brend\OneDrive\NegativePA_Results.csv", index=False)