In [24]:
import pandas as pd
import statsmodels.api as sm

df = pd.read_excel(
    r"C:\Users\brend\OneDrive - Stonehill College\Documents\Cape_League\cape_cod_pbp_2024.xlsx",
    sheet_name=1
)

X = pd.DataFrame({
    'SO_PA': df['SO'] / df['BF'],
    'BB_PA': df['BB'] / df['BF'],
    'GB_FB_PU_PA': (df['GB'] - df['FB'] - df['PU']) / df['BF']
})

X['SO_PA_sq'] = X['SO_PA'] ** 2
X['GB_FB_PU_PA_sq'] = X['GB_FB_PU_PA'] ** 2
X['SO_PA_x_GB_FB_PU_PA'] = X['SO_PA'] * X['GB_FB_PU_PA']
X['BB_PA_x_GB_FB_PU_PA'] = X['BB_PA'] * X['GB_FB_PU_PA']

y = df['RA/9']

X = sm.add_constant(X)

model = sm.OLS(y, X).fit()

print(model.summary())

                            OLS Regression Results                            
Dep. Variable:                   RA/9   R-squared:                       0.235
Model:                            OLS   Adj. R-squared:                  0.208
Method:                 Least Squares   F-statistic:                     8.768
Date:                Tue, 30 Sep 2025   Prob (F-statistic):           2.17e-09
Time:                        14:20:24   Log-Likelihood:                -496.12
No. Observations:                 208   AIC:                             1008.
Df Residuals:                     200   BIC:                             1035.
Df Model:                           7                                         
Covariance Type:            nonrobust                                         
                          coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------------
const                   6.8094    

In [25]:
df['Predicted_RA9'] = model.predict(X)

print(df[['RA/9', 'Predicted_RA9']].head(20))

         RA/9  Predicted_RA9
0   10.285714       8.333472
1    5.000000       6.595410
2    1.421240       5.494893
3    9.980431       7.050873
4    5.538462       7.849694
5    2.812500       3.490608
6    6.230769       6.338889
7    2.346806       7.408777
8    5.267008       5.262401
9   11.374408       4.003501
10   7.434783       5.859699
11   4.321729       8.487270
12   4.379562       3.340353
13   6.545455       5.914839
14   6.428571       5.726645
15   3.000000       5.421384
16   4.109589       5.037981
17   2.553709       5.443892
18   7.172996       4.640804
19  10.500000       9.220192


In [27]:
def calculate_ccbl_siera(df):

    df = df.copy()
    
    df['SO_PA'] = df['SO'] / df['BF']
    df['BB_PA'] = df['BB'] / df['BF']
    df['GB_FB_PU_PA'] = (df['GB'] - df['FB'] - df['PU']) / df['BF']
    
    df['SO_PA_sq'] = df['SO_PA'] ** 2
    df['GB_FB_PU_PA_sq'] = df['GB_FB_PU_PA'] ** 2
    df['SO_PA_x_GB_FB_PU_PA'] = df['SO_PA'] * df['GB_FB_PU_PA']
    df['BB_PA_x_GB_FB_PU_PA'] = df['BB_PA'] * df['GB_FB_PU_PA']
    
    df['CCBL_SIERA'] = (
        6.8094
        - 13.3740 * df['SO_PA']
        + 16.8621 * df['BB_PA']
        + 10.0163 * df['GB_FB_PU_PA']
        + 4.2882 * df['SO_PA_sq']
        - 9.7459 * df['GB_FB_PU_PA_sq']
        - 29.2185 * df['SO_PA_x_GB_FB_PU_PA']
        - 33.0850 * df['BB_PA_x_GB_FB_PU_PA']
    )
    
    return df

df = calculate_ccbl_siera(df)

print(df[['Name', 'RA/9', 'CCBL_SIERA']].head(20))

                  Name       RA/9  CCBL_SIERA
0      A.J. Colarusso*  10.285714    8.333460
1      Aaron Mishoulam   5.000000    6.595388
2         Aaron Savary   1.421240    5.494866
3         Aidan Hunter   9.980431    7.050855
4        Aiden Moffett   5.538462    7.849672
5         Aiven Cabral   2.812500    3.490581
6        Alex Kranzler   6.230769    6.338869
7   Alizaeh Gutierrez*   2.346806    7.408761
8         Andres Galan   5.267008    5.262381
9       Andrew Behnke*  11.374408    4.003476
10        Andrew Evans   7.434783    5.859681
11    Andrew Williams*   4.321729    8.487252
12       Anthony Susac   4.379562    3.340324
13    Ashton Crowther*   6.545455    5.914822
14        Augie Mojica   6.428571    5.726626
15        Beau Bryans*   3.000000    5.421362
16           Ben Bybee   4.109589    5.037960
17         Blaine Wynk   2.553709    5.443871
18   Blake Morningstar   7.172996    4.640778
19     Boston Flannery  10.500000    9.220179
