In [1]:
import pandas as pd
import numpy as np
from pathlib import Path
from sklearn.preprocessing import OrdinalEncoder
from sklearn.compose import make_column_transformer
from sklearn.model_selection import TimeSeriesSplit, GridSearchCV
from sklearn.metrics import mean_squared_error, accuracy_score
from xgboost import XGBRegressor, XGBClassifier
from load import dataset

In [57]:
# Initialize Variables
predict_week = 7
predict_season = 2020
today_localize = pd.to_datetime('today').tz_localize('US/Central') - pd.Timedelta(hours=3)

final_columns = ['game_id', 'season', 'week', 'season_type', 'home_team',
                'home_conference', 'away_team', 'away_conference',  'pre_game_spread',
                'predicted_spread', 'regression_spread_pred', 'classification_spread_pred', 'classification_confidence']

# Load Dataset
df = dataset(predict_season=predict_season, predict_week=predict_week, window_size=4, update_data=True, update_seasons=[pd.to_datetime('today').year])


In [67]:

# Prep for ML Models
df[df.columns[df.columns.str.contains('home|Home')]] = df[df.columns[df.columns.str.contains(
    'home|Home')]].groupby('home_team', as_index=False, group_keys=False).apply(lambda x: x.fillna(x.mean()))

df[df.columns[df.columns.str.contains('away|Away')]] = df[df.columns[df.columns.str.contains(
    'away|Away')]].groupby('away_team', as_index=False, group_keys=False).apply(lambda x: x.fillna(x.mean()))

df = df.dropna(axis=1, thresh=int(len(df) * .9))

df = df.fillna(0)

df[df.select_dtypes('bool').columns] = df.select_dtypes('bool').astype('int')

ct = make_column_transformer(
    (OrdinalEncoder(), ['season_type', 'home_conference', 'away_conference']),
    remainder='passthrough'
)

drop_columns =  ['game_id', 'season', 'venue', 'start_date', 'home_team', 'away_team', 
                'start_date', 'spread_target', 'pre_game_spread' , 'pre_game_home_win_prob', 'pre_game_away_win_prob'
                ]


In [59]:
# Use in production
X_train = df[(df.start_date.dt.tz_convert('US/Central') < today_localize)]
X_test = df[(df.start_date.dt.tz_convert('US/Central') >= today_localize)] 

# Use in development
# X_train = df[(df['week'] < predict_week) | (df['season'] < predict_season)] 
# X_test = df[(df['week'] == predict_week) & (df['season'] == predict_season)] 

model_regressor = XGBRegressor(learning_rate=0.1, 
                                colsample_bytree=0.9,
                                gamma=0.5,
                                max_depth=2,
                                min_child_weight=4,
                                n_estimators=100,
                                subsample=0.8)

model_regressor.fit(
    ct.fit_transform(
        X_train.drop(drop_columns, axis=1)), 
        X_train['spread_target']
        )

X_train['predicted_spread'] = model_regressor.predict(ct.fit_transform(X_train.drop(drop_columns, axis=1)))
X_test['predicted_spread'] = model_regressor.predict(ct.fit_transform(X_test.drop(drop_columns, axis=1)))

# Clean Up
X_train['predicted_spread'] = np.around(X_train['predicted_spread'] / .5, decimals=0) * .5
X_test['predicted_spread'] = np.around(X_test['predicted_spread'] / .5, decimals=0) * .5

X_train['regression_spread_pred'] = np.where(X_train['predicted_spread'] <= X_train['pre_game_spread'], 1, 0)
X_test['regression_spread_pred'] = np.where(X_test['predicted_spread'] <= X_test['pre_game_spread'], 1, 0)

X_train['spread_result'] = np.where(X_train['spread_target'] <= X_train['pre_game_spread'], 1, 0)
X_test['spread_result'] = np.where(X_test['spread_target'] <= X_test['pre_game_spread'], 1, 0)

model_classifier = XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
            colsample_bynode=1, colsample_bytree=0.8, gamma=5, gpu_id=-1,
            importance_type='gain', interaction_constraints='',
            learning_rate=0.1, max_delta_step=0, max_depth=5,
            min_child_weight=10, monotone_constraints='()',
            n_estimators=100, n_jobs=0, num_parallel_tree=1,
            objective='binary:logistic', random_state=0, reg_alpha=0,
            reg_lambda=1, scale_pos_weight=1, subsample=1.0,
            tree_method='exact', validate_parameters=1, verbosity=None)

# Update drop columns list for Classifier Model
drop_columns.append('spread_result')
# drop_columns.remove('pre_game_spread')   

model_classifier.fit(ct.fit_transform(X_train.drop(drop_columns, axis=1)), X_train['spread_result'])

X_train['classification_spread_pred'] = model_classifier.predict(ct.fit_transform(X_train.drop(drop_columns, axis=1)))
X_test['classification_spread_pred'] = model_classifier.predict(ct.fit_transform(X_test.drop(drop_columns, axis=1)))

# Add so the Predict Probability is Aligned
drop_columns.append('classification_spread_pred')

X_train['classification_confidence'] = np.max(model_classifier.predict_proba(ct.fit_transform(X_train.drop(drop_columns, axis=1))), axis=1)
X_test['classification_confidence'] = np.max(model_classifier.predict_proba(ct.fit_transform(X_test.drop(drop_columns, axis=1))), axis=1)

ValueError: Found array with 0 sample(s) (shape=(0,)) while a minimum of 1 is required.

In [68]:
try:
    predict_df = X_test[final_columns]
    predict_df.head(10)
    
    no_predictions = False
except:
    no_predictions = True

In [69]:
# Pull in games that have occurred within the current week to append to the predictions
filepath = Path('../zillion_picks/picks/free')

result_dfs = []

for file in filepath.rglob('*.csv'):
    result_df = pd.read_csv(file)


    result_dfs.append(result_df)

result_df = pd.concat(result_dfs)

result_df = result_df[(result_df['week'] == predict_week) & (result_df['season'] == predict_season)]

if no_predictions:
    result_df = result_df.drop(['actual_spread', 'spread_result', 'start_date'], axis=1)

else: 

    result_df = result_df[~result_df.game_id.isin(predict_df.game_id)]
    result_df = result_df.drop(['actual_spread', 'spread_result', 'start_date'], axis=1)

result_df.head()

Unnamed: 0,game_id,season,week,season_type,home_team,home_conference,away_team,away_conference,pre_game_spread,predicted_spread,regression_spread_pred,classification_spread_pred,classification_confidence,regression_spread_result,classification_spread_result
0,401236006,2020,7,regular,Baylor,Big 12,Oklahoma State,Big 12,7.0,4.5,1,1,0.710735,,
1,401215313,2020,7,regular,South Alabama,Sun Belt,Texas State,Sun Belt,-3.5,3.5,0,1,0.5269,Lost,Won
2,401234614,2020,7,regular,Miami,ACC,Pittsburgh,ACC,-13.0,-10.0,0,1,0.549595,Won,Lost
3,401212478,2020,7,regular,Temple,American Athletic,South Florida,American Athletic,-13.0,-1.5,0,0,0.722217,Won,Won
4,401237111,2020,7,regular,Tennessee,SEC,Kentucky,SEC,-6.5,-10.5,1,0,0.510851,Lost,Won


In [70]:
# Merge Predit and Result Dataframes
try:
    predict_df = pd.concat([result_df, predict_df])
    print('ran try')
except:
    predict_df = result_df

In [71]:
# Add spread target in case the game is complete
predict_df = predict_df.merge(df[['game_id', 'spread_target', 'start_date']], on='game_id', how='left')
predict_df.head()

Unnamed: 0,game_id,season,week,season_type,home_team,home_conference,away_team,away_conference,pre_game_spread,predicted_spread,regression_spread_pred,classification_spread_pred,classification_confidence,regression_spread_result,classification_spread_result,spread_target,start_date
0,401236006,2020,7,regular,Baylor,Big 12,Oklahoma State,Big 12,7.0,4.5,1,1,0.710735,,,0.0,2020-10-17 04:00:00+00:00
1,401215313,2020,7,regular,South Alabama,Sun Belt,Texas State,Sun Belt,-3.5,3.5,0,1,0.5269,Lost,Won,-10.0,2020-10-17 16:00:00+00:00
2,401234614,2020,7,regular,Miami,ACC,Pittsburgh,ACC,-13.0,-10.0,0,1,0.549595,Won,Lost,-12.0,2020-10-17 16:00:00+00:00
3,401212478,2020,7,regular,Temple,American Athletic,South Florida,American Athletic,-13.0,-1.5,0,0,0.722217,Won,Won,-2.0,2020-10-17 16:00:00+00:00
4,401237111,2020,7,regular,Tennessee,SEC,Kentucky,SEC,-6.5,-10.5,1,0,0.510851,Lost,Won,27.0,2020-10-17 16:00:00+00:00


In [72]:
predict_df['spread_result'] = np.where(predict_df['spread_target'] <= predict_df['pre_game_spread'], 1, 0)
predict_df.head()

Unnamed: 0,game_id,season,week,season_type,home_team,home_conference,away_team,away_conference,pre_game_spread,predicted_spread,regression_spread_pred,classification_spread_pred,classification_confidence,regression_spread_result,classification_spread_result,spread_target,start_date,spread_result
0,401236006,2020,7,regular,Baylor,Big 12,Oklahoma State,Big 12,7.0,4.5,1,1,0.710735,,,0.0,2020-10-17 04:00:00+00:00,1
1,401215313,2020,7,regular,South Alabama,Sun Belt,Texas State,Sun Belt,-3.5,3.5,0,1,0.5269,Lost,Won,-10.0,2020-10-17 16:00:00+00:00,1
2,401234614,2020,7,regular,Miami,ACC,Pittsburgh,ACC,-13.0,-10.0,0,1,0.549595,Won,Lost,-12.0,2020-10-17 16:00:00+00:00,0
3,401212478,2020,7,regular,Temple,American Athletic,South Florida,American Athletic,-13.0,-1.5,0,0,0.722217,Won,Won,-2.0,2020-10-17 16:00:00+00:00,0
4,401237111,2020,7,regular,Tennessee,SEC,Kentucky,SEC,-6.5,-10.5,1,0,0.510851,Lost,Won,27.0,2020-10-17 16:00:00+00:00,0


In [73]:
# Spread Result Calculation
predict_df['regression_spread_result'] = np.where(predict_df['spread_result'] == predict_df['regression_spread_pred'], 'Won', 'Lost')
predict_df['regression_spread_result'] = np.where(predict_df['spread_target'] == predict_df['pre_game_spread'], 'Push', predict_df['regression_spread_result'])

predict_df['classification_spread_result'] = np.where(predict_df['spread_result'] == predict_df['classification_spread_pred'], 'Won', 'Lost')
predict_df['classification_spread_result'] = np.where(predict_df['spread_target'] == predict_df['pre_game_spread'], 'Push', predict_df['classification_spread_result'])

# Remove Results from Games that haven't been played
predict_df[['regression_spread_result', 'classification_spread_result', 'spread_result']] = predict_df[['regression_spread_result', 'classification_spread_result', 'spread_result']].where(predict_df.spread_target.ne(0), np.nan)

# Rename Column
predict_df = predict_df.rename(columns={'spread_target':'actual_spread'})
predict_df['week'] = predict_df['week'].mode()[0]

predict_df.head()

Unnamed: 0,game_id,season,week,season_type,home_team,home_conference,away_team,away_conference,pre_game_spread,predicted_spread,regression_spread_pred,classification_spread_pred,classification_confidence,regression_spread_result,classification_spread_result,actual_spread,start_date,spread_result
0,401236006,2020,7,regular,Baylor,Big 12,Oklahoma State,Big 12,7.0,4.5,1,1,0.710735,,,0.0,2020-10-17 04:00:00+00:00,
1,401215313,2020,7,regular,South Alabama,Sun Belt,Texas State,Sun Belt,-3.5,3.5,0,1,0.5269,Lost,Won,-10.0,2020-10-17 16:00:00+00:00,1.0
2,401234614,2020,7,regular,Miami,ACC,Pittsburgh,ACC,-13.0,-10.0,0,1,0.549595,Won,Lost,-12.0,2020-10-17 16:00:00+00:00,0.0
3,401212478,2020,7,regular,Temple,American Athletic,South Florida,American Athletic,-13.0,-1.5,0,0,0.722217,Won,Won,-2.0,2020-10-17 16:00:00+00:00,0.0
4,401237111,2020,7,regular,Tennessee,SEC,Kentucky,SEC,-6.5,-10.5,1,0,0.510851,Lost,Won,27.0,2020-10-17 16:00:00+00:00,0.0


In [74]:
if no_predictions:
    predict_df = predict_df.dropna(subset=['regression_spread_result'])
else:
    pass
predict_df.head()

Unnamed: 0,game_id,season,week,season_type,home_team,home_conference,away_team,away_conference,pre_game_spread,predicted_spread,regression_spread_pred,classification_spread_pred,classification_confidence,regression_spread_result,classification_spread_result,actual_spread,start_date,spread_result
1,401215313,2020,7,regular,South Alabama,Sun Belt,Texas State,Sun Belt,-3.5,3.5,0,1,0.5269,Lost,Won,-10.0,2020-10-17 16:00:00+00:00,1.0
2,401234614,2020,7,regular,Miami,ACC,Pittsburgh,ACC,-13.0,-10.0,0,1,0.549595,Won,Lost,-12.0,2020-10-17 16:00:00+00:00,0.0
3,401212478,2020,7,regular,Temple,American Athletic,South Florida,American Athletic,-13.0,-1.5,0,0,0.722217,Won,Won,-2.0,2020-10-17 16:00:00+00:00,0.0
4,401237111,2020,7,regular,Tennessee,SEC,Kentucky,SEC,-6.5,-10.5,1,0,0.510851,Lost,Won,27.0,2020-10-17 16:00:00+00:00,0.0
5,401237109,2020,7,regular,South Carolina,SEC,Auburn,SEC,3.0,5.0,0,1,0.707988,Lost,Won,-8.0,2020-10-17 16:00:00+00:00,1.0


In [75]:
# Export
filepath = Path('../zillion_picks/picks')

for key, group in predict_df.groupby(['season', 'week', 'season_type']):
    if group.season_type.unique() == 'postseason':
        group.to_csv(filepath/f'premium/{key[0]}_postseason.csv', index=False)
    else:
        group.to_csv(filepath/f'premium/{key[0]}_{str(key[1]).rjust(2, "0")}.csv', index=False)

In [2]:
# REGRESSION AND CLASSIFIER MODEL FOR BACK TESTING

# Load
predict_season = 2019

final_columns = ['game_id', 'season', 'week', 'season_type', 'home_team',
                'home_conference', 'away_team', 'away_conference',  'pre_game_spread',
                'predicted_spread', 'regression_spread_pred', 'classification_spread_pred', 'classification_confidence']

for predict_week in range(1,17):

    df = dataset(predict_season=predict_season, predict_week=predict_week, window_size=4, update_data=False)

    # Prep Dataset
    df[df.columns[df.columns.str.contains('home|Home')]] = df[df.columns[df.columns.str.contains(
        'home|Home')]].groupby('home_team', as_index=False, group_keys=False).apply(lambda x: x.fillna(x.mean()))

    df[df.columns[df.columns.str.contains('away|Away')]] = df[df.columns[df.columns.str.contains(
        'away|Away')]].groupby('away_team', as_index=False, group_keys=False).apply(lambda x: x.fillna(x.mean()))

    df = df.dropna(axis=1, thresh=int(len(df) * .9))
    df = df.fillna(0)
    df[df.select_dtypes('bool').columns] = df.select_dtypes('bool').astype('int')

    # Model
    ct = make_column_transformer(
        (OrdinalEncoder(), ['season_type', 'home_conference', 'away_conference']),
        remainder='passthrough'
    )

    drop_columns =  ['game_id', 'season', 'venue', 'start_date', 'home_team', 'away_team', 
                    'start_date', 'spread_target', 'pre_game_spread', 'pre_game_home_win_prob', 'pre_game_away_win_prob']
                    

    # Use in production
    # X_train = df[(df.start_date.dt.tz_convert('US/Central') < today_localize)]
    # X_test = df[(df.start_date.dt.tz_convert('US/Central') >= today_localize)] 

    # Use in development
    X_train = df[(df['week'] < predict_week) | (df['season'] < predict_season)] 
    X_test = df[(df['week'] == predict_week) & (df['season'] == predict_season)] 

    model_regressor = XGBRegressor(learning_rate=0.1, 
                                    colsample_bytree=0.9,
                                    gamma=0.5,
                                    max_depth=2,
                                    min_child_weight=4,
                                    n_estimators=100,
                                    subsample=0.8)

    model_regressor.fit(ct.fit_transform(X_train.drop(drop_columns, axis=1)), X_train['spread_target'])
    
    # Use this for later to determine feature importance
    model_regressor_cols = X_train.drop(drop_columns, axis=1).columns

    X_train['predicted_spread'] = model_regressor.predict(ct.fit_transform(X_train.drop(drop_columns, axis=1)))
    X_test['predicted_spread'] = model_regressor.predict(ct.fit_transform(X_test.drop(drop_columns, axis=1)))

    # Clean Up
    X_train['predicted_spread'] = np.around(X_train['predicted_spread'] / .5, decimals=0) * .5
    X_test['predicted_spread'] = np.around(X_test['predicted_spread'] / .5, decimals=0) * .5

    X_train['regression_spread_pred'] = np.where(X_train['predicted_spread'] <= X_train['pre_game_spread'], 1, 0)
    X_test['regression_spread_pred'] = np.where(X_test['predicted_spread'] <= X_test['pre_game_spread'], 1, 0)

    X_train['spread_result'] = np.where(X_train['spread_target'] <= X_train['pre_game_spread'], 1, 0)
    X_test['spread_result'] = np.where(X_test['spread_target'] <= X_test['pre_game_spread'], 1, 0)

    model_classifier = XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
                                    colsample_bynode=1, colsample_bytree=0.8, gamma=5, gpu_id=-1,
                                    importance_type='gain', interaction_constraints='',
                                    learning_rate=0.1, max_delta_step=0, max_depth=5,
                                    min_child_weight=10, monotone_constraints='()',
                                    n_estimators=100, n_jobs=0, num_parallel_tree=1,
                                    objective='binary:logistic', random_state=0, reg_alpha=0,
                                    reg_lambda=1, scale_pos_weight=1, subsample=1.0,
                                    tree_method='exact', validate_parameters=1, verbosity=None)

    # Update drop columns list for Classifier Model
    drop_columns.append('spread_result')
    
    # for col in ['pre_game_spread', 'pre_game_home_win_prob', 'pre_game_away_win_prob']:
    #     drop_columns.remove(col)   

    model_classifier.fit(ct.fit_transform(X_train.drop(drop_columns, axis=1)), X_train['spread_result'])
    
    # Use this for later to determine feature importance
    model_classifier_cols = X_train.drop(drop_columns, axis=1).columns

    X_train['classification_spread_pred'] = model_classifier.predict(ct.fit_transform(X_train.drop(drop_columns, axis=1)))
    X_test['classification_spread_pred'] = model_classifier.predict(ct.fit_transform(X_test.drop(drop_columns, axis=1)))

    # Add so the Predict Probability is Aligned
    drop_columns.append('classification_spread_pred')

    X_train['classification_confidence'] = model_classifier.predict_proba(ct.fit_transform(X_train.drop(drop_columns, axis=1)))[:,0]
    X_test['classification_confidence'] =  np.max(model_classifier.predict_proba(ct.fit_transform(X_test.drop(drop_columns, axis=1))), axis=1)

    predict_df = X_test[final_columns]

    predict_df = predict_df.merge(df[['game_id', 'spread_target', 'start_date']], on='game_id', how='left')
    predict_df['spread_result'] = np.where(predict_df['spread_target'] <= predict_df['pre_game_spread'], 1, 0)

    predict_df['regression_spread_result'] = np.where(predict_df['spread_result'] == predict_df['regression_spread_pred'], 'Won', 'Lost')
    predict_df['regression_spread_result'] = np.where(predict_df['spread_target'] == predict_df['pre_game_spread'], 'Push', predict_df['regression_spread_result'])

    predict_df['classification_spread_result'] = np.where(predict_df['spread_result'] == predict_df['classification_spread_pred'], 'Won', 'Lost')
    predict_df['classification_spread_result'] = np.where(predict_df['spread_target'] == predict_df['pre_game_spread'], 'Push', predict_df['classification_spread_result'])

    # Remove Results from Games that haven't been played
    predict_df[['regression_spread_result', 'classification_spread_result', 'spread_result']] = predict_df[['regression_spread_result', 'classification_spread_result', 'spread_result']].where(predict_df.spread_target.ne(0), np.nan)

    # Rename Column
    predict_df = predict_df.rename(columns={'spread_target':'actual_spread'})
    predict_df['week'] = predict_df['week'].mode()[0]

    # Export
    filepath = Path('../zillion_picks/picks')

    for key, group in predict_df.groupby(['season', 'week', 'season_type']):
        if group.season_type.unique() == 'postseason':
            group.to_csv(filepath/f'premium/{key[0]}_postseason.csv', index=False)
        else:
            group.to_csv(filepath/f'premium/{key[0]}_{str(key[1]).rjust(2, "0")}.csv', index=False)

    print(f"{predict_season} Week {predict_week} Regression Accuracy: {round(accuracy_score(X_test['spread_result'], X_test['regression_spread_pred']) * 100,2)}%" )
    print(f"{predict_season} Week {predict_week} Classification Accuracy: {round(accuracy_score(X_test['spread_result'], X_test['classification_spread_pred']) * 100,2)}%" )
    print('--------------')



2019 Week 1 Regression Accuracy: 53.01%
2019 Week 1 Classification Accuracy: 53.01%
--------------
2019 Week 2 Regression Accuracy: 54.79%
2019 Week 2 Classification Accuracy: 53.42%
--------------
2019 Week 3 Regression Accuracy: 43.28%
2019 Week 3 Classification Accuracy: 55.22%
--------------
2019 Week 4 Regression Accuracy: 44.83%
2019 Week 4 Classification Accuracy: 55.17%
--------------
2019 Week 5 Regression Accuracy: 61.82%
2019 Week 5 Classification Accuracy: 47.27%
--------------
2019 Week 6 Regression Accuracy: 55.32%
2019 Week 6 Classification Accuracy: 59.57%
--------------
2019 Week 7 Regression Accuracy: 45.28%
2019 Week 7 Classification Accuracy: 52.83%
--------------
2019 Week 8 Regression Accuracy: 49.18%
2019 Week 8 Classification Accuracy: 45.9%
--------------
2019 Week 9 Regression Accuracy: 50.91%
2019 Week 9 Classification Accuracy: 47.27%
--------------
2019 Week 10 Regression Accuracy: 52.08%
2019 Week 10 Classification Accuracy: 60.42%
--------------
2019 Week

In [3]:
# Test Feature Importance

regression_important_features = pd.Series(data=model_regressor.feature_importances_, index=model_regressor_cols).sort_values(ascending=False)
classification_important_features = pd.Series(data=model_classifier.feature_importances_, index=model_classifier_cols).sort_values(ascending=False)

print(regression_important_features.head(10))
print('---------------------------------------')
print(classification_important_features.head(10))


pointsAway                         0.060463
away_points                        0.042084
rankHome                           0.030395
averageStars_All PositionsHome     0.029661
offense_successRateHome            0.028046
offense_totalPPAAway               0.026664
talentHome                         0.026290
home_conference                    0.025569
averageRating_All PositionsHome    0.024366
offense_successRateAway            0.023262
dtype: float32
---------------------------------------
regression_spread_pred                  0.008598
offense_lineYardsHome                   0.005699
sacksHome                               0.005600
tacklesForLossHome                      0.005529
season_type                             0.005423
year WRHome                             0.005348
averageRating_QuarterbackHome           0.005269
defensiveTDsHome                        0.005211
away_points                             0.005042
offense_passingPlays_successRateHome    0.004976
dtype: float32


In [9]:
    print(f"{predict_season} Week {predict_week} Classification Accuracy: {round(accuracy_score(X_train['spread_result'], X_train['classification_spread_pred']) * 100,2)}%" )

2019 Week 16 Classification Accuracy: 92.34%


In [4]:
#################### WORK ON THIS ###############################

# CHECK CONFIDENCE TO SEE IF THERE IS A WIN ADVANTAGE
dfs=[]

for file in (filepath/'free').rglob('*'):
    df = pd.read_csv(file)
    dfs.append(df)

df = pd.concat(dfs)
df.head()


df = df[df['regression_spread_pred'] == df['classification_spread_pred']]

df['bin'] = pd.cut(df.classification_confidence, [0,.20,.30,.40,.50,.55,.60,.65,.70,.80,1])
df.groupby('bin')['regression_spread_result'].agg(win=(lambda df: df.eq('Won').sum() / (df.ne('Push').sum() - df.isnull().sum())), count=( 'count'))

print(sum(df['classification_spread_result'].eq('Won')) / (sum(df['classification_spread_result'].ne('Push')) - sum(df['classification_spread_result'].isnull())))

0.5353901996370236


In [None]:
            df = pd.read_json(
                f'https://api.collegefootballdata.com/roster?team={urllib.parse.quote(team)}&year={season}')