In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.regression.mixed_linear_model import MixedLM
import statsmodels.formula.api as smf
from sklearn.preprocessing import StandardScaler

import ast

In [4]:
df = pd.read_csv('player_yearly_stats_aug_24.csv')
pos_info = pd.read_csv('career_stats_with_predicted_positions_aug_24.csv')

# df = df[df['year']>=2021]

In [3]:
df.columns

Index(['Unnamed: 0', 'playerID', 'name', 'gamesPlayed', 'scores', 'assists',
       'goals', 'plusMinus', 'completions', 'completionPercentage',
       'hockeyAssists', 'throwaways', 'stalls', 'drops', 'blocks', 'callahans',
       'pulls', 'teams', 'year', 'pointsPlayed', 'oPointsPlayed',
       'dPointsPlayed', 'minutesPlayed', 'possessions', 'oEfficiency',
       'yardsTotal', 'yardsThrown', 'yardsReceived', 'hucksCompleted',
       'huckPercentage'],
      dtype='object')

In [9]:
team_change_dict = {'ATX':'AUS', 'CHA':'CAR', 'RAL':'CAR'}
df['teams'] = df['teams'].apply(lambda x: team_change_dict[x] if x in team_change_dict.keys() else x)

df['predicted_position'] = df.apply(lambda row: pos_info[pos_info['name'] == row['name']]['predicted_position'].values[0] if not pos_info[pos_info['name'] == row['name']].empty else '', axis=1)
# result_df['position'] = df.apply(lambda row: pos_info[pos_info['name'] == row['name']]['position'].values[0] if not pos_info[pos_info['name'] == row['name']].empty else '', axis=1)
df['predicted_position'] = df.apply(lambda row: pos_info[pos_info['name'] == row['name']]['position'].iloc[0] if (len(pos_info[pos_info['name'] == row['name']]['position']) > 0 and pos_info[pos_info['name'] == row['name']]['position'].iloc[0] in ['Cutter', 'Handler', 'Defender']) else row['predicted_position'], axis=1)

df = df.dropna(subset = ['predicted_position', 'oEfficiency', 'teams'])
df = df[~df['teams'].str.contains(',')]

df['teams'] = df['teams'].astype('category')
df['predicted_position'] = df['predicted_position'].astype('category')

In [10]:
oeffmodel = smf.mixedlm('oEfficiency ~ C(teams) + C(predicted_position) + C(year) + oPointsPlayed + dPointsPlayed', data=df, groups=df['name'])
oeffresult = oeffmodel.fit()
print(oeffresult.summary())




                     Mixed Linear Model Regression Results
Model:                     MixedLM        Dependent Variable:        oEfficiency
No. Observations:          2700           Method:                    REML       
No. Groups:                1343           Scale:                     44.6627    
Min. group size:           1              Log-Likelihood:            -8934.2759 
Max. group size:           4              Converged:                 Yes        
Mean group size:           2.0                                                  
--------------------------------------------------------------------------------
                                   Coef.  Std.Err.    z    P>|z|  [0.025  0.975]
--------------------------------------------------------------------------------
Intercept                          50.624    0.908  55.770 0.000  48.845  52.404
C(teams)[T.AUS]                    -3.645    0.895  -4.074 0.000  -5.398  -1.891
C(teams)[T.BOS]                    -3.091    0.863

In [11]:
goalmodel = smf.mixedlm('goals ~ C(teams) + C(predicted_position) + C(year) + oPointsPlayed + dPointsPlayed', data=df, groups=df['name'])
goalresult = goalmodel.fit()
print(goalresult.summary())


                    Mixed Linear Model Regression Results
Model:                   MixedLM        Dependent Variable:        goals     
No. Observations:        2700           Method:                    REML      
No. Groups:              1343           Scale:                     0.2105    
Min. group size:         1              Log-Likelihood:            -2261.4683
Max. group size:         4              Converged:                 Yes       
Mean group size:         2.0                                                 
-----------------------------------------------------------------------------
                                  Coef.  Std.Err.    z    P>|z| [0.025 0.975]
-----------------------------------------------------------------------------
Intercept                         -0.049    0.086  -0.572 0.568 -0.217  0.119
C(teams)[T.AUS]                    0.003    0.090   0.036 0.971 -0.174  0.180
C(teams)[T.BOS]                    0.112    0.088   1.275 0.202 -0.060  0.283
C(team

In [12]:
assmodel = smf.mixedlm('assists ~ C(teams) + C(predicted_position) + C(year) + oPointsPlayed + dPointsPlayed', data=df, groups=df['name'])
assresult = assmodel.fit()
print(assresult.summary())


                   Mixed Linear Model Regression Results
Model:                    MixedLM       Dependent Variable:       assists   
No. Observations:         2700          Method:                   REML      
No. Groups:               1343          Scale:                    0.2165    
Min. group size:          1             Log-Likelihood:           -2465.0974
Max. group size:          4             Converged:                Yes       
Mean group size:          2.0                                               
----------------------------------------------------------------------------
                                  Coef.  Std.Err.   z    P>|z| [0.025 0.975]
----------------------------------------------------------------------------
Intercept                         -0.629    0.095 -6.601 0.000 -0.815 -0.442
C(teams)[T.AUS]                   -0.033    0.103 -0.326 0.745 -0.234  0.168
C(teams)[T.BOS]                   -0.212    0.100 -2.122 0.034 -0.408 -0.016
C(teams)[T.CAR]    

In [13]:
blockmodel = smf.mixedlm('blocks ~ C(teams) + C(predicted_position) + C(year) + oPointsPlayed + dPointsPlayed', data=df, groups=df['name'])
blockresult = blockmodel.fit()
print(blockresult.summary())


                   Mixed Linear Model Regression Results
Model:                   MixedLM        Dependent Variable:        blocks   
No. Observations:        2700           Method:                    REML     
No. Groups:              1343           Scale:                     0.0801   
Min. group size:         1              Log-Likelihood:            -823.7694
Max. group size:         4              Converged:                 Yes      
Mean group size:         2.0                                                
----------------------------------------------------------------------------
                                  Coef.  Std.Err.   z    P>|z| [0.025 0.975]
----------------------------------------------------------------------------
Intercept                         -0.349    0.048 -7.264 0.000 -0.443 -0.255
C(teams)[T.AUS]                   -0.055    0.050 -1.094 0.274 -0.153  0.043
C(teams)[T.BOS]                   -0.025    0.048 -0.516 0.606 -0.119  0.070
C(teams)[T.CAR]    

In [14]:
result_df = pd.DataFrame(oeffresult.random_effects)
result_df = pd.concat([result_df, pd.DataFrame(goalresult.random_effects)])
result_df = pd.concat([result_df, pd.DataFrame(assresult.random_effects)])
result_df = pd.concat([result_df, pd.DataFrame(blockresult.random_effects)])


result_df = result_df.transpose()
result_df.reset_index(inplace = True)
result_df.columns = ['name','oeff_rating', 'goal_rating', 'assist_rating', 'block_rating']

# Assuming df is your original DataFrame
columns_to_normalize = ['oeff_rating', 'goal_rating', 'assist_rating', 'block_rating']

# Fill missing values with the mean of each column
result_df[columns_to_normalize].fillna(result_df[columns_to_normalize].mean(), inplace=True)

# Initialize the StandardScaler
scaler = StandardScaler()

# Fit and transform the selected columns in-place
result_df[columns_to_normalize] = scaler.fit_transform(result_df[columns_to_normalize])

# Calculate the mean of the normalized values along each row
result_df['composite_rating'] = result_df[columns_to_normalize].mean(axis=1)
result_df.sort_values('composite_rating', ascending= False).head(20)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  result_df[columns_to_normalize].fillna(result_df[columns_to_normalize].mean(), inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  result_df[columns_to_normalize].fillna(result_df[columns_to_normalize].mean(), inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  result_df[columns_t

Unnamed: 0,name,oeff_rating,goal_rating,assist_rating,block_rating,composite_rating
316,Daniel Lee,1.939692,2.151241,0.275424,4.910573,2.319233
520,Jack Hatchett,1.497477,2.489952,2.664641,1.964188,2.154064
1151,Sean McDougall,0.714968,3.126985,1.638263,3.041134,2.130338
1102,Ryan Osgar,0.63693,2.03999,6.7052,-0.983789,2.099583
1114,Sacha Poitte-Sokolsky,1.38757,3.980831,0.148283,2.458838,1.99388
817,Manuel Eckert,-0.793821,-1.061267,5.176585,4.446295,1.941948
388,Eli Friedman,2.667126,1.345494,2.585209,0.935718,1.883387
1060,Raphy Hayes,1.134236,0.648385,3.358205,2.341059,1.870471
976,Nick Boucher,1.74234,-0.63641,4.311398,2.056346,1.868418
553,Jake Felton,-0.201403,0.809801,5.411031,0.826067,1.711374


In [15]:
df = pd.read_csv('player_yearly_stats_aug_24.csv')

season_by_season_w_ratings = df.merge(result_df, on = 'name', how = 'left')
# season_by_season_w_ratings.to_csv('mixed_model_results_yearly_stats_21_24_aug_24.csv')

In [16]:
career_stats_w_ratings = pos_info.merge(result_df, on = 'name', how = 'left')
career_stats_w_ratings['position'] = career_stats_w_ratings.apply(
    lambda row: row['position'] if pd.notna(row['position']) else row['predicted_position'], axis=1
)
# career_stats_w_ratings.to_csv('mixed_model_ratings_21_24_w_ratings_aug_24.csv')