In [None]:
import pandas as pd
import numpy as np
from scipy.stats import zscore

df_ahl_init = pd.read_csv('2021-2022_AHL_all_goalies.csv')
df_nhl_init = pd.read_csv('2021-2022_NHLcom_Goalie_Summary.csv')

df_2 = pd.read_csv('2021-2022_NHLcom_Goalie_Bio.csv')
df_3 = pd.read_csv('2021-2022_MoneyPuck_Goalies.csv')
df_4 = pd.read_csv('2021-2022_NHLcom_Goalie_Shootout_Summary.csv')
df_5 = pd.read_csv('2021-2022_NHLcom_Goalie_Career.csv')
df_6 = pd.read_csv('2021-2022_NS_Goalie_Counts.csv')

min_rate_skill = 60
max_rate_skill = 99
rate_range_skill = max_rate_skill - min_rate_skill

# VHL Player Ratings Model

### Goalies with more games in NHL than AHL

In [None]:
df_ahl_init_cl = df_ahl_init.loc[:,['Goalie','GP']]
df_nhl_init_cl = df_nhl_init.loc[:,['goalieFullName','gamesPlayed']]
df_ahl_init_cl.rename(columns={'Goalie':'goalieFullName', 'GP':'gamesPlayed'},inplace=True)

df_ahl_init_cl['League'] = 'AHL'
df_nhl_init_cl['League'] = 'NHL'

df_names = pd.concat([df_nhl_init_cl,df_ahl_init_cl]).reset_index(drop=True)
df_names['goalieFullName'] = df_names['goalieFullName'].str.lower()

df_names = df_names.iloc[df_names.groupby('goalieFullName')['gamesPlayed'].idxmax()]

df_ahl_names = df_names[df_names['League'] == 'NHL'].values.tolist()
names_list = [name[0] for name in df_ahl_names]

df_nhl_init['goalieFullName'] = df_nhl_init['goalieFullName'].str.lower()
file_clean = df_nhl_init[df_nhl_init['goalieFullName'].isin(names_list)]

file_clean

# Games Played Multiplier

In [None]:
df_wins = file_clean.loc[:,['goalieFullName','gamesPlayed']]

df_wins['wins_zscore'] = zscore(df_wins['gamesPlayed'])

wins_z_min = df_wins['wins_zscore'].min()
wins_z_max = df_wins['wins_zscore'].max()

wins_z_range = wins_z_max-wins_z_min
wins_incr_du = wins_z_range/(rate_range_skill)

df_wins['GP'] = max_rate_skill-((wins_z_max-df_wins['wins_zscore'])/wins_incr_du)

df_winx = df_wins[['goalieFullName','GP']]

df_winx

# Skating (SK)

1. GAA (80%)
2. Wins (20%)

In [None]:
df_sk = file_clean.loc[:,['goalieFullName','gamesPlayed','goalsAgainstAverage']]

df_sk['goalsAgainstAverage'] = np.where(df_sk['gamesPlayed'] <= 5, 3, df_sk['goalsAgainstAverage'])
df_sk['goalsAgainstAverage'] = np.where(df_sk['goalsAgainstAverage'] >= 4, 4, df_sk['goalsAgainstAverage'])

df_sk['gaa_zscore'] = zscore(df_sk['goalsAgainstAverage'])

sk_z_min = df_sk['gaa_zscore'].max()
sk_z_max = df_sk['gaa_zscore'].min()

sk_z_range = sk_z_max-sk_z_min
sk_z_incr = sk_z_range/rate_range_skill

df_sk['SK_'] = max_rate_skill-((sk_z_max-df_sk['gaa_zscore'])/sk_z_incr)

df_sk_merge = df_sk.merge(df_winx, how='left', on='goalieFullName')

df_sk_merge['SK'] = (df_sk_merge['SK_'] * 0.8) + (df_sk_merge['GP'] * 0.2)

df_sk_rate = df_sk_merge[['goalieFullName','SK']]

df_sk_rate.sort_values(by='SK',ascending=False,inplace=True)

df_sk_rate

## Durability (DU)

In [None]:
min_rate = 55
max_rate = 99
rate_range = max_rate - min_rate

In [None]:
df_du = file_clean.loc[:,['goalieFullName','gamesPlayed']]
df_du['gamesPlayed'] = np.where(df_du['gamesPlayed'] > 65, 65, df_du['gamesPlayed'])
df_du['gamesPlayed'] = np.where(df_du['gamesPlayed'] < 5, 5, df_du['gamesPlayed'])
df_du['gp_zscore'] = zscore(df_du['gamesPlayed'])

du_z_min = df_du['gp_zscore'].min()
du_z_max = df_du['gp_zscore'].max()

du_z_range = du_z_max-du_z_min
z_incr_du = du_z_range/rate_range

df_du['DU'] = max_rate-((du_z_max-df_du['gp_zscore'])/z_incr_du)

df_du_rate = df_du[['goalieFullName','DU']]

df_du_rate

## Endurance (EN)

In [None]:
df_en = file_clean.loc[:,['goalieFullName','timeOnIce']]
df_en['toi_zscore'] = zscore(df_en['timeOnIce'])

en_z_min = df_en['toi_zscore'].min()
en_z_max = df_en['toi_zscore'].max()

en_z_range = en_z_max-en_z_min
z_incr_en = en_z_range/rate_range

df_en['EN'] = max_rate-((en_z_max-df_en['toi_zscore'])/z_incr_en)

df_en_rate = df_en[['goalieFullName','EN']]

df_en_rate

# Size (SZ)

1. Height (85%)
2. Weight (15%)

In [None]:
df_2['goalieFullName'] = df_2['goalieFullName'].str.lower()
df2 = df_2[df_2['goalieFullName'].isin(names_list)]

min_rate_sz = 68
max_rate_sz = 99
rate_range_sz = max_rate_sz - min_rate_sz

In [None]:
df_sz = df2.loc[:,['goalieFullName','height','weight']]
df_sz['h_zscore'] = zscore(df_sz['height'])
df_sz['w_zscore'] = zscore(df_sz['weight'])

szh_z_min = df_sz['h_zscore'].min()
szh_z_max = df_sz['h_zscore'].max()

szw_z_min = df_sz['w_zscore'].min()
szw_z_max = df_sz['w_zscore'].max()

szh_z_range = szh_z_max-szh_z_min
szh_z_incr = szh_z_range/rate_range_sz

szw_z_range = szw_z_max-szw_z_min
szw_z_incr = szw_z_range/rate_range_sz

In [None]:
df_sz['H'] = max_rate_sz-((szh_z_max-df_sz['h_zscore'])/szh_z_incr)
df_sz['W'] = max_rate_sz-((szw_z_max-df_sz['w_zscore'])/szw_z_incr)
df_sz['SZ'] = (df_sz['H'] * 0.85) + (df_sz['W'] * 0.15) 

df_sz_rate = df_sz[['goalieFullName','SZ','height']]

df_sz_rate.sort_values(by='SZ',ascending=False, inplace=True)

df_sz_rate

# Agility (AG)

1. High Danger xGoals / TOI

In [None]:
df_3.rename(columns={'name':'goalieFullName'},inplace=True)
df_3['goalieFullName'] = df_3['goalieFullName'].str.lower()
df3 = df_3[df_3['goalieFullName'].isin(names_list)]

df3 = df3.loc[df3['situation'] == 'all']

df_ag = df3.loc[:,['goalieFullName','icetime','highDangerShots','highDangerGoals','highDangerxGoals']]

df_ag['HDxG/60'] = (df_ag['highDangerxGoals']-df_ag['highDangerGoals'])/(df_ag['icetime']/3600)
df_ag['HDxG/60'] = np.where((df_ag['highDangerShots'] <= 15), -0.33, df_ag['HDxG/60'])
display(df_ag.sort_values(by='HDxG/60',ascending=True))

In [None]:
df_ag['hdx_zscore'] = zscore(df_ag['HDxG/60'])

hdx_z_min = df_ag['hdx_zscore'].min()
hdx_z_max = df_ag['hdx_zscore'].max()

hdx_z_range = hdx_z_max-hdx_z_min
hdx_z_incr = hdx_z_range/(rate_range_skill+3)

In [None]:
df_ag['AG'] = max_rate_skill-((hdx_z_max-df_ag['hdx_zscore'])/hdx_z_incr)
df_ag.rename(columns = {'name':'goalieFullName'}, inplace = True)

df_ag_rate = df_ag[['goalieFullName','AG']]

df_ag_rate.sort_values(by='AG',ascending=False,inplace=True)

df_ag_rate

# Rebounds (RB)

1. (xRebounds - rebounds)/rebounds

In [None]:
df_rb = df3.loc[:,['goalieFullName','icetime','xRebounds','rebounds']]

df_rb['xRB%'] = (df_rb['xRebounds'] - df_rb['rebounds'])/df_rb['rebounds']
df_rb['xRB%'].replace(np.inf, 0, inplace=True)

df_rb['xRB%'] = np.where((df_rb['rebounds'] <= 6), 0, df_rb['xRB%'])
df_rb['xRB%'] = np.where((df_rb['xRB%'] >= 0.25), 0.15, df_rb['xRB%'])

df_rb['xrb_zscore'] = zscore(df_rb['xRB%'])

rb_z_min = df_rb['xrb_zscore'].min()
rb_z_max = df_rb['xrb_zscore'].max()

rb_z_range = rb_z_max-rb_z_min
rb_z_incr = rb_z_range/rate_range_skill

df_rb['RB'] = max_rate_skill-((rb_z_max-df_rb['xrb_zscore'])/rb_z_incr)
df_rb.rename(columns = {'name':'goalieFullName'}, inplace = True)

df_rb_rate = df_rb[['goalieFullName','RB']]

df_rb_rate.sort_values(by='RB',ascending=False,inplace=True)

df_rb_rate

# Style Control (SC)

1. SV% (80%)
2. Wins (20%)

In [None]:
df_sc = file_clean.loc[:,['goalieFullName','gamesPlayed','savePct']]

df_sc['savePct'] = np.where((df_sc['gamesPlayed'] <= 3), 0.875, df_sc['savePct'])
df_sc['savePct'] = np.where((df_sc['savePct'] <= 0.875), 0.875, df_sc['savePct'])

df_sc['gaa_zscore'] = zscore(df_sc['savePct'])

sc_z_min = df_sc['gaa_zscore'].min()
sc_z_max = df_sc['gaa_zscore'].max()

sc_z_range = sc_z_max-sc_z_min
sc_z_incr = sc_z_range/rate_range_skill

df_sc['SC_rate'] = max_rate_skill-((sc_z_max-df_sc['gaa_zscore'])/sc_z_incr)

df_sc_merge = df_sc.merge(df_winx, how='left', on='goalieFullName')

df_sc_merge['SC'] = (df_sc_merge['SC_rate'] * 0.8) + (df_sc_merge['GP'] * 0.2)

df_sc_rate = df_sc_merge[['goalieFullName','SC']]

df_sc_rate.sort_values(by='SC',ascending=False,inplace=True)

df_sc_rate

# Hand Speed (HS)

1. GSAA (100%)

In [None]:
df_6.rename(columns={'Player':'goalieFullName'},inplace=True)
#df_6['goalieFullName'].replace('Cal Petersen','Calvin Petersen',inplace=True)
df_6['goalieFullName'] = df_6['goalieFullName'].str.lower()
df6 = df_6[df_6['goalieFullName'].isin(names_list)]

df_hs = df6.loc[:,['goalieFullName','GSAA']]
df_hs['GSAA'] = np.where(df_hs['GSAA'] < -15, -15, df_hs['GSAA'])

df_hs['gsaa_zscore'] = zscore(df_hs['GSAA'])

hs_z_min = df_hs['gsaa_zscore'].min()
hs_z_max = df_hs['gsaa_zscore'].max()
hs_z_range = hs_z_max-hs_z_min
hs_z_incr = hs_z_range/rate_range_skill

df_hs['HS'] = max_rate_skill-((hs_z_max-df_hs['gsaa_zscore'])/hs_z_incr)
df_hs.rename(columns = {'Player':'goalieFullName'}, inplace = True)

df_hs_rate = df_hs[['goalieFullName','HS']]

df_hs_rate.sort_values(by='HS',ascending=False,inplace=True)

df_hs_rate

# Reaction Time (RT)

1. HDSV% (70%)
2. Wins (30%)

In [None]:
df_rt = df3.loc[:,['goalieFullName','highDangerShots','highDangerGoals']]

df_rt['HDSv%'] = ((df_ag['highDangerShots']-df_rt['highDangerGoals'])/df_ag['highDangerShots'])
df_rt['HDSv%'] = np.where((df_rt['HDSv%'] == 1) | (df_rt['highDangerShots'] < 10), 0.5, df_rt['HDSv%'])
df_rt['HDSv%'] = df_rt['HDSv%'].fillna(0.5)

df_rt['hdsv%_zscore'] = zscore(df_rt['HDSv%'])

rt_z_min = df_rt['hdsv%_zscore'].min()
rt_z_max = df_rt['hdsv%_zscore'].max()
rt_z_range = rt_z_max-rt_z_min
rt_z_incr = rt_z_range/rate_range_skill

df_rt['RT_rate'] = max_rate_skill-((rt_z_max-df_rt['hdsv%_zscore'])/rt_z_incr)
df_rt.rename(columns = {'name':'goalieFullName'}, inplace = True)

df_rt_merge = df_rt.merge(df_winx, how='left', on='goalieFullName')

df_rt_merge['RT'] = (df_rt_merge['RT_rate'] * 0.7) + (df_rt_merge['GP'] * 0.3)


df_rt_rate = df_rt_merge[['goalieFullName','RT']]

df_rt_rate.sort_values(by='RT',ascending=False,inplace=True)

df_rt_rate

# Puck Handling (PH)

1. Assists/60

In [None]:
df_ph = file_clean.loc[:,['goalieFullName','gamesPlayed','assists','timeOnIce']]

df_ph2 = df_ph[df_ph['assists'] > 0]
df_ph3 = df_ph[df_ph['assists'] == 0]

df_ph2['a60'] = df_ph2['assists']/(df_ph2['timeOnIce']/3600)
df_ph2['a60_zscore'] = zscore(df_ph2['a60'])

df_ph3['gamesPlayed_zscore'] = zscore(df_ph3['gamesPlayed'])

ph_z_min = df_ph2['a60_zscore'].min()
ph_z_max = df_ph2['a60_zscore'].max()
ph_z_range = ph_z_max-ph_z_min
ph_z_incr = ph_z_range/(rate_range_skill-15)

ph2_z_min = df_ph3['gamesPlayed_zscore'].min()
ph2_z_max = df_ph3['gamesPlayed_zscore'].max()
ph2_z_range = ph2_z_max-ph2_z_min
ph2_z_incr = ph2_z_range/(rate_range_skill-20)

df_ph2['PH'] = max_rate_skill-((ph_z_max-df_ph2['a60_zscore'])/ph_z_incr)
df_ph3['PH'] = 80-((ph2_z_max-df_ph3['gamesPlayed_zscore'])/ph2_z_incr)

df_ph4 = pd.concat([df_ph2,df_ph3])

df_ph_rate = df_ph4[['goalieFullName','PH']]

df_ph_rate.sort_values(by='PH',ascending=False,inplace=True)

df_ph_rate

# Penalty Shots (PS)

1. Penalty Shot Saves (30%)
2. Penalty Shot SV% Season (50%)
3. Penalty Shot SV% Career (20%)

In [None]:
df_4['goalieFullName'] = df_4['goalieFullName'].str.lower()
df4 = df_4[df_4['goalieFullName'].isin(names_list)]

df_ps = df4.loc[:,['goalieFullName','careerShootoutSavePct','shootoutSavePct','shootoutSaves']]

df_ps['saves_zscore'] = zscore(df_ps['shootoutSaves'])
df_ps['career_zscore'] = zscore(df_ps['careerShootoutSavePct'])
df_ps['season_zscore'] = zscore(df_ps['shootoutSavePct'])

ps_z_min = df_ps['saves_zscore'].min()
ps_z_max = df_ps['saves_zscore'].max()
ps_z_range = ps_z_max-ps_z_min
ps_z_incr = ps_z_range/rate_range_skill

ps2_z_min = df_ps['career_zscore'].min()
ps2_z_max = df_ps['career_zscore'].max()
ps2_z_range = ps2_z_max-ps2_z_min
ps2_z_incr = ps2_z_range/rate_range_skill

ps3_z_min = df_ps['season_zscore'].min()
ps3_z_max = df_ps['season_zscore'].max()
ps3_z_range = ps3_z_max-ps3_z_min
ps3_z_incr = ps3_z_range/rate_range_skill

df_ps['saves'] = (max_rate_skill+5)-((ps_z_max-df_ps['saves_zscore'])/ps_z_incr)
df_ps['career'] = (max_rate_skill+5)-((ps2_z_max-df_ps['career_zscore'])/ps2_z_incr)
df_ps['season'] = (max_rate_skill+5)-((ps3_z_max-df_ps['season_zscore'])/ps3_z_incr)
df_ps['PS'] = (df_ps['saves'] * 0.3) + (df_ps['season'] * 0.5)  + (df_ps['career'] * 0.2)

df_ps_rate = df_ps[['goalieFullName','PS']]

df_ps_rate

# Experience (EX)

1. Career Games Played

In [None]:
df_5['goalieFullName'] = df_5['goalieFullName'].str.lower()
df5 = df_5[df_5['goalieFullName'].isin(names_list)]

df_ex = df5.loc[:,['Id','goalieFullName','Birthday','City','State/Province','Country','Weight','Rookie','GP_season']]

df_ex['GP_season'] = np.where(df_ex['GP_season'] >= 900, 900, df_ex['GP_season'])
df_ex['games_zscore'] = zscore(df_ex['GP_season'])

ex_z_min = df_ex['games_zscore'].min()
ex_z_max = df_ex['games_zscore'].max()
ex_z_range = ex_z_max-ex_z_min
ex_z_incr = ex_z_range/rate_range

df_ex['EX'] = max_rate-((ex_z_max-df_ex['games_zscore'])/ex_z_incr)
df_ex['name'] = df_ex['goalieFullName'].str.lower()
df_ex['name'] = df_ex['goalieFullName'].str.replace(' ','-')
df_ex['UrlLink'] = 'https://www.nhl.com/player/' + df_ex['name'] + '-' + df_ex['Id'].astype(str)

df_ex_rate = df_ex[['Id','goalieFullName','UrlLink','Birthday','City','State/Province','Country','Weight','Rookie','EX']]

df_ex_rate.sort_values(by='EX',ascending=False,inplace=True)

df_ex_rate

# Leadership (LD)

In [None]:
df_ld = df5.loc[:,['goalieFullName','GP_playoffs']]

df_ld['games_zscore'] = zscore(df_ld['GP_playoffs'])

ld_z_min = df_ld['games_zscore'].min()
ld_z_max = df_ld['games_zscore'].max()
ld_z_range = ld_z_max-ld_z_min
ld_z_incr = ld_z_range/rate_range

df_ld['LD'] = max_rate-((ld_z_max-df_ld['games_zscore'])/ld_z_incr)

df_ld_rate = df_ld[['goalieFullName','LD']]

df_ld_rate

# Overall (OV)

In [None]:
df_ov = df_sk_rate.merge(df_du_rate, how='left',on=['goalieFullName']).merge(df_en_rate, how='left',on=['goalieFullName']).merge(df_sz_rate,how='left',on=['goalieFullName']).merge(df_ag_rate,how='left',on=['goalieFullName']).merge(df_rb_rate,how='left',on=['goalieFullName']).merge(df_sc_rate,how='left',on=['goalieFullName']).merge(df_hs_rate,how='left',on=['goalieFullName']).merge(df_rt_rate,how='left',on=['goalieFullName']).merge(df_ph_rate,how='left',on=['goalieFullName']).merge(df_ps_rate,how='left',on=['goalieFullName']).merge(df_ex_rate,how='left',on=['goalieFullName']).merge(df_ld_rate,how='left',on=['goalieFullName'])
df_ov['Rookie'] = np.where(df_ov['Rookie'].astype(str).str.contains('True'), "Y", "N")
df_ov['Position'] = 1
df_ov['PO'] = 1
df_ov['Contract'] = 1
df_ov['Salary'] = 1
df_ov['Year'] = df_ov['Birthday'].str.slice(start=0,stop=4)
df_ov['Month'] = df_ov['Birthday'].str.slice(start=5,stop=7)
df_ov['Day'] = df_ov['Birthday'].str.slice(start=8)
df_ov = df_ov.fillna(55)

df_ov = df_ov[['Id','goalieFullName','Position','Country','Rookie','Year','Month','Day','Contract','Salary','UrlLink','Weight','height','SK','DU','EN','SZ','AG','RB','SC','HS','RT','PH','PS','EX','LD','PO']]

df_ov['OV'] = (df_ov['SK'] * 0.09) + (df_ov['DU'] * 0.06) + (df_ov['EN'] * 0.09) + (df_ov['SZ'] * 0.06) + (df_ov['AG'] * 0.08) + (df_ov['RB'] * 0.11) + (df_ov['SC'] * 0.13) + (df_ov['HS'] * 0.13) + (df_ov['RT'] * 0.13) + (df_ov['PH'] * 0.03) + (df_ov['PS'] * 0.01) + (df_ov['EX'] * 0.04) + (df_ov['LD'] * 0.04) 
df_ov['goalieFullName'] = df_ov['goalieFullName'].str.title()
df_ov.sort_values(by='OV',ascending=False, inplace=True)

df_ov