In [None]:
import pandas as pd
from scipy.stats import zscore
from datetime import datetime

file_ahl = pd.read_csv('2021-2022_AHL_all_goalies.csv')
file_ahl_ep = pd.read_csv('2021-2022_AHL_goalies_EP.csv', encoding= 'unicode_escape')
file_nhl = pd.read_csv('2021-2022_NS_Goalie_Counts.csv')

min_rate = 54
max_rate = 70
rate_range = max_rate - min_rate

Merge data and EP information

In [None]:
file_ahl.sort_values(by=['Goalie','GP'],ascending=False,inplace=True)
file_ahl.drop_duplicates(subset='Goalie',keep='first',inplace=True)
file_ahl_ep.rename(columns={'player' : 'Goalie'},inplace=True)
file_ahl_ep['Goalie'] = file_ahl_ep['Goalie'].str.strip(' ')
file_ahl_merge = file_ahl.merge(file_ahl_ep,on='Goalie',how='left')

file_ahl_merge

# Goalies with more AHL games than NHL games

In [None]:
df_ahl_init_cl = file_ahl_merge.loc[:,['Goalie','GP']]
df_nhl_init_cl = file_nhl.loc[:,['Player','GP']]
df_nhl_init_cl.rename(columns={'Player':'Goalie'},inplace=True)

df_ahl_init_cl['League'] = 'AHL'
df_nhl_init_cl['League'] = 'NHL'

df_names = pd.concat([df_nhl_init_cl,df_ahl_init_cl]).reset_index(drop=True)
df_names['Goalie'] = df_names['Goalie'].str.lower()

df_names = df_names.iloc[df_names.groupby('Goalie')['GP'].idxmax()]

df_ahl_names = df_names[df_names['League'] == 'AHL'].values.tolist()
names_list = [name[0] for name in df_ahl_names]

file_ahl_merge['Goalie'] = file_ahl_merge['Goalie'].str.lower()
file_clean = file_ahl_merge[file_ahl_merge['Goalie'].isin(names_list)]

file_clean.to_csv('C:/Users/adoog/Desktop/goalie_names.csv')

In [None]:
def get_rating(rating,stat1,stat2,stat1_wgt,stat2_wgt,inv):
    df = file_clean.loc[:,['Goalie',stat1,stat2]]
    
    df[f'{stat1}_zscore'] = zscore(df[stat1])
    df[f'{stat1}_zscore'] = df[f'{stat1}_zscore'] * inv
    z_max = df[f'{stat1}_zscore'].max()
    z_min = df[f'{stat1}_zscore'].min()
    z_range = z_max-z_min
    z_incr = z_range/rate_range

    df[f'{stat1}_Rating'] = max_rate-((z_max-df[f'{stat1}_zscore'])/z_incr)
    
    df[f'{stat2}_zscore'] = zscore(df[stat2])
    df[f'{stat2}_zscore'] = df[f'{stat2}_zscore']
    z_max2 = df[f'{stat2}_zscore'].max()
    z_min2 = df[f'{stat2}_zscore'].min()
    z_range2 = z_max2-z_min2
    z_incr2 = z_range2/rate_range

    df[f'{stat2}_Rating'] = max_rate-((z_max2-df[f'{stat2}_zscore'])/z_incr2)
    df[rating] = (df[f'{stat1}_Rating'] * stat1_wgt) + (df[f'{stat2}_Rating'] * stat2_wgt)
    df_rate = df.loc[:,['Goalie',rating]]
    df_rate.sort_values(by=rating,ascending=False,inplace=True)
    
    return df_rate

# Skating (SK)

#### 1. Wins

In [None]:
sk = get_rating('SK','W','GP',1,0,1)
sk

# Durability (DU)

In [None]:
du = get_rating('DU','GP','W',0.9,0.1,1)
du

# Endurance (EN)

In [None]:
en = get_rating('EN','MINS','W',1,0,1)
en

# Size (SZ)

In [None]:
sz_max = 80
sz_min = 52
sz_range = sz_max - sz_min

sz_calc = file_clean.loc[:,['Goalie','height','weight']]
sz_calc['height'] = round(sz_calc['height'] / 2.54).astype(int)
sz_calc['ratio'] = (sz_calc['height'] * 0.75) + (sz_calc['height'] * 0.25)

sz_calc['ratio_zscore'] = zscore(sz_calc['ratio'])
z_max = sz_calc['ratio_zscore'].max()
z_min = sz_calc['ratio_zscore'].min()
z_range = z_max-z_min
z_incr = z_range/sz_range

sz_calc['SZ'] = sz_max-((z_max-sz_calc['ratio_zscore'])/z_incr)
sz = sz_calc.loc[:,['Goalie','height','weight','SZ']]

sz.sort_values(by='SZ',ascending=False,inplace=True)

sz

# Agility (AG)

In [None]:
ag1 = get_rating('AG_','GAA','W',1,0,-1)

ag_1 = ag1.merge(en,how='left',on='Goalie')

ag_1['AG'] = (ag_1['AG_'] * 0.7) + (ag_1['EN'] * 0.3)

ag = ag_1.loc[:,['Goalie','AG']]

ag.sort_values(by='AG',ascending=False,inplace=True)

ag

# Rebounds (RB)

In [None]:
rb = get_rating('RB','SO','MINS',0.9,0.1,1)
rb

# Style Control (SC)

In [None]:
sc = get_rating('SC','SV%','MINS',1,0,1)
sc

# Hand Speed (HS)

In [None]:
hs = get_rating('HS','SVS','MINS',1,0,1)
hs

# Reaction Time (RT)

In [None]:
rt_max = 69
rt_min = 54
rt_range = rt_max - rt_min

rt_calc = file_clean.loc[:,['Goalie','SVS','MINS']]
rt_calc['SV/MIN'] = rt_calc['SVS']/rt_calc['MINS']

rt_calc['ratio_zscore'] = zscore(rt_calc['SV/MIN'])
z_max = rt_calc['ratio_zscore'].max()
z_min = rt_calc['ratio_zscore'].min()
z_range = z_max-z_min
z_incr = z_range/rt_range

rt_calc['RT'] = rt_max-((z_max-rt_calc['ratio_zscore'])/z_incr)
rt = rt_calc.loc[:,['Goalie','RT']]

rt.sort_values(by='RT',ascending=False,inplace=True)

rt

# Puck Handling (PH)

In [None]:
ph = get_rating('PH','GAA','MINS',0.5,0.5,1)
ph

# Penalty Shot (PS)

In [None]:
ps = get_rating('PS','SO%','MINS',0.7,0.3,-1)
ps

# Experience (EX)

In [None]:
ex_max = 85
ex_min = 52
ex_range = ex_max - ex_min

ex_calc = file_clean.loc[:,['Goalie','nation','dob','link']]
ex_calc['Year'] = ex_calc['dob'].str[:4]
ex_calc['Month'] = ex_calc['dob'].str[5:7]
ex_calc['Day'] = ex_calc['dob'].str[8:]
ex_calc['Age'] = 2021 - ex_calc['Year'].astype(int)

ex_calc['ex_zscore'] = zscore(ex_calc['Age'])
z_ex_max = ex_calc['ex_zscore'].max()
z_ex_min = ex_calc['ex_zscore'].min()
z_ex_range = z_ex_max-z_ex_min
z_ex_incr = z_ex_range/ex_range

ex_calc['EX'] = ex_max-((z_ex_max-ex_calc['ex_zscore'])/z_ex_incr)
ex = ex_calc.loc[:,['Goalie','nation','Year','Month','Day','link','EX']]

ex.sort_values(by='EX',ascending=False,inplace=True)

ex

# Leadership (LD)

In [None]:
ld_max = 70
ld_min = 50
ld_range = ld_max - ld_min

ld_calc = file_clean.loc[:,['Goalie','dob']]
ld_calc['Year'] = ld_calc['dob'].str[:4]
ld_calc['Age'] = 2021 - ld_calc['Year'].astype(int)

ld_calc['ld_zscore'] = zscore(ld_calc['Age'])
z_ld_max = ld_calc['ld_zscore'].max()
z_ld_min = ld_calc['ld_zscore'].min()
z_ld_range = z_ld_max-z_ld_min
z_ld_incr = z_ld_range/ld_range

ld_calc['LD'] = ld_max-((z_ld_max-ld_calc['ld_zscore'])/z_ld_incr)
ld = ld_calc.loc[:,['Goalie','LD']]

ld.sort_values(by='LD',ascending=False,inplace=True)

ld

# Overall (OV)

In [None]:
df_ov = sk.merge(du, how='left',on='Goalie').merge(en, how='left',on='Goalie').merge(sz, how='left',on='Goalie').merge(ag, how='left',on='Goalie').merge(rb, how='left',on='Goalie').merge(hs, how='left',on='Goalie').merge(sc, how='left',on='Goalie').merge(rt, how='left',on='Goalie').merge(ph, how='left',on='Goalie').merge(ps, how='left',on='Goalie').merge(ex, how='left',on='Goalie').merge(ld, how='left',on='Goalie')
df_ov['Rookie'] = 'N'
df_ov['PO'] = 1
df_ov['Contract'] = 1
df_ov['Salary'] = 1
df_ov['Position'] = 'G'

df_ov = df_ov[['Goalie','Position','nation','Rookie','Year','Month','Day','Contract','Salary','link','weight','height','SK','DU','EN','SZ','AG','RB','SC','HS','RT','PH','PS','EX','LD','PO']]

df_ov['OV'] = (df_ov['SK'] * 0.08) + (df_ov['DU'] * 0.06) + (df_ov['EN'] * 0.09) + (df_ov['SZ'] * 0.08) + (df_ov['AG'] * 0.08) + (df_ov['RB'] * 0.10) + (df_ov['SC'] * 0.13) + (df_ov['HS'] * 0.12) + (df_ov['RT'] * 0.12) + (df_ov['PH'] * 0.04) + (df_ov['PS'] * 0.02) + (df_ov['EX'] * 0.04) + (df_ov['LD'] * 0.04)
df_ov['Goalie'] = df_ov['Goalie'].str.title()
df_to_print = df_ov.sort_values(by=['OV'],ascending=False)
df_to_print.to_csv('2021-2022_AHL_goalies.csv',index=False)
df_to_print