In [21]:
import pandas as pd
import numpy as np
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
import os

In [22]:
fantasy_stats_list = ['FG_PCT', 'FT_PCT', 'FG3M', 'PTS', 'REB', 'AST', 'STL', 'BLK', 'TOV']
total_reg_games = 82

In [23]:
for f in os.listdir('./data'):
    df_temp = pd.read_csv(f'./data/{f}')
    df_temp = df_temp[['SEASON_YEAR', 'PLAYER_ID', 'PLAYER_NAME']+fantasy_stats_list+['MIN']]

    df_temp_count = df_temp['PLAYER_ID'].value_counts().reset_index()
    df_temp_count.rename(columns={'index': 'PLAYER_ID', 'PLAYER_ID': 'GAMES'}, inplace=True)

    df_temp_agg = df_temp.groupby(by=['SEASON_YEAR', 'PLAYER_ID', 'PLAYER_NAME']).mean().reset_index()
    df_temp_agg = df_temp_agg.merge(df_temp_count, how='left', on=['PLAYER_ID'])

    for c in [ele for ele in fantasy_stats_list if ele not in ['FG_PCT', 'FT_PCT']]:
        df_temp_agg[c+'_ADJ'] = (df_temp_agg[c] * df_temp_agg['GAMES']) / np.maximum(total_reg_games, df_temp_agg['GAMES'])

    fantasy_stats_list_standardized = fantasy_stats_list+[ele for ele in df_temp_agg.columns if ele.endswith('_ADJ')]
    ct = ColumnTransformer([('standardized', StandardScaler(), fantasy_stats_list_standardized)])    
    standardized = ct.fit_transform(df_temp_agg)
    df_temp_standardized = pd.DataFrame(standardized, columns=[ele+'_STD' for ele in fantasy_stats_list_standardized])

    df_temp_agg = df_temp_agg.merge(df_temp_standardized, how='left', left_index=True, right_index=True)
    df_temp_agg = df_temp_agg.eval('TOTAL_RAW = FG_PCT_STD + FT_PCT_STD + FG3M_STD + PTS_STD + REB_STD + AST_STD + STL_STD + BLK_STD + TOV_STD')
    df_temp_agg = df_temp_agg.eval('TOTAL_DNP_ADJUSTED = FG_PCT_STD + FT_PCT_STD + FG3M_ADJ_STD + PTS_ADJ_STD + REB_ADJ_STD + AST_ADJ_STD + STL_ADJ_STD + BLK_ADJ_STD + TOV_ADJ_STD')

    df_temp_agg = df_temp_agg.sort_values(by='TOTAL_RAW', ascending=False, ignore_index=True).reset_index()
    df_temp_agg = df_temp_agg.sort_values(by='TOTAL_DNP_ADJUSTED', ascending=False, ignore_index=True).reset_index()
    df_temp_agg = df_temp_agg.rename(columns={'index':'RANK_RAW', 'level_0':'RANK_DNP_ADJUSTED'})
    df_temp_agg[['RANK_RAW', 'RANK_DNP_ADJUSTED']] += 1

    df_temp_agg['H2H_RAW_WINS'] = 0
    df_temp_agg['H2H_DNP_ADJUSTED_WINS'] = 0

    for idx in df_temp_agg.index:
        for i in range(idx+1,len(df_temp_agg)):
            incumbent_raw_score = 0
            incumbent_dnp_adj_score = 0
            if df_temp_agg.at[idx, 'FG_PCT'] > df_temp_agg.at[i, 'FG_PCT']:
                incumbent_raw_score += 1
                incumbent_dnp_adj_score += 1
            if df_temp_agg.at[idx, 'FT_PCT'] > df_temp_agg.at[i, 'FT_PCT']:
                incumbent_raw_score += 1
                incumbent_dnp_adj_score += 1
            if df_temp_agg.at[idx, 'FG3M'] > df_temp_agg.at[i, 'FG3M']:
                incumbent_raw_score += 1
            if df_temp_agg.at[idx, 'PTS'] > df_temp_agg.at[i, 'PTS']:
                incumbent_raw_score += 1
            if df_temp_agg.at[idx, 'REB'] > df_temp_agg.at[i, 'REB']:
                incumbent_raw_score += 1
            if df_temp_agg.at[idx, 'AST'] > df_temp_agg.at[i, 'AST']:
                incumbent_raw_score += 1
            if df_temp_agg.at[idx, 'STL'] > df_temp_agg.at[i, 'STL']:
                incumbent_raw_score += 1
            if df_temp_agg.at[idx, 'BLK'] > df_temp_agg.at[i, 'BLK']:
                incumbent_raw_score += 1
            if df_temp_agg.at[idx, 'TOV'] < df_temp_agg.at[i, 'TOV']:
                incumbent_raw_score += 1
            if df_temp_agg.at[idx, 'FG3M_ADJ'] > df_temp_agg.at[i, 'FG3M_ADJ']:
                incumbent_dnp_adj_score += 1
            if df_temp_agg.at[idx, 'PTS_ADJ'] > df_temp_agg.at[i, 'PTS_ADJ']:
                incumbent_dnp_adj_score += 1
            if df_temp_agg.at[idx, 'REB_ADJ'] > df_temp_agg.at[i, 'REB_ADJ']:
                incumbent_dnp_adj_score += 1
            if df_temp_agg.at[idx, 'AST_ADJ'] > df_temp_agg.at[i, 'AST_ADJ']:
                incumbent_dnp_adj_score += 1
            if df_temp_agg.at[idx, 'STL_ADJ'] > df_temp_agg.at[i, 'STL_ADJ']:
                incumbent_dnp_adj_score += 1
            if df_temp_agg.at[idx, 'BLK_ADJ'] > df_temp_agg.at[i, 'BLK_ADJ']:
                incumbent_dnp_adj_score += 1
            if df_temp_agg.at[idx, 'TOV_ADJ'] < df_temp_agg.at[i, 'TOV_ADJ']:
                incumbent_dnp_adj_score += 1
            
            if incumbent_raw_score > 4:
                df_temp_agg.at[idx, 'H2H_RAW_WINS'] += 1
                #print(f"H2H RAW: {df_temp_agg.at[idx, 'PLAYER_NAME']} vs {df_temp_agg.at[i, 'PLAYER_NAME']}: {df_temp_agg.at[idx, 'PLAYER_NAME']} won with {incumbent_raw_score} categories")
            else:
                df_temp_agg.at[i, 'H2H_RAW_WINS'] += 1
                #print(f"H2H RAW: {df_temp_agg.at[idx, 'PLAYER_NAME']} vs {df_temp_agg.at[i, 'PLAYER_NAME']}: {df_temp_agg.at[i, 'PLAYER_NAME']} won with {9-incumbent_raw_score} categories")
            
            if incumbent_dnp_adj_score > 4:
                df_temp_agg.at[idx, 'H2H_DNP_ADJUSTED_WINS'] += 1
                #print(f"H2H DNP: {df_temp_agg.at[idx, 'PLAYER_NAME']} vs {df_temp_agg.at[i, 'PLAYER_NAME']}: {df_temp_agg.at[idx, 'PLAYER_NAME']} won with {incumbent_dnp_adj_score} categories")
            else:
                df_temp_agg.at[i, 'H2H_DNP_ADJUSTED_WINS'] += 1
                #print(f"H2H DNP: {df_temp_agg.at[idx, 'PLAYER_NAME']} vs {df_temp_agg.at[i, 'PLAYER_NAME']}: {df_temp_agg.at[i, 'PLAYER_NAME']} won with {9-incumbent_dnp_adj_score} categories")
            
    df_temp_agg['H2H_RAW_WIN_PCT'] = df_temp_agg['H2H_RAW_WINS'] / (len(df_temp_agg) - 1) * 100
    df_temp_agg['H2H_DNP_ADJUSTED_WIN_PCT'] = df_temp_agg['H2H_DNP_ADJUSTED_WINS'] / (len(df_temp_agg) - 1) * 100

    df_temp_agg = df_temp_agg.sort_values(by='H2H_RAW_WIN_PCT', ascending=False, ignore_index=True).reset_index()
    df_temp_agg = df_temp_agg.sort_values(by='H2H_DNP_ADJUSTED_WIN_PCT', ascending=False, ignore_index=True).reset_index()
    df_temp_agg = df_temp_agg.rename(columns={'index':'RANK_H2H_RAW', 'level_0':'RANK_H2H_DNP_ADJUSTED'})
    df_temp_agg[['RANK_H2H_RAW', 'RANK_H2H_DNP_ADJUSTED']] += 1

    df_temp_agg.to_csv(f'./stats/per_game_stat_{f[9:13]}.txt', index=None)