In [1]:
# from rankaggregation import rankaggregator as ra
import rankaggregation as ra
import pandas as pd
import os

In [2]:
ra.__version__

'0.1.2'

In [3]:
agg = ra.RankAggregator()

# data prep

Read the rankings from base rankers and store in a dataframe. Also recreate some of the FantasyPros numbers (average rank, standard deviation, min, max).

In [4]:
def get_ranks(year, scoring='STD'):
    scoring = scoring.upper()
    if scoring not in ['STD', 'HALF', 'PPR']:
        raise ValueError("scoring must be one of ['STD', 'HALF', 'PPR']")
    
    base_dir = '/Users/Daniel/Documents/Projects/fantasy_football_rank_aggregation/'
    rank_dir = os.path.join(base_dir, f'rankings/{year}/draft')
    expert_dir = os.path.join(base_dir, f'experts/{year}')
    
    files = [x for x in os.listdir(rank_dir) if f'_{scoring}_' in x]
    
    expert_df = pd.read_csv(os.path.join(expert_dir, f'{year}_expert_list_draft_{scoring}.csv'))
    expert_df['Expert'] = expert_df['Site'] + '_' + expert_df['Expert']
    expert_list = list(expert_df.loc[expert_df['checked'] == True, 'Expert'])
    
    rank_list = []
    for i, file in enumerate(files):
        tmp_df = pd.read_csv(os.path.join(rank_dir, file))
        tmp_df = tmp_df.drop(columns='Staff Composite')
        tmp_df['Position'] = tmp_df['Position'].str.replace('\d+', '')

        source = file.split('_')[2]
        tmp_df.columns = [x if x in ['Player', 'Position', 'Team'] else f'{source}_{x}' for x in tmp_df.columns]
        keep_experts = [x for x in tmp_df.columns if x in expert_list]
        tmp_df = tmp_df[['Player', 'Position', 'Team'] + keep_experts]

        for expert in keep_experts:
            tmp_list = tmp_df[['Player', 'Position', 'Team', expert]].sort_values(expert)
            tmp_list['key'] = tmp_list['Player'] + '_' + tmp_list['Position'] + '_' + tmp_list['Team']
            tmp_list = list(tmp_list.loc[~tmp_list[expert].isna(), 'key'])
            rank_list.append(tmp_list)

        if i == 0:
            rank_df = tmp_df
        else:
            rank_df = rank_df.merge(tmp_df, on=['Player', 'Position', 'Team'], how='outer')

    rank_cols = [x for x in rank_df.columns if x not in ['Player', 'Position', 'Team']]
    rank_df['mean_rank'] = rank_df[rank_cols].mean(axis=1)
    rank_df['median_rank'] = rank_df[rank_cols].median(axis=1)
    rank_df['std_dev'] = rank_df[rank_cols].std(axis=1)
    rank_df['best'] = rank_df[rank_cols].min(axis=1)
    rank_df['worst'] = rank_df[rank_cols].max(axis=1)
    rank_df = rank_df[~rank_df['mean_rank'].isna()]
    rank_df = rank_df.sort_values('mean_rank').reset_index(drop=True)
    
    return rank_df, rank_list

In [5]:
year = 2019

std_df, std_list = get_ranks(year, 'STD')
half_df, half_list = get_ranks(year, 'HALF')
ppr_df, ppr_list = get_ranks(year, 'PPR')

In [6]:
std_df[['Player', 'Position', 'Team', 'mean_rank', 'median_rank', 'std_dev', 'best', 'worst']].head()

Unnamed: 0,Player,Position,Team,mean_rank,median_rank,std_dev,best,worst
0,Saquon Barkley,RB,NYG,1.60177,1.0,1.271528,1.0,11.0
1,Alvin Kamara,RB,NO,2.707965,3.0,1.42479,1.0,11.0
2,Christian McCaffrey,RB,CAR,2.778761,3.0,1.406201,1.0,12.0
3,Ezekiel Elliott,RB,DAL,6.088496,4.0,6.455282,1.0,36.0
4,Nick Chubb,RB,CLE,7.672566,5.0,4.300005,1.0,24.0


In [7]:
half_df[['Player', 'Position', 'Team', 'mean_rank', 'median_rank', 'std_dev', 'best', 'worst']].head()

Unnamed: 0,Player,Position,Team,mean_rank,median_rank,std_dev,best,worst
0,Saquon Barkley,RB,NYG,1.5625,1.0,0.867649,1.0,5.0
1,Christian McCaffrey,RB,CAR,2.446429,2.0,1.206882,1.0,10.0
2,Alvin Kamara,RB,NO,2.651786,3.0,1.054461,1.0,7.0
3,Ezekiel Elliott,RB,DAL,6.455357,4.0,6.615418,1.0,45.0
4,DeAndre Hopkins,WR,HOU,6.964286,6.0,2.022079,4.0,15.0


In [8]:
ppr_df[['Player', 'Position', 'Team', 'mean_rank', 'median_rank', 'std_dev', 'best', 'worst']].head()

Unnamed: 0,Player,Position,Team,mean_rank,median_rank,std_dev,best,worst
0,Saquon Barkley,RB,NYG,1.640351,1.0,1.175918,1.0,8.0
1,Christian McCaffrey,RB,CAR,2.5,2.0,1.740969,1.0,15.0
2,Alvin Kamara,RB,NO,3.035088,3.0,1.677175,1.0,13.0
3,DeAndre Hopkins,WR,HOU,6.210526,6.0,1.902368,4.0,18.0
4,Davante Adams,WR,GB,6.368421,6.0,3.072179,4.0,22.0


# combine ranking methods

In [9]:
def all_methods(rank_df, rank_list):
    agg = ra.RankAggregator()
    avg_rank = agg.average_rank(rank_list)
    irv_rank = agg.instant_runoff(rank_list)
    borda_rank = agg.borda(rank_list)
    dowdall_rank = agg.dowdall(rank_list)
    
    rank_df['key'] = rank_df['Player'] + '_' + rank_df['Position'] + '_' + rank_df['Team']
    avg_df = pd.DataFrame({'key': [x[0] for x in avg_rank]})
    avg_df['avg_rank'] = avg_df.index + 1
    irv_df = pd.DataFrame({'key': irv_rank})
    irv_df['irv_rank'] = irv_df.index + 1
    borda_df = pd.DataFrame({'key': [x[0] for x in borda_rank]})
    borda_df['borda_rank'] = borda_df.index + 1
    dowdall_df = pd.DataFrame({'key': [x[0] for x in dowdall_rank]})
    dowdall_df['dowdall_rank'] = dowdall_df.index + 1

    combined_df = rank_df.copy()
    combined_df = combined_df.merge(avg_df, on='key', how='outer')
    combined_df['avg_pos_rank'] = combined_df['Position'] + (combined_df.groupby('Position')['avg_rank'].rank().astype(int)).astype(str)
    combined_df = combined_df.merge(irv_df, on='key', how='outer')
    combined_df['irv_pos_rank'] = combined_df['Position'] + (combined_df.groupby('Position')['irv_rank'].rank().astype(int)).astype(str)
    combined_df = combined_df.merge(borda_df, on='key', how='outer')
    combined_df['borda_pos_rank'] = combined_df['Position'] + (combined_df.groupby('Position')['borda_rank'].rank().astype(int)).astype(str)
    combined_df = combined_df.merge(dowdall_df, on='key', how='outer')
    combined_df['dowdall_pos_rank'] = combined_df['Position'] + (combined_df.groupby('Position')['dowdall_rank'].rank().astype(int)).astype(str)
    combined_df = combined_df[['Player', 'Position', 'Team', 'avg_rank', 'irv_rank', 'borda_rank', 'dowdall_rank',
                               'avg_pos_rank', 'irv_pos_rank', 'borda_pos_rank', 'dowdall_pos_rank']]
    
    combined_df['irv_diff'] = combined_df['avg_rank'] - combined_df['irv_rank']
    combined_df['irv_abs_diff'] = combined_df['irv_diff'].abs()
    
    return combined_df

In [10]:
std_agg = all_methods(std_df, std_list)
half_agg = all_methods(half_df, half_list)
ppr_agg = all_methods(ppr_df, ppr_list)

In [11]:
std_agg.head()

Unnamed: 0,Player,Position,Team,avg_rank,irv_rank,borda_rank,dowdall_rank,avg_pos_rank,irv_pos_rank,borda_pos_rank,dowdall_pos_rank,irv_diff,irv_abs_diff
0,Saquon Barkley,RB,NYG,1,1,1,1,RB1,RB1,RB1,RB1,0,0
1,Alvin Kamara,RB,NO,2,2,2,2,RB2,RB2,RB2,RB2,0,0
2,Christian McCaffrey,RB,CAR,3,3,3,3,RB3,RB3,RB3,RB3,0,0
3,Ezekiel Elliott,RB,DAL,4,4,4,4,RB4,RB4,RB4,RB4,0,0
4,Nick Chubb,RB,CLE,5,5,5,6,RB5,RB5,RB5,RB6,0,0


In [12]:
half_agg.head()

Unnamed: 0,Player,Position,Team,avg_rank,irv_rank,borda_rank,dowdall_rank,avg_pos_rank,irv_pos_rank,borda_pos_rank,dowdall_pos_rank,irv_diff,irv_abs_diff
0,Saquon Barkley,RB,NYG,1,1,1,1,RB1,RB1,RB1,RB1,0,0
1,Christian McCaffrey,RB,CAR,2,2,2,2,RB2,RB2,RB2,RB2,0,0
2,Alvin Kamara,RB,NO,3,3,3,3,RB3,RB3,RB3,RB3,0,0
3,Ezekiel Elliott,RB,DAL,4,4,4,4,RB4,RB4,RB4,RB4,0,0
4,DeAndre Hopkins,WR,HOU,5,6,5,6,WR1,WR1,WR1,WR1,-1,1


In [13]:
ppr_agg.head()

Unnamed: 0,Player,Position,Team,avg_rank,irv_rank,borda_rank,dowdall_rank,avg_pos_rank,irv_pos_rank,borda_pos_rank,dowdall_pos_rank,irv_diff,irv_abs_diff
0,Saquon Barkley,RB,NYG,1,1,1,1,RB1,RB1,RB1,RB1,0,0
1,Christian McCaffrey,RB,CAR,2,2,2,2,RB2,RB2,RB2,RB2,0,0
2,Alvin Kamara,RB,NO,3,3,3,3,RB3,RB3,RB3,RB3,0,0
3,DeAndre Hopkins,WR,HOU,4,6,4,6,WR1,WR2,WR1,WR2,-2,2
4,Davante Adams,WR,GB,5,5,5,5,WR2,WR1,WR2,WR1,0,0


In [14]:
std_agg.to_csv(f'./aggregation/{year}/draft_checked_experts_STD.csv', index=False)
half_agg.to_csv(f'./aggregation/{year}/draft_checked_experts_HALF.csv', index=False)
ppr_agg.to_csv(f'./aggregation/{year}/draft_checked_experts_PPR.csv', index=False)

# check correlations

In [15]:
std_agg.corr(method='spearman')

Unnamed: 0,avg_rank,irv_rank,borda_rank,dowdall_rank,irv_diff,irv_abs_diff
avg_rank,1.0,0.960703,0.955208,0.95678,0.280496,0.668824
irv_rank,0.960703,1.0,0.992866,0.993458,0.045949,0.70843
borda_rank,0.955208,0.992866,1.0,0.999446,0.055957,0.708146
dowdall_rank,0.95678,0.993458,0.999446,1.0,0.05727,0.704833
irv_diff,0.280496,0.045949,0.055957,0.05727,1.0,0.167996
irv_abs_diff,0.668824,0.70843,0.708146,0.704833,0.167996,1.0


In [16]:
half_agg.corr(method='spearman')

Unnamed: 0,avg_rank,irv_rank,borda_rank,dowdall_rank,irv_diff,irv_abs_diff
avg_rank,1.0,0.976837,0.968603,0.972422,0.088149,0.184966
irv_rank,0.976837,1.0,0.992437,0.994794,-0.086283,0.197962
borda_rank,0.968603,0.992437,1.0,0.998517,-0.08401,0.210576
dowdall_rank,0.972422,0.994794,0.998517,1.0,-0.083157,0.202823
irv_diff,0.088149,-0.086283,-0.08401,-0.083157,1.0,0.111604
irv_abs_diff,0.184966,0.197962,0.210576,0.202823,0.111604,1.0


In [17]:
ppr_agg.corr(method='spearman')

Unnamed: 0,avg_rank,irv_rank,borda_rank,dowdall_rank,irv_diff,irv_abs_diff
avg_rank,1.0,0.940785,0.936892,0.935257,0.294496,0.632543
irv_rank,0.940785,1.0,0.994866,0.995272,0.014262,0.668549
borda_rank,0.936892,0.994866,1.0,0.999694,0.018708,0.659834
dowdall_rank,0.935257,0.995272,0.999694,1.0,0.010131,0.656095
irv_diff,0.294496,0.014262,0.018708,0.010131,1.0,0.171147
irv_abs_diff,0.632543,0.668549,0.659834,0.656095,0.171147,1.0


In [18]:
std_agg[std_agg['avg_rank'] <= 100].corr(method='spearman')

Unnamed: 0,avg_rank,irv_rank,borda_rank,dowdall_rank,irv_diff,irv_abs_diff
avg_rank,1.0,0.995512,0.993711,0.996328,-0.148568,0.430304
irv_rank,0.995512,1.0,0.987699,0.99724,-0.230046,0.422512
borda_rank,0.993711,0.987699,1.0,0.991911,-0.124646,0.435983
dowdall_rank,0.996328,0.99724,0.991911,1.0,-0.197281,0.429756
irv_diff,-0.148568,-0.230046,-0.124646,-0.197281,1.0,-0.154542
irv_abs_diff,0.430304,0.422512,0.435983,0.429756,-0.154542,1.0


In [19]:
half_agg[half_agg['avg_rank'] <= 100].corr(method='spearman')

Unnamed: 0,avg_rank,irv_rank,borda_rank,dowdall_rank,irv_diff,irv_abs_diff
avg_rank,1.0,0.997288,0.992103,0.997012,-0.060449,0.249174
irv_rank,0.997288,1.0,0.989535,0.996892,-0.126456,0.243749
borda_rank,0.992103,0.989535,1.0,0.991179,-0.054001,0.250491
dowdall_rank,0.997012,0.996892,0.991179,1.0,-0.093305,0.234559
irv_diff,-0.060449,-0.126456,-0.054001,-0.093305,1.0,-0.067871
irv_abs_diff,0.249174,0.243749,0.250491,0.234559,-0.067871,1.0


In [20]:
ppr_agg[ppr_agg['avg_rank'] <= 100].corr(method='spearman')

Unnamed: 0,avg_rank,irv_rank,borda_rank,dowdall_rank,irv_diff,irv_abs_diff
avg_rank,1.0,0.996988,0.995836,0.997024,-0.082182,0.407649
irv_rank,0.996988,1.0,0.991707,0.997192,-0.152682,0.397927
borda_rank,0.995836,0.991707,1.0,0.994275,-0.062563,0.408321
dowdall_rank,0.997024,0.997192,0.994275,1.0,-0.116686,0.396228
irv_diff,-0.082182,-0.152682,-0.062563,-0.116686,1.0,-0.04304
irv_abs_diff,0.407649,0.397927,0.408321,0.396228,-0.04304,1.0
