## Nomenclatura dos Dados
* _p: Previous
    * _p _win_pct: Acertos prévios

In [1]:
import requests 
import json
import pandas as pd
pd.set_option('display.max_columns', 200)
import numpy as np
import itertools
import pickle
import os

In [2]:
def pickle_data(path, data):
    if not path.endswith('.pickle'):
        path += '.pickle'
    with open(path, 'wb') as handle:
        pickle.dump(data, handle, protocol=pickle.HIGHEST_PROTOCOL)
    return

In [3]:
def read_pickle(path):
    with open(path, 'rb') as handle:
        data = pickle.load(handle)
    return data

In [4]:
def fetch_data(week, year=2019):
    url = "https://api.nflpickwatch.com/v1/nfl/expert-picks-su/" + str(year) + "/" + str(week)
    resp = requests.get(url).text
    data = json.loads(resp)
    return data

In [5]:
def get_weekly_picks(week, data):
    games = pd.json_normalize(data['games'])[['id', 'rt_id', 'ht_id', 'game_state']]
    games.index = games.id
    cols = ['user_id']
    for g in list(games.id):
        cols.append("picks." + str(g) + ".t")
    picks = pd.json_normalize(data['expertPicks'])[cols].T
    picks.columns = picks.loc['user_id']
    picks = picks.drop(['user_id'])
    picks.index = games.index
    
    gp = pd.concat([games, picks], axis=1)
    gp = gp.fillna(0)

    gp = gp.rename(columns={"game_state":"game_result"})
    gp['game_result'] = np.where(gp['game_result']=='home-win', 1, 
                                np.where(gp['game_result']=='road-win', -1, 0))

    teams_dict = {}
    for ht in gp.ht_id:
        teams_dict[ht] = 1
    for rt in gp.rt_id:
        teams_dict[rt] = -1
    gp.iloc[:,4:] = gp.iloc[:,4:].replace(teams_dict)

    return gp

In [169]:
def get_votes(week, rank, picks, top, strategy="rank_squared"):
    best = rank.sort_values(by='week' + str(week-1) + '_rank')
    best = best.iloc[0:top]
#     best['votes'] = (top - best['week' + str(week-1) + '_rank'])
    
    if strategy == "rank_squared":
        best['pct_rank'] = best['week' + str(week-1) + '_rank'].rank(pct=True, ascending=False)
        best['votes'] = best.pct_rank**3
        best['votes'] = np.where(best['votes'] < 0, 0, best['votes'])
        
    elif strategy == 'democracy':
        best['votes'] = 1
        
    
    bid = list(best.user_id)
    
    votes = picks[bid]*best['votes']
    votes['vote'] = votes.sum(axis=1)
    votes['vote'] = np.where(votes['vote']==0, 1, votes['vote']) #In case of votes summing up to zero, the first ranked expert gets the Minerva vote.
    
    votes['bet'] = np.sign(votes.vote)
    return votes

In [161]:
year = 2016
bla = pd.read_csv('Data/' + str(year) + '/rank_' + str(year) +'.csv')
bla.index = bla.user_id

In [167]:
week = 4
bla = calculate_rank(bla,consider_last_season=False, season_hits_modifier=1, hist_modifier=9, hist_decay=True)
best = bla.sort_values(by='week' + str(week-1) + '_rank')
best = best.iloc[0:18]
best['pct_rank'] = best['week' + str(week-1) + '_rank'].rank(pct=True, ascending=False)
best['votes'] = best.pct_rank**2
# best['votes'] = np.where(best['votes'] == 0, 0, best['votes'])
best

Unnamed: 0_level_0,user_id,name,affiliation,prev_rank,p_wins,p_win_pct,season_hits,week0_rank,week1_hits,week1_rate,week1_rank,week2_hits,week2_rate,week2_rank,week3_hits,week3_rate,week3_rank,week4_hits,week4_rate,week4_rank,week5_hits,week5_rate,week5_rank,week6_hits,week6_rate,week6_rank,week7_hits,week7_rate,week7_rank,week8_hits,week8_rate,week8_rank,week9_hits,week9_rate,week9_rank,week10_hits,week10_rate,week10_rank,week11_hits,week11_rate,week11_rank,week12_hits,week12_rate,week12_rank,week13_hits,week13_rate,week13_rank,week14_hits,week14_rate,week14_rank,week15_hits,week15_rate,week15_rank,week16_hits,week16_rate,week16_rank,week17_hits,week17_rate,week17_rank,pct_rank,votes
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1
229,229,Andrew Lynch,FOX,48,0,0.0,170,117.0,12,114.0,17.0,12,40.5,4.0,10,27.864198,1.0,8,22.141602,1.0,8,19.39296,4.0,8,17.797454,12.0,9,17.094098,9.0,8,16.303024,7.0,8,15.721881,16.0,8,15.431562,12.0,11,15.903619,10.0,12,16.575958,13.0,10,16.758129,16.0,11,17.111697,15.0,13,17.807914,6.0,10,17.817327,2.0,12,18.194091,6.0,1.0,1.0
202,202,Best Defense (PPG),Pickwatch,28,158,59.398,145,53.0,11,104.5,44.0,11,37.125,20.0,10,26.37037,2.0,7,20.560059,14.0,8,18.29328,16.0,8,16.964185,27.0,8,16.094157,35.0,7,15.195923,48.0,6,14.29137,87.0,10,14.660751,48.0,9,14.784958,64.0,9,14.930179,97.0,9,15.084022,92.0,9,15.205972,95.0,11,15.710726,96.0,2,14.394653,109.0,10,14.748083,107.0,0.944444,0.891975
252,252,Mia Khalifa,Fansided,48,0,0.0,146,117.0,10,95.0,68.0,11,35.671875,27.0,10,25.705761,3.0,6,19.478027,40.0,10,18.54288,12.0,12,18.73026,3.0,7,17.117808,8.0,7,16.017288,15.0,7,15.221913,34.0,9,15.248261,18.0,10,15.513506,23.0,10,15.773965,58.0,10,16.019927,57.0,10,16.198972,59.0,10,16.369046,73.0,0,14.606464,107.0,7,14.355933,108.0,0.888889,0.790123
59,59,Elliot Harrison,NFL,26,159,59.551,170,51.0,11,104.5,44.0,11,37.125,20.0,9,25.376543,7.0,9,21.308594,5.0,9,19.29264,6.0,8,17.697338,15.0,10,17.332094,6.0,8,16.481552,5.0,11,16.610736,3.0,8,16.241279,1.0,11,16.627128,1.0,11,16.985707,4.0,10,17.084003,6.0,11,17.369662,5.0,12,17.813493,5.0,9,17.595138,9.0,12,17.971845,9.0,0.75,0.5625
63,63,NFL.com Fan Picks,NFL,10,169,63.296,168,17.0,11,104.5,44.0,11,37.125,20.0,9,25.376543,7.0,8,20.643066,12.0,7,17.84352,23.0,10,17.430684,17.0,8,16.451241,22.0,9,16.052994,13.0,9,15.749646,14.0,8,15.485732,11.0,12,16.195298,6.0,12,16.839864,6.0,10,16.980394,11.0,11,17.292263,10.0,12,17.732959,10.0,10,17.733152,7.0,11,17.895089,17.0,0.75,0.5625
248,248,David Steele,Sporting News,48,0,0.0,164,117.0,12,114.0,17.0,9,34.734375,38.0,10,25.376543,7.0,8,20.643066,12.0,8,18.3432,15.0,10,17.797261,13.0,7,16.403641,26.0,8,15.731598,26.0,12,16.235719,5.0,8,15.919072,2.0,9,15.912487,9.0,12,16.565575,14.0,10,16.727184,17.0,11,17.05912,19.0,10,17.132978,34.0,10,17.202799,26.0,10,17.227575,45.0,0.75,0.5625
228,228,Kate Hairopoulos,DallasMorningNews,48,0,0.0,166,117.0,12,114.0,17.0,9,34.734375,38.0,10,25.376543,7.0,8,20.643066,12.0,11,19.84224,1.0,10,18.896991,1.0,8,17.593979,3.0,8,16.695831,2.0,8,16.041305,6.0,8,15.724657,8.0,11,16.1726,7.0,11,16.562128,15.0,11,16.914114,13.0,9,16.795214,29.0,12,17.3107,20.0,8,16.994421,37.0,12,17.457162,36.0,0.75,0.5625
131,131,Joel Thorman,SB Nation,32,157,58.801,161,63.0,12,114.0,17.0,10,36.65625,21.0,9,25.211934,9.0,8,20.560059,14.0,10,19.29264,6.0,8,17.697338,15.0,10,17.332094,6.0,6,15.910172,21.0,9,15.638533,18.0,9,15.612128,9.0,9,15.629676,18.0,12,16.319059,24.0,11,16.713259,19.0,11,17.045233,21.0,10,17.146807,32.0,6,16.43544,69.0,11,16.765194,70.0,0.583333,0.340278
114,114,Chris Simms,PFT,48,0,0.0,150,117.0,11,104.5,44.0,12,39.046875,7.0,8,25.211934,9.0,5,18.563477,64.0,9,17.44368,39.0,7,15.93107,79.0,7,14.951419,88.0,8,14.553085,82.0,9,14.499672,77.0,6,13.919072,87.0,8,13.912506,101.0,11,14.621179,104.0,8,14.612879,105.0,8,14.568107,109.0,12,15.341317,105.0,10,15.620313,96.0,11,16.03722,90.0,0.583333,0.340278
235,235,Jeff Ratcliffe,ProFootballFocus,48,0,0.0,160,117.0,12,114.0,17.0,11,38.578125,10.0,8,25.047325,12.0,8,20.477051,16.0,7,17.74368,27.0,8,16.5643,47.0,9,16.117957,33.0,10,16.070847,12.0,8,15.513525,23.0,7,15.038506,24.0,10,15.355676,33.0,14,16.541248,16.0,10,16.723973,18.0,10,16.870665,28.0,11,17.169078,28.0,8,16.825162,49.0,9,16.699024,73.0,0.444444,0.197531


In [166]:
best.votes.sum()

6.5092592592592595

In [7]:
def get_rank(year):
    data = fetch_data(year=year, week=1)
    rank = pd.json_normalize(data['expertPicks'])[['user_id', 'name', 'affiliation', 'prev_rank', 'p_wins', 'p_win_pct']]
    rank['season_hits'] = 0
    rank['week0_rank'] = rank.prev_rank.rank(method='max')
    ht = ['week' + str(w) + '_hits' for w in range (1,18)]
    rt = ['week' + str(w) + '_rate' for w in range (1,18)]
    rk = ['week' + str(w) + '_rank' for w in range (1,18)]

    for w in range (0,17):
        rank[ht[w]] = 0
        rank[rt[w]] = 0
        rank[rk[w]] = 0

    rank.index = rank.user_id
    
    for week in range (1, 18):
        data = fetch_data(week, year)
        week_pct = pd.json_normalize(data['expertPicks'])[['user_id', 't_wins']]
        week_pct.index = week_pct.user_id
        rank['week'+str(week)+"_hits"]=week_pct.t_wins
        for d in data['expertPicks']:
            if d['picks'] == {} and d['user_id'] in rank.user_id:
                rank = rank.drop(d['user_id'])
                print("Removed " + str(d['user_id']))
        

    return rank

In [153]:
def calculate_rank(rank, hist_modifier, season_hits_modifier, consider_last_season, hist_decay):
    for week in range(1, 18):
        rate = 'week' + str(week) + "_rate"

        rank['season_hits'] = 0
        for w in range (1, week+1):
            rank['season_hits'] += rank['week'+str(w)+'_hits']

        n = 0
        if consider_last_season:
            while (n < hist_modifier):
                if n == 0 and hist_modifier > week:
                    rank[rate]  = (hist_modifier - week + 1) * (rank.p_wins/16)
                    n += (hist_modifier - week)
                else:
                    rank[rate] += rank['week' + str(week-hist_modifier+n+1) + "_hits"]
                    n+=1
                rank[rate] = rank[rate]/(hist_modifier)

        else:
            if hist_decay:
                hd = {}
                width = max(0,(week-hist_modifier))
                rg = min(hist_modifier, week)
                for i in range(rg):
                    hd[week-rg+i+1] = (i+1)/(week-width)
            n = week
            while (n > week - hist_modifier) and (n > 0):
                if hist_decay:
                    rank[rate] += rank['week' + str(n) + "_hits"]*hd[n]
#                     print("Week: {}; n: {}; Decay: {}".format(week, n, hd[n]))
            
                else:
                    rank[rate] += rank['week' + str(n) + "_hits"]   
                n -= 1
            rank[rate] = rank[rate]/(min(hist_modifier, week))

        rank[rate] += (rank['season_hits'] * season_hits_modifier)/week
        rank['week' + str(week) + "_rank"] = rank[rate].rank(method='max', ascending=False)
    
    return rank 

In [137]:
rank = pd.read_csv('Data/2019/rank_2019.csv')

In [147]:
rank = calculate_rank(rank, 4, 1, False, True)

Week: 1; n: 1; Decay: 1.0


Week: 2; n: 2; Decay: 1.0
Week: 2; n: 1; Decay: 0.5


Week: 3; n: 3; Decay: 1.0
Week: 3; n: 2; Decay: 0.6666666666666666
Week: 3; n: 1; Decay: 0.3333333333333333


Week: 4; n: 4; Decay: 1.0
Week: 4; n: 3; Decay: 0.75
Week: 4; n: 2; Decay: 0.5
Week: 4; n: 1; Decay: 0.25


Week: 5; n: 5; Decay: 1.0
Week: 5; n: 4; Decay: 0.75
Week: 5; n: 3; Decay: 0.5
Week: 5; n: 2; Decay: 0.25


Week: 6; n: 6; Decay: 1.0
Week: 6; n: 5; Decay: 0.75
Week: 6; n: 4; Decay: 0.5
Week: 6; n: 3; Decay: 0.25


Week: 7; n: 7; Decay: 1.0
Week: 7; n: 6; Decay: 0.75
Week: 7; n: 5; Decay: 0.5
Week: 7; n: 4; Decay: 0.25


Week: 8; n: 8; Decay: 1.0
Week: 8; n: 7; Decay: 0.75
Week: 8; n: 6; Decay: 0.5
Week: 8; n: 5; Decay: 0.25


Week: 9; n: 9; Decay: 1.0
Week: 9; n: 8; Decay: 0.75
Week: 9; n: 7; Decay: 0.5
Week: 9; n: 6; Decay: 0.25


Week: 10; n: 10; Decay: 1.0
Week: 10; n: 9; Decay: 0.75
Week: 10; n: 8; Decay: 0.5
Week: 10; n: 7; Decay: 0.25


Week: 11; n: 11; Decay: 1.0
Week: 11; n: 10; De

In [148]:
def win_a_bet(hp):
    results_cols = []
    hits_cols=[]
    gms_cols = []
    for y in range(2014,2020):
        results_cols+= [str(y)+"_hits", str(y)+"_gms", str(y)+"_rate"]
        hits_cols.append(str(y)+'_hits')
        gms_cols.append(str(y)+'_gms')

    results = pd.DataFrame(columns=results_cols)

    for year in range (2014,2020):
        season = read_pickle('Data/' + str(year) + '/' + "picks_" + str(year) + ".pickle")
        total_hits = 0
        total_games = 0
        for week in range(1,18):
            rank = pd.read_csv('Data/' + str(year) + '/rank_' + str(year) +'.csv')
            rank.index = rank.user_id
            rank = calculate_rank(rank, hist_modifier=hp['hist_modifier'], season_hits_modifier=hp['season_hits_modifier'], consider_last_season=hp['consider_last_season'], hist_decay=hp['hist_decay'])

    #         season[week] = get_weekly_picks(week, data)
            picks = season[week]

            votes = get_votes(week, rank, picks, top=hp['n_top_experts'], strategy=hp['vote_strategy'])
            picks['vote'] = votes.vote
            picks['bet'] = votes.bet
            picks['hit'] = np.where(picks.bet == picks.game_result, 1, 0)

            hits = picks['hit'].sum()
            games = picks['hit'].count()
            total_hits += hits
            total_games += games

            results.loc['week'+str(week), str(year)+'_gms'] = games
            results.loc['week'+str(week), str(year)+'_hits'] = hits
            results.loc['week'+str(week), str(year)+'_rate'] = hits/games

        results.loc['total', str(year)+'_gms'] = total_games
        results.loc['total', str(year)+'_hits'] = total_hits
        results.loc['total', str(year)+'_rate'] = total_hits/total_games

        print("{} Hits - Total: {}/{} = {:.2%}".format(year,total_hits,total_games,total_hits/total_games))

    results['mean_hits'] = results[hits_cols].mean(1)
    results['total_hits'] = results[hits_cols].sum(1)
    results['mean_gms'] = results[gms_cols].mean(1)
    results['total_gms'] = results[gms_cols].sum(1)
    results['total_rate'] = results['total_hits']/results['total_gms']
    #         print("Hits - Week {}: {}/{} = {:.2%}".format(week,hits,games,hits/games))

    return results

In [202]:
season = {}
total_hits = 0
total_games = 0
for week in range(1,18):
    data = fetch_data(week)
    season[week] = get_weekly_picks(week, data)
    picks = season[week]
    votes = get_votes(week, rank, picks, top=_TOP_N)
    picks['vote'] = votes.vote
    picks['bet'] = votes.bet
    picks['hit'] = np.where(picks.bet == picks.game_result, 1, 0)
    hits = picks['hit'].sum()
    games = picks['hit'].count()
    total_hits += hits
    total_games += games
    print("Hits - Week {}: {}/{} = {:.2%}".format(week,hits,games,hits/games))
print("Hits - TOTAL: {}/{} = {:.2%}".format(total_hits,total_games,total_hits/total_games))

Hits - Week 1: 11/16 = 68.75%
Hits - Week 2: 10/16 = 62.50%
Hits - Week 3: 10/16 = 62.50%
Hits - Week 4: 6/15 = 40.00%
Hits - Week 5: 8/15 = 53.33%
Hits - Week 6: 8/14 = 57.14%
Hits - Week 7: 8/14 = 57.14%
Hits - Week 8: 14/15 = 93.33%
Hits - Week 9: 7/14 = 50.00%
Hits - Week 10: 6/13 = 46.15%
Hits - Week 11: 11/14 = 78.57%
Hits - Week 12: 10/14 = 71.43%
Hits - Week 13: 7/16 = 43.75%
Hits - Week 14: 14/16 = 87.50%
Hits - Week 15: 11/16 = 68.75%
Hits - Week 16: 11/16 = 68.75%
Hits - Week 17: 9/16 = 56.25%
Hits - TOTAL: 161/256 = 62.89%


In [81]:
rank=get_rank()

NameError: name 'rank' is not defined

In [9]:
def gather_data(year):
    season = {}
    for week in range(1,18):
        data = fetch_data(week, year)
        season[week] = get_weekly_picks(week, data)
        
    if not os.path.exists('Data/' + str(year)):
        os.makedirs('Data/' + str(year))
        
    pickle_data(path='Data/' + str(year) + '/picks_' + str(year) + '.pickle', data=season)     
    
    return

In [23]:
for year in range(2014,2016):
    r = get_rank(year)
    r = calculate_rank(r, hist_modifier=hyperparameters['hist_modifier'], season_hits_modifier=hyperparameters['season_hits_modifier'], consider_last_season=hyperparameters['consider_last_season'])
    r.to_csv('Data/' + str(year) + '/rank_' + str(year) +'.csv', index=False)

Removed 127
Removed 124
Removed 123
Removed 143
Removed 36
Removed 128
Removed 113
Removed 30
Removed 71
Removed 102
Removed 122
Removed 153
Removed 170
Removed 169
Removed 58
Removed 135
Removed 167
Removed 122
Removed 156
Removed 54
Removed 69


In [22]:
%%time
for year in range(2014, 2016):
    print(year)
    gather_data(year)

2014
2015
Wall time: 1min 7s


In [149]:
hyperparameters = {
    'hist_modifier': 9,
    'n_top_experts': 18,
    'consider_last_season': False,
    'season_hits_modifier': 0,
    'vote_strategy': 'rank_squared',
    'hist_decay': True,
}

In [None]:
ntop_list = []
for hm in range(1,16):
    hyperparameters['hist_modifier']=hm
    ntop_change = {}
    for nt in range (18,19):
        print ((" HM = " + str(hm) + "; nTop = " +str(nt) + " ").center(80,'='))
        hyperparameters['n_top_experts'] = nt
        hyperparameters['season_hits_modifier'] = hm/4
        results = win_a_bet(hyperparameters)
        print("Grand-Total: {}/{} = {:.2%}".format(results.loc['total','total_hits'],results.loc['total','total_gms'],results.loc['total','total_rate']))
        print(''.center(80,'=') + '\n')
        ntop_change[nt]=results.loc['total','total_rate']
    ntop_list.append(ntop_change)

2014 Hits - Total: 173/256 = 67.58%
2015 Hits - Total: 159/256 = 62.11%
2016 Hits - Total: 165/256 = 64.45%
2017 Hits - Total: 168/256 = 65.62%
2018 Hits - Total: 166/256 = 64.84%
2019 Hits - Total: 162/256 = 63.28%
Grand-Total: 993.0/1536.0 = 64.65%

2014 Hits - Total: 169/256 = 66.02%
2015 Hits - Total: 160/256 = 62.50%
2016 Hits - Total: 167/256 = 65.23%
2017 Hits - Total: 169/256 = 66.02%
2018 Hits - Total: 165/256 = 64.45%
2019 Hits - Total: 160/256 = 62.50%
Grand-Total: 990.0/1536.0 = 64.45%

2014 Hits - Total: 171/256 = 66.80%
2015 Hits - Total: 154/256 = 60.16%
2016 Hits - Total: 166/256 = 64.84%
2017 Hits - Total: 171/256 = 66.80%
2018 Hits - Total: 165/256 = 64.45%
2019 Hits - Total: 161/256 = 62.89%
Grand-Total: 988.0/1536.0 = 64.32%

2014 Hits - Total: 168/256 = 65.62%
2015 Hits - Total: 156/256 = 60.94%
2016 Hits - Total: 162/256 = 63.28%
2017 Hits - Total: 172/256 = 67.19%
2018 Hits - Total: 163/256 = 63.67%
2019 Hits - Total: 155/256 = 60.55%
Grand-Total: 976.0/1536.0 = 

In [72]:
ntop = pd.DataFrame(ntop_list).T

cols = ["hm"+str(c+1) for c in list(ntop.columns)]

ntop.columns = cols

for c in list(ntop.columns):
    ntop['rank' + (c[2:])] = ntop[c].rank(pct=True)

In [74]:
ntop.iloc[:,:12].mean(1).sort_values()

7     0.634223
8     0.635471
11    0.635634
9     0.636068
10    0.636068
12    0.636068
14    0.636447
13    0.636773
5     0.637207
15    0.637804
16    0.637804
6     0.639757
17    0.640082
19    0.641005
18    0.641330
20    0.642090
21    0.642144
22    0.642632
23    0.644097
25    0.644260
24    0.645182
dtype: float64

In [75]:
hmod = pd.DataFrame(ntop_list)

hmod.index = hmod.index +1

cols = ["top"+str(c) for c in list(hmod.columns)]

hmod.columns = cols

for c in list(hmod.columns):
    hmod['rank' + (c[3:])] = hmod[c].rank(pct=True)

In [80]:
hmod.iloc[:,:21].mean(1).sort_values()

9     0.635448
7     0.636595
6     0.637339
11    0.637339
8     0.637494
12    0.637649
4     0.638300
10    0.638858
3     0.641307
5     0.641524
2     0.643818
1     0.644128
dtype: float64

In [429]:
ntop.to_csv('Data/hyperparameters/n_top_experts.csv')
hmod.to_csv('Data/hyperparameters/hist_modifiers.csv')