In [1]:
#my intent is to use this notebook to pull together all the finished pieces of data and handle score/metric calculations here

import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn'
import numpy as np
import statistics as stats
import math
import matplotlib.pyplot as plt
import seaborn as sns
plt.rcParams['figure.figsize'] = [13,8]
import json

#load data
full_rs_matchups_df=pd.read_csv('full_rs_matchups_w_is_drafted.csv') #this file is created separately
full_seasons_draft_df = pd.read_csv('full_seasons_draft_df.csv') #load draft data - this is compiled in a separate notebook
full_faab=pd.read_csv('faab_thru_2022.csv')

In [12]:
def create_metric_dict(metrics_dict, values, metric, ascending):
    metric_dict = {}
    df = pd.DataFrame(values,columns=['value']).sort_values(by=['value'],ascending=ascending).drop_duplicates().reset_index(drop=True)
    if metric not in ['rs_points_against','playoff_points_against']:
        variances = []
        for index, row in df.iterrows():
            if index == 0:
                variances.append(0)
            else:
                variance = abs(df.iloc[index-1]['value'] - row['value'])
                variances.append(variance)

        sum_variance = sum(variances)
        #alternate way I decided against because it would create too much variance (ironically) in scores
        #variance_percents = []
        #for v in variances:
        #    variance_percents.append(v/sum_variance)
        for i in range(len(variances)):
            #metric_percent = sum(variance_percents[i:])
            #metric_dict[df.iloc[i]['value']] = metrics_dict[metric] * metric_percent
            if i ==0:
                #assign the max value
                metric_dict[df.iloc[i]['value']] = metrics_dict[metric]
                last_value = metrics_dict[metric]
            else:
                percent_variance = variances[i] / sum_variance
                next_value = last_value - (last_value * percent_variance)
                metric_dict[df.iloc[i]['value']] = next_value
                last_value = next_value
    else:
        #handle points against
        for value in df.value:
            metric_dict[value] = value * metrics_dict[metric]
        
    return metric_dict

def intersection(lst1, lst2):
    lst3 = [value for value in lst1 if value in lst2]
    return lst3

def calculate_composite_ranks(Master,start_year,pre_managers,recency_bonus,recency_window,Metrics_dict,use_model_weights,
                              manager_controlled_overall_weight,invert_manager_uncontrolled_weights,season_rank_weight):

    #weights informed by trained regression models
    manager_controlled_weights={
        'rs_points':.151,
        'playoff_points':.103,
        'draft_efficiency':.035,
        'faab_efficiency':.023,
        'undrafted_savvy':.014
    }
    manager_uncontrolled_weights={
        'rs_win_percentage':.183,
        'playoff_win_percentage':.132,
        'playoff_points_against':.053,
        'rs_points_against':.016
    }

    #get metric proportions from model feature weights
    if use_model_weights:
        metrics_dict={}
        available_weight=100-season_rank_weight
        #print('Features managers have control over:')
        sum_manager_controlled=sum(manager_controlled_weights.values())
        for k in manager_controlled_weights:
            proportion = manager_controlled_weights[k] / sum_manager_controlled
            points_weight = proportion*manager_controlled_overall_weight*available_weight
            #print(f'{k}: {proportion} - {points_weight}')
            metrics_dict[k]=points_weight
        #print('\nFeatures managers do not have control over:')
        sum_manager_uncontrolled=sum(manager_uncontrolled_weights.values())
        i=1
        for k in manager_uncontrolled_weights:
            if invert_manager_uncontrolled_weights:
                inverted_key=list(manager_uncontrolled_weights.keys())[-i]
                proportion = proportion = manager_uncontrolled_weights[inverted_key] / sum_manager_uncontrolled
            else:
                proportion = manager_uncontrolled_weights[k] / sum_manager_uncontrolled
            points_weight = proportion*(1-manager_controlled_overall_weight)*available_weight
            #print(f'{k}: {proportion} - {points_weight}')
            metrics_dict[k]=points_weight
            i=i+1
        metrics_dict['season_rank']=season_rank_weight
    else:
        metrics_dict=Metrics_dict
        for k in metrics_dict.keys():
            print(f'    {k}: {round(metrics_dict[k],3)}')

    #print out weights for convenience
    weights_df=pd.DataFrame()
    weights_df['metric']=metrics_dict.keys()
    weights_df['weight']=metrics_dict.values()
    weights_df.sort_values(by=['weight'],inplace=True,ascending=False)
    if use_model_weights:
        print('Using MODEL-derived metric weights:')
    else:
        print('Using USER-derived metric weights:')
    weights_df.sort_values(by=['weight'],inplace=True,ascending=False)
    for index, row in weights_df.iterrows():
        metric=row['metric']
        weight=row['weight']
        print(f'    {metric}: {round(weight,3)}')
    print('')

    #check weights sum to 100
    sum_weights=round(sum(list(metrics_dict.values())))
    if sum_weights != 100:
        print(f'Sum Metric Weights: {sum_weights}')
        print('WARNING: Metric weights do not sum to 100. To help manage relative metric weighting, consider modifying weights.\n')

    seasons = list(Master.season.drop_duplicates())
    latest_season = Master.season.max()

    #add necessary features to master
    Master['rs_win_percentage'] = Master.wins/(Master.losses + Master.wins) #note: this does not account for ties

    #need to add playoff matches because I've neglected to already have that feature
    playoff_match_counts =[]
    for index, row in Master.iterrows():
        number_managers = Master[Master.season==row['season']].shape[0]
        if math.isnan(row['playoff_seed']):
            #playoff seed is null
            playoff_matches = np.nan
        else:
            if number_managers == 6:
                if row['playoff_seed'] <= 4:
                    playoff_matches = 2
                else:
                    playoff_matches = 1
            elif number_managers == 8:
                if row['playoff_seed'] <= 2:
                    playoff_matches = 2
                elif row['playoff_seed'] <= 6:
                    if row['rank'] in [5,6]:
                        playoff_matches = 2
                    else:
                        playoff_matches = 3
                else:
                    playoff_matches = 1
            elif number_managers == 10:
                if row['playoff_seed'] <= 2:
                    playoff_matches = 2
                elif row['playoff_seed'] <= 6:
                    if row['rank'] in [5,6]:
                        playoff_matches = 2
                    else:
                        playoff_matches = 3
                else:
                    playoff_matches = 2
        playoff_match_counts.append(playoff_matches)
    Master['playoff_matches'] = playoff_match_counts
    Master['avg_playoff_points'] = Master['revised_p_score'] / Master['playoff_matches'] #revised p_score accounts for manual fixes for 2020 and 2022
    Master['playoff_win_percent'] = Master['playoff_wins'] / Master['playoff_matches']

    #need to add playoff_points_AGAINST
    sum_p_points_against = []
    for index, row in Master.iterrows():
        manager_id = int(row['team_key'].split('.')[-1])
        season = row['season']
        playoff_df = pd.read_csv(f'{season}_pre_playoffs.csv')
        playoff_df = playoff_df[playoff_df.score != '–']
        playoff_df['score'] = playoff_df['score'].astype(float)
        #playoff_opponents = playoff_df[playoff_df.manager_id == manager_id].opponent_id.drop_duplicates()
        opponent_scores = playoff_df[playoff_df.opponent_id==manager_id].score
        sum_p_points_against.append(sum(opponent_scores))
    Master['p_points_against'] = sum_p_points_against
    Master['avg_playoff_points_against'] = Master['p_points_against'] / Master['playoff_matches']

    #season data dict that we will use for aggregations by season
    #reminder why we need this: it is (primarily) so we can see relative performance of a specific season to reward recency bonus
    season_data_dict ={}
    for season in Master.season.drop_duplicates():
        season_dict = {}
        season_df = Master[Master.season == season]
        #stuff for win percents
        win_percents = season_df.rs_win_percentage
        win_percent_mean = sum(win_percents) / len(win_percents)
        win_percent_stdev = stats.stdev(win_percents)
        season_dict['win_percent_mean'] = win_percent_mean
        season_dict['win_percent_stdev'] = win_percent_stdev

        #stuff for rs_points
        season_points = season_df.points_for
        rs_points_mean = sum(season_points) / len(season_points)
        rs_points_stdev = stats.stdev(season_points)
        season_dict['rs_points_mean'] = rs_points_mean
        season_dict['rs_points_stdev'] = rs_points_stdev

        #stuff for rs_points_against
        season_points_against = season_df.points_against
        rs_points_against_mean = sum(season_points_against) / len(season_points_against)
        rs_points_against_stdev = stats.stdev(season_points_against)
        season_dict['rs_points_against_mean'] = rs_points_against_mean
        season_dict['rs_points_against_stdev'] = rs_points_against_stdev

        #stuff for playoff win percents
        playoff_win_percents = season_df[season_df.playoff_win_percent.notnull()].playoff_win_percent
        p_win_percents_mean = sum(playoff_win_percents) / len(playoff_win_percents)
        p_win_percents_stdev = stats.stdev(playoff_win_percents)
        season_dict['p_win_percents_mean'] = p_win_percents_mean
        season_dict['p_win_percents_stdev'] = p_win_percents_stdev

        #stuff for playoff points
        avg_p_points = season_df[season_df.avg_playoff_points.notnull()].avg_playoff_points
        p_points_mean = sum(avg_p_points) / len(avg_p_points)
        p_points_stdev = stats.stdev(avg_p_points)
        season_dict['p_points_mean'] = p_points_mean
        season_dict['p_points_stdev'] = p_points_stdev

        #stuff for playoff points against
        avg_p_points_against = season_df[season_df.avg_playoff_points_against >0].avg_playoff_points_against
        p_points_against_mean = sum(avg_p_points_against) / len(avg_p_points_against)
        p_points_against_stdev = stats.stdev(avg_p_points_against)
        season_dict['p_points_against_mean'] = p_points_against_mean
        season_dict['p_points_against_stdev'] = p_points_against_stdev

        season_data_dict[season] = season_dict


    final_score_dfs=[]
    for iter in range(latest_season-start_year+1):
        print(f'Calculating data thru {start_year+iter}')
        master=Master[Master.season<=(start_year+iter)]
        season_managers=intersection(list(master.manager),pre_managers)
        #season managers approach only works for us so long as each next season does not include fewer pre_managers than year before

        #variable governing whether we will get the raw values together, we should only do this in the most recent year
        if master.season.max() == Master.season.max():
            compile_raw_values=True
            raw_seasons=[]
            raw_managers=[]
            raw_metrics=[]
            raw_values=[]
        else:
            compile_raw_values=False

        #---------------------REGULAR SEASON WIN PERCENTAGE-------------------------------------
        win_percent_dict = {}
        raw_win_percent_dict ={}
        for manager in season_managers:
            win_percent_values = []
            manager_df = master[master.manager == manager]
            for index, row in manager_df.iterrows():
                if row['season'] < (latest_season - recency_window):
                    win_percent_values.append(row['rs_win_percentage'])
                else:
                    #recent season so it gets bonus
                    win_percent_z_score = (row['rs_win_percentage'] - season_data_dict[row['season']]['win_percent_mean']) / season_data_dict[row['season']]['win_percent_stdev']
                    score_modifier = win_percent_z_score * recency_bonus
                    modified_score = (win_percent_z_score + score_modifier) * season_data_dict[row['season']]['win_percent_stdev'] + season_data_dict[row['season']]['win_percent_mean']
                    win_percent_values.append(modified_score)
                if compile_raw_values:
                    raw_seasons.append(row['season'])
                    raw_managers.append(manager)
                    raw_metrics.append('avg_rs_win_percent')
                    raw_values.append(row['rs_win_percentage'])
            avg_win_percent = sum(win_percent_values) / len(win_percent_values)
            win_percent_dict[manager] = avg_win_percent

        win_percent_score_dict = create_metric_dict(metrics_dict,win_percent_dict.values(),'rs_win_percentage',False)

        #create df for final scores, add first metric
        final_scores_df = pd.DataFrame(index=win_percent_dict.keys(),data=win_percent_dict.values(),columns=['avg_rs_win_percent'])
        final_scores_df['rs_win_percent_score'] = final_scores_df.avg_rs_win_percent.map(win_percent_score_dict)

        #---------------------REGULAR SEASON POINTS FOR-----------------------------
        rs_points_z_scores_dict = {}
        for manager in season_managers:
            manager_z_scores = []
            manager_df = master[master.manager==manager]
            for index, row in manager_df.iterrows():
                z_score = (row['points_for'] - season_data_dict[row['season']]['rs_points_mean']) / season_data_dict[row['season']]['rs_points_stdev']
                if row['season'] < (latest_season - recency_window):
                    manager_z_scores.append(z_score)
                else:
                    modified_score = z_score * (1 + recency_bonus)
                    manager_z_scores.append(modified_score)
                if compile_raw_values:
                    raw_seasons.append(row['season'])
                    raw_managers.append(manager)
                    raw_metrics.append('rs_points')
                    raw_values.append(z_score)
            avg_rs_points_z_score = sum(manager_z_scores) / len(manager_z_scores)
            rs_points_z_scores_dict[manager] = avg_rs_points_z_score

        points_for_dict = create_metric_dict(metrics_dict,rs_points_z_scores_dict.values(),'rs_points',False)
        final_scores_df['rs_points_z_score'] = final_scores_df.index.map(rs_points_z_scores_dict)
        final_scores_df['rs_points_score'] = final_scores_df.rs_points_z_score.map(points_for_dict)

        #-----------------------REGULAR SEASON POINTS AGAINST-------------------------
        rs_points_against_z_scores_dict = {}
        for manager in season_managers:
            manager_z_scores = []
            manager_df = master[master.manager==manager]
            for index, row in manager_df.iterrows():
                z_score = (row['points_against'] - season_data_dict[row['season']]['rs_points_against_mean']) / season_data_dict[row['season']]['rs_points_against_stdev']
                if row['season'] < (latest_season - recency_window):
                    manager_z_scores.append(z_score)
                else:
                    modified_score = z_score * (1 + recency_bonus)
                    manager_z_scores.append(modified_score)
                if compile_raw_values:
                    raw_seasons.append(row['season'])
                    raw_managers.append(manager)
                    raw_metrics.append('rs_points_against')
                    raw_values.append(z_score)
            avg_rs_points_against_z_score = sum(manager_z_scores) / len(manager_z_scores)
            rs_points_against_z_scores_dict[manager] = avg_rs_points_against_z_score

        points_against_dict = create_metric_dict(metrics_dict,rs_points_against_z_scores_dict.values(),'rs_points_against',True)
        final_scores_df['rs_points_against_z_score'] = final_scores_df.index.map(rs_points_against_z_scores_dict)
        final_scores_df['rs_points_against_score'] = final_scores_df.rs_points_against_z_score.map(points_against_dict)

        #-----------------------PLAYOFF WINS------------------------
        playoff_wins_dict = {}
        for manager in season_managers:
            playoff_win_percents = []
            manager_df = master[master.manager==manager]
            for index, row in manager_df.iterrows():
                if not math.isnan(row['playoff_win_percent']):
                    if row['season'] < (latest_season - recency_window):
                        playoff_win_percents.append(row['playoff_win_percent'])
                    else:
                        playoff_win_percent_z_score = (row['playoff_win_percent'] - season_data_dict[row['season']]['p_win_percents_mean']) / season_data_dict[row['season']]['p_win_percents_stdev']
                        score_modifier = playoff_win_percent_z_score * recency_bonus
                        modified_score = (playoff_win_percent_z_score + score_modifier) * season_data_dict[row['season']]['p_win_percents_mean'] + season_data_dict[row['season']]['p_win_percents_stdev']
                        playoff_win_percents.append(modified_score)
                    if compile_raw_values:
                        raw_seasons.append(row['season'])
                        raw_managers.append(manager)
                        raw_metrics.append('playoff_win_percentage')
                        raw_values.append(row['playoff_win_percent'])
                avg_rs_points_against_z_score = sum
            playoff_wins_dict[manager] = sum(playoff_win_percents) / len(playoff_win_percents)

        playoff_win_percent_score_dict = create_metric_dict(metrics_dict,playoff_wins_dict.values(),'playoff_win_percentage',False)
        final_scores_df['avg_p_win_percent'] = final_scores_df.index.map(playoff_wins_dict)
        final_scores_df['p_win_percent_score'] = final_scores_df.avg_p_win_percent.map(playoff_win_percent_score_dict)

        #-----------------------PLAYOFF POINTS-----------------------
        playoff_points_dict = {}
        for manager in season_managers:
            avg_playoff_points = []
            manager_df = master[master.manager==manager]
            for index, row in manager_df.iterrows():
                if not math.isnan(row['avg_playoff_points']):
                    z_score_playoff_points = (row['avg_playoff_points'] - season_data_dict[row['season']]['p_points_mean']) / season_data_dict[row['season']]['p_points_stdev']
                    if row['season'] < (latest_season - recency_window):
                        avg_playoff_points.append(z_score_playoff_points)
                    else:
                        modified_score = z_score_playoff_points * (1 + recency_bonus)
                        avg_playoff_points.append(modified_score)
                    if compile_raw_values:
                        raw_seasons.append(row['season'])
                        raw_managers.append(manager)
                        raw_metrics.append('playoff_points')
                        raw_values.append(z_score_playoff_points)
            playoff_points_dict[manager] = sum(avg_playoff_points) / len(avg_playoff_points)
        playoff_points_score_dict = create_metric_dict(metrics_dict,playoff_points_dict.values(),'playoff_points',False)
        final_scores_df['p_points_z_score'] = final_scores_df.index.map(playoff_points_dict)
        final_scores_df['p_points_score'] = final_scores_df.p_points_z_score.map(playoff_points_score_dict)

        #-----------------------PLAYOFF POINTS AGAINST--------------------
        p_points_against_z_scores_dict = {}
        for manager in season_managers:
            manager_z_scores = []
            manager_df = master[master.manager==manager]
            for index, row in manager_df.iterrows():
                if not math.isnan(row['avg_playoff_points']):
                    z_score = (row['avg_playoff_points_against'] - season_data_dict[row['season']]['p_points_against_mean']) / season_data_dict[row['season']]['p_points_against_stdev']
                    if row['season'] < (latest_season - recency_window):
                        manager_z_scores.append(z_score)
                    else:
                        modified_score = z_score * (1 + recency_bonus)
                        manager_z_scores.append(modified_score)
                    if compile_raw_values:
                        raw_seasons.append(row['season'])
                        raw_managers.append(manager)
                        raw_metrics.append('playoff_points_against')
                        raw_values.append(z_score)
            avg_p_points_against_z_score = sum(manager_z_scores) / len(manager_z_scores)
            p_points_against_z_scores_dict[manager] = avg_p_points_against_z_score

        p_points_against_dict = create_metric_dict(metrics_dict,p_points_against_z_scores_dict.values(),'playoff_points_against',True)
        final_scores_df['p_points_against_z_score'] = final_scores_df.index.map(p_points_against_z_scores_dict)
        final_scores_df['p_points_against_score'] = final_scores_df.p_points_against_z_score.map(p_points_against_dict)

        #-----------------------WEIGHTED RANK-----------------------
        rank_weights_dict = {
            6: {
            1:15,
            2:10,
            3:7,
            4:4,
            5:1,
            6:0
            },
            8: {
            1:18,
            2:13,
            3:10,
            4:7,
            5:5,
            6:4,
            7:1,
            8:0
            },
            10: {
            1:20,
            2:15,
            3:12,
            4:9,
            5:7,
            6:6,
            7:3,
            8:2,
            9:1,
            10:0
            }
        }

        weighted_rank_stdevs_dict = {}
        for k in rank_weights_dict.keys():
            stdev = stats.stdev(rank_weights_dict[k].values())
            weighted_rank_stdevs_dict[k] = stdev

        weighted_rank_dict = {}
        for manager in season_managers:
            manager_weighted_ranks = []
            for index, row in master[master.manager==manager].iterrows():
                number_managers = master[master.season==row['season']].shape[0]
                raw_weighted_rank = rank_weights_dict[number_managers][row['rank']]
                if row['season'] < (latest_season - recency_window):
                    manager_weighted_ranks.append(raw_weighted_rank)
                else:
                    #apply recency bonus
                    avg_weights = sum(rank_weights_dict[number_managers].values()) / number_managers
                    z_score_weighted_rank = (raw_weighted_rank - avg_weights) / weighted_rank_stdevs_dict[number_managers]
                    score_modifier = z_score_weighted_rank * recency_bonus
                    modified_score = (z_score_weighted_rank + score_modifier) * weighted_rank_stdevs_dict[number_managers] + avg_weights
                    manager_weighted_ranks.append(modified_score)
                if compile_raw_values:
                    raw_seasons.append(row['season'])
                    raw_managers.append(manager)
                    raw_metrics.append('season_rank')
                    raw_values.append(raw_weighted_rank)
            weighted_rank_dict[manager] = sum(manager_weighted_ranks) / len(manager_weighted_ranks)

        weighted_rank_score_dict = create_metric_dict(metrics_dict,weighted_rank_dict.values(),'season_rank',False)
        final_scores_df['weighted_rank'] = final_scores_df.index.map(weighted_rank_dict)
        final_scores_df['weighted_rank_score'] = final_scores_df.weighted_rank.map(weighted_rank_score_dict)

        #----------------------------DRAFT EFFICIENCY--------------------------
        #notes
        #I'm questioning whether this is the right approach to grade draft efficiency.  well, it has the draft price....so that's probably the dimension I need
        #but, I could use regular season starting lineups to gather points for drafted and non-drafted players
        #at the least, this will be used for some "in season pickup savvy" metric.  but arguably it could be used for draft skill also
        #for non draft, one idea is to take an average number of points for non-drafted (divided by non-drafted starting player count)
        #can't use a raw count because this will unfairly advantage people who simply have more non-drafted because they drafted poorly
        
        #get df of just seasons thru current assessment season
        assessment_draft_seasons_df=full_seasons_draft_df[full_seasons_draft_df.Year<=master.season.max()]

        #now get draft score for each season for use in calculating the final scores
        draft_scores_dfs=[]
        for season in assessment_draft_seasons_df.Year.drop_duplicates():
            season_df = assessment_draft_seasons_df[assessment_draft_seasons_df.Year==season]
            roster_spots = max(season_df.Owner.value_counts())
            owners=[]
            draft_scores=[]
            for owner in season_df.Owner.drop_duplicates():
                drafted_players = season_df[season_df.Owner==owner].shape[0]
                penalty = (roster_spots - drafted_players) * .3
                draft_score = (sum(season_df[season_df.Owner==owner].draft_score) / season_df[season_df.Owner==owner].shape[0]) - penalty
                owners.append(owner)
                draft_scores.append(draft_score)
            draft_scores_df = pd.DataFrame()
            draft_scores_df['Owner']=owners
            draft_scores_df['Year']=season
            draft_scores_df['draft_score']=draft_scores
            draft_scores_dfs.append(draft_scores_df)
        full_draft_scores_df=pd.concat(draft_scores_dfs)

        #quick and dirty scores
        #full_draft_scores_df.groupby('Owner').agg({'draft_score':'mean'})

        #we need to calculate recency bonus
        #I think I need to calculate the score for everyone in a season, then can get the standard deviation
        #so we could pre-calc and save in season dict, or do it here...
        for season in full_draft_scores_df.Year.drop_duplicates():
            season_df = full_draft_scores_df[full_draft_scores_df.Year == season]
            #stuff for draft scores
            draft_score_values=season_df.draft_score
            draft_score_mean = sum(draft_score_values) / len(draft_score_values)
            draft_score_stdev = stats.stdev(draft_score_values)
            season_data_dict[season]['draft_score_mean']=draft_score_mean
            season_data_dict[season]['draft_score_stdev']=draft_score_stdev

        draft_efficiency_dict={}
        for manager in season_managers:
            manager_draft_scores=[]
            manager_draft_df= full_draft_scores_df[full_draft_scores_df.Owner==manager]
            for season in manager_draft_df.Year.drop_duplicates():
                raw_draft_efficiency = sum(manager_draft_df[manager_draft_df.Year==season]['draft_score']) / manager_draft_df[manager_draft_df.Year==season].shape[0]
                draft_efficiency_z_score = (raw_draft_efficiency - season_data_dict[season]['draft_score_mean']) / season_data_dict[season]['draft_score_stdev']
                if season < (latest_season - recency_window):
                    manager_draft_scores.append(draft_efficiency_z_score)
                else:
                    #score_modifier = draft_efficiency_z_score * recency_bonus
                    #modified_score = (draft_efficiency_z_score + score_modifier) * season_data_dict[season]['draft_score_stdev'] + season_data_dict[season]['draft_score_mean']
                    modified_score = draft_efficiency_z_score * (1 + recency_bonus)
                    manager_draft_scores.append(modified_score)
                if compile_raw_values:
                    raw_seasons.append(season)
                    raw_managers.append(manager)
                    raw_metrics.append('draft_efficiency')
                    raw_values.append(draft_efficiency_z_score)
            draft_efficiency_dict[manager] = sum(manager_draft_scores) / len(manager_draft_scores)

        #holy shit this took so many lines of code to get here
        draft_efficiency_score_dict = create_metric_dict(metrics_dict,draft_efficiency_dict.values(),'draft_efficiency',False)
        final_scores_df['draft_efficiency'] = final_scores_df.index.map(draft_efficiency_dict)
        final_scores_df['draft_efficiency_score'] = final_scores_df.draft_efficiency.map(draft_efficiency_score_dict)

        #----------------------IN-SEASON PICKUP SAVVY---------------------------

        #soooooo I think I want to load all the matchup files, then isolate non-drafted players...
        #then...I guess take an average?  need to avoid regarding those who simply had poor drafts and therefore a lot of non-drafted
        
        non_drafted_avg_scores =[]
        for season in master.season.drop_duplicates():
            manager_avg_scores={}
            season_matchups = full_rs_matchups_df[full_rs_matchups_df.season==season]
            for manager in season_matchups.manager_name.drop_duplicates():
                manager_non_drafted = season_matchups[(season_matchups.manager_name==manager) & (season_matchups.is_drafted==0)]
                cleaned_scores=[]
                for score in manager_non_drafted.score:
                    if score=='–':
                        cleaned_scores.append(0)
                    else:
                        cleaned_scores.append(score)
                manager_non_drafted['score'] = cleaned_scores
                manager_non_drafted['score'] = manager_non_drafted['score'].astype(float)
                avg_score = manager_non_drafted.score.sum() / manager_non_drafted.shape[0]
                manager_avg_scores[manager]=avg_score
            df=pd.DataFrame(index=manager_avg_scores.keys(),data=manager_avg_scores.values(),columns=['avg_non_draft_score'])
            stdev=stats.stdev(df.avg_non_draft_score)
            m= df.avg_non_draft_score.sum() / df.shape[0]
            zscores=[]
            for v in df.avg_non_draft_score:
                zscore=(v-m)/stdev
                zscores.append(zscore)
            df['zscore_non_draft_scores'] =zscores
            df['season']= season
            non_drafted_avg_scores.append(df)
        full_non_drafted_avg_scores=pd.concat(non_drafted_avg_scores)

        rs_non_drafted_dict = {}
        for manager in season_managers:
            manager_z_scores = []
            manager_df = full_non_drafted_avg_scores[full_non_drafted_avg_scores.index==manager]
            for index, row in manager_df.iterrows():
                if row['season'] < (latest_season - recency_window):
                    manager_z_scores.append(row['zscore_non_draft_scores'])
                else:
                    modified_score = row['zscore_non_draft_scores'] * (1 + recency_bonus)
                    manager_z_scores.append(modified_score)
                if compile_raw_values:
                    raw_seasons.append(row['season'])
                    raw_managers.append(manager)
                    raw_metrics.append('undrafted_savvy')
                    raw_values.append(row['zscore_non_draft_scores'])
            avg_rs_non_drafted_points = sum(manager_z_scores) / len(manager_z_scores)
            rs_non_drafted_dict[manager] = avg_rs_non_drafted_points

        non_drafted_score_dict = create_metric_dict(metrics_dict,rs_non_drafted_dict.values(),'undrafted_savvy',False)
        final_scores_df['undrafted_avg_z_score'] = final_scores_df.index.map(rs_non_drafted_dict)
        final_scores_df['undrafted_savvy_score'] = final_scores_df.undrafted_avg_z_score.map(non_drafted_score_dict)

        #--------------------------FAAB EFFICIENCY---------------------------------------
        #clean the failed_bids column so it is usable
        new_bids=[]
        for failed_bids in full_faab.failed_bids:
            if isinstance(failed_bids,float):
                new_items=''
            else:
                clean1=failed_bids.replace('[','').replace(']]','').replace(', ',',').split(']')
                new_items=[]
                for failed in clean1:
                    failed_items=[]
                    for clean2 in failed.split(','):
                        if len(clean2)>0:
                            failed_items.append(clean2[1:-1])
                    new_items.append(failed_items)
            new_bids.append(new_items)
        full_faab['failed_bids_clean'] = new_bids

        top_losing_bids=[]
        for f in full_faab.failed_bids_clean:
            if len(f)==0:
                top_losing_bids.append(0)
            else:
                top_bid = int(f[0][2].split()[0].replace('$',''))
                top_losing_bids.append(top_bid)
        full_faab['top_losing_bid']=top_losing_bids

        full_faab['bid_differential'] = full_faab.faab_dollars - full_faab.top_losing_bid

        #map manager names based on id
        manager_ids=[]
        for awardee_url in full_faab.awardee_url:
            manager_id = int(awardee_url.split('/')[-1])
            manager_ids.append(manager_id)
        full_faab['manager_id']=manager_ids

        season_dfs=[]
        for season in full_faab.season.drop_duplicates():
            season_faab_df=full_faab[full_faab.season==season]
            manager_lkup={}
            for index, row in Master[Master.season==season].iterrows():
                manager_id = int(row['team_key'].split('.')[-1])
                manager= row['manager']
                if manager_id not in manager_lkup.keys():
                    manager_lkup[manager_id]=manager
            season_faab_df['manager_name'] = season_faab_df.manager_id.map(manager_lkup)
            season_dfs.append(season_faab_df)
        full_faab_m=pd.concat(season_dfs)

        #meh do it again now to get the actual season scores
        #It looks like what I did in the past for this metric is [avg differential of winning bid] + [count bids *.2] + [unused faab *.1]
        season_faab_score_dfs=[]
        for season in master.season.drop_duplicates():
            if season in list(full_faab_m.season):
                #print(season)
                season_df=full_faab_m[full_faab_m.season==season]
                manager_faab_dict={}
                for manager in season_df.manager_name.drop_duplicates():
                    #print(manager)
                    manager_df=season_df[season_df.manager_name==manager]
                    avg_differential=manager_df.bid_differential.sum() / manager_df.shape[0]
                    unused_dollars=100-manager_df.faab_dollars.sum()
                    faab_efficiency=avg_differential+(unused_dollars*.1)+(manager_df.shape[0]*.2)
                    manager_faab_dict[manager] =faab_efficiency
                    #print(f'avg_differential: {avg_differential}')
                    #print(f'unused: {unused_dollars}')
                    #print(f'faab_efficiency: {faab_efficiency}')
                manager_faab_df=pd.DataFrame(index=manager_faab_dict.keys())
                manager_faab_df['faab_efficiency']=manager_faab_dict.values()
                season_faab_stdev=stats.stdev(manager_faab_df.faab_efficiency)
                season_faab_mean=manager_faab_df.faab_efficiency.sum() / manager_faab_df.shape[0]
                manager_faab_df['faab_efficiency_zscore'] = (manager_faab_df.faab_efficiency-season_faab_mean) / season_faab_stdev
                manager_faab_df['season']=season
                #update season_data_dict
                season_data_dict[season]['faab_efficiency_stdev'] =season_faab_stdev
                season_data_dict[season]['faab_efficiency_mean'] = season_faab_mean
                season_faab_score_dfs.append(manager_faab_df)
            else:
                manager_faab_df=pd.DataFrame(index=season_managers)
                manager_faab_df['faab_efficiency_zscore']=0
                manager_faab_df['season']=season
                season_faab_score_dfs.append(manager_faab_df)

        full_season_faab_dfs=pd.concat(season_faab_score_dfs)

        faab_efficiency_dict = {}
        for manager in season_managers:
            manager_faab_scores = []
            manager_df = full_season_faab_dfs[full_season_faab_dfs.index==manager]
            for index, row in manager_df.iterrows():
                if row['season'] < (latest_season - recency_window):
                    raw_faab_efficiency_score=row['faab_efficiency_zscore']
                    manager_faab_scores.append(raw_faab_efficiency_score)
                else:
                    faab_efficiency_zscore = row['faab_efficiency_zscore']
                    score_modifier = faab_efficiency_zscore * recency_bonus
                    modified_score = (faab_efficiency_zscore + score_modifier) * season_data_dict[season]['faab_efficiency_stdev'] + season_data_dict[season]['faab_efficiency_mean']
                    manager_faab_scores.append(modified_score)
                if compile_raw_values:
                    if row['faab_efficiency_zscore']!=0:
                        raw_seasons.append(row['season'])
                        raw_managers.append(manager)
                        raw_metrics.append('faab_efficiency')
                        raw_values.append(row['faab_efficiency_zscore'])
            avg_faab_efficiency = sum(manager_faab_scores) / len(manager_faab_scores)
            faab_efficiency_dict[manager] = avg_faab_efficiency

        faab_efficiency_score_dict = create_metric_dict(metrics_dict,faab_efficiency_dict.values(),'faab_efficiency',True)
        final_scores_df['avg_faab_efficiency'] = final_scores_df.index.map(faab_efficiency_dict)
        final_scores_df['faab_efficiency_score'] = final_scores_df.avg_faab_efficiency.map(faab_efficiency_score_dict)

        final_scores_df['total_score'] = final_scores_df['draft_efficiency_score']+ final_scores_df['rs_win_percent_score'] + final_scores_df['rs_points_score'] + final_scores_df['rs_points_against_score'] + final_scores_df['p_win_percent_score'] + final_scores_df['p_points_score'] + final_scores_df['p_points_against_score'] + final_scores_df['weighted_rank_score'] + final_scores_df['undrafted_savvy_score'] + final_scores_df['faab_efficiency_score']
        final_scores_df.sort_values(by=['total_score'],ascending=False,inplace=True)
        final_scores_df['thru'] = start_year+iter
        #TODO need to start compiling a data df containing finished metric values like my formula excel tab
        final_score_dfs.append(final_scores_df)

    #put everything together and cleanup
    compiled_final_scores_df = pd.concat(final_score_dfs)

    #clean up manager names
    manager_name_dict = {
        'Kevin':'KJ',
        'David Casstevens':'David',
        'Scott Gunter':'Scott',
        'Benjamin':'Ben',
        'Patrick':'Pat'
    }
    corrected_names=[]
    for manager in compiled_final_scores_df.index:
        if manager in manager_name_dict.keys():
            corrected_names.append(manager_name_dict[manager])
        else:
            corrected_names.append(manager)
    compiled_final_scores_df['manager']=corrected_names
    compiled_final_scores_df.set_index('manager',inplace=True)

    #compile raw_values df
    raw_scores_df=pd.DataFrame()
    raw_scores_df['season']=raw_seasons
    raw_scores_df['manager']=raw_managers
    raw_scores_df['metric']=raw_metrics
    raw_scores_df['score']=raw_values
    raw_scores_df=raw_scores_df.drop_duplicates()

    #need to inverse the rank weights
    inv_rank_weights_dict={}
    for key in rank_weights_dict.keys():
        inv_dict= {v: k for k, v in rank_weights_dict[key].items()}
        inv_rank_weights_dict[key]=inv_dict

    normalized_scores=[]
    for index, row in raw_scores_df.iterrows():
        if row['metric'] =='avg_rs_win_percent':
            normalized_score = (row['score'] - season_data_dict[row['season']]['win_percent_mean']) / season_data_dict[row['season']]['win_percent_stdev']
        elif row['metric'] =='playoff_win_percentage':
            normalized_score = (row['score'] - season_data_dict[row['season']]['p_win_percents_mean']) / season_data_dict[row['season']]['p_win_percents_stdev']
        elif row['metric'] == 'season_rank':
            number_managers=raw_scores_df[raw_scores_df.season==row['season']].drop_duplicates(subset=['manager']).shape[0]
            if row['season'] == 2007:
                number_managers=6
            elif row['season'] == 2009:
                number_managers=8
            actual_rank=inv_rank_weights_dict[number_managers][row['score']]
            normalized_score=actual_rank/number_managers
        else:
            normalized_score=row['score']
        normalized_scores.append(normalized_score)
    raw_scores_df['normalized_score']=normalized_scores

    #VISUALIZE
    plt.rcParams['figure.figsize'] = [13,8]
    plt.rcParams['figure.figsize'] = [13,8]
    sns.set(style='white')
    #create stacked bar chart for final scores
    thru=compiled_final_scores_df.thru.max()
    score_cols = ['rs_win_percent_score','rs_points_score','rs_points_against_score','p_win_percent_score','p_points_score','p_points_against_score','weighted_rank_score','draft_efficiency_score','undrafted_savvy_score','faab_efficiency_score']
    graph_data=compiled_final_scores_df[compiled_final_scores_df.thru==thru][score_cols]
    #consolidate the points against score cols with an appopriate sibling metric to avoid negative scores in our graph
    graph_data['rs_points_score']= graph_data['rs_points_score'] + graph_data['rs_points_against_score']
    graph_data['p_win_percent_score']= graph_data['p_win_percent_score'] + graph_data['p_points_against_score']
    for col in ['rs_points_against_score','p_points_against_score']:
        graph_data.drop(col,inplace=True,axis=1)
    graph_data.plot(kind='bar', stacked=True)
    plt.xlabel('Manager')
    plt.ylabel('Score')
    plt.title(f'PRE Fantasy League Composite Scores Thru {thru}')
    plt.show()

    #graph manager composite scores over time
    linegraph_data = compiled_final_scores_df[['total_score','thru']]
    sns.lineplot(data=linegraph_data,y='total_score',x='thru',hue=linegraph_data.index,linewidth=3.5)
    plt.xlabel('Season')
    plt.ylabel('Score')
    plt.title(f'PRE Fantasy League Composite Over Time')
    plt.show()

    #now let's show each metric over time
    for metric in score_cols:
        metric_data = compiled_final_scores_df[[metric,'thru']]
        sns.lineplot(data=metric_data,y=metric,x='thru',hue=metric_data.index,linewidth=3.5)
        plt.xlabel('Season')
        plt.ylabel(metric)
        plt.title(f'{metric} Over Time')
        plt.show()

    return compiled_final_scores_df, raw_scores_df

In [13]:
#set parameters
Master = pd.read_csv('consolidated_master.csv')
#Master=Master[Master.season!=2018] #remove Pat's terrible season to demonstrate what happens
start_year=2015
pre_managers = ['Benjamin','Bryan','David Casstevens','Duncan','Kevin','Krista','Luke','Mark','Patrick','Scott Gunter']
recency_bonus = 0
recency_window = 5 #number of recent seasons that we will give a boost to
use_model_weights=True
manager_controlled_overall_weight=.8
invert_manager_uncontrolled_weights=False
season_rank_weight=15

#master dictionary we will use to set weights for metrics
Metrics_dict = {
    #metrics getting at effectiveness of player management
    'draft_efficiency':6,
    'faab_efficiency':3,
    'undrafted_savvy':6,
    #rs perormance
    'rs_win_percentage':5,
    'rs_points':38,
    'rs_points_against':8,
    #playoff performance
    'playoff_win_percentage':6,
    'playoff_points':8,
    'playoff_points_against':5,
    #overall
    'season_rank':15,
}

compiled_final_scores, raw_scores = calculate_composite_ranks(Master,start_year,pre_managers,recency_bonus,recency_window,Metrics_dict
                                                              ,use_model_weights,manager_controlled_overall_weight,
                                                              invert_manager_uncontrolled_weights,season_rank_weight)
compiled_final_scores[compiled_final_scores.thru==2022]

Using MODEL-derived metric weights:
    rs_points: 31.496932515337416
    playoff_points: 21.48466257668711
    season_rank: 15.0
    rs_win_percentage: 8.101562499999998
    draft_efficiency: 7.300613496932516
    playoff_win_percentage: 5.843749999999998
    faab_efficiency: 4.797546012269938
    undrafted_savvy: 2.9202453987730057
    playoff_points_against: 2.3463541666666656
    rs_points_against: 0.7083333333333331

Calculating data thru 2015
Calculating data thru 2016
Calculating data thru 2017
Calculating data thru 2018
Calculating data thru 2019
Calculating data thru 2020
Calculating data thru 2021


In [7]:
metrics_dict

NameError: name 'metrics_dict' is not defined

In [None]:
compiled_final_scores_df[compiled_final_scores_df.index=='Duncan']

Unnamed: 0_level_0,avg_rs_win_percent,rs_win_percent_score,rs_points_z_score,rs_points_score,rs_points_against_z_score,rs_points_against_score,avg_p_win_percent,p_win_percent_score,p_points_z_score,p_points_score,...,weighted_rank,weighted_rank_score,draft_efficiency,draft_efficiency_score,undrafted_avg_z_score,undrafted_savvy_score,avg_faab_efficiency,faab_efficiency_score,total_score,thru
manager,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Duncan,0.562821,4.302703,-0.102374,26.797738,-0.638083,-5.104667,0.533333,3.609375,0.031479,5.417617,...,10.2,15.0,4.714286,4.331805,-0.442298,2.813275,0.0,3.0,61.699153,2012
Duncan,0.571581,5.0,-0.109385,23.161997,-0.855259,-6.842068,0.527778,4.033751,-0.068918,3.816585,...,9.333333,12.407524,3.207143,3.09602,-0.179958,3.783541,0.0,3.0,51.861743,2013
Duncan,0.555861,4.691506,-0.101475,27.131506,-0.754979,-6.039835,0.452381,3.240946,-0.057434,5.060988,...,8.857143,7.144788,4.271429,3.616306,-0.116108,3.670342,0.0,3.0,52.90087,2014
Duncan,0.563301,4.625223,0.093234,26.445773,-0.65889,-5.271116,0.479167,3.786627,0.035506,5.432473,...,9.25,9.053254,6.291071,5.811279,-0.059562,3.541953,0.0,3.0,57.907831,2015
Duncan,0.577635,5.0,0.212055,33.562968,-0.630438,-5.043504,0.462963,3.376846,0.082053,5.665563,...,9.222222,12.758621,6.912857,6.0,0.083226,4.223879,0.0,3.0,69.649242,2016
Duncan,0.589103,5.0,0.386847,36.996448,-0.681718,-5.453747,0.450913,3.501293,0.23364,6.667755,...,9.5,15.0,7.277381,6.0,0.185619,4.592079,0.108092,2.59162,75.961743,2017
Duncan,0.605478,5.0,0.468295,38.0,-0.585327,-4.682617,0.442771,3.199656,0.232159,6.480701,...,9.727273,15.0,7.396939,6.0,0.164628,4.359086,0.167976,2.823539,77.233775,2018
Duncan,0.625534,5.0,0.596982,38.0,-0.632861,-5.062886,0.434412,3.495161,0.294774,6.959067,...,10.166667,15.0,7.895049,6.0,0.247635,4.82063,0.236967,2.721829,78.271103,2019
Duncan,0.612919,5.0,0.524415,38.0,-0.579281,-4.634249,0.428792,3.570468,0.349314,7.661467,...,9.538462,15.0,7.692821,6.0,0.29638,4.634904,0.248949,2.633699,78.917874,2020
Duncan,0.589547,5.0,0.443966,38.0,-0.501375,-4.011,0.427922,3.703507,0.356113,8.0,...,8.928571,13.402597,7.329421,6.0,0.264591,4.057063,0.289786,2.762715,77.646091,2021


In [None]:
raw_scores[raw_scores.metric=='draft_efficiency']

Unnamed: 0,season,manager,metric,score,normalized_score
12705,2012.0,Patrick,draft_efficiency,-1.870062,-1.870062
12706,2013.0,Patrick,draft_efficiency,0.188118,0.188118
12707,2014.0,Patrick,draft_efficiency,0.449411,0.449411
12708,2015.0,Patrick,draft_efficiency,0.776419,0.776419
12709,2016.0,Patrick,draft_efficiency,0.126133,0.126133
...,...,...,...,...,...
13279,2018.0,Benjamin,draft_efficiency,0.679152,0.679152
13280,2019.0,Benjamin,draft_efficiency,-0.376637,-0.376637
13281,2020.0,Benjamin,draft_efficiency,-0.126437,-0.126437
13282,2021.0,Benjamin,draft_efficiency,-0.166030,-0.166030


In [None]:
#compile data for modeling
raw_scores_df=pd.DataFrame()
raw_scores_df['season']=raw_seasons
raw_scores_df['manager']=raw_managers
raw_scores_df['metric']=raw_metrics
raw_scores_df['score']=raw_values
raw_scores_df=raw_scores_df.drop_duplicates()

#need to inverse the rank weights
inv_rank_weights_dict={}
for key in rank_weights_dict.keys():
    inv_dict= {v: k for k, v in rank_weights_dict[key].items()}
    inv_rank_weights_dict[key]=inv_dict

normalized_scores=[]
for index, row in raw_scores_df.iterrows():
    if row['metric'] =='avg_rs_win_percent':
        normalized_score = (row['score'] - season_data_dict[row['season']]['win_percent_mean']) / season_data_dict[row['season']]['win_percent_stdev']
    elif row['metric'] =='playoff_win_percentage':
        normalized_score = (row['score'] - season_data_dict[row['season']]['p_win_percents_mean']) / season_data_dict[row['season']]['p_win_percents_stdev']
    elif row['metric'] == 'season_rank':
        number_managers=raw_scores_df[raw_scores_df.season==row['season']].drop_duplicates(subset=['manager']).shape[0]
        if row['season'] == 2007:
            number_managers=6
        elif row['season'] == 2009:
            number_managers=8
        actual_rank=inv_rank_weights_dict[number_managers][row['score']]
        normalized_score=actual_rank/number_managers
    else:
        normalized_score=row['score']
    normalized_scores.append(normalized_score)

raw_scores_df['normalized_score']=normalized_scores
raw_scores_df=raw_scores_df.set_index(['manager','season'])

transformed_dfs=[]
for metric in raw_scores_df.metric.drop_duplicates():
    metric_df=raw_scores_df[raw_scores_df.metric==metric][['normalized_score']]
    metric_df.rename(columns={'normalized_score':metric},inplace=True)
    transformed_dfs.append(metric_df)

transformed_raw_df=pd.concat(transformed_dfs,axis=1)
transformed_raw_df

Unnamed: 0_level_0,Unnamed: 1_level_0,avg_rs_win_percent,rs_points,rs_points_against,playoff_win_percentage,playoff_points,playoff_points_against,season_rank,draft_efficiency,undrafted_savvy,faab_efficiency
manager,season,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Patrick,2007.0,0.185041,-0.457184,-0.076762,1.118034,1.592217,0.205434,0.166667,,-0.457184,
Patrick,2009.0,1.527525,0.997591,-0.359494,-1.140532,-1.512676,1.112101,0.500000,,0.997591,
Patrick,2010.0,-0.935414,-0.368335,1.299256,-0.429198,0.011889,0.460636,0.500000,,-0.414008,
Patrick,2011.0,0.585142,0.969865,0.050845,-1.287593,-0.607253,1.063553,0.750000,,0.969865,
Patrick,2012.0,-0.585142,-0.672375,0.514127,-1.287593,-1.183163,-0.668073,1.000000,-1.870062,-0.362362,
...,...,...,...,...,...,...,...,...,...,...,...
Benjamin,2018.0,0.230089,-0.209043,-1.828109,1.530981,1.703617,0.255019,0.100000,0.679152,0.759980,-0.950154
Benjamin,2019.0,-0.230089,-0.830613,0.517510,0.000000,-0.985571,-1.243074,0.900000,-0.376637,0.873651,2.327015
Benjamin,2020.0,0.271607,0.184991,-0.646841,0.049386,-0.097848,-0.202828,0.500000,-0.126437,-1.015709,-0.006240
Benjamin,2021.0,0.884652,0.360860,-0.687133,0.088852,-1.603351,0.258713,0.500000,-0.166030,-1.097138,-0.526180


In [None]:
transformed_raw_df.to_csv('transformed_pre_fanasy_data.csv')

In [None]:
full_faab=pd.read_csv('faab_thru_2022.csv')

#clean the failed_bids column so it is usable
new_bids=[]
for failed_bids in full_faab.failed_bids:
    if isinstance(failed_bids,float):
        new_items=''
    else:
        clean1=failed_bids.replace('[','').replace(']]','').replace(', ',',').split(']')
        new_items=[]
        for failed in clean1:
            failed_items=[]
            for clean2 in failed.split(','):
                if len(clean2)>0:
                    failed_items.append(clean2[1:-1])
            new_items.append(failed_items)
    new_bids.append(new_items)
full_faab['failed_bids_clean'] = new_bids

top_losing_bids=[]
for f in full_faab.failed_bids_clean:
    if len(f)==0:
        top_losing_bids.append(0)
    else:
        top_bid = int(f[0][2].split()[0].replace('$',''))
        top_losing_bids.append(top_bid)
full_faab['top_losing_bid']=top_losing_bids

full_faab['bid_differential'] = full_faab.faab_dollars - full_faab.top_losing_bid

#map manager names based on id
manager_ids=[]
for awardee_url in full_faab.awardee_url:
    manager_id = int(awardee_url.split('/')[-1])
    manager_ids.append(manager_id)
full_faab['manager_id']=manager_ids

season_dfs=[]
for season in full_faab.season.drop_duplicates():
    season_faab_df=full_faab[full_faab.season==season]
    manager_lkup={}
    for index, row in Master[Master.season==season].iterrows():
        manager_id = int(row['team_key'].split('.')[-1])
        manager= row['manager']
        if manager_id not in manager_lkup.keys():
            manager_lkup[manager_id]=manager
    season_faab_df['manager_name'] = season_faab_df.manager_id.map(manager_lkup)
    season_dfs.append(season_faab_df)
full_faab_m=pd.concat(season_dfs)

#meh do it again now to get the actual season scores
#It looks like what I did in the past for this metric is [avg differential of winning bid] + [count bids *.2] + [unused faab *.1]
season_faab_score_dfs=[]
for season in full_faab_m.season.drop_duplicates():
    #print(season)
    season_df=full_faab_m[full_faab_m.season==season]
    manager_faab_dict={}
    for manager in season_df.manager_name.drop_duplicates():
        #print(manager)
        manager_df=season_df[season_df.manager_name==manager]
        avg_differential=manager_df.bid_differential.sum() / manager_df.shape[0]
        unused_dollars=100-manager_df.faab_dollars.sum()
        faab_efficiency=avg_differential+(unused_dollars*.1)+(manager_df.shape[0]*.2)
        manager_faab_dict[manager] =faab_efficiency
        #print(f'avg_differential: {avg_differential}')
        #print(f'unused: {unused_dollars}')
        #print(f'faab_efficiency: {faab_efficiency}')
    manager_faab_df=pd.DataFrame(index=manager_faab_dict.keys())
    manager_faab_df['faab_efficiency']=manager_faab_dict.values()
    season_faab_stdev=stats.stdev(manager_faab_df.faab_efficiency)
    season_faab_mean=manager_faab_df.faab_efficiency.sum() / manager_faab_df.shape[0]
    manager_faab_df['faab_efficiency_zscore'] = (manager_faab_df.faab_efficiency-season_faab_mean) / season_faab_stdev
    manager_faab_df['season']=season
    #update season_data_dict
    season_data_dict[season]['faab_efficiency_stdev'] =season_faab_stdev
    season_data_dict[season]['faab_efficiency_mean'] = season_faab_mean
    season_faab_score_dfs.append(manager_faab_df)

full_season_faab_dfs=pd.concat(season_faab_score_dfs)

faab_efficiency_dict = {}
for manager in pre_managers:
    manager_faab_scores = []
    manager_df = full_season_faab_dfs[full_season_faab_dfs.index==manager]
    for index, row in manager_df.iterrows():
        if row['season'] < (latest_season - recency_window):
            raw_faab_efficiency_score=row['faab_efficiency_zscore']
            manager_faab_scores.append(raw_faab_efficiency_score)
        else:
            faab_efficiency_zscore = row['faab_efficiency_zscore']
            score_modifier = faab_efficiency_zscore * recency_bonus
            modified_score = (faab_efficiency_zscore + score_modifier) * season_data_dict[season]['faab_efficiency_stdev'] + season_data_dict[season]['faab_efficiency_mean']
            manager_faab_scores.append(modified_score)
    avg_faab_efficiency = sum(manager_faab_scores) / len(manager_faab_scores)
    faab_efficiency_dict[manager] = avg_faab_efficiency

faab_efficiency_score_dict = create_metric_dict(faab_efficiency_dict.values(),'faab_efficiency',True)
final_scores_df['avg_faab_efficiency'] = final_scores_df.index.map(faab_efficiency_dict)
final_scores_df['faab_efficiency_score'] = final_scores_df.avg_faab_efficiency.map(faab_efficiency_score_dict)

final_scores_df


Unnamed: 0,avg_rs_win_percent,rs_win_percent_score,rs_points_z_score,rs_points_score,rs_points_against_z_score,rs_points_against_score,avg_p_win_percent,p_win_percent_score,p_points_z_score,p_points_score,...,weighted_rank,weighted_rank_score,draft_efficiency,draft_efficiency_score,undrafted_avg_z_score,undrafted_savvy_score,total_score,thru,avg_faab_efficiency,faab_efficiency_score
Duncan,0.596691,5.0,0.503632,38.0,-0.484863,-3.878901,0.379153,3.052883,0.351345,8.0,...,8.773333,14.359744,7.265538,6.0,0.393071,4.227817,75.72841,2022,7.36677,2.187318
Kevin,0.472027,2.164336,0.372548,33.337488,0.585366,4.682925,0.373282,2.994737,0.16662,6.310829,...,7.666667,10.227967,3.615691,2.954664,0.759662,6.0,70.675747,2022,6.805133,3.0
Patrick,0.550085,3.604089,0.477459,37.05004,-0.306183,-2.449465,0.265574,2.035675,-0.061966,4.811165,...,8.8,14.46,5.853379,4.596663,-0.107598,2.711471,67.742403,2022,8.113126,1.462409
Bryan,0.51344,2.815906,0.119819,25.290146,-0.216068,-1.728543,0.52995,5.14552,0.014361,5.271028,...,8.938462,15.0,6.876638,5.587205,-0.047221,2.850117,61.23946,2022,8.154114,1.433498
Luke,0.490659,2.431632,-0.004794,22.280063,0.429587,3.436695,0.432204,3.656879,-0.114669,4.521331,...,7.385714,9.480838,3.81817,3.064432,-0.132915,2.656162,51.313491,2022,7.957313,1.581246
David Casstevens,0.445177,1.816227,-0.262177,17.199238,0.729426,5.835407,0.330886,2.582887,-0.118752,4.500227,...,6.555556,7.575211,5.987606,4.708469,-0.481489,1.967184,44.839345,2022,7.894139,1.630941
Mark,0.489963,2.421494,-0.142026,19.359695,-0.143489,-1.147914,0.451044,3.894916,0.065588,5.598868,...,6.76,8.000481,4.40078,3.415433,0.235986,3.692726,43.56287,2022,8.459326,1.223461
Scott Gunter,0.514078,2.826705,-0.213945,18.029846,-0.237741,-1.901924,0.508659,4.790153,0.093511,5.783456,...,7.04,8.628646,2.579565,2.413079,-0.303358,2.295928,41.648549,2022,8.878407,0.982154
Krista,0.429755,1.648437,-0.490498,13.448476,0.398976,3.191809,0.412464,3.422717,-0.203915,4.06216,...,5.092308,4.797432,4.383884,3.405224,-0.306302,2.290483,35.766182,2022,8.449255,1.229433
Benjamin,0.474237,2.193373,-0.543342,12.769699,-0.466404,-3.731228,0.573853,6.0,-0.52352,2.578173,...,6.4,7.268836,1.612865,2.000403,-0.150042,2.619509,28.773568,2022,8.517752,1.188983


In [None]:
faab_efficiency_dict

{'Benjamin': 7.326291203916711,
 'Bryan': 7.326291203916711,
 'David Casstevens': 7.326291203916711,
 'Duncan': 7.326291203916711,
 'Kevin': 7.326291203916711,
 'Krista': 7.326291203916711,
 'Luke': 7.326291203916711,
 'Mark': 7.326291203916711,
 'Patrick': 7.326291203916711,
 'Scott Gunter': 7.326291203916711}

In [None]:
full_season_faab_dfs[full_season_faab_dfs.season==2020].sort_values(by=['faab_efficiency'])

Unnamed: 0,faab_efficiency,season
Bryan,5.4,2020
Mark,6.857143,2020
Luke,6.907692,2020
Kevin,6.929412,2020
David Casstevens,7.061538,2020
Patrick,7.333333,2020
Benjamin,7.785714,2020
Duncan,7.822222,2020
Krista,9.754545,2020
Scott Gunter,12.122222,2020


In [None]:
full_faab_m.manager_id.iloc[0]

'3'

In [None]:
#playoffs cleanup
managers=[]
opponents=[]
p_2020 = pd.read_csv('2020_pre_playoffs.csv')
manager_lkup={}
for index, row in Master[Master.season==2020].iterrows():
    manager_id = int(row['team_key'].split('.')[-1])
    manager= row['manager']
    if manager_id not in manager_lkup.keys():
        manager_lkup[manager_id]=manager
p_2020['manager_name']=p_2020.manager_id.map(manager_lkup)
p_2020['opponent_name']=p_2020.opponent_id.map(manager_lkup)

p_2020.to_csv('p_2020.csv')


In [None]:
#in season pickups
#this code will produce the df of all rs matchups and append a flag for whether the player was drafted by the corresponding manager
#it gets saved to file because it takes a bit of time to compute, probably because I am doing it inefficiently but eh
rs_matchup_datas=[]
for season in master.season.drop_duplicates():
    rs_matchups_df = pd.read_csv(f'{season}_pre_matchups.csv')
    rs_matchups_df['season'] = season
    manager_key_dict={}
    for index, row in master[master.season==season].iterrows():
        team_key = int(row['team_key'].split('.')[-1])
        manager_key_dict[team_key] = row['manager']
    rs_matchups_df['manager_name'] = rs_matchups_df.manager_id.map(manager_key_dict)
    player_ids=[]
    for index, row in rs_matchups_df.iterrows():
        try:
            if row['player_url'][-1]=='/':
                url=row['player_url'][:-1]
                player_id=url.split('/')[-1]
            else:
                player_id=row['player_url'].split('/')[-1]
            player_ids.append(player_id)
        except:
            player_ids.append('')
    rs_matchups_df['player_id'] = player_ids
    is_drafted=[]
    for index, row in rs_matchups_df.iterrows():
        #for each row check if they are drafted by the corresponding manager
        filter_df = full_seasons_draft_df[(full_seasons_draft_df.Year==season) & (full_seasons_draft_df.player_id==row['player_id']) & (full_seasons_draft_df.Owner==row['manager_name'])]
        filter_rows=filter_df.shape[0]
        if filter_rows>0:
            if filter_rows==1:
                is_drafted.append(1)
            else:
                print(filter_df)
        else:
            is_drafted.append(0)
    rs_matchups_df['is_drafted'] = is_drafted
    rs_matchup_datas.append(rs_matchups_df)

full_rs_matchups_df=pd.concat(rs_matchup_datas)

full_rs_matchups_df

Unnamed: 0.1,Unnamed: 0,position,manager_id,opponent_id,matchup_url,player,player_url,score,manager,league_url,season,manager_name,player_id,is_drafted
0,0,QB,1,2,https://football.fantasysports.yahoo.com//2007...,D. McNabb,https://sports.yahoo.com/nfl/players/4650,7,Philadelphia Freedom,https://football.fantasysports.yahoo.com/2007/...,2007,Ryan,4650,0
1,1,WR,1,2,https://football.fantasysports.yahoo.com//2007...,M. Harrison,https://sports.yahoo.com,10,Philadelphia Freedom,https://football.fantasysports.yahoo.com/2007/...,2007,Ryan,sports.yahoo.com,0
2,2,WR,1,2,https://football.fantasysports.yahoo.com//2007...,C. Johnson,https://sports.yahoo.com/nfl/players/5483,10,Philadelphia Freedom,https://football.fantasysports.yahoo.com/2007/...,2007,Ryan,5483,0
3,3,WR,1,2,https://football.fantasysports.yahoo.com//2007...,J. Walker,https://sports.yahoo.com/nfl/players/5906,5,Philadelphia Freedom,https://football.fantasysports.yahoo.com/2007/...,2007,Ryan,5906,0
4,4,RB,1,2,https://football.fantasysports.yahoo.com//2007...,L. Tomlinson,https://sports.yahoo.com/nfl/players/5452,15,Philadelphia Freedom,https://football.fantasysports.yahoo.com/2007/...,2007,Ryan,5452,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1535,6,TE,10,5,https://football.fantasysports.yahoo.com//f1/7...,P. Freiermuth Pit - TE,https://sports.yahoo.com/nfl/players/33443,11.63,Fresh Princ(ess) of Helaire,https://football.fantasysports.yahoo.com/2022/...,2022,Krista,33443,0
1536,7,W/R/T,10,5,https://football.fantasysports.yahoo.com//f1/7...,D. Swift Det - RB,https://sports.yahoo.com/nfl/players/32705,6.38,Fresh Princ(ess) of Helaire,https://football.fantasysports.yahoo.com/2022/...,2022,Krista,32705,1
1537,8,Q/WR/T,10,5,https://football.fantasysports.yahoo.com//f1/7...,M. Jones NE - QB,https://sports.yahoo.com/nfl/players/33403,9.63,Fresh Princ(ess) of Helaire,https://football.fantasysports.yahoo.com/2022/...,2022,Krista,33403,1
1538,9,DEF,10,5,https://football.fantasysports.yahoo.com//f1/7...,Philadelphia Phi - DEF,https://sports.yahoo.com/nfl/teams/philadelphia/,19.0,Fresh Princ(ess) of Helaire,https://football.fantasysports.yahoo.com/2022/...,2022,Krista,philadelphia,1


In [None]:
#saving this to file because this step takes a bit of time and would be annoying to have to recalculate
full_rs_matchups_df.to_csv('full_rs_matchups_w_is_drafted.csv',index=False)

In [None]:
full_rs_matchups_df=pd.read_csv('full_rs_matchups_w_is_drafted.csv')
non_drafted_avg_scores =[]
for season in master.season.drop_duplicates():
    manager_avg_scores={}
    season_matchups = full_rs_matchups_df[full_rs_matchups_df.season==season]
    for manager in season_matchups.manager_name.drop_duplicates():
        manager_non_drafted = season_matchups[(season_matchups.manager_name==manager) & (season_matchups.is_drafted==0)]
        cleaned_scores=[]
        for score in manager_non_drafted.score:
            if score=='–':
                cleaned_scores.append(0)
            else:
                cleaned_scores.append(score)
        manager_non_drafted['score'] = cleaned_scores
        manager_non_drafted['score'] = manager_non_drafted['score'].astype(float)
        avg_score = manager_non_drafted.score.sum() / manager_non_drafted.shape[0]
        manager_avg_scores[manager]=avg_score
    df=pd.DataFrame(index=manager_avg_scores.keys(),data=manager_avg_scores.values(),columns=['avg_non_draft_score'])
    stdev=stats.stdev(df.avg_non_draft_score)
    m= df.avg_non_draft_score.sum() / df.shape[0]
    zscores=[]
    for v in df.avg_non_draft_score:
        zscore=(v-m)/stdev
        zscores.append(zscore)
    df['zscore_non_draft_scores'] =zscores
    df['season']= season
    non_drafted_avg_scores.append(df)
full_non_drafted_avg_scores=pd.concat(non_drafted_avg_scores)

rs_non_drafted_dict = {}
for manager in pre_managers:
    manager_z_scores = []
    manager_df = full_non_drafted_avg_scores[full_non_drafted_avg_scores.index==manager]
    for index, row in manager_df.iterrows():
        if row['season'] < (latest_season - recency_window):
            manager_z_scores.append(row['zscore_non_draft_scores'])
        else:
            modified_score = row['zscore_non_draft_scores'] * (1 + recency_bonus)
            manager_z_scores.append(modified_score)
    avg_rs_non_drafted_points = sum(manager_z_scores) / len(manager_z_scores)
    rs_non_drafted_dict[manager] = avg_rs_non_drafted_points

non_drafted_score_dict = create_metric_dict(rs_non_drafted_dict.values(),'undrafted_savvy',False)
final_scores_df['undrafted_avg_z_score'] = final_scores_df.index.map(rs_non_drafted_dict)
final_scores_df['undrafted_savvy_score'] = final_scores_df.undrafted_avg_z_score.map(non_drafted_score_dict)

final_scores_df


KeyError: 'undrafted_savvy'

In [None]:
df=pd.DataFrame(index=manager_avg_scores.keys(),data=manager_avg_scores.values(),columns=['avg_non_draft_score'])
stdev=stats.stdev(df.avg_non_draft_score)
m= df.avg_non_draft_score.sum() / df.shape[0]
zscores=[]
for v in df.avg_non_draft_score:
    zscore=(v-m)/stdev
    zscores.append(zscore)
df['zscore_non_draft_scores'] =zscores
df

Unnamed: 0,avg_non_draft_score,zscore_non_draft_scores
Scott Gunter,12.530299,0.264064
Benjamin,11.531613,-0.157267
Duncan,15.066267,1.333952
Luke,10.39322,-0.637539
Mark,8.696,-1.353572
David Casstevens,9.393077,-1.059485
Patrick,12.214462,0.130817
Bryan,9.77075,-0.90015
Kevin,15.273243,1.421273
Krista,14.174923,0.957907
