In [3]:
import pandas as pd
from scipy.stats import kendalltau

In [4]:
df = pd.read_csv('../data/contest_results_trueskill.csv', parse_dates=['contest_date'], index_col=0)

In [5]:
df.head()

Unnamed: 0,organization,contest_name,contest_date,division,class,competitor_name,placing,trueskill_rating,trueskill_rank
17038,ifbb,ifbb flex pro,2012-02-18,fitness,open,adela garcia,1.0,25.0,1.0
17037,ifbb,ifbb flex pro,2012-02-18,fitness,open,oksana grishina,2.0,25.0,2.0
17036,ifbb,ifbb flex pro,2012-02-18,fitness,open,myriam capes,3.0,25.0,3.0
17035,ifbb,ifbb flex pro,2012-02-18,fitness,open,tanji johnson,4.0,25.0,4.0
17034,ifbb,ifbb flex pro,2012-02-18,fitness,open,bethany wagner,5.0,25.0,5.0


# Evaluating Ranking Metrics

In [6]:
df_sorted = df.sort_values(by=["contest_date"], ascending=True)
df_groupby = df_sorted.groupby(["organization", "division", "class", "contest_date"])

In [7]:
rows_list = []
for name, df_dt in df_groupby:
    kt_corr = []
    prec_5_scores = []
    for ctst in df_dt["contest_name"].unique():
        temp_df = df_dt[df_dt["contest_name"] == ctst]
        temp_df = temp_df.dropna()
        if len(temp_df) < 2:
            continue
        y_true = temp_df["placing"]
        y_predict = temp_df["trueskill_rank"]
        kt_corr.append(kendalltau(y_true, y_predict).statistic)
        if len(temp_df) > 5:
            top_5_placing = temp_df.sort_values(by=['placing'], ascending=True).head(5)
            top_5_elo = temp_df.sort_values(by=['trueskill_rank'], ascending=True).head(5)
            prec_5_scores.append(0.2 * len(pd.merge(top_5_placing, top_5_elo, how='inner', on=['competitor_name'])))
    if kt_corr:
        if not prec_5_scores:
            prec_5_score = None
        else:
            prec_5_score = sum(prec_5_scores) / len(prec_5_scores)
        rows_list.append(
            {
                "organization": name[0],
                "division": name[1],
                "class": name[2],
                "date": name[3],
                "kt_corr": sum(kt_corr) / len(kt_corr),
                "prec_5_score": prec_5_score
            }
        )

In [8]:
metric_df = pd.DataFrame(rows_list)
for met in ['kt_corr', 'prec_5_score']:
    metric_df[f'30dayrollavg_{met}'] = metric_df.groupby(['organization', 'division', 'class'])[met].transform(lambda x: x.rolling(30, 1).mean())

In [9]:
metric_df.head()

Unnamed: 0,organization,division,class,date,kt_corr,prec_5_score,30dayrollavg_kt_corr,30dayrollavg_prec_5_score
0,cpa,bikini,class a,2018-03-25,1.0,1.0,1.0,1.0
1,cpa,bikini,class a,2021-08-07,1.0,,1.0,1.0
2,cpa,bikini,class a,2021-08-26,1.0,1.0,1.0,1.0
3,cpa,bikini,class a,2021-09-10,0.666667,,0.916667,1.0
4,cpa,bikini,class a,2021-09-11,0.333333,,0.8,1.0


In [10]:
metric_df.to_csv('../data/metric_data_trueskill.csv')