In [1]:
import pandas as pd
from scipy.stats import kendalltau

In [2]:
df = pd.read_csv(
    "../../data/contest_results_elo_adjusted.csv", parse_dates=["contest_date"]
)

In [3]:
df.head()

Unnamed: 0.1,Unnamed: 0,organization,contest_name,contest_date,division,class,competitor_name,placing,elo_rating,elo_rank
0,17038,ifbb,ifbb flex pro,2012-02-18,fitness,open,adela garcia,1.0,1516.0,1.0
1,17030,ifbb,ifbb flex pro,2012-02-18,fitness,open,allison ethier,9.0,1516.0,2.0
2,17034,ifbb,ifbb flex pro,2012-02-18,fitness,open,bethany wagner,5.0,1516.0,3.0
3,17031,ifbb,ifbb flex pro,2012-02-18,fitness,open,camala rodriguez,8.0,1516.0,4.0
4,17033,ifbb,ifbb flex pro,2012-02-18,fitness,open,jodi boam,6.0,1516.0,5.0


# Evaluating Ranking Metrics

In [8]:
df_sorted = df.sort_values(by=["contest_date"], ascending=True)
df_groupby = df_sorted.groupby(["contest_date", "organization", "division", "class"])

In [9]:
rows_list = []
for name, df_dt in df_groupby:
    kt_corr = []
    prec_5_scores = []
    for ctst in df_dt["contest_name"].unique():
        temp_df = df_dt[df_dt["contest_name"] == ctst]
        temp_df = temp_df.dropna()
        if len(temp_df) < 2:
            continue
        y_true = temp_df["placing"]
        y_predict = temp_df["elo_rank"]
        kt_corr.append(kendalltau(y_true, y_predict).statistic)
        y_predict = list(y_predict)
        if len(temp_df) > 5:
            top_5_placing = temp_df.sort_values(by=["placing"], ascending=True).head(5)
            top_5_elo = temp_df.sort_values(by=["elo_rank"], ascending=True).head(5)
            prec_5_scores.append(
                0.2
                * len(
                    pd.merge(
                        top_5_placing,
                        top_5_elo,
                        how="inner",
                        on=["competitor_name"],
                    )
                )
            )
            top_5_random = temp_df.sample(5)
    if kt_corr:
        if not prec_5_scores:
            prec_5_score = None
        else:
            prec_5_score = sum(prec_5_scores) / len(prec_5_scores)
        rows_list.append(
            {   "date": name[0],
                "organization": name[1],
                "division": name[2],
                "class": name[3],
                "kt_corr": sum(kt_corr) / len(kt_corr),
                "prec_5_score": prec_5_score,
            }
        )

In [10]:
metric_df = pd.DataFrame(rows_list)
for met in ["kt_corr", "prec_5_score"]:
    metric_df[f"30dayrollavg_{met}"] = metric_df.groupby(
        ["organization", "division", "class"]
    )[met].transform(lambda x: x.rolling(30, 1).mean())

In [11]:
metric_df.head()

Unnamed: 0,organization,division,class,date,kt_corr,prec_5_score,30dayrollavg_kt_corr,30dayrollavg_prec_5_score
0,cpa,bikini,class a,2018-03-25,0.333333,0.8,0.333333,0.8
1,cpa,bikini,class a,2021-08-07,0.333333,,0.333333,0.8
2,cpa,bikini,class a,2021-08-26,0.357143,0.6,0.34127,0.7
3,cpa,bikini,class a,2021-09-10,-0.333333,,0.172619,0.7
4,cpa,bikini,class a,2021-09-11,0.0,,0.138095,0.7


In [12]:
metric_df.to_csv("../../data/metric_data_adjusted.csv")