In [1]:
import pandas as pd
from scipy.stats import kendalltau

In [3]:
df = pd.read_csv("../data/contest_results_elo.csv", parse_dates=["contest_date"])

In [4]:
df.head()

Unnamed: 0.1,Unnamed: 0,organization,contest_name,contest_date,division,class,competitor_name,placing,elo_rating,elo_rank
0,17038,ifbb,ifbb flex pro,2012-02-18,fitness,open,adela garcia,1.0,1500.0,1.0
1,17030,ifbb,ifbb flex pro,2012-02-18,fitness,open,allison ethier,9.0,1500.0,2.0
2,17034,ifbb,ifbb flex pro,2012-02-18,fitness,open,bethany wagner,5.0,1500.0,3.0
3,17031,ifbb,ifbb flex pro,2012-02-18,fitness,open,camala rodriguez,8.0,1500.0,4.0
4,17033,ifbb,ifbb flex pro,2012-02-18,fitness,open,jodi boam,6.0,1500.0,5.0


In [5]:
df["last_name"] = df["competitor_name"].str.split().str[-1]

In [7]:
def assign_alphabetical_order(group):
    group["alphabetical_order"] = range(1, len(group) + 1)
    return group

In [16]:
df_alpha = (
    df.sort_values(by=["last_name"])
    .groupby(by=["division", "contest_date", "contest_name"])
    .apply(assign_alphabetical_order)
    .reset_index(drop=True)
)

  .apply(assign_alphabetical_order)


In [17]:
df_sorted = df_alpha.sort_values(by=["contest_date"])

# Evaluating Ranking Metrics

In [26]:
df_groupby = df_sorted.groupby(["organization", "division", "class", "contest_date"])

In [30]:
rows_list = []
for name, df_dt in df_groupby:
    kt_corr = []
    kt_corr_alpha = []
    prec_5_scores = []
    prec_5_scores_alpha = []
    for ctst in df_dt["contest_name"].unique():
        temp_df = df_dt[df_dt["contest_name"] == ctst]
        temp_df = temp_df.dropna()
        if len(temp_df) < 2:
            continue
        y_true = temp_df["placing"]
        y_predict = temp_df["elo_rank"]
        y_alpha = temp_df["alphabetical_order"]
        kt_corr.append(kendalltau(y_true, y_predict).statistic)
        kt_corr_alpha.append(kendalltau(y_true, y_alpha).statistic)
        if len(temp_df) > 5:
            top_5_placing = temp_df.sort_values(
                by=["placing"], ascending=True
            ).head(5)
            top_5_elo = temp_df.sort_values(
                by=["elo_rank"], ascending=True
            ).head(5)
            top_5_alpha = temp_df.sort_values(
                by=["alphabetical_order"], ascending=True
            ).head(5)
            prec_5_scores.append(
                0.2
                * len(
                    pd.merge(
                        top_5_placing,
                        top_5_elo,
                        how="inner",
                        on=["competitor_name"],
                    )
                )
            )
            prec_5_scores_alpha.append(
                0.2
                * len(
                    pd.merge(
                        top_5_placing,
                        top_5_alpha,
                        how="inner",
                        on=["competitor_name"],
                    )
                )
            )
    if kt_corr:
        if not prec_5_scores:
            prec_5_score = None
            prec_5_score_alpha = None
        else:
            prec_5_score = sum(prec_5_scores) / len(prec_5_scores)
            prec_5_score_alpha = sum(prec_5_scores_alpha) / len(
                prec_5_scores_alpha
            )
        rows_list.append(
            {
                "organization": name[0],
                "division": name[1],
                "class": name[2],
                "date": name[3],
                "kt_corr": sum(kt_corr) / len(kt_corr),
                "kt_corr_alpha": sum(kt_corr_alpha) / len(kt_corr_alpha),
                "prec_5_score": prec_5_score,
                "prec_5_score_alpha": prec_5_score_alpha,
            }
        )

In [32]:
metric_df = pd.DataFrame(rows_list)
for met in ["kt_corr", "kt_corr_alpha", "prec_5_score", "prec_5_score_alpha"]:
    metric_df[f"30dayrollavg_{met}"] = metric_df.groupby(
        ["organization", "division", "class"]
    )[met].transform(lambda x: x.rolling(30, 1).mean())

In [33]:
metric_df.head()

Unnamed: 0,organization,division,class,date,kt_corr,kt_corr_alpha,prec_5_score,prec_5_score_alpha,30dayrollavg_kt_corr,30dayrollavg_kt_corr_alpha,30dayrollavg_prec_5_score,30dayrollavg_prec_5_score_alpha
0,cpa,bikini,class a,2018-03-25,0.333333,-0.2,0.8,0.8,0.333333,-0.2,0.8,0.8
1,cpa,bikini,class a,2021-08-07,0.333333,0.333333,,,0.333333,0.066667,0.8,0.8
2,cpa,bikini,class a,2021-08-26,0.357143,-0.214286,0.6,0.4,0.34127,-0.026984,0.7,0.6
3,cpa,bikini,class a,2021-09-10,-0.333333,-0.166667,,,0.172619,-0.061905,0.7,0.6
4,cpa,bikini,class a,2021-09-11,0.0,0.333333,,,0.138095,0.017143,0.7,0.6


In [34]:
metric_df.to_csv("../data/metric_data_elo.csv")