In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import math

In [None]:
EVIDENCE_FILENAME = "estimations/raago_tobi/prior_evidence_dynamic_time.csv"
MATCHES_FILENAME = "data/aago/aago_original_filtered.adapted.csv"
PRIORS_FILENAME = "estimations/raago_tobi/priors.csv"

In [None]:
df_evidence = pd.read_csv(EVIDENCE_FILENAME, index_col="match_id")
df_evidence

In [None]:
sns.histplot(data=df_evidence, x="evidence", bins=50, log_scale=(True,True))

In [None]:
len(df_evidence[df_evidence["evidence"] > 0.5]) / len(df_evidence)

In [None]:
def geometric_mean(data):
    return math.exp(np.log(data).mean())

In [None]:
geometric_mean(df_evidence["evidence"])

In [None]:
df_matches = pd.read_csv(MATCHES_FILENAME, index_col="id")
df_matches

In [None]:
df_priors = pd.read_csv(PRIORS_FILENAME, index_col=["event_id", "player_id"])
df_priors

In [None]:
df_join = df_evidence.join(df_matches, how="inner") \
                     .join(df_priors, on=["event_id", "black"], how="inner", rsuffix="_black") \
                     .join(df_priors, on=["event_id", "white"], how="inner", rsuffix="_white") \
                     .rename(columns={"category": "category_black", "mu": "mu_black", "sigma": "sigma_black", "age_in_days": "age_in_days_black"})
df_join["age_in_days"] = df_join["age_in_days_black"].fillna(value=0.0) + df_join["age_in_days_white"].fillna(value=0.0)
df_join["log10_evidence"] = np.log10(df_join["evidence"])


def mu_by_category(category):
    num = int(category[:-1])
    rank = category[-1]
    if rank.lower() == "k":
        return -(num + 0.5)
    if rank.lower() == "d":
        return num + 0.5

df_join["category_mu_diff_black"] = np.abs(df_join["category_black"].apply(mu_by_category) - df_join["mu_black"]).fillna(value=0)
df_join["category_mu_diff_white"] = np.abs(df_join["category_white"].apply(mu_by_category) - df_join["mu_white"]).fillna(value=0)

df_join

In [None]:
# sns.countplot(data=df_join[df_join["evidence"] <1e-4], x="handicap")
# sns.histplot(data=df_join, x="evidence", hue="handicap", bins=20, log_scale=(True,True), palette="tab10", multiple="stack")
sns.boxplot(data=df_join, x="handicap", y="log10_evidence")

In [None]:
sns.countplot(data=df_join[df_join["log10_evidence"] < -2], x="handicap")

In [None]:
sns.countplot(data=df_join, x="handicap")

In [None]:
sns.boxplot(data=df_join, x="winner", y="log10_evidence")

In [None]:
sns.countplot(data=df_join[df_join["log10_evidence"] < -2], x="winner")

In [None]:
sns.countplot(data=df_join[df_join["log10_evidence"] < -4], x="event_id")
len(df_join[df_join["event_id"] == 69])

In [None]:
sns.scatterplot(data=df_join, x="age_in_days", y="log10_evidence")

In [None]:
sns.scatterplot(data=df_join, x="category_mu_diff_black", y="log10_evidence")

In [None]:
sns.scatterplot(data=df_join, x="category_mu_diff_white", y="log10_evidence")

In [None]:
df_join[df_join["log10_evidence"] < -8][["winner", "handicap", "category_black", "category_white", "mu_black","sigma_black","mu_white","sigma_white"]]

In [None]:
sns.countplot(data=df_join[df_join["log10_evidence"] < -4], x="black")