# Validation Check

In [None]:
# load libraries
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from numerapi import NumerAPI
from utils.api_keys import PUBLIC_ID, SECRET_KEY

In [None]:
# instantiate api & check for round
napi = NumerAPI(public_id=PUBLIC_ID, secret_key=SECRET_KEY)
current_round = napi.get_current_round()
print(f"Current round #: {current_round}")

In [None]:
# era corr function
def compute_corr_by_era(val_df, model_list, corr_method):
    df_list = []
    for m in model_list:
        r = val_df.groupby("era")[["target", m]].corr(method=corr_method).iloc[0::2, -1]
        df = pd.DataFrame(r)
        df_list.append(r)
    df_all = pd.concat(df_list, axis=1).dropna().reset_index()
    return df_all.drop("level_1", axis=1).set_index("era")

---
# Load validation datesets

In [None]:
# read current round's validation preds
validation_data = pd.read_parquet('data/validation.parquet', columns=["era", "target"]).reset_index()
foxhound_val = pd.read_csv(f"predictions/dh_foxhound_v0_val_preds.csv")
deadcell_val = pd.read_csv(f"predictions/dh_deadcell_v0_val_preds.csv")
cobra_val = pd.read_csv(f"predictions/dh_cobra_v0_val_preds.csv")
beautybeast_val = pd.read_csv(f"predictions/dh_beautybeast_v0_val_preds.csv")
skulls_val = pd.read_csv(f"predictions/dh_skulls_v0_val_preds.csv")
desperado_val = pd.read_csv(f"predictions/dh_desperado_v0_val_preds.csv")

print(f"""
    Numerai validation     : {len(validation_data)}
    Foxhound predictions   : {len(foxhound_val)}
    Deadcell pedictions    : {len(deadcell_val)}
    Cobra pedictions       : {len(cobra_val)}
    BeautyBeast pedictions : {len(beautybeast_val)}
    Skulls pedictions      : {len(skulls_val)}
    Desperado pedictions   : {len(desperado_val)}
    """)

In [None]:
# join as master validation set
val = validation_data.merge(
    right=foxhound_val, how='inner', on="id").merge(
    right=deadcell_val, how='inner', on="id", suffixes=('', '2')).merge(
    right=cobra_val, how='inner', on="id", suffixes=('', '3')).merge(
    right=beautybeast_val, how='inner', on="id", suffixes=('', '4')).merge(
    right=skulls_val, how='inner', on="id", suffixes=('', '5')).merge(
    right=desperado_val, how='inner', on="id", suffixes=('', '6'))

val.columns = ["id", "era", "target", "foxhound", "deadcell", "cobra", "beautybeast", "skulls", "desperado"]
val.info()

In [None]:
# view data
val.head()

---
# Get correlation by era

In [None]:
# models
model_names = ["foxhound", "deadcell", "cobra", "beautybeast", "skulls", "desperado"]

In [None]:
# get corrs
pearson_corr = compute_corr_by_era(val_df=val, model_list=model_names, corr_method="pearson")
spearman_corr = compute_corr_by_era(val_df=val, model_list=model_names, corr_method="spearman")

In [None]:
# correlation plots
ROLLING_FACTOR = 52

fig, axes = plt.subplots(nrows=1, ncols=2, sharex=True, sharey=True, figsize=(12, 5))
pearson_corr.rolling(ROLLING_FACTOR).mean().plot(ax=axes[0]);
spearman_corr.rolling(ROLLING_FACTOR).mean().plot(ax=axes[1]);
axes[0].set_title("Pearson's Correlations")
axes[1].set_title("Spearman's Rank Correlations")
axes[0].axhline(0, color ='black', linewidth=1)
axes[1].axhline(0, color ='black', linewidth=1)
plt.show()

In [None]:
# compute sharpe ratio
sharpe_df = pd.concat([
    (pearson_corr.mean() / pearson_corr.std()),
    (spearman_corr.mean() / spearman_corr.std())], axis=1)

sharpe_df.columns = ["sharpe_pearson", "sharpe_spearman"]
round(sharpe_df, 2)

In [None]:
# compute pct rank of validation era each model
pearson_rank_per_era = pearson_corr.rank(axis=1, ascending=False)
spearman_rank_per_era = spearman_corr.rank(axis=1, ascending=False)

rank_as_top = pd.concat([
    pd.DataFrame({
        "foxhound": [np.sum([r for r in pearson_rank_per_era["foxhound"] if r == 1])],
        "deadcell": [np.sum([r for r in pearson_rank_per_era["deadcell"] if r == 1])],
        "cobra": [np.sum([r for r in pearson_rank_per_era["cobra"] if r == 1])],
        "beautybeast": [np.sum([r for r in pearson_rank_per_era["beautybeast"] if r == 1])],
        "skulls": [np.sum([r for r in pearson_rank_per_era["skulls"] if r == 1])],
        "desperado": [np.sum([r for r in pearson_rank_per_era["desperado"] if r == 1])],
    }) / len(pearson_rank_per_era.index),
    pd.DataFrame({
        "foxhound": [np.sum([r for r in spearman_rank_per_era["foxhound"] if r == 1])],
        "deadcell": [np.sum([r for r in spearman_rank_per_era["deadcell"] if r == 1])],
        "cobra": [np.sum([r for r in spearman_rank_per_era["cobra"] if r == 1])],
        "beautybeast": [np.sum([r for r in spearman_rank_per_era["beautybeast"] if r == 1])],
        "skulls": [np.sum([r for r in spearman_rank_per_era["skulls"] if r == 1])],
        "desperado": [np.sum([r for r in spearman_rank_per_era["desperado"] if r == 1])],
    }) / len(spearman_rank_per_era.index),
])

rank_as_top.index = ["pearson_corr", "spearman_corr"]
round(rank_as_top, 2)


# Submit for Validation Diagnosis

In [None]:
# read model name json file
with open("data/model_names.json", "r") as f:
    model_names = json.load(f)

model_names

In [None]:
# submit validation predictions for diagnosis
for item in model_names.items():
    print(f"Submitting validation predictions for: {item[0]}...")
    napi.upload_diagnostics(
        file_path=f"predictions/{item[0]}_val_preds.csv",
        model_id=item[1]
    )