<a href="https://colab.research.google.com/github/jonrtaylor/twitch/blob/master/latest_correlations.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install numerapi
import datetime
import pathlib

import pandas as pd
import numerapi
import matplotlib.pyplot as plt

today = str(datetime.date.today())

api = numerapi.NumerAPI()

round_number = api.get_current_round()
if datetime.date.today().weekday() < 3:
    round_number -= 1

def get_round_df(round_number, today=today):
    cache_dir = pathlib.Path("daily_scores_cache")
    cache_dir.mkdir(exist_ok=True)
    cache_filename = cache_dir / f"r{round_number}_{today}.csv.xz"
    if cache_filename.exists():
        df = pd.read_csv(cache_filename, index_col=0)
        df.date = pd.to_datetime(df.date)
        return df
    df = pd.DataFrame(api.round_details(round_number))
    df['round_number'] = round_number
    cols = list(df.columns)
    cols = [cols[1], cols[0]] + cols[2:]
    df = df[cols]
    if len(df[df.date == today]) > 0:
        df.to_csv(cache_filename)
    return df

dfs = []
# get last 4 rounds
for i in range(4):
    temp_df = get_round_df(round_number - i, today)
    dfs.append(temp_df)

df = pd.concat(dfs)

#calculate percentile rank of correlation score by round
df['percentile_rank'] = df.groupby(['round_number','date']).rank(pct=True)
#filter to today and yesterday's daily scores
days_delta = 1
if datetime.date.today().weekday() < 2:
    days_delta = 3
df = df[(df.date == today) | (df.date + pd.Timedelta(days=days_delta) == today)].copy()

names = ['arbitrage', 'arbitrage2', 'arbitrage3', 'arbitrage4', 'leverage',
         'leverage2', 'leverage3', 'culebracapital', 'culebracapital2',
         'culebracapital3', 'integration_test'] 
#replace model names above with whichever models you want!
all_data = pd.DataFrame()
for name in names:
    name_df = df[df.username == name].copy()
    all_data = all_data.append(name_df, ignore_index=True)
#calculate differences from yesterday
all_data[['corr_diff', 'percentile_diff']] = all_data.sort_values(by='date').groupby(['round_number','username']).diff()[['correlation', 'percentile_rank']]
#replace with only data from today
all_data = all_data[(all_data.date == today)].copy()



In [None]:
def color_styling(val):
    color = 'red' if val < 0 else 'green'
    return 'color: %s' % color

In [None]:
# view mean of all models per round
all_data.sort_values(by=['round_number', 'correlation'], ascending=False).groupby('round_number')[['correlation','percentile_rank','corr_diff','percentile_diff']].mean().style.applymap(color_styling, ['correlation','corr_diff','percentile_diff'])

In [None]:
# view raw stats of all models per round
all_data.sort_values(by=['round_number', 'correlation'], ascending=False).reset_index(drop=True).style.applymap(color_styling, ['correlation','corr_diff','percentile_diff'])

In [None]:
print(f"Top 10 models with highest correlation scores in r{round_number}")
df[(df.round_number == round_number) & (df.date == df.date.max())].sort_values(by="correlation", ascending=False).iloc[:10].sort_values(by="percentile_rank", ascending=False)