<a href="https://colab.research.google.com/github/jonrtaylor/twitch/blob/master/latest_correlations.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
!pip install numerapi
import datetime
import pathlib

import pandas as pd
import numerapi
import matplotlib.pyplot as plt

today = str(datetime.date.today())

api = numerapi.NumerAPI()

round_number = api.get_current_round()
if datetime.date.today().weekday() < 3:
    round_number -= 1

def get_round_df(round_number, today=today):
    cache_dir = pathlib.Path("daily_scores_cache")
    cache_dir.mkdir(exist_ok=True)
    cache_filename = cache_dir / f"r{round_number}_{today}.csv.xz"
    if cache_filename.exists():
        df = pd.read_csv(cache_filename, index_col=0)
        df.date = pd.to_datetime(df.date)
        return df
    df = pd.DataFrame(api.round_details(round_number))
    df['round_number'] = round_number
    cols = list(df.columns)
    cols = [cols[1], cols[0]] + cols[2:]
    df = df[cols]
    if len(df[df.date == today]) > 0:
        df.to_csv(cache_filename)
    return df

dfs = []
# get last 4 rounds
for i in range(4):
    temp_df = get_round_df(round_number - i, today)
    dfs.append(temp_df)

df = pd.concat(dfs)

#calculate percentile rank of correlation score by round
df['percentile_rank'] = df.groupby(['round_number','date']).rank(pct=True)
#filter to today and yesterday's daily scores
df = df[(df.date == today) | (df.date + pd.Timedelta(days=1) == today)].copy()

names = ['arbitrage', 'arbitrage2', 'arbitrage3', 'arbitrage4', 'leverage',
         'leverage2', 'leverage3', 'culebracapital', 'culebracapital2',
         'culebracapital3', 'integration_test'] 
#replace model names above with whichever models you want!
all_data = pd.DataFrame()
for name in names:
    name_df = df[df.username == name].copy()
    all_data = all_data.append(name_df, ignore_index=True)
#calculate differences from yesterday
all_data[['corr_diff', 'percentile_diff']] = all_data.sort_values(by='date').groupby(['round_number','username']).diff()[['correlation', 'percentile_rank']]
#replace with only data from today
all_data = all_data[(all_data.date == today)].copy()



In [6]:
def color_styling(val):
    color = 'red' if val < 0 else 'green'
    return 'color: %s' % color

In [7]:
# view mean of all models per round
all_data.sort_values(by=['round_number', 'correlation'], ascending=False).groupby('round_number')[['correlation','percentile_rank','corr_diff','percentile_diff']].mean().style.applymap(color_styling, ['correlation','corr_diff','percentile_diff'])

Unnamed: 0_level_0,correlation,percentile_rank,corr_diff,percentile_diff
round_number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
248,0.021724,0.54293,-0.004422,-0.015043
249,0.036976,0.630127,-0.001804,-0.073935
250,0.020072,0.498569,0.007985,0.115697
251,0.001549,0.443617,,


In [8]:
# view raw stats of all models per round
all_data.sort_values(by=['round_number', 'correlation'], ascending=False).reset_index(drop=True).style.applymap(color_styling, ['correlation','corr_diff','percentile_diff'])

Unnamed: 0,correlation,date,username,round_number,percentile_rank,corr_diff,percentile_diff
0,0.024179,2021-02-18 00:00:00,arbitrage3,251,0.953496,,
1,0.020571,2021-02-18 00:00:00,leverage3,251,0.924707,,
2,0.017887,2021-02-18 00:00:00,culebracapital,251,0.884847,,
3,0.011183,2021-02-18 00:00:00,leverage,251,0.696299,,
4,0.000926,2021-02-18 00:00:00,culebracapital3,251,0.422651,,
5,0.000783,2021-02-18 00:00:00,integration_test,251,0.405252,,
6,-0.003242,2021-02-18 00:00:00,arbitrage,251,0.265739,,
7,-0.010027,2021-02-18 00:00:00,arbitrage2,251,0.12243,,
8,-0.012156,2021-02-18 00:00:00,leverage2,251,0.096805,,
9,-0.014074,2021-02-18 00:00:00,arbitrage4,251,0.073711,,
