#### Import Dependencies

In [2]:
import sys
import numpy as np
import pandas as pd
from pathlib import Path
sys.path.append('./asc-library-temp')
from library import Dataset
import plotly.express as px
import datetime

---
##### primary formatting

In [5]:
scoring_system = Path(r"c:\\Users\\RyanLing\\OneDrive - Alpha Sherpa Capital\\Desktop\\Kepler Platform (tidy)\\Scoring system")
monte_carlos = scoring_system/"monte carlo"

# r_with_sl is a csv with d0 to d2 r, market data, and trading side determined
# those with no action (nither long or short)
# r_with_sl = pd.read_csv(monte_carlos/"r_with_sl.csv", low_memory=False)
# r_with_sl = pd.read_csv(monte_carlos/"dataset2.csv", low_memory=False)
r_with_sl = pd.read_csv(monte_carlos/"stifel_abg.csv", low_memory=False)
# r_with_sl = r_with_sl[r_with_sl["max_risk"]>1000] ## filter out trades with max risk < 1000

r_with_sl.exch_region.value_counts()

Europe    8328
Name: exch_region, dtype: int64

---
#### set which region to backtest


In [6]:
df = r_with_sl[r_with_sl["exch_region"] == "Europe"].copy(deep=True)

In [9]:
df.keys()
df.rename({"entry_date": "d0_date"}, axis=1, inplace=True)

In [10]:
df['d0_date'] = pd.to_datetime(df['d0_date'])
df[["year", "week", "day"]] = df['d0_date'].dt.isocalendar()

In [11]:
first_year = df["year"].min()
all_years = df["year"]
year_diff = all_years-first_year

weeks_dict = {}
counter = 1
for year in range(year_diff.max()+1):
    this_year = df[df["year"] == first_year+year]
    num_of_weeks = this_year["week"].max() - this_year["week"].min() + 1
    for i in range(num_of_weeks):
        week_num = this_year["week"].min() + i
        weeks_dict[counter] = (first_year+year, week_num)
        counter+=1

In [15]:
weeks_dict.__len__()

262

In [16]:
## modify this to get a nice train to test ratio for production
training_weeks = 200
testing_weeks = 60
step_weeks = 6

In [17]:
start_idx = 1 # if you do not
train_start_year, train_start_week = weeks_dict[start_idx] 
train_end_year, train_end_week = weeks_dict[start_idx+training_weeks-1]
test_start_year, test_start_week = weeks_dict[start_idx+training_weeks]
test_end_year, test_end_week = weeks_dict[start_idx+training_weeks+testing_weeks-1]

In [18]:
print(train_start_year, train_start_week)
print(train_end_year, train_end_week)
print(test_start_year, test_start_week)
print(test_end_year, test_end_week)

2017 2
2020 47
2020 48
2022 3


In [19]:
train_start_day = datetime.date.fromisocalendar(year=train_start_year, week=train_start_week, day=1)
train_end_day = datetime.date.fromisocalendar(year=train_end_year, week=train_end_week, day=6)

train_range = (df['d0_date'].dt.date>train_start_day) & (df['d0_date'].dt.date<train_end_day)
train = df[train_range]

In [20]:
test_start_day = datetime.date.fromisocalendar(year=test_start_year, week=test_start_week, day=1)
test_end_day = datetime.date.fromisocalendar(year=test_end_year, week=test_end_week, day=6)

test_range = (df['d0_date'].dt.date>test_start_day) & (df['d0_date'].dt.date<test_end_day)
test = df[test_range]

In [23]:
## set which day
train_day_exp = 'd0_r'
test_day_exp = 'd0_r'
test_date = 'd0_date'

In [24]:
# pivot
table = pd.pivot_table(train, values=train_day_exp, index='analyst_pri', columns='side', aggfunc={train_day_exp:[np.count_nonzero, np.mean, np.std]})
table.columns = ['_'.join(col) for col in table.columns.values]
table.reset_index(inplace=True)

# filter analysts
long_anal = table[table["mean_long"] >= table["mean_long"].quantile(0.75)]['analyst_pri']
short_anal = table[table["mean_short"] >= table["mean_short"].quantile(0.75)]['analyst_pri']
both_side_anal = list(set(long_anal) & set(short_anal))
print(f"# of long analyst: {len(long_anal)}")
print(f"# of short analyst: {len(short_anal)}")
print(f"analyst that are in good in both long and short: {both_side_anal}")
# second filter
test_long_anal = list(set(long_anal) & set(test["analyst_pri"]))
test_short_anal = list(set(short_anal) & set(test["analyst_pri"]))
both_side_anal = list(set(both_side_anal) & set(test["analyst_pri"]))

shld_long = (test["analyst_pri"].isin(test_long_anal) & (test['side'] == 'long'))
shld_long_exp = test[shld_long][test_day_exp].mean()
shld_short = (test["analyst_pri"].isin(test_short_anal) & (test['side'] == 'short'))
shld_short_exp = test[shld_short][test_day_exp].mean()
shld_both = test["analyst_pri"].isin(both_side_anal)
###
either_long_short = test[(shld_long | shld_short)].sort_values([test_date])
either_long_short_exp = either_long_short[test_day_exp].mean()
either_long_short = either_long_short.groupby(either_long_short[test_date])[test_day_exp].sum()

either_long_short = either_long_short.reset_index()
either_long_short['cum_sum'] = either_long_short[test_day_exp].cumsum()
###
both_long_short = test[shld_both].sort_values([test_date])
both_long_short_exp = both_long_short[test_day_exp].mean()
both_long_short = both_long_short.groupby(both_long_short[test_date])[test_day_exp].sum()

## output
print('-----------------------------------')
print(f"long_exp = {shld_long_exp}")
print(f"short_exp = {shld_short_exp}")
print(f"long + short exp = {either_long_short_exp}")
print(f"star star analyst exp = {both_long_short_exp}")

# of long analyst: 31
# of short analyst: 30
analyst that are in good in both long and short: ['Haakon Amundsen', 'Tobias Fahrenholz', 'Johan Nilsson Wall', 'Edvard Hagman', 'Oskar Vikström', 'Laurits Kjærgaard', 'Joe Sutcliffe', 'Andrew Carlsen', "George O'Connor", 'Christopher W. Uhde']
-----------------------------------
long_exp = 0.10030770605469755
short_exp = 0.2906537889955643
long + short exp = 0.1568764214668427
star star analyst exp = 0.21193019446360156


In [25]:
shld_short.value_counts()

False    3275
True      148
dtype: int64

In [27]:
long_or_short = px.line(data_frame=either_long_short, x = test_date, y = 'cum_sum', title=f'.75Q (long OR short) || [train:{train_day} | test:{test_day}]')
long_or_short.show()

In [224]:
either_long_short_exp

0.45828747526864416