In [1]:
import pandas as pd
import numpy as np

In [2]:
from portfoliolab.clustering import NestedClusteredOptimisation
from submission_utils import general_weights_fixer, get_submission_markowitz

In [3]:
#!pip install -U numpy
#!pip install tqdm

In [4]:
from data_utils import read_all_candles


In [5]:
candles = read_all_candles("trainTimeSeries/trainTimeSeries/TrainCandles")

100%|██████████| 96/96 [00:02<00:00, 39.06it/s]


In [6]:
def compute_returns(v):
    v["returns"] = (v["close"] - v["open"])/v["open"]
    v.drop([col for col in v.columns if col != "returns"], axis=1, inplace=True)
    return v

In [7]:
candles = {k:compute_returns(v)["returns"] for k,v in candles.items()} # compute_returns(v)

In [8]:
def filter_candles(candles, remove):
    return {k:v for k,v in candles.items() if k not in remove}

In [9]:
def process_data(candles, remove=None):
    if remove is not None:
        candles = filter_candles(candles, remove)
    df = pd.concat(candles.values(),keys= candles.keys(),axis=1)
    df = df.resample("H").aggregate("mean")
    return df

In [10]:
def get_returns_and_cov(df):
    cov = df.cov().values
    exp_returns = df.mean().values.reshape(-1, 1)
    return cov, exp_returns

In [11]:
df = process_data(candles)

In [12]:
cov, exp_rets = get_returns_and_cov(df)

In [13]:
nco = NestedClusteredOptimisation()

# Find optimal weights using the NCO algorithm
w_nco = nco.allocate_nco(cov, exp_rets, n_init=50)

In [14]:
w_nco.squeeze().sum()

1041.4610238825103

In [15]:
assets_remove = [asset for asset, weight in zip(candles.keys(), w_nco.squeeze()) if weight <= 0]

In [16]:
len(assets_remove)

37

In [17]:
df = process_data(candles, remove=assets_remove)
cov, exp_rets = get_returns_and_cov(df)

In [18]:
w_nco = nco.allocate_nco(cov, exp_rets, n_init=50)

In [19]:
w_nco.squeeze().sum()

1892.842931754332

In [20]:
w_nco.squeeze()

array([113.08056095,  15.92964881,  -0.63444976,   3.18555745,
        27.02154565,  80.5896123 ,   1.77693784,  75.26044408,
         8.07663267,   3.92071602,  58.91617725,  28.56665186,
        64.41992821,  11.35954379, -20.84564187,  16.01144804,
        -0.31735663, -46.71909458,  -5.55576534,  14.30814795,
        29.34903309,  17.62361004,  35.86889157,  20.97444299,
        56.79017964, -11.79533933, -16.44598389,  18.02170913,
        52.9091972 , -10.75396608,  76.37001455,   1.75188238,
       -41.1898639 ,  40.30039126, 188.06554606,  60.90849645,
        -1.76789461, 250.50073647,   9.57079344, -31.37414446,
       122.8296087 ,   3.76284802,  33.87192638,  41.41620233,
         2.293209  ,  22.70023584,  -8.11461621,  26.02874701,
        -9.8985491 ,  53.81901408,  74.02345266,   5.07390274,
        54.62195886,  99.59735319,  27.17595082,   9.72618794,
        43.10202585,  63.94785785,  32.83663909])

In [21]:
w_transformed = np.clip(w_nco.squeeze(), 0, np.inf)

In [22]:
w_transformed = w_transformed / w_transformed.sum()

In [23]:
w_transformed.sum()

0.9999999999999999

In [24]:
w_transformed.sum()

0.9999999999999999

In [25]:
weights = general_weights_fixer(w_transformed)

In [26]:
sum(weights) == 1.00000000000000000000000000000000000

True

In [27]:
weights = weights / sum(weights)

In [28]:
sum(weights)

1.0

In [29]:
weights

array([0.054, 0.008, 0.   , 0.002, 0.013, 0.038, 0.001, 0.036, 0.004,
       0.002, 0.028, 0.014, 0.031, 0.005, 0.   , 0.008, 0.   , 0.   ,
       0.   , 0.007, 0.014, 0.008, 0.017, 0.01 , 0.027, 0.   , 0.   ,
       0.009, 0.025, 0.   , 0.036, 0.001, 0.   , 0.019, 0.09 , 0.029,
       0.   , 0.117, 0.005, 0.   , 0.059, 0.002, 0.016, 0.02 , 0.001,
       0.011, 0.   , 0.012, 0.   , 0.026, 0.035, 0.002, 0.026, 0.047,
       0.013, 0.005, 0.021, 0.03 , 0.016])

In [30]:
dict_weights = {asset:w for asset, w in zip(df.columns, weights)}

In [31]:
sum(dict_weights.values())

1.0

In [32]:
def get_submission_markowitz(weights, assets):
    subm_plantilla = pd.read_csv("./submission/submission.csv")
    date = subm_plantilla["eod_ts"]
    cols = {
        f"allo_{asset.replace('_close', '')}": [weights[f"{asset}"]] * len(date) for asset in assets
    }
    return pd.DataFrame({"eod_ts": date, **cols})

In [108]:
subm = get_submission_markowitz(dict_weights, df.columns)


In [109]:
subm.head()

Unnamed: 0,eod_ts,allo_NCT,allo_OOS,allo_GFJ,allo_USX,allo_TDD,allo_CIS,allo_UYZ,allo_TRO,allo_ERO,...,allo_ZUJ,allo_BSX,allo_LEN,allo_FNM,allo_ERQ,allo_OJG,allo_BOT,allo_DIG,allo_PHI,allo_RWJ
0,2020-08-18 00:00:00,0.063,0.006,0.0,0.006,0.014,0.034,0.002,0.043,0.0,...,0.023,0.036,0.003,0.025,0.047,0.013,0.012,0.013,0.036,0.015
1,2020-08-18 01:00:00,0.063,0.006,0.0,0.006,0.014,0.034,0.002,0.043,0.0,...,0.023,0.036,0.003,0.025,0.047,0.013,0.012,0.013,0.036,0.015
2,2020-08-18 02:00:00,0.063,0.006,0.0,0.006,0.014,0.034,0.002,0.043,0.0,...,0.023,0.036,0.003,0.025,0.047,0.013,0.012,0.013,0.036,0.015
3,2020-08-18 03:00:00,0.063,0.006,0.0,0.006,0.014,0.034,0.002,0.043,0.0,...,0.023,0.036,0.003,0.025,0.047,0.013,0.012,0.013,0.036,0.015
4,2020-08-18 04:00:00,0.063,0.006,0.0,0.006,0.014,0.034,0.002,0.043,0.0,...,0.023,0.036,0.003,0.025,0.047,0.013,0.012,0.013,0.036,0.015


In [115]:
all(subm.sum(axis=1) == 1)

True

In [114]:
subm.sum(axis=1).iloc[0]

1.0

In [112]:
subm.set_index("eod_ts", inplace=True)

In [113]:
subm = subm.div(subm.sum(axis=1), axis=0)


In [118]:
sum(n < 0 for n in subm.iloc[:, 1:].values.flatten())

0

In [119]:
subm.head()

Unnamed: 0_level_0,allo_NCT,allo_OOS,allo_GFJ,allo_USX,allo_TDD,allo_CIS,allo_UYZ,allo_TRO,allo_ERO,allo_ZAB,...,allo_ZUJ,allo_BSX,allo_LEN,allo_FNM,allo_ERQ,allo_OJG,allo_BOT,allo_DIG,allo_PHI,allo_RWJ
eod_ts,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-08-18 00:00:00,0.063,0.006,0.0,0.006,0.014,0.034,0.002,0.043,0.0,0.003,...,0.023,0.036,0.003,0.025,0.047,0.013,0.012,0.013,0.036,0.015
2020-08-18 01:00:00,0.063,0.006,0.0,0.006,0.014,0.034,0.002,0.043,0.0,0.003,...,0.023,0.036,0.003,0.025,0.047,0.013,0.012,0.013,0.036,0.015
2020-08-18 02:00:00,0.063,0.006,0.0,0.006,0.014,0.034,0.002,0.043,0.0,0.003,...,0.023,0.036,0.003,0.025,0.047,0.013,0.012,0.013,0.036,0.015
2020-08-18 03:00:00,0.063,0.006,0.0,0.006,0.014,0.034,0.002,0.043,0.0,0.003,...,0.023,0.036,0.003,0.025,0.047,0.013,0.012,0.013,0.036,0.015
2020-08-18 04:00:00,0.063,0.006,0.0,0.006,0.014,0.034,0.002,0.043,0.0,0.003,...,0.023,0.036,0.003,0.025,0.047,0.013,0.012,0.013,0.036,0.015


In [120]:
subm.to_csv("submission_1602_3.csv", index=True, header=True)

In [None]:
w_nco2 = w_nco.reshape(-1, 1)

In [32]:
w_cvo = nco.allocate_cvo(cov.values, returns.values.reshape(-1, 1))

In [34]:
w_cvo.squeeze().sum()

1086.7710025806819

In [None]:
# Compare the NCO solutions to the CVO ones using MCOS
# Parameters are: 10 simulations, 100 observations in a simulation
# goal of minimum variance, no LW shrinkage
w_cvo, w_nco = nco.allocate_mcos(assets_mean, assets_cov, 100, 10, 0.01, True, False)

# Find the errors in estimations of NCO and CVO in simulations
err_cvo, err_nco = nco.estim_errors_mcos(w_cvo, w_nco, assets_mean, assets_cov, True)