# Imports and models

In [56]:
import random
from time import time

import numpy as np
import pandas as pd

In [57]:
modes = ["buy_and_hold", "risk_parity", "ts_mom", "csec_mom"]
MODE = modes[3]

### Basic functions

In [58]:
from empyrical import (
    annual_return,
    annual_volatility,
    downside_risk,
    max_drawdown,
    sharpe_ratio,
    sortino_ratio,
    tail_ratio,
    value_at_risk,
)
from scipy.stats import kurtosis, skew


def get_data(data_config, problem_config, model_config):
    """

    :return:
    """
    Xtrain_tasks, Xval_tasks, Xtest_tasks = {}, {}, {}
    for region in data_config["region"]:
        # pre-allocation
        region_task_paths = [t + "_all_assets_data.pkl.gz" for t in data_config[region]]
        Xtrain_tasks[region], Xval_tasks[region], Xtest_tasks[region] = {}, {}, {}

        for tk_path, tk in zip(region_task_paths, data_config[region]):
            # get data
            df = pd.read_pickle(data_config["data_path"] + tk_path)
            df_train = df.iloc[
                : -(problem_config["val_period"] + problem_config["holdout_period"])
            ]
            if problem_config["val_period"] != 0:
                df_val = df.iloc[
                    -(
                        problem_config["val_period"] + problem_config["holdout_period"]
                    ) : -problem_config["holdout_period"]
                ]
            else:
                df_val = df.iloc[
                    : -(problem_config["val_period"] + problem_config["holdout_period"])
                ]
            df_test = df.iloc[-problem_config["holdout_period"] :]

            # transform in tensor
            Xtrain_tasks[region][tk] = df_train.values
            Xval_tasks[region][tk] = df_val.values
            Xtest_tasks[region][tk] = df_test.values
            print(region, tk, Xtrain_tasks[region][tk].shape)

    return Xtrain_tasks, Xval_tasks, Xtest_tasks


def calc_tcosts(signal):
    slip = 0.0005 * 0.00
    bp = 0.0020 * 0.00
    tc = np.abs(signal[1:, :] - signal[:-1, :]) * (bp + slip)
    tc = np.concatenate([np.zeros(signal.shape[1]).reshape(1, -1), tc], axis=0)
    return tc


def calmar_ratio(x):
    return annual_return(x).values / -max_drawdown(x)


def sharpe_ratio(x):
    return annual_return(x).values / annual_volatility(x)


def compute_performance_metrics(df_returns):
    """

    :param df_returns:
    :return:
    """

    # metrics to compute
    pf_metrics = [
        sharpe_ratio,
        calmar_ratio,
        max_drawdown,
        annual_return,
        annual_volatility,
        sortino_ratio,
        downside_risk,
        value_at_risk,
        tail_ratio,
        skew,
        kurtosis,
    ]
    pf_metrics_labels = [
        "SR",
        "CR",
        "MDD",
        "ANN_RET",
        "ANN_VOL",
        "SortR",
        "DownRisk",
        "VaR",
        "TailR",
        "Skew",
        "Kurt",
    ]

    # compute performance metric
    df_metrics = pd.DataFrame(
        index=range(df_returns.shape[1]), columns=pf_metrics_labels
    )
    for pf, pf_label in zip(pf_metrics, pf_metrics_labels):
        df_metrics[pf_label] = np.array(pf(df_returns))
    df_metrics.index = df_returns.columns

    return df_metrics

### Baselines

In [59]:
from sklearn.linear_model import LinearRegression


In [60]:
class BuyAndHold:
    def __init__(self, x_tasks, model_config):
        # general params
        self.Xtrain_tasks = x_tasks
        self.export_path = model_config["export_path"]
        self.export_label = model_config["export_label"]

        # set learning model per transfer
        self.mtl_list = self.Xtrain_tasks.keys()
        self.sub_mtl_list = {}
        self.signal = {}

        for tk in self.mtl_list:
            # pre-allocation
            self.signal[tk] = {}
            self.sub_mtl_list[tk] = self.Xtrain_tasks[tk].keys()

            # sub models
            for sub_tk in self.sub_mtl_list[tk]:
                self.signal[tk][sub_tk] = LinearRegression()
                # print(tk, sub_tk)

    def train(self):
        for tk in self.mtl_list:
            for sub_tk in self.sub_mtl_list[tk]:
                X_train = self.Xtrain_tasks[tk][sub_tk][:-1, :]
                Y_train = self.Xtrain_tasks[tk][sub_tk][1:, :]
                self.signal[tk][sub_tk].fit(X_train, Y_train)
                self.signal[tk][sub_tk].intercept_ = 1.0
                self.signal[tk][sub_tk].coef_ = self.signal[tk][sub_tk].coef_ * 0.0
                print(tk, sub_tk)

    def predict(self, x_test):
        y_pred = {}
        for tk in self.mtl_list:
            y_pred[tk] = {}
            for sub_tk in self.sub_mtl_list[tk]:
                y_pred[tk][sub_tk] = self.signal[tk][sub_tk].predict(
                    x_test[tk][sub_tk]
                )  # constant signal

        return y_pred


class RiskParity:
    def __init__(self, x_tasks, model_config):
        # general params
        self.Xtrain_tasks = x_tasks
        self.export_path = model_config["export_path"]
        self.export_label = model_config["export_label"]
        self.window = model_config["risk_parity"]["window"]

        # set learning model per transfer
        self.mtl_list = self.Xtrain_tasks.keys()
        self.sub_mtl_list = {}
        self.signal = {}

        for tk in self.mtl_list:
            # pre-allocation
            self.signal[tk] = {}
            self.sub_mtl_list[tk] = self.Xtrain_tasks[tk].keys()

            # sub models
            for sub_tk in self.sub_mtl_list[tk]:
                self.signal[tk][sub_tk] = LinearRegression()
                # print(tk, sub_tk)

    def train(self):
        for tk in self.mtl_list:
            for sub_tk in self.sub_mtl_list[tk]:
                X_train = self.Xtrain_tasks[tk][sub_tk][self.window : -1, :]
                Y_train = self.Xtrain_tasks[tk][sub_tk][self.window + 1 :, :]
                print(tk, sub_tk)

    def predict(self, x_test):
        y_pred = {}
        for tk in self.mtl_list:
            y_pred[tk] = {}
            for sub_tk in self.sub_mtl_list[tk]:
                x = pd.DataFrame(
                    np.concatenate(
                        [self.Xtrain_tasks[tk][sub_tk], x_test[tk][sub_tk]], axis=0
                    )
                )
                risk = (
                    x.rolling(window=self.window)
                    .std()
                    .values[-x_test[tk][sub_tk].shape[0] :, :]
                )
                y_pred[tk][sub_tk] = (1.0 / risk) / np.repeat(
                    np.sum((1.0 / risk), axis=1).reshape(-1, 1), risk.shape[1], axis=1
                )

        return y_pred


class TimeSeriesMomentum:
    def __init__(self, x_tasks, model_config):
        # general params
        self.Xtrain_tasks = x_tasks
        self.export_path = model_config["export_path"]
        self.export_label = model_config["export_label"]
        self.window = model_config["ts_mom"]["window"]

        # set learning model per transfer
        self.mtl_list = self.Xtrain_tasks.keys()
        self.sub_mtl_list = {}
        self.signal = {}

        for tk in self.mtl_list:
            # pre-allocation
            self.signal[tk] = {}
            self.sub_mtl_list[tk] = self.Xtrain_tasks[tk].keys()

            # sub models
            for sub_tk in self.sub_mtl_list[tk]:
                self.signal[tk][sub_tk] = LinearRegression()
                print(tk, sub_tk)

    def train(self):
        for tk in self.mtl_list:
            for sub_tk in self.sub_mtl_list[tk]:
                X_train = pd.DataFrame(self.Xtrain_tasks[tk][sub_tk][:-1, :])
                Y_train = pd.DataFrame(self.Xtrain_tasks[tk][sub_tk][1:, :])
                print(tk, sub_tk)

    def predict(self, x_test):
        y_pred = {}
        for tk in self.mtl_list:
            y_pred[tk] = {}
            for sub_tk in self.sub_mtl_list[tk]:
                x = pd.DataFrame(
                    np.concatenate(
                        [self.Xtrain_tasks[tk][sub_tk], x_test[tk][sub_tk]], axis=0
                    )
                )
                y_pred[tk][sub_tk] = (
                    -x.rolling(window=self.window)
                    .mean()
                    .values[-x_test[tk][sub_tk].shape[0] :, :]
                )

        return y_pred


class CrossSectionalMomentum:
    def __init__(self, x_tasks, model_config):
        # general params
        self.Xtrain_tasks = x_tasks
        self.export_path = model_config["export_path"]
        self.export_label = model_config["export_label"]
        self.window = model_config["csec_mom"]["window"]
        self.fraction = model_config["csec_mom"]["fraction"]

        # set learning model per transfer
        self.mtl_list = self.Xtrain_tasks.keys()
        self.sub_mtl_list = {}
        self.signal = {}

        for tk in self.mtl_list:
            # pre-allocation
            self.signal[tk] = {}
            self.sub_mtl_list[tk] = self.Xtrain_tasks[tk].keys()

            # sub models
            for sub_tk in self.sub_mtl_list[tk]:
                self.signal[tk][sub_tk] = LinearRegression()
                print(tk, sub_tk)

    def train(self):
        for tk in self.mtl_list:
            for sub_tk in self.sub_mtl_list[tk]:
                X_train = pd.DataFrame(self.Xtrain_tasks[tk][sub_tk][:-1, :])
                Y_train = pd.DataFrame(self.Xtrain_tasks[tk][sub_tk][1:, :])
                print(tk, sub_tk)

    def predict(self, x_test):
        y_pred = {}
        for tk in self.mtl_list:
            y_pred[tk] = {}
            for sub_tk in self.sub_mtl_list[tk]:
                x = pd.DataFrame(
                    np.concatenate(
                        [self.Xtrain_tasks[tk][sub_tk], x_test[tk][sub_tk]], axis=0
                    )
                )
                signal = (
                    x.rolling(window=self.window)
                    .mean()
                    .values[-x_test[tk][sub_tk].shape[0] :, :]
                )
                # spread_signal = signal - np.repeat(signal.mean(axis=1).reshape(-1, 1), signal.shape[1], axis=1)
                bottom = (
                    pd.DataFrame(signal).rank(axis=1) / signal.shape[1]
                ).values < self.fraction
                top = (pd.DataFrame(signal).rank(axis=1) / signal.shape[1]).values > (
                    1 - self.fraction
                )
                y_pred[tk][sub_tk] = np.multiply(-signal, (bottom + top))

        return y_pred

### Main loop configuration

In [61]:
# reproducibility params
manualSeed = 999999999
np.random.seed(manualSeed)
random.seed(manualSeed)

# data params
data_config = {
    "data_path": ".\\Tasks\\",
    "region": ["Asia and Pacific", "Europe", "Americas", "MEA"],
    "Europe": [
        "Europe_AEX",
        "Europe_ASE",
        "Europe_ATX",
        "Europe_BEL20",
        "Europe_BUX",
        "Europe_BVLX",
        "Europe_CAC",
        "Europe_CYSMMAPA",
        "Europe_DAX",
        "Europe_HEX",
        "Europe_IBEX",
        "Europe_ISEQ",
        "Europe_KFX",
        "Europe_OBX",
        "Europe_OMX",
        "Europe_SMI",
        "Europe_UKX",
        "Europe_VILSE",
        "Europe_WIG20",
        "Europe_XU100",
        "Europe_SOFIX",
        "Europe_SBITOP",
        "Europe_PX",
        "Europe_CRO",
    ],
    "Asia and Pacific": [
        "Asia and Pacific_AS51",
        "Asia and Pacific_FBMKLCI",
        "Asia and Pacific_HSI",
        "Asia and Pacific_JCI",
        "Asia and Pacific_KOSPI",
        "Asia and Pacific_KSE100",
        "Asia and Pacific_NIFTY",
        "Asia and Pacific_NKY",
        "Asia and Pacific_NZSE50FG",
        "Asia and Pacific_PCOMP",
        "Asia and Pacific_STI",
        "Asia and Pacific_SHSZ300",
        "Asia and Pacific_TWSE",
    ],
    "Americas": [
        "Americas_IBOV",
        "Americas_MEXBOL",
        "Americas_MERVAL",
        "Americas_SPTSX",
        "Americas_SPX",
        "Americas_RTY",
    ],
    "MEA": [
        "MEA_DFMGI",
        "MEA_DSM",
        "MEA_EGX30",
        "MEA_FTN098",
        "MEA_JOSMGNFF",
        "MEA_KNSMIDX",
        "MEA_KWSEPM",
        "MEA_MOSENEW",
        "MEA_MSM30",
        "MEA_NGSE30",
        "MEA_PASISI",
        "MEA_SASEIDX",
        "MEA_SEMDEX",
        "MEA_TA-35",
        "MEA_TOP40",
    ],
    "additional_data_path": "_all_assets_data.pkl.gz",
}

# problem params
problem_config = {
    "export_path": "./Results/",
    "val_period": 0,  # if val is 0, then its results are the same as training
    "holdout_period": 756,
}

# model params
model_config = {
    "baseline": MODE,
    "buy_and_hold": {},
    "risk_parity": {"window": 252},
    "ts_mom": {"window": 252},
    "csec_mom": {"window": 252, "fraction": 0.33},
}

# pre-allocation
export_label = (
    "valperiod_"
    + str(problem_config["val_period"])
    + "_testperiod_"
    + str(problem_config["holdout_period"])
    + "_baseline_"
    + model_config["baseline"]
)
data_config["export_label"] = export_label
problem_config["export_label"] = export_label
model_config["export_label"] = export_label
model_config["export_path"] = problem_config["export_path"]


In [62]:
# get data
Xtrain_tasks, Xval_tasks, Xtest_tasks = get_data(
    data_config, problem_config, model_config
)

Asia and Pacific Asia and Pacific_AS51 (5623, 86)
Asia and Pacific Asia and Pacific_FBMKLCI (5623, 22)
Asia and Pacific Asia and Pacific_HSI (5623, 37)
Asia and Pacific Asia and Pacific_JCI (5623, 44)
Asia and Pacific Asia and Pacific_KOSPI (5623, 297)
Asia and Pacific Asia and Pacific_KSE100 (5623, 41)
Asia and Pacific Asia and Pacific_NIFTY (5623, 37)
Asia and Pacific Asia and Pacific_NKY (5623, 186)
Asia and Pacific Asia and Pacific_NZSE50FG (5623, 19)
Asia and Pacific Asia and Pacific_PCOMP (5623, 16)
Asia and Pacific Asia and Pacific_STI (5623, 25)
Asia and Pacific Asia and Pacific_SHSZ300 (5623, 18)
Asia and Pacific Asia and Pacific_TWSE (5623, 227)
Europe Europe_AEX (5623, 17)
Europe Europe_ASE (5623, 51)
Europe Europe_ATX (5623, 13)
Europe Europe_BEL20 (5623, 14)
Europe Europe_BUX (5623, 8)
Europe Europe_BVLX (5623, 17)
Europe Europe_CAC (5623, 35)
Europe Europe_CYSMMAPA (5623, 42)
Europe Europe_DAX (5623, 25)
Europe Europe_HEX (5623, 57)
Europe Europe_IBEX (5623, 23)
Europe Eu

### Training

In [63]:
# set model
if model_config["baseline"] == "buy_and_hold":
    trad_strat = BuyAndHold(Xtrain_tasks, model_config)
    add_label = [""] * len(data_config["region"])

elif model_config["baseline"] == "risk_parity":
    trad_strat = RiskParity(Xtrain_tasks, model_config)
    add_label = [""] * len(data_config["region"])

elif model_config["baseline"] == "ts_mom":
    trad_strat = TimeSeriesMomentum(Xtrain_tasks, model_config)
    add_label = [""] * len(data_config["region"])

elif model_config["baseline"] == "csec_mom":
    trad_strat = CrossSectionalMomentum(Xtrain_tasks, model_config)
    add_label = [""] * len(data_config["region"])

# additional labelling
to_add_label = {}
for lab, region in zip(add_label, data_config["region"]):
    to_add_label[region] = lab

# train model
start = time()
trad_strat.train()
print(time() - start)

# get signals
Xtrain_signal = trad_strat.predict(Xtrain_tasks)
Xval_signal = trad_strat.predict(Xval_tasks)
Xtest_signal = trad_strat.predict(Xtest_tasks)

Asia and Pacific Asia and Pacific_AS51
Asia and Pacific Asia and Pacific_FBMKLCI
Asia and Pacific Asia and Pacific_HSI
Asia and Pacific Asia and Pacific_JCI
Asia and Pacific Asia and Pacific_KOSPI
Asia and Pacific Asia and Pacific_KSE100
Asia and Pacific Asia and Pacific_NIFTY
Asia and Pacific Asia and Pacific_NKY
Asia and Pacific Asia and Pacific_NZSE50FG
Asia and Pacific Asia and Pacific_PCOMP
Asia and Pacific Asia and Pacific_STI
Asia and Pacific Asia and Pacific_SHSZ300
Asia and Pacific Asia and Pacific_TWSE
Europe Europe_AEX
Europe Europe_ASE
Europe Europe_ATX
Europe Europe_BEL20
Europe Europe_BUX
Europe Europe_BVLX
Europe Europe_CAC
Europe Europe_CYSMMAPA
Europe Europe_DAX
Europe Europe_HEX
Europe Europe_IBEX
Europe Europe_ISEQ
Europe Europe_KFX
Europe Europe_OBX
Europe Europe_OMX
Europe Europe_SMI
Europe Europe_UKX
Europe Europe_VILSE
Europe Europe_WIG20
Europe Europe_XU100
Europe Europe_SOFIX
Europe Europe_SBITOP
Europe Europe_PX
Europe Europe_CRO
Americas Americas_IBOV
America

### Evaluation

In [64]:
# compute results
k, g = True, True
for region in data_config["region"]:
    region_task_paths = [t + "_all_assets_data.pkl.gz" for t in data_config[region]]

    z = True
    for tk, tk_path in zip(data_config[region], region_task_paths):
        # get signal
        pred_train = Xtrain_signal[region][tk][:-1, :]
        pred_val = Xval_signal[region][tk][:-1, :]
        pred_test = Xtest_signal[region][tk][:-1, :]

        # get target
        Ytrain = Xtrain_tasks[region][tk][1:, :]
        Yval = Xval_tasks[region][tk][1:, :]
        Ytest = Xtest_tasks[region][tk][1:, :]

        # compute returns
        df_train_ret = np.multiply(pred_train, Ytrain) - calc_tcosts(pred_train)
        df_val_ret = np.multiply(pred_val, Yval) - calc_tcosts(pred_val)
        df_test_ret = np.multiply(pred_test, Ytest) - calc_tcosts(pred_test)

        # get performance metrics
        df = pd.read_pickle(data_config["data_path"] + tk_path)
        df_train_ret = pd.DataFrame(df_train_ret, columns=df.columns)
        df_train_metrics = compute_performance_metrics(df_train_ret)
        df_train_metrics["exchange"] = tk

        df_val_ret = pd.DataFrame(df_val_ret, columns=df.columns)
        df_val_metrics = compute_performance_metrics(df_val_ret)
        df_val_metrics["exchange"] = tk

        df_test_ret = pd.DataFrame(df_test_ret, columns=df.columns)
        df_test_metrics = compute_performance_metrics(df_test_ret)
        df_test_metrics["exchange"] = tk

        if z:
            all_df_train_metrics = df_train_metrics.copy()
            all_df_val_metrics = df_val_metrics.copy()
            all_df_test_metrics = df_test_metrics.copy()
            z = False
        else:
            all_df_train_metrics = pd.concat(
                [all_df_train_metrics, df_train_metrics], axis=0
            )
            all_df_val_metrics = pd.concat([all_df_val_metrics, df_val_metrics], axis=0)
            all_df_test_metrics = pd.concat(
                [all_df_test_metrics, df_test_metrics], axis=0
            )

    # export results
    all_df_train_metrics["region"] = region
    all_df_train_metrics["set"] = "train"
    all_df_val_metrics["region"] = region
    all_df_val_metrics["set"] = "val"
    all_df_test_metrics["region"] = region
    all_df_test_metrics["set"] = "test"

    pd.concat(
        [all_df_train_metrics, all_df_val_metrics, all_df_test_metrics], axis=0
    ).to_csv(
        problem_config["export_path"]
        + region
        + "_"
        + problem_config["export_label"]
        + to_add_label[region]
        + ".csv"
    )

    # consolidate test results
    if g:
        global_df_test_metrics = all_df_test_metrics.copy()
        g = False
    else:
        global_df_test_metrics = pd.concat(
            [global_df_test_metrics, all_df_test_metrics.copy()], axis=0
        )

  return annual_return(x).values / annual_volatility(x)
  return annual_return(x).values / annual_volatility(x)
  return annual_return(x).values / annual_volatility(x)
  return annual_return(x).values / annual_volatility(x)
  return annual_return(x).values / annual_volatility(x)
  return annual_return(x).values / annual_volatility(x)
  return annual_return(x).values / annual_volatility(x)
  return annual_return(x).values / annual_volatility(x)
  return annual_return(x).values / annual_volatility(x)
  return annual_return(x).values / annual_volatility(x)
  return annual_return(x).values / annual_volatility(x)
  return annual_return(x).values / annual_volatility(x)
  return annual_return(x).values / annual_volatility(x)
  return annual_return(x).values / annual_volatility(x)
  return annual_return(x).values / annual_volatility(x)
  return annual_return(x).values / annual_volatility(x)
  return annual_return(x).values / annual_volatility(x)
  return annual_return(x).values / annual_volati