In [1]:
base_dir = './'
fig_dir = './figures/'
model_dir = './results/trained_encoder/'

In [2]:
import os
import sys

sys.path.insert(0, os.getcwd() + '\\src')

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm.notebook import trange

---

In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [5]:
print('CUDA available :', torch.cuda.is_available())
# device = torch.device('cpu')
device = torch.device('cuda')

CUDA available : True


In [6]:
n_timestamps = 5
lag = 1

data_dir = f'./data/n_timestamps_{n_timestamps}__lag_{lag}/' + 'data_split.pt'

data_split = torch.load(data_dir)
train_data, train_pos_enc, train_dates = data_split['train'].values()
valid_data, valid_pos_enc, valid_dates = data_split['valid'].values()
test_data, test_pos_enc, test_dates = data_split['test'].values()

# Concat Date Positional Encoding
X_train = torch.cat((train_data, train_pos_enc), dim=-1)
X_valid = torch.cat((valid_data, valid_pos_enc), dim=-1)
X_test = torch.cat((test_data, test_pos_enc), dim=-1)

# Get currency names
currency_names = pd.read_csv(f'./data/n_timestamps_{n_timestamps}__lag_{lag}/cleansed_data.csv').columns.tolist()

del data_split

In [7]:
N = X_train.shape[0] + X_valid.shape[0] + X_test.shape[0]
_, T, D = X_train.shape
print(f'Data shape : (*, {T}, {D})')
print(f'    - Train : {train_dates[0]} ~ {train_dates[-1]} ({X_train.__len__():>4d}, {X_train.__len__()/N*100:.2f}%)')
print(f'    - Valid : {valid_dates[0]} ~ {valid_dates[-1]} ({X_valid.__len__():>4d}, {X_valid.__len__()/N*100:.2f}%)')
print(f'    - Test  : {test_dates[0]} ~ {test_dates[-1]} ({X_test.__len__():>4d}, {X_test.__len__()/N*100:.2f}%)')

Data shape : (*, 5, 36)
    - Train : 2002-01-08 ~ 2015-05-07 (3412, 63.99%)
    - Valid : 2015-05-08 ~ 2018-09-03 ( 853, 16.00%)
    - Test  : 2018-09-04 ~ 2022-10-31 (1067, 20.01%)


---

# Training Investment Module

In [8]:
from encoder import Encoder
from investment import Investment
from itertools import product

In [9]:
manifold_names = {'euclidean':'Euclidean', 
                  'p_plane':'Poincare-Halfplane', 
                  'sphere':'Hypersphere'}

- Training

In [10]:
dim_data = (T, D)
layer_configs = {}

candidates = list(product(['sphere', 'p_plane', 'euclidean'], 
                          list(range(16, 37))))

In [11]:
# for target_manifold, dim_embedding in candidates:
#     print('target_manifold :', target_manifold, ', dim_embedding :', dim_embedding)
#     model_name = '_'.join([manifold_names[target_manifold], f'dim-{dim_embedding}'])
    
#     best_ckpt = torch.load(model_dir +'best/' + model_name + '_best.pt')
    
#     model = Encoder(dim_data, dim_embedding, target_manifold=target_manifold,
#                     layer_configs=layer_configs, device=device)
#     model.load_state_dict(best_ckpt['state_dict'])
    
#     investment = Investment(model, num_currencies=D-2)
#     investment.fit(X_train, X_valid, 500, 60, save_name=model_name + '_investment.pt')

- Evaluation

In [12]:
def investment_on_dataset(data, target_manifold, dim_embedding,
                          ckpt='best', portfolio_as=None, dates=None):
    
    model_name = '_'.join([manifold_names[target_manifold], f'dim-{dim_embedding}'])
    ckpt = torch.load(model_dir +f'{ckpt}/' + model_name + f'_{ckpt}.pt')
    
    model = Encoder(dim_data, dim_embedding, target_manifold=target_manifold,
                    layer_configs=layer_configs, device=device)
    model.load_state_dict(ckpt['state_dict'])

    investment = Investment(model, num_currencies=D-2)
    investment.rebalance.load_state_dict(torch.load('./results/investment/models/'+model_name+'_investment.pt'))
    investment.rebalance.eval()

    neg_sharpe_ratio, position_values, portfolios = investment._trade(data)
    sharpe_ratio = - neg_sharpe_ratio.item()
    
    if portfolio_as == 'dataframe':
        assert dates is not None, '"dates" is required.'
        portfolios = pd.DataFrame(portfolios.detach().cpu().numpy(), 
                                     index=dates[1:], 
                                     columns = ['USD'] + currency_names
                                    )
        portfolios.index.name = 'Date'
        portfolios.index = pd.to_datetime(portfolios.index)
        
    return sharpe_ratio, position_values.detach().cpu(), portfolios

In [13]:
def eval_investment_by_model(candidates, data_type):
    try:
        PV_by_model = torch.load(f'./results/investment/portfolio_values_on_{data_type}.pt')
    except:
        dim_data = (T, D)
        layer_configs = {}
        if data_type == 'valid':
            data = X_valid
            dates = valid_dates
        elif data_type == 'test':
            data = X_test
            dates = test_dates
        else:
            raise ValueError(f'Unknown value for "data_type" : {data_type}')
        
        PV_by_model = {'Date':dates}
        PFO_by_model = {'Date':dates}

        for target_manifold, dim_embedding in candidates:
            model_name = '_'.join([manifold_names[target_manifold], f'dim-{dim_embedding}'])
            SR, PV, PFO = investment_on_dataset(data, target_manifold, dim_embedding,
                                                ckpt='best', portfolio_as='dataframe', dates=dates
                                                                 )
            PV_by_model[model_name] = PV.tolist()
            PFO_by_model[model_name] = PFO

        torch.save(PV_by_model, f'./results/investment/portfolio_values_on_{data_type}.pt')
        
    return PV_by_model, PFO_by_model

In [14]:
try:
    PV_valid_by_model = torch.load('./results/investment/portfolio_values_on_valid.pt')
except:
    PV_valid_by_model, PFO_valid_by_model = eval_investment_by_model(candidates, 'valid')
    
    torch.save(PV_valid_by_model, './results/investment/portfolio_values_on_valid.pt')
    
try:
    PV_test_by_model = torch.load('./results/investment/portfolio_values_on_test.pt')
except:
    PV_test_by_model, PFO_test_by_model = eval_investment_by_model(candidates, 'test')
    
    torch.save(PV_test_by_model, './results/investment/portfolio_values_on_test.pt')

In [15]:
def get_marker(manifold_name):
    if manifold_name == 'sphere':
        marker = 'o'
    elif manifold_name == 'euclidean':
        marker = 's'
    else:
        marker = 'v'
    return marker

In [16]:
def viz_portfolio_value(data_type):
    if data_type == 'valid':
        data_name = 'Validation'
        PV_by_model = PV_valid_by_model
    elif data_type == 'test':
        data_name = 'Test'
        PV_by_model = PV_test_by_model
    else:
        raise ValueError(f'Unknown value for "data_type" : {data_type}')
        
    df_PV = pd.DataFrame(PV_by_model)
    df_PV.set_index('Date', inplace=True)
    
    for manifold_name in manifold_names.keys():
        fig, ax = plt.subplots(figsize=(20, 10))
        models = list(filter(lambda t: manifold_names[manifold_name] in t, 
                             df_PV.columns.tolist()))
        
        marker = get_marker(manifold_name)
        df_PV[models].plot(ax=ax, linestyle='--', linewidth=1, 
                           marker=marker, markersize=2, alpha=.7)
        plt.axhline(1e7, color='grey', linestyle='--', alpha=.7)
        plt.title(f'Portfolio Values - {manifold_names[manifold_name].title()} ({data_name})')
        plt.legend(loc='upper left', ncol=1)
        plt.xlim([0, df_PV.shape[0]])
        plt.savefig(f'./results/investment/figures_inv/portfolio_values_on_{data_name}_dataset_{manifold_name}.png')
        plt.close()
        
    fig, ax = plt.subplots(figsize=(20, 10))
    for manifold_name in manifold_names.keys():
        models = list(filter(lambda t: manifold_names[manifold_name] in t, 
                                    df_PV.columns.tolist()))
        marker = get_marker(manifold_name)
        df_PV[models].plot(ax=ax, linestyle='--', linewidth=1, 
                           marker=marker, markersize=2, alpha=.7)
        
    plt.title(f'Portfolio Value ({data_name})')
    plt.axhline(1e7, color='grey', linestyle='--', alpha=.7)
    plt.legend(loc='upper left', ncol=3)
    plt.xlim([0, df_PV.shape[0]])
    plt.tight_layout()
    plt.savefig(f'./results/investment/figures_inv/portfolio_values_on_{data_name}_dataset_all.png')
    plt.close()
    
    df_PV.to_csv(f'./results/investment/portfolio_values_on_{data_type}.csv')

In [17]:
viz_portfolio_value('valid')
viz_portfolio_value('test')

---

# Investment Performance

In [18]:
PV_by_model_valid = pd.read_csv('./results/investment/portfolio_values_on_valid.csv')
PV_by_model_test = pd.read_csv('./results/investment/portfolio_values_on_test.csv')

In [19]:
def performance_measure(PV):
    PV = PV.set_index('Date')
    perf = pd.DataFrame(index=PV.columns.tolist())
    perf.index.name = 'Model'

    T = PV.shape[0]

    daily_return = (PV.diff(1) / PV).dropna().values

    # Holding Period Return
    perf['HPR'] = PV.values[-1] / PV.values[0] - 1
    # Sharpe Ratio
    perf['SR'] = daily_return.mean(axis=0) / daily_return.std(axis=0)
    # 
    n_neg_return = (daily_return < 0).sum(axis=0)
    neg_mean = (daily_return * (daily_return < 0)).sum(axis=0) / n_neg_return
    neg_var = (((daily_return - neg_mean.reshape(1, -1)) * (daily_return < 0)) ** 2).sum(axis=0)  / (n_neg_return-1)
    perf['SoR'] = daily_return.mean(axis=0) / np.sqrt(neg_var)

    perf['MDD'] = -(PV.cummax() / PV - 1).max()
    
    perf['Manifold'] = perf.index.map(lambda mn: mn.split('_')[0])
    perf['Dimension'] = perf.index.map(lambda mn: int(mn.split('_')[1].split('-')[-1]))
    perf.reset_index(drop=True, inplace=True)
    perf = perf[['Manifold', 'Dimension', 'HPR', 'SR', 'SoR', 'MDD']]
    
    return perf

In [20]:
def viz_performance_measure(perf, data_type):
    if data_type == 'valid':
        data_name = 'Validation'
    elif data_type == 'test':
        data_name = 'Test'
    else:
        raise ValueError(f'Unknown value for "data_type" : {data_type}')
        
    fig, ax = plt.subplots(ncols=3, figsize=(21, 5))
    for i, manifold in enumerate(manifold_names.values()):
        perf_m = perf.loc[perf.Manifold == manifold]
        for meas in ['HPR', 'SR', 'SoR', 'MDD']:
            perf_m.plot(x='Dimension', y=meas, ax=ax[i], 
                        marker='o', ms=3, linestyle='--', linewidth=1)
        ax[i].axhline(0, c='grey', alpha=.5)
        ax[i].set_title(f'Portfolio Values - {manifold} ({data_name})')
        ax[i].set_xticks([16, 20, 24, 28, 32, 36])
    fig.savefig(f'./results/investment/figures_inv/performance_measure_on_{data_type}.png')
    plt.close()

In [21]:
viz_performance_measure(performance_measure(PV_by_model_valid), 'valid')
viz_performance_measure(performance_measure(PV_by_model_test), 'test')