In [1]:
import numpy as np
import os
from datetime import datetime
import argparse
import pandas as pd
import torch
from DGP import DGP_gu, DGP_t3
from AE import AE_Agent, AE_Factor_Agent

In [2]:
cuda_dic = {'zero':0, 'one':1, 'two':2, 'three':3, 'four':4, 'five':5, 'six':6, 'seven':7}

In [3]:
# cuda setting
torch.cuda.set_device(3)
# DGP parameters
seed = 10
np.random.seed(seed)
torch.manual_seed(seed)
N = 1000
T = 400
P_x = P_c = 50
P_f = 3
W = np.hstack([np.identity(P_f), np.zeros([P_f, P_x - P_f])])
linear_index = True
# network parameters
f_hidden_dim = 64
K = 1
lr = 1e-3
lam = 1e-4
AE_epoch_num = 5000
AE_factor_epoch_num = 5000
# the number of repetition and log_dir
repetition_num = 3
bandwidth = 10
log_name = 'test'
time_ = datetime.now().strftime("%Y%m%d-%H%M%S")
root_log_dir = f'{log_name}_{seed}_{time_}'
if not os.path.exists(root_log_dir):
    os.makedirs(root_log_dir)
# loop

In [4]:
r, C = DGP_t3(N, T, P_f, P_x, P_c, W, linear_index)
repetition_index = 0
P = C.shape[2]
log_dir = f'{root_log_dir}/{repetition_index}'

In [5]:
portfolio = np.zeros((P, T))
for t in range(T):
    portfolio[:, t] = np.linalg.inv(C[:, t, :].transpose() @ C[:, t, :]) @ C[:, t, :].transpose() @ r[:, t]

In [6]:
AE_agent = AE_Agent(input_dim = P,
                    latent_dim = K,
                    output_dim = P,
                    learning_rate = lr,
                    seed = seed + repetition_index,
                    log_dir = log_dir)

In [7]:
AE_feature = AE_label = torch.Tensor(portfolio.transpose())
AE_agent.load_data(feature = AE_feature, label = AE_label, valid_size = 1/3, test_size = 1/3, num_cpu = 0, batch_size = 64)
AE_agent.train(AE_epoch_num, 0)

Begining loading...
The data contains 133 training samples and 134 valid samples
Complete!
best score:0.6589850783348083


In [8]:
AE_model_para = torch.load(f'{log_dir}/AE_best.pth')
AE_factor = AE_Factor_Agent(N = N,
                            T = T,
                            P = P,
                            K = K,
                            f_hidden_dim = f_hidden_dim,
                            model_para = AE_model_para,
                            learning_rate = lr, 
                            seed = seed + repetition_index,
                            log_dir = log_dir)

In [9]:
AE_factor.load_data(C = C, r = r, valid_size = 1/3, test_size =1/3, batch_size = 64, num_cpu = 0)

Begining loading...
The data contains 133 training samples, 134 validation samples and 133 test samples
Complete!


In [10]:
AE_factor.train(AE_factor_epoch_num, lam)

Epoch 0/4999 Training Loss 1794.08 Time Consume 2.231
------------------------------------------------------------
Evaluation 0/4999 Loss 1752.40
Update Model
------------------------------------------------------------
Epoch 1/4999 Training Loss 1736.00 Time Consume 1.913
Epoch 2/4999 Training Loss 1696.19 Time Consume 1.902
Epoch 3/4999 Training Loss 1670.15 Time Consume 1.858
Epoch 4/4999 Training Loss 1652.98 Time Consume 1.842
Epoch 5/4999 Training Loss 1640.13 Time Consume 1.896
Epoch 6/4999 Training Loss 1629.39 Time Consume 1.900
Epoch 7/4999 Training Loss 1618.62 Time Consume 1.887
Epoch 8/4999 Training Loss 1607.63 Time Consume 1.907
Epoch 9/4999 Training Loss 1598.53 Time Consume 1.884
Epoch 10/4999 Training Loss 1588.70 Time Consume 1.897
------------------------------------------------------------
Evaluation 10/4999 Loss 1567.59
Update Model
------------------------------------------------------------
Epoch 11/4999 Training Loss 1578.80 Time Consume 1.909
Epoch 12/4999 Tra

KeyboardInterrupt: 

In [11]:
AE_factor.network.load_state_dict(torch.load(f'{log_dir}/AEF_best.pth'))
true_r = AE_factor.label_test.numpy()
r_hat_total = AE_factor.network.forward(AE_factor.feature_test.to(AE_factor.network.device)).cpu().detach().numpy()
R_total = 1 - np.sum(np.power(true_r - r_hat_total, 2)) / np.sum(np.power(true_r, 2))

In [12]:
R_total

0.4552391767501831

In [14]:
c_test.shape

torch.Size([133, 1000, 50])

In [16]:
feature_test = AE_factor.feature_test
r_test = feature_test[:, :, -1]
c_test = feature_test[:, :, :-1]

portfolio_test = torch.zeros((r_test.shape[0], P), device = AE_factor.network.device)
for t in range(r_test.shape[0]):
    portfolio_test[t, :] = torch.inverse(c_test[t, :, :].t() @ c_test[t, :, :]) @ c_test[t, :, :].t() @ r_test[t, :]

latent_ = AE_factor.network.Encoder(portfolio_test.to(AE_factor.network.device))
moving_latent = torch.zeros_like(latent_)
for j in range(bandwidth, latent_.shape[0], 1):
    moving_latent[j, :] = latent_[(j - bandwidth):j, :].mean(axis = 0)
beta = AE_factor.network.factor_loading_network(c_test.to(AE_factor.network.device))
r_hat_pred = torch.bmm(beta[bandwidth:, :, :], torch.unsqueeze(moving_latent[bandwidth:, :], axis = 2)).cpu().detach().numpy()
R_pred = 1 - np.sum(np.power(true_r[bandwidth:, :, :] - r_hat_pred, 2)) / np.sum(np.power(true_r[bandwidth:, :, :], 2))

In [17]:
R_pred

0.04230237007141113

In [None]:
result_tab = pd.DataFrame(columns = ['R_total', 'R_pred'])
for repetition_index in range(repetition_num):
    R_total, R_pred = train_once(repetition_index, bandwidth, N, T, P_f, P_x, P_c, W, linear_index, K, lr, f_hidden_dim, lam, AE_epoch_num, AE_factor_epoch_num, root_log_dir, seed)
    result_tab.loc[repetition_index, 'R_total'] = R_total
    result_tab.loc[repetition_index, 'R_pred'] = R_pred
    result_tab.to_csv(f'{root_log_dir}/result.csv', index = False)