In [2]:
import torch
import numpy as np
import os

from models import DeepNetwork, DeepEnsemble
from utils import *
from train import *

print(torch.cuda.is_available())

True


In [3]:
subset2col = {
        'flow+fund_mom+sentiment': list(range(56, 60)) + [47],
        'fund_ex_mom_flow': [59] + [x for x in range(46, 58) if x not in (list(range(54, 58)) + [47])],
        'stock': range(46),
        'fund': range(46, 59),
        'fund+sentiment': range(46, 60),
        'stock+fund': range(59),
        'F_r12_2+sentiment': [58, 59],
        'stock+sentiment': [59] + list(range(0, 46)),
        'stock+fund+sentiment': range(60),
        'F_r12_2+flow+sentiment': [47, 58, 59]
    }

In [4]:
config = {
	'data_path': "./deep_learning/datasets/CharAll_na_rm_huge_train_variableall4_sentiment_full_new.npz",
	'split_lists_path': './deep_learning/sampling_folds/random_sampling_folds.npy',
	'subset': 'fund+sentiment',
	'num_layers': 1,
	'hidden_dim': [2**6],
	'dropout': 0.,
	'learning_rate': 0.001,
	'epochs': 512,
	'weighted_loss': False,
	'reg_l1': 0.0,
	'reg_l2': 0.001,
	'batch_size': 300000,
	'criteria': 'Factor_sharpe',
	'ensemble_members': 8,
	'random_seed': 15,
	'device': 'cuda' if torch.cuda.is_available() else 'cpu',
	'num_workers': 4
}

config['split_lists'] = np.load(config['split_lists_path'], allow_pickle = True)
config['input_dim'] = len(subset2col[config['subset']])

In [5]:
model = DeepNetwork(config).to(device=config['device'])
print(model)

DeepNetwork(
  (hidden_layers): ModuleList(
    (0): Linear(in_features=14, out_features=64, bias=True)
  )
  (output_layer): Linear(in_features=64, out_features=1, bias=True)
)


In [None]:
losses = {}
sharpes = {}
for subset in subset2col.keys():
	print('\nTRAINING FOR SUBSET: {}'.format(subset))
	config['subset'] = subset
	config['input_dim'] = len(subset2col[config['subset']])
	losses[subset], sharpes[subset] = run_one_subset(config)

In [12]:
import pandas as pd
result = pd.DataFrame()

for subset in subset2col.keys():
	if subset in ['fund+sentiment', 'fund_ex_mom_flow', 'stock+fund']:
		result[subset] = 0.
		continue
	config['subset'] = subset
	config['input_dim'] = len(subset2col[config['subset']])
	crossval_loaders, masks = get_crossval_dataloaders(config['data_path'], config['split_lists'], config['subset'], batch_size=config['batch_size'])
	
	avg_sharpe_fold_ens = 0.
	for fold in range(len(crossval_loaders)):
		model_dirs = os.path.join('./Experiments/', config['subset'], 'fold' + str(fold+1))
		ensemble = DeepEnsemble(config, model_dirs)
		
		crossval_loaders, masks = get_crossval_dataloaders(config['data_path'], config['split_lists'], config['subset'], batch_size=config['batch_size'])
		_, _, test_loader = crossval_loaders[fold]['dataloaders']
		X_test, y_test = unload_data(test_loader)
		_, _, test_mask = masks[fold]
		
		ens_pred_test = ensemble.predict(X_test.float().to(config['device']))
		sharpe_test = evaluate_sharpe(ens_pred_test, y_test, test_mask)
		# print(sharpe_test)
		avg_sharpe_fold_ens += sharpe_test
	
	avg_sharpe_fold_ens /= len(crossval_loaders)
	print(f'Avg. Sharpe Test for {subset}: {avg_sharpe_fold_ens:.4f}')
	result[subset] = avg_sharpe_fold_ens

Avg. Sharpe Test for flow+fund_mom+sentiment: 0.2546
Avg. Sharpe Test for stock: 0.0284
Avg. Sharpe Test for fund: 0.2750
Avg. Sharpe Test for F_r12_2+sentiment: 0.2334
Avg. Sharpe Test for stock+sentiment: 0.0792
Avg. Sharpe Test for stock+fund+sentiment: 0.1870
Avg. Sharpe Test for F_r12_2+flow+sentiment: 0.1410
