In [1]:
import torch
import numpy as np
import os

from models import DeepNetwork, DeepEnsemble
from utils import *
from train import *

print(torch.cuda.is_available())

True


In [2]:
subset2col = {
        'flow+fund_mom+sentiment': list(range(56, 60)) + [47],
        'fund_ex_mom_flow': [59] + [x for x in range(46, 58) if x not in (list(range(54, 58)) + [47])],
        'stock': range(46),
        'fund': range(46, 59),
        'fund+sentiment': range(46, 60),
        'stock+fund': range(59),
        'F_r12_2+sentiment': [58, 59],
        'stock+sentiment': [59] + list(range(0, 46)),
        'stock+fund+sentiment': range(60),
        'F_r12_2+flow+sentiment': [47, 58, 59]
    }

In [3]:
config = {
	'data_path': "./deep_learning/datasets/CharAll_na_rm_huge_train_variableall4_sentiment_full_new.npz",
	'split_lists_path': './deep_learning/sampling_folds/random_sampling_folds.npy',
	'subset': 'fund+sentiment',
	'num_layers': 1,
	'hidden_dim': [2**6],
	'dropout': 0.,
	'learning_rate': 0.001,
	'epochs': 512,
	'weighted_loss': False,
	'reg_l1': 0.0,
	'reg_l2': 0.001,
	'batch_size': 300000,
	'criteria': 'Factor_sharpe',
	'ensemble_members': 8,
	'random_seed': 15,
	'device': 'cuda' if torch.cuda.is_available() else 'cpu',
	'num_workers': 4
}

config['split_lists'] = np.load(config['split_lists_path'], allow_pickle = True)
config['input_dim'] = len(subset2col[config['subset']])

In [4]:
model = DeepNetwork(config).to(device=config['device'])
print(model)

DeepNetwork(
  (hidden_layers): ModuleList(
    (0): Linear(in_features=14, out_features=64, bias=True)
  )
  (output_layer): Linear(in_features=64, out_features=1, bias=True)
)


In [5]:
losses = {}
sharpes = {}
for subset in ['flow+fund_mom+sentiment', 'stock+sentiment', 'stock+fund+sentiment']:
	print('\nTRAINING FOR SUBSET: {}'.format(subset))
	config['subset'] = subset
	config['input_dim'] = len(subset2col[config['subset']])
	losses[subset], sharpes[subset] = run_one_subset(config)

Running fold no. 1

TRAINING ENSEMBLE MEMBER 1


KeyboardInterrupt: 

In [None]:
crossval_loaders, masks = get_crossval_dataloaders(config['data_path'], config['split_lists'], config['subset'], batch_size=config['batch_size'])

avg_loss_fold = {'train': 0.0, 'val': 0.0, 'test': 0.0}
avg_sharpe_fold = {'train': 0.0, 'val': 0.0, 'test': 0.0}

for fold in range(len(crossval_loaders)):
	print('\nRUNNING FOLD NO. {}'.format(fold+1))
	avg_loss_ens, avg_sharpe_ens = train_ensembles(config, crossval_loaders, masks, fold, config['ensemble_members'])
	
	avg_loss_fold['train'] += avg_loss_ens['train']
	avg_loss_fold['val'] += avg_loss_ens['val']
	avg_loss_fold['test'] += avg_loss_ens['test']
	
	avg_sharpe_fold['train'] += avg_sharpe_ens['train']
	avg_sharpe_fold['val'] += avg_sharpe_ens['val']
	avg_sharpe_fold['test'] += avg_sharpe_ens['test']

avg_loss_fold = {key: value / (len(crossval_loaders)) for key, value in avg_loss_fold.items()}
avg_sharpe_fold = {key: value / (len(crossval_loaders)) for key, value in avg_sharpe_fold.items()}

print(avg_loss_fold)
print(avg_sharpe_fold)


RUNNING FOLD NO. 1

TRAINING ENSEMBLE MEMBER 1
Epoch 1 - Training Loss: 0.00719792, Val Loss: 0.00672514, Train Sharpe: -0.17059104, Validation Sharpe: -0.16196084
Best model updated at epoch 1
Epoch 2 - Training Loss: 0.00589450, Val Loss: 0.00551381, Train Sharpe: -0.16686632, Validation Sharpe: -0.20798695
Epoch 3 - Training Loss: 0.00486706, Val Loss: 0.00459242, Train Sharpe: -0.16529635, Validation Sharpe: -0.20762973
Epoch 4 - Training Loss: 0.00410437, Val Loss: 0.00393244, Train Sharpe: -0.17694706, Validation Sharpe: -0.20383722
Epoch 5 - Training Loss: 0.00357805, Val Loss: 0.00348807, Train Sharpe: -0.17902026, Validation Sharpe: -0.17700179
Epoch 6 - Training Loss: 0.00324291, Val Loss: 0.00319936, Train Sharpe: -0.17015506, Validation Sharpe: -0.17527135
Epoch 7 - Training Loss: 0.00304036, Val Loss: 0.00300372, Train Sharpe: -0.17288339, Validation Sharpe: -0.09014613
Best model updated at epoch 7
Epoch 8 - Training Loss: 0.00291008, Val Loss: 0.00284890, Train Sharpe: 

In [16]:
avg_sharpe_fold_ens = 0.
for fold in range(len(crossval_loaders)):
	model_dirs = os.path.join('./Experiments/', config['subset'], 'fold' + str(fold+1))
	ensemble = DeepEnsemble(config, model_dirs)
	
	crossval_loaders, masks = get_crossval_dataloaders(config['data_path'], config['split_lists'], config['subset'], batch_size=config['batch_size'])
	_, _, test_loader = crossval_loaders[fold]['dataloaders']
	X_test, y_test = unload_data(test_loader)
	_, _, test_mask = masks[fold]
	
	ens_pred_test = ensemble.predict(X_test.float().to(config['device']))
	sharpe_test = evaluate_sharpe(ens_pred_test, y_test, test_mask)
	print(sharpe_test)
	avg_sharpe_fold_ens += sharpe_test

avg_sharpe_fold_ens /= len(crossval_loaders)
print(avg_sharpe_fold_ens)

-0.32411616676669497
0.16614131398356732
-0.14898776745329467
-0.10232087341214076


In [None]:
if __name__ == "__main__":
	losses, sharpes = run_all_subsets(config)
	print(losses)
	print(sharpes)