In [None]:
import json
import os
import gc
from tqdm import tqdm

import numpy as np
import pandas as pd
import torch

import pytorch_lightning as pl
from pytorch_lightning import loggers as pl_loggers
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from pytorch_lightning.callbacks import ModelCheckpoint

from data import JPXdataModule
from models import PortfolioOptimizer, ReturnsDeltaClassifier, VolatilityDeltaClassifier
from utils import spread_return_sharpe_from_weights

In [None]:
torch.manual_seed(0)
np.random.seed(0)

data = JPXdataModule(merge_with_secondary=False)
del data
torch.cuda.empty_cache()

data = JPXdataModule(merge_with_secondary=True)
del data
torch.cuda.empty_cache()

In [None]:
for _ in range(2):
    for i in range(5):

        fold = 'CV5{}'.format(i)

        data = JPXdataModule(mode='returns_classification', fold=fold, merge_with_secondary=False)
        name = 'returns_momentum_' + fold

        if os.path.exists('./weights/jpx_'+name+'.ckpt'):

            model = ReturnsDeltaClassifier.load_from_checkpoint('./weights/jpx_'+name+'.ckpt', width = data.x.shape[-1])
            model.eval()
            model.cuda()

            outputs = []
            outputs_sec = []

            for x, x_sec, _, _, _, _ in tqdm(data.all_dataloader()):

                outputs.append(torch.sigmoid(model(x.cuda())).detach().cpu())
                outputs_sec.append(torch.sigmoid(model(x_sec.cuda())).detach().cpu())

            outputs = torch.cat(outputs, dim=0)
            outputs_sec = torch.cat(outputs_sec, dim=0)

            torch.save(outputs, data.settings['CACHE_DIR'] + 'pred_returns_delta_{}.pt'.format(fold))
            torch.save(outputs_sec, data.settings['CACHE_DIR'] + 'pred_returns_delta_{}_sec.pt'.format(fold))

            del outputs, outputs_sec

            model.cpu()
            data.vamos()
        else:

            model = ReturnsDeltaClassifier(width=data.x.shape[-1])
            trainer, callback0, callback1 = get_trainer(name=name, monitor='val_roc_auc', mode='max')
            trainer.fit(model, data)

            model.cpu()
            data.vamos()

            # to free cuda data dependences
            for optimizer_metrics in trainer.optimizers[0].state.values():
                for metric_name, metric in optimizer_metrics.items():
                    if torch.is_tensor(metric):
                        optimizer_metrics[metric_name] = metric.cpu()

            # outputs, outputs_sec, 
            del callback0, callback1, trainer

        del model, data

        gc.collect()
        torch.cuda.empty_cache()

In [None]:
for _ in range(2):
    for i in range(5):

        fold = 'CV5{}'.format(i)

        data = JPXdataModule(mode='volatility_classification', fold=fold, merge_with_secondary=False)
        name = 'volatility_momentum_' + fold

        if os.path.exists('./weights/jpx_'+name+'.ckpt'):

            model = VolatilityDeltaClassifier.load_from_checkpoint('./weights/jpx_'+name+'.ckpt', width = data.x.shape[-1])
            model.eval()
            model.cuda()

            outputs = []
            outputs_sec = []

            for x, x_sec, _, _, _, _ in tqdm(data.all_dataloader()):

                outputs.append(torch.sigmoid(model(x.cuda())).detach().cpu())
                outputs_sec.append(torch.sigmoid(model(x_sec.cuda())).detach().cpu())

            outputs = torch.cat(outputs, dim=0)
            outputs_sec = torch.cat(outputs_sec, dim=0)

            torch.save(outputs, data.settings['CACHE_DIR'] + 'pred_volatility_delta_{}.pt'.format(fold))
            torch.save(outputs_sec, data.settings['CACHE_DIR'] + 'pred_volatility_delta_{}_sec.pt'.format(fold))

            del outputs, outputs_sec

            model.cpu()
            data.vamos()
        else:

            model = VolatilityDeltaClassifier(width=data.x.shape[-1])
            trainer, callback0, callback1 = get_trainer(name=name, monitor='val_roc_auc', mode='max')
            trainer.fit(model, data)

            model.cpu()
            data.vamos()

            # to free cuda data dependences
            for optimizer_metrics in trainer.optimizers[0].state.values():
                for metric_name, metric in optimizer_metrics.items():
                    if torch.is_tensor(metric):
                        optimizer_metrics[metric_name] = metric.cpu()

            # outputs, outputs_sec, 
            del callback0, callback1, trainer

        del model, data

        gc.collect()
        torch.cuda.empty_cache()

In [None]:
for i in range(5):

    fold = 'CV5{}'.format(i)

    name = 'portfolio_optimization_' + fold

    if not os.path.exists('./weights/jpx_'+name+'.ckpt'):

        data = JPXdataModule(mode='portfolio_optimization', fold=fold, merge_with_secondary=False)

        model = PortfolioOptimizer(width=data.x.shape[-1])
        trainer, callback0, callback1 = get_trainer(name=name, monitor='val_sharpe_ratio', mode='max')
        trainer.fit(model, data)

        model.cpu()
        data.vamos()

        # to free cuda data dependences
        for optimizer_metrics in trainer.optimizers[0].state.values():
            for metric_name, metric in optimizer_metrics.items():
                if torch.is_tensor(metric):
                    optimizer_metrics[metric_name] = metric.cpu()

        # outputs, outputs_sec, 
        del callback0, callback1, trainer

        del model, data

        gc.collect()
        torch.cuda.empty_cache()


In [None]:
def infenrence_time():

    print('INFERENCE TIME')

    data = JPXdataModule(mode='inference', fold='CV50', merge_with_secondary=False)

    with open('./settings.json') as f:
        settings = json.load(f)

    prices_csv = pd.read_csv(settings['SUPP_DIR'] + 'stock_prices.csv').iloc[:,1:]
    financials_csv = pd.read_csv(settings['SUPP_DIR'] + 'financials.csv')

    for day in tqdm(np.unique(prices_csv.Date)):

        day_prices_csv = prices_csv[prices_csv.Date == day]
        day_financials_csv = financials_csv[financials_csv.Date == day]

        sample_prediction = pd.DataFrame(day_prices_csv.SecuritiesCode, columns=['SecuritiesCode'])
        sample_prediction['Date'] = day_prices_csv.Date.iloc[0]
        sample_prediction['Rank'] = 0

        for i in range(5):

            fold = 'CV5{}'.format(i)

            input = data.process_day_for_inference(day_prices_csv, day_financials_csv)
            weights = model(input)

        ranking = np.zeros(len(sample_prediction))

        for i in range(len(sample_prediction)):

            sec_id = np.argwhere(data.unique_secus==sample_prediction.iloc[i].SecuritiesCode)

            if len(sec_id) == 1:

                ranking[i] = weights[0,sec_id.item()].item()

        sample_prediction.Rank = ranking
        sample_prediction.sort_values(by=['Rank'], ascending=False, inplace=True)
        sample_prediction.Rank = np.arange(len(sample_prediction))

        sample_prediction.sort_index(ascending=True, inplace=True)