In [1]:
# Data manipulation
import numpy as np
import pandas as pd

# For saving dictionaries
import pickle

# Data visualization
import matplotlib.pyplot as plt

# For neural networks
import torch

# Computation of asset metrics
from portfolio_stats import PortfolioStats

# Time series splitting for walk forward modeling
from sklearn.model_selection import TimeSeriesSplit

# Neural network portfolio optimizer and objective functions
from workflow.tools.no_leverage_models import LSTMPortOpt_NL, CNNPortOpt_NL, FCNPortOpt_NL
from workflow.tools.metrics import neg_sharpe_ratio

# Tensor transform
from workflow.tools.conversion import to_tensors

from workflow.tools.utils import set_plot_style, strftime

set_plot_style()

In [2]:
# Load data
kwargs = {
    "index_col": 0,
    "parse_dates": True
}

features = pd.read_csv("workflow/data/features.csv", **kwargs)

features.head()

Unnamed: 0_level_0,BONDS,COMMODITIES,STOCKS,VOLATILIITY,BONDS_RET,COMMODITIES_RET,STOCKS_RET,VOLATILIITY_RET
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2006-02-07,58.516796,21.28208,44.861336,13.59,-0.000699,-0.029352,-0.009784,0.041313
2006-02-08,58.487652,21.191517,45.184078,12.83,-0.000498,-0.004264,0.007168,-0.057548
2006-02-09,58.522652,21.390755,45.098019,13.12,0.000598,0.009358,-0.001906,0.022352
2006-02-10,58.399944,21.001339,45.191254,12.87,-0.002099,-0.018373,0.002065,-0.019239
2006-02-13,58.44083,20.675316,44.986858,13.35,0.0007,-0.015646,-0.004533,0.036617


In [3]:
# Define lookback window
lookback = 50
n_samples = len(features) - lookback
n_features = len(features.columns)

# Get columns related to returns, and index names
return_cols = [col for col in features.columns if "RET" in col]
indices = features.columns[~features.columns.isin(return_cols)]

# Initialize input data with zeros
X_ = np.zeros((n_samples, lookback, n_features))

# Extract target values starting from the lookback index
y = features[return_cols].iloc[lookback:].values

# Populate the 'X' tensor using a rolling window of size 'lookback'
for i in range(lookback, len(features)):
    X_[i-lookback] = features.iloc[i-lookback:i]

X_.shape, y.shape

((4403, 50, 8), (4403, 4))

In [4]:
# Set random seed (torch) for reproducibility
torch.manual_seed(42)

# Initialize models dictionary
models = {
    "FCN": FCNPortOpt_NL,
    "CNN": CNNPortOpt_NL,
    "LSTM": LSTMPortOpt_NL,
}

# Initialize empty weights dictionary
weights = {model: [] for model in models.keys()}

# Define number of trading days per year and initialize TimeSeriesSplit
days_in_year = 365
trading_days = 252
retrain_after = 2
tss = TimeSeriesSplit(test_size=trading_days*retrain_after)
val_size = .2 # 20% validation window

# Loop over model
for a, (name, model_) in enumerate(models.items()):
    
    print(f"{a+1}: {name} ---", end="\n")

    # Vary input dimensions based on NN architecture
    if name == "CNN": 
        X = X_.transpose(0, 2, 1)
        input_size = X.shape[1]
    if name == "FCN": 
        X = X_.reshape((X_.shape[0], -1))
        input_size = X.shape[-1]
    if name == "LSTM": 
        X = X_
        input_size = X.shape[-1]

    dates = features.index
    test_start = None

    # Walk forward model training and prediction    
    for e, (train_idx, test_idx) in enumerate(tss.split(X)):
        
        train_start, train_end = strftime(dates[train_idx][0]), strftime(dates[train_idx][-1])
        
        if e == 0:
            test_start = dates[test_idx][lookback+1]
        
        val_idx = int(len(train_idx) * (1 - val_size))
        
        # Split data
        X_train, X_val, X_test = X[:val_idx], X[val_idx:], X[test_idx]
        y_train, y_val, y_test = y[:val_idx], y[val_idx:], y[test_idx]

        # Convert inputs and targets to PyTorch tensors
        X_train, X_val, X_test, y_train, y_val, y_test = to_tensors(
            X_train, X_val, X_test, y_train, y_val, y_test
        )

        # Hidden dimension and output dimension
        hidden_size = 64                       # 64 neurons
        output_size = y_train.shape[-1]        # 4 assets

        # Initialize model and optimizer
        model = model_(input_size, hidden_size, output_size)
        optimizer = torch.optim.Adam(model.parameters())

        # Define number of epochs
        epochs = 100

        # Construct training loop
        for epoch in range(epochs):
            model.train()
            optimizer.zero_grad()
            outputs = model(X_train)
            obj = neg_sharpe_ratio(outputs, y_train)
            obj.backward()
            optimizer.step()

            # Evaluate validation performance
            with torch.no_grad():
                val_allocations = model(X_val)
                val_obj = neg_sharpe_ratio(val_allocations, y_val)
            
            print(
                f"Split {e+1} ({train_start} - {train_end}):",
                f"Epoch {epoch + 1}/{epochs} ({(epoch + 1)/epochs*100:,.2f}%)",
                "|",
                f"T: {-obj.item():,.5f}", 
                f"V: {-val_obj.item():,.5f}" ,
                end="\r"
            )

        print()
        
        # Get the outputs (asset allocations) after training
        with torch.no_grad():
            model_allocations = model(X_test)
            weights[name].append(model_allocations)
    
    print()
    print(f"Test Period: {strftime(test_start)} - {strftime(dates[-1])}", 
          f"({(dates[-1] - test_start).days / days_in_year:,.2f} years)")
    print()

1: FCN ---
Split 1 (02/07/2006 - 07/31/2013): Epoch 100/100 (100.00%) | T: 0.95816 V: 0.20811
Split 2 (02/07/2006 - 07/31/2015): Epoch 100/100 (100.00%) | T: 0.85916 V: 0.19985
Split 3 (02/07/2006 - 08/01/2017): Epoch 100/100 (100.00%) | T: -0.16818 V: 0.24597
Split 4 (02/07/2006 - 08/02/2019): Epoch 100/100 (100.00%) | T: 0.36449 V: 0.54555
Split 5 (02/07/2006 - 08/03/2021): Epoch 100/100 (100.00%) | T: 0.44266 V: 0.41388

Test Period: 10/14/2013 - 10/16/2023 (10.01 years)

2: CNN ---
Split 1 (02/07/2006 - 07/31/2013): Epoch 100/100 (100.00%) | T: 0.08220 V: -0.01064
Split 2 (02/07/2006 - 07/31/2015): Epoch 100/100 (100.00%) | T: 0.45351 V: 0.56373
Split 3 (02/07/2006 - 08/01/2017): Epoch 100/100 (100.00%) | T: 0.85160 V: 0.11811
Split 4 (02/07/2006 - 08/02/2019): Epoch 100/100 (100.00%) | T: 0.82607 V: 0.04248
Split 5 (02/07/2006 - 08/03/2021): Epoch 100/100 (100.00%) | T: 1.51045 V: -0.03195

Test Period: 10/14/2013 - 10/16/2023 (10.01 years)

3: LSTM ---
Split 1 (02/07/2006 - 07/31

In [5]:
# Backtest optimized portfolios
nn_unlev_rets = []
weights_dfs = {model: None for model in models.keys()}

# Set loan variables
loan_term = 2     # 2Y repayment schedule
loan_ir = 0.05    # 5% interest rate
days_in_year = 365

amrtzn = 1 / (days_in_year * loan_term)
ir = loan_ir / days_in_year

# Set transaction variables
trnsc_cr = 0.002  # 0.2% transaction cost rate

for name, weights_ in weights.items():
    weights_concat = np.concatenate(weights_)
    
    # Reindex features to match weights
    reindexed_features = features.iloc[-len(weights_concat):]
    weights_df = pd.DataFrame(weights_concat, index=reindexed_features.index, columns=indices)

    # Shift weights for real time information lag
    weights_df = weights_df.shift().dropna()

    reindexed_returns = reindexed_features[return_cols][1:]
    reindexed_returns.columns = indices

    # Portfolio returns = sum of (weights * returns)
    returns = (weights_df * reindexed_returns).sum(axis=1)

    # Account for transaction costs
    c = weights_df.diff().abs().sum(axis=1)
    tc = c * trnsc_cr
    returns = returns - tc
    
    returns.name = name
    nn_unlev_rets.append(returns)

    weights_dfs[name] = weights_df

nn_unlev_rets = pd.concat(nn_unlev_rets, axis=1)
nn_unlev_rets.loc[nn_unlev_rets.index.min()] = 0

nn_unlev_rets.head()

Unnamed: 0_level_0,FCN,CNN,LSTM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2013-10-14,0.0,0.0,0.0
2013-10-15,-0.000468,0.149428,0.003144
2013-10-16,0.004297,-0.237855,-0.004907
2013-10-17,0.002793,-0.08732,0.000966
2013-10-18,0.001672,-0.033186,0.0027


In [6]:
nn_unlev_rets.to_csv("workflow/data/returns/nn_unlev_rets.csv")