In [24]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
from datetime import datetime
from dateutil.relativedelta import relativedelta as rd
import time
import math
from sklearn.metrics import mean_squared_error

In [25]:
# stocks data csv read
df = pd.read_csv('data.csv')
df = df.set_index('Date')

# s&p data csv read
df_sp = pd.read_csv('sp500.csv')
df_sp = df_sp.set_index('Date')

In [26]:
def date_slicer(df, start, duration, rebalancing_period=0):
    start = str(datetime.strptime(start, '%Y-%m-%d').date() + rd(months=rebalancing_period))
    end = str(datetime.strptime(start, '%Y-%m-%d').date() + rd(months=duration) - rd(days=1))
    return df.loc[start:end]

In [27]:
def data_process(df):
    df = df.pct_change()
    df = df.tail(-1)
    df = df + 1
    df = df.cumprod()
    df = df - 1
    df = df.iloc[-1,:]
    df = df.to_numpy()
    df = torch.from_numpy(df).type(torch.Tensor)
    return df

In [28]:
def daily_change(df):
    df = df.pct_change()
    df = df.tail(-1)
    return df

In [29]:
def daily_return(df):
    df = df.pct_change()
    df = df.tail(-1)
    df = df + 1
    return df

In [30]:
# shallow nnf biuld
class shallow_NNF(nn.Module):
    def __init__(self, input_dim, hidden_size, num_classes):
        super(shallow_NNF, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_size)
        self.fc2 = nn.Linear(hidden_size, num_classes)
        
        self.relu = nn.ReLU()
        self.softmax = nn.Softmax(dim=0)
        
    def reset_parameters(self):
        self.fc1.reset_parameters()
        self.fc2.reset_parameters()
        
    def forward(self, x):
        out = self.relu(self.fc1(x))
        out = self.softmax(self.fc2(out))
        weights = out
        cumulative_change = sum(out * x)
        return cumulative_change, weights

In [31]:
# deep nnf build
class deep_NNF(nn.Module):
    def __init__(self, input_dim, hidden_size1, hidden_size2, hidden_size3,
                 hidden_size4, hidden_size5, num_classes, dropout_p = 0.2):
        super(deep_NNF, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_size1)
        self.fc2 = nn.Linear(hidden_size1, hidden_size2)
        self.fc3 = nn.Linear(hidden_size2, hidden_size3)
        self.fc4 = nn.Linear(hidden_size3, hidden_size4)
        self.fc5 = nn.Linear(hidden_size4, hidden_size5)
        self.fc6 = nn.Linear(hidden_size5, num_classes)
    
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout_p)
        self.softmax = nn.Softmax(dim=0)
        
    def reset_parameters(self):
        self.fc1.reset_parameters()
        self.fc2.reset_parameters()
        self.fc3.reset_parameters()
        self.fc4.reset_parameters()
        self.fc5.reset_parameters()
        self.fc6.reset_parameters()
        
    def forward(self, x):
        out = self.relu(self.fc1(x))
        out = self.dropout(out)
        out = self.relu(self.fc2(out))
        out = self.dropout(out)
        out = self.relu(self.fc3(out))
        out = self.dropout(out)
        out = self.relu(self.fc4(out))
        out = self.dropout(out)
        out = self.relu(self.fc5(out))
        out = self.softmax(self.fc6(out))
        weights = out
        cumulative_change = sum(out * x)
        return cumulative_change, weights

In [32]:
# 1/N model build
class equal_w_model():
    def __init__(self, df):
        self.df = df
        self.performance()
        
    def performance(self):
        self.df = np.array(self.df)
        weights = np.ones((len(self.df), 1)) * (1/len(self.df))
        cumulative_change = sum(np.multiply(weights, self.df.reshape(-1,1)))
        return cumulative_change, weights.reshape(-1)

In [33]:
# rebalancing period = one or three months
rbp = 1

# epochs
num_epochs = 100

In [34]:
# shallow_nnf hyperparameters
input_dim = 471
hidden_size = 471
num_classes = 471
lr = 1e-3

In [35]:
# shallow nnf tune
shallow_NNF = shallow_NNF(input_dim=input_dim, hidden_size=hidden_size, num_classes=num_classes)
shallow_NNF_loss_fun = torch.nn.MSELoss(reduction='mean')
shallow_NNF_optimizer = torch.optim.Adam(shallow_NNF.parameters(), lr=lr)

In [36]:
# deep_nnf hyperparameters
input_dim = 471
hidden_size1 = 471
hidden_size2 = 471
hidden_size3 = 471
hidden_size4 = 471
hidden_size5 = 471
num_classes = 471
lr = 1e-3
dropout_p = 0

In [37]:
# deep nnf tune
deep_NNF = deep_NNF(input_dim=input_dim, hidden_size1=hidden_size1, hidden_size2=hidden_size2, 
                    hidden_size3=hidden_size3, hidden_size4=hidden_size4, hidden_size5=hidden_size5,
                    num_classes=num_classes)
deep_NNF_loss_fun = torch.nn.MSELoss(reduction='mean')
deep_NNF_optimizer = torch.optim.Adam(deep_NNF.parameters(), lr=lr)

In [38]:
# RMSE
def RMSE(x, y, weights):
    temp = 0
    for i in range(len(x)):
        temp += (sum(x.iloc[i] * weights) - y.iloc[i]) ** 2
    return math.sqrt(temp/len(x))

In [39]:
# MEAN
def MEAN(x, weights):
    temp = []
    for i in range(len(x)):
        temp.append(sum(x.iloc[i] * weights))
    temp = np.array(temp)
    return temp.mean()

In [40]:
# Volatility
def VOL(x, weights):
    temp = []
    for i in range(len(x)):
        temp.append(sum(x.iloc[i] * weights))
    temp = np.array(temp)
    return temp.std()

In [41]:
def portfolio_return(df, x_test, model, i, temp):    
    x_return = date_slicer(df, '2018-01-01', 1, i)
    x_return =  x_return.pct_change()
    x_return =  x_return.tail(-1)
    x_return =  x_return + 1
    x_return =  x_return.cumprod()
    
    if model == equal_w_model:
        weights = model(x_test).performance()[1]
    else:
        weights = np.array(model(x_test)[1].detach())
    
    for i in range(len(x_return)):
        temp.append(sum(x_return.iloc[i] * weights))
    temp = np.array(temp)
    return temp

In [42]:
def index_return(df_sp, i, temp):
    y_return = date_slicer(df_sp, '2018-01-01', 1, i)
    y_return = y_return.pct_change()
    y_return = y_return.tail(-1)
    y_return = y_return + 1
    y_return = y_return.cumprod()
    
    for i in range(len(y_return)):
        temp.append(sum(y_return.iloc[i]))
    temp = np.array(temp)
    return temp

In [43]:
def valid_fun(x_valid, i, model):
    x_change = daily_change(date_slicer(df, '2017-07-01', 6, i))
    y_change = daily_change(date_slicer(df_sp, '2017-07-01', 6, i))
    # x_return = daily_return(date_slicer(df, '2017-07-01', 6, i))
    # y_return = daily_return(date_slicer(df_sp, '2017-07-01', 6, i))
    
    if model == equal_w_model:
        weights = model(x_valid).performance()[1]
    else:
        weights = np.array(model(x_valid)[1].detach())
    
    valid_rmse = RMSE(x_change, y_change, weights)
    # valid_mean = MEAN(x_return, weights)
    # valid_vol  = VOL(x_return, weights)
    
    print(f'Validation RMSE: {valid_rmse}')
    # print(f'Validation MEAN: {valid_mean}')
    # print(f'Validation VOL: {valid_vol}')
    
    return valid_rmse

In [44]:
def test_fun(x_test, i, model):
    x_change = daily_change(date_slicer(df, '2018-01-01', 1, i))
    y_change = daily_change(date_slicer(df_sp, '2018-01-01', 1, i))
    x_return = daily_return(date_slicer(df, '2018-01-01', 1, i))
    y_return = daily_return(date_slicer(df_sp, '2018-01-01', 1, i))
    
    if model == equal_w_model:
        weights = model(x_test).performance()[1]
    else:
        weights = np.array(model(x_test)[1].detach())
    
    test_rmse = RMSE(x_change, y_change, weights)
    test_mean = MEAN(x_return, weights)
    test_vol  = VOL(x_return, weights)
    test_dic = {'RMSE': test_rmse, 'MEAN': test_mean, 'VOL': test_vol}
    
    print(f'Test RMSE: {test_rmse}')
    print(f'Test MEAN: {test_mean}')
    print(f'Test VOL: {test_vol}')
    
    return test_dic

### **Deep NNF Training**

In [45]:
# deep nnf training function
def train_deep_nnf(x_train, y_train, i):
    start_time_deep_nnf = time.time()
    print(f'\nDeep NNF Training & Results for model {(i/rbp)+1}:')
    
    for epoch in range(num_epochs):
        y_train_pred = deep_NNF(x_train)[0]
        loss_deep_nnf = deep_NNF_loss_fun(y_train_pred, y_train)
        if epoch == 0 or epoch == num_epochs-1:
            print(f'Epoch {epoch+1} of {num_epochs} | MSE: {loss_deep_nnf.item()}')
        deep_NNF_optimizer.zero_grad()
        loss_deep_nnf.backward()
        deep_NNF_optimizer.step()
        
    training_time = format(time.time()-start_time_deep_nnf, '0.2f')
    print(f'Training time: {training_time}')

In [46]:
# deep nnf
deep_nnf_valid_rmse_list = []
deep_nnf_test_results = []
deep_nnf_test_plot = []
index_test_plot = []

for i in range(int(24/rbp)):
    x_train = data_process(date_slicer(df, '2014-07-01', 36, i*rbp))
    y_train = data_process(date_slicer(df_sp, '2014-07-01', 36, i*rbp))
    x_valid = data_process(date_slicer(df, '2017-07-01', 6, i*rbp))
    y_valid = data_process(date_slicer(df_sp, '2017-07-01', 6, i*rbp))
    x_test = data_process(date_slicer(df, '2018-01-01', 1, i*rbp))
    y_test = data_process(date_slicer(df_sp, '2018-01-01', 1, i*rbp))
    train_deep_nnf(x_train, y_train, i*rbp)
    deep_nnf_valid_rmse_list.append(valid_fun(x_valid, i*rbp, deep_NNF))
    deep_nnf_test_results.append(test_fun(x_test, i*rbp, deep_NNF))
    portfolio_return(df, x_test, deep_NNF, i, deep_nnf_test_plot)
    index_return(df_sp, i, index_test_plot)
    deep_NNF.reset_parameters()

print(f'\nMin Valid RMSE is: {min(deep_nnf_valid_rmse_list)} for model i = {deep_nnf_valid_rmse_list.index(min(deep_nnf_valid_rmse_list))+1}')
print('Selected Model Test Results are:')
print('RMSE =', deep_nnf_test_results[deep_nnf_valid_rmse_list.index(min(deep_nnf_valid_rmse_list))]['RMSE'])
print('MEAN =', deep_nnf_test_results[deep_nnf_valid_rmse_list.index(min(deep_nnf_valid_rmse_list))]['MEAN'])
print('VOL =', deep_nnf_test_results[deep_nnf_valid_rmse_list.index(min(deep_nnf_valid_rmse_list))]['VOL'])

deep_best_result_index = deep_nnf_valid_rmse_list.index(min(deep_nnf_valid_rmse_list))
deep_nnf_test_plot = np.array(deep_nnf_test_plot).reshape(-1,1)
index_test_plot = np.array(index_test_plot).reshape(-1,1)


Deep NNF Training & Results for model 1.0:
Epoch 1 of 100 | MSE: 0.04560910910367966


  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 100 of 100 | MSE: 4.2345433826085355e-09
Training time: 0.93
Validation RMSE: 0.0017728015957523635
Test RMSE: 0.0015109802660347826
Test MEAN: 1.001734803345647
Test VOL: 0.0053171887531660855

Deep NNF Training & Results for model 2.0:
Epoch 1 of 100 | MSE: 0.05329778045415878
Epoch 100 of 100 | MSE: 7.70110148096137e-07
Training time: 0.89
Validation RMSE: 0.002057817783039222
Test RMSE: 0.002574862189796817
Test MEAN: 0.997816797676421
Test VOL: 0.015116351935162815

Deep NNF Training & Results for model 3.0:
Epoch 1 of 100 | MSE: 0.04562446102499962
Epoch 100 of 100 | MSE: 5.030702610042681e-08
Training time: 0.90
Validation RMSE: 0.001913025321780243
Test RMSE: 0.0023273259604580554
Test MEAN: 1.000206695836756
Test VOL: 0.010884041748593948

Deep NNF Training & Results for model 4.0:
Epoch 1 of 100 | MSE: 0.06030300259590149
Epoch 100 of 100 | MSE: 1.3948465493740514e-06
Training time: 0.89
Validation RMSE: 0.0023355867219195118
Test RMSE: 0.0020490110882120083
Test MEAN: 

### **Shallow NNF Training**

In [47]:
# shallow nnf training function
def train_shallow_nnf(x_train, y_train, i):
    start_time_shallow_nnf = time.time()
    print(f'\nShallow NNF Training & Results for model {(i/rbp)+1}:')
    
    for epoch in range(num_epochs):
        y_train_pred = shallow_NNF(x_train)[0]
        loss_shallow_nnf = shallow_NNF_loss_fun(y_train_pred, y_train)
        if epoch == 0 or epoch == num_epochs-1:
            print(f'Epoch {epoch+1} of {num_epochs} | MSE: {loss_shallow_nnf.item()}')
        shallow_NNF_optimizer.zero_grad()
        loss_shallow_nnf.backward()
        shallow_NNF_optimizer.step()
        
    training_time = format(time.time()-start_time_shallow_nnf, '0.2f')
    print(f'Training time: {training_time}')

In [48]:
#shallow nnf
shallow_nnf_valid_rmse_list = []
shallow_nnf_test_results = []
shallow_nnf_test_plot = []

for i in range(int(24/rbp)):
    x_train = data_process(date_slicer(df, '2014-07-01', 36, i*rbp))
    y_train = data_process(date_slicer(df_sp, '2014-07-01', 36, i*rbp))
    x_valid = data_process(date_slicer(df, '2017-07-01', 6, i*rbp))
    y_valid = data_process(date_slicer(df_sp, '2017-07-01', 6, i*rbp))
    x_test = data_process(date_slicer(df, '2018-01-01', 1, i*rbp))
    y_test = data_process(date_slicer(df_sp, '2018-01-01', 1, i*rbp))
    
    train_shallow_nnf(x_train, y_train, i*rbp)
    shallow_nnf_valid_rmse_list.append(valid_fun(x_valid, i*rbp, shallow_NNF))
    shallow_nnf_test_results.append(test_fun(x_test, i*rbp, shallow_NNF))
    portfolio_return(df, x_test, shallow_NNF, i, shallow_nnf_test_plot)
    shallow_NNF.reset_parameters()
    

# print(f'\nMin Valid RMSE is: {min(valid_rmse_list)} for model i = {(deep_best_result_index)+1}')
print('Selected Model Test Results for model i =', (deep_best_result_index)+1, 'are: ')
print('RMSE =', shallow_nnf_test_results[(deep_best_result_index)]['RMSE'])
print('MEAN =', shallow_nnf_test_results[(deep_best_result_index)]['MEAN'])
print('VOL =', shallow_nnf_test_results[(deep_best_result_index)]['VOL'])

shallow_nnf_test_plot = np.array(shallow_nnf_test_plot).reshape(-1,1)


Shallow NNF Training & Results for model 1.0:
Epoch 1 of 100 | MSE: 0.04763383790850639
Epoch 100 of 100 | MSE: 1.731020375927983e-07
Training time: 0.69
Validation RMSE: 0.0014686670047935465
Test RMSE: 0.0014292690669289773
Test MEAN: 1.0018946458131222
Test VOL: 0.005344484371128251

Shallow NNF Training & Results for model 2.0:
Epoch 1 of 100 | MSE: 0.051557160913944244
Epoch 100 of 100 | MSE: 3.588329633430476e-09
Training time: 0.66
Validation RMSE: 0.0017001807186724856
Test RMSE: 0.0022673565546297786
Test MEAN: 0.9978997607124502
Test VOL: 0.015449192145345887

Shallow NNF Training & Results for model 3.0:
Epoch 1 of 100 | MSE: 0.04535500332713127
Epoch 100 of 100 | MSE: 3.6328646757510796e-08
Training time: 0.67
Validation RMSE: 0.0017613060638160796
Test RMSE: 0.0021662788703087667
Test MEAN: 1.0002432732098172
Test VOL: 0.010999400789633788

Shallow NNF Training & Results for model 4.0:
Epoch 1 of 100 | MSE: 0.06642987579107285
Epoch 100 of 100 | MSE: 2.7364030756871216e-0

### **1/N Model**

In [49]:
equal_w_model_valid_rmse_list = []
equal_w_model_test_results = []
equal_w_model_test_plot = []

for i in range(int(24/rbp)):
    print(f'\nEqual Weights Model Results for model {i+1}:')
    x_train = data_process(date_slicer(df, '2014-07-01', 36, i*rbp))
    y_train = data_process(date_slicer(df_sp, '2014-07-01', 36, i*rbp))
    x_valid = data_process(date_slicer(df, '2017-07-01', 6, i*rbp))
    y_valid = data_process(date_slicer(df_sp, '2017-07-01', 6, i*rbp))
    x_test = data_process(date_slicer(df, '2018-01-01', 1, i*rbp))
    y_test = data_process(date_slicer(df_sp, '2018-01-01', 1, i*rbp))
    
    equal_w_model_valid_rmse_list.append(valid_fun(x_valid, i*rbp, equal_w_model))
    equal_w_model_test_results.append(test_fun(x_test, i*rbp, equal_w_model))
    portfolio_return(df, x_test, equal_w_model, i, equal_w_model_test_plot)
    
print('Selected Model Test Results for model i =', (deep_best_result_index)+1, 'are: ')
print('RMSE =', equal_w_model_test_results[(deep_best_result_index)]['RMSE'])
print('MEAN =', equal_w_model_test_results[(deep_best_result_index)]['MEAN'])
print('VOL =', equal_w_model_test_results[(deep_best_result_index)]['VOL'])

equal_w_model_test_plot = np.array(equal_w_model_test_plot).reshape(-1,1)


Equal Weights Model Results for model 1:
Validation RMSE: 0.0013752466607634818
Test RMSE: 0.00141308068973907
Test MEAN: 1.00194947794337
Test VOL: 0.005348201810361211

Equal Weights Model Results for model 2:
Validation RMSE: 0.0014103582761839522
Test RMSE: 0.002225473810475016
Test MEAN: 0.9979382024584036
Test VOL: 0.015471723202182033

Equal Weights Model Results for model 3:
Validation RMSE: 0.00161101492926301
Test RMSE: 0.0021228799964131116
Test MEAN: 1.0002504247997188
Test VOL: 0.011031703269168955

Equal Weights Model Results for model 4:
Validation RMSE: 0.00178815568268382
Test RMSE: 0.0016167972928903618
Test MEAN: 1.001305977361471
Test VOL: 0.008808539875808678

Equal Weights Model Results for model 5:
Validation RMSE: 0.0017533218637125797
Test RMSE: 0.0012496906685729259
Test MEAN: 1.0007751300633236
Test VOL: 0.0062942423298088844

Equal Weights Model Results for model 6:
Validation RMSE: 0.0017002638599400465
Test RMSE: 0.0012049049143032751
Test MEAN: 1.0001325

In [50]:
# print test results
print(f'Models test results with rebalancing period of {rbp} month(s) are: ')
deep_temp = pd.DataFrame(deep_nnf_test_results)
deep_temp = deep_temp.iloc[deep_best_result_index]
shallow_temp = pd.DataFrame(shallow_nnf_test_results)
shallow_temp = shallow_temp.iloc[deep_best_result_index]
equal_w_temp = pd.DataFrame(equal_w_model_test_results)
equal_w_temp = equal_w_temp.iloc[deep_best_result_index]

sp_temp_rmse = '-'
sp_temp_mean = daily_return(date_slicer(df_sp, '2018-01-01', 6, deep_best_result_index)).mean()[0]
sp_temp_std = daily_return(date_slicer(df_sp, '2018-01-01', 6, deep_best_result_index)).std()[0]
sp_temp = pd.DataFrame([sp_temp_rmse, sp_temp_mean, sp_temp_std], index=deep_temp.index)

final_result = pd.concat([deep_temp, shallow_temp, equal_w_temp, sp_temp], axis=1, join='inner')
final_result.columns = ['Deep NNF', 'Shallow NNF', '1/N Model', 'S&P 500']
final_result

Models test results with rebalancing period of 1 month(s) are: 


Unnamed: 0,Deep NNF,Shallow NNF,1/N Model,S&P 500
RMSE,0.001511,0.001429,0.001413,-
MEAN,1.001735,1.001895,1.001949,1.000121
VOL,0.005317,0.005344,0.005348,0.010367


In [51]:
print(f'Average of test RMSE for each model: ')

deep_nnf_test_rmse_mean = 0
for i in range(int(24/rbp)):
    deep_nnf_test_rmse_mean += deep_nnf_test_results[i]['RMSE']
print(f'Deep NNF: {deep_nnf_test_rmse_mean/int(24/rbp)}')

shallow_nnf_test_rmse_mean = 0
for i in range(int(24/rbp)):
    shallow_nnf_test_rmse_mean += shallow_nnf_test_results[i]['RMSE']
print(f'Shallow NNF: {shallow_nnf_test_rmse_mean/int(24/rbp)}')

equal_w_model_test_rmse_mean = 0
for i in range(int(24/rbp)):
    equal_w_model_test_rmse_mean += equal_w_model_test_results[i]['RMSE']
print(f'Equal weight model: {equal_w_model_test_rmse_mean/int(24/rbp)}')

Average of test RMSE for each model: 
Deep NNF: 0.0028145955760775967
Shallow NNF: 0.0018910813995882847
Equal weight model: 0.001811103162995504


In [52]:
plot_test = pd.concat([pd.DataFrame(deep_nnf_test_plot), pd.DataFrame(shallow_nnf_test_plot),
                       pd.DataFrame(equal_w_model_test_plot), pd.DataFrame(index_test_plot)], axis=1, join='inner')
plot_test.columns = ['Deep NNF', 'Shallow NNF', '1/N Model', 'S&P 500']

In [53]:
import cufflinks as cf
cf.set_config_file(offline = True)

plot_test.iplot()