In [1]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
from datetime import datetime
from dateutil.relativedelta import relativedelta as rd
import time
import math
from sklearn.metrics import mean_squared_error

In [2]:
# stocks data csv read
df = pd.read_csv('data.csv')
df = df.set_index('Date')

# s&p data csv read
df_sp = pd.read_csv('sp500.csv')
df_sp = df_sp.set_index('Date')

In [3]:
def date_slicer(df, start, duration, rebalancing_period=0):
    start = str(datetime.strptime(start, '%Y-%m-%d').date() + rd(months=rebalancing_period))
    end = str(datetime.strptime(start, '%Y-%m-%d').date() + rd(months=duration) - rd(days=1))
    return df.loc[start:end]

In [4]:
def data_process(df):
    df = df.pct_change()
    df = df.tail(-1)
    df = df + 1
    df = df.cumprod()
    df = df - 1
    df = df.iloc[-1,:]
    df = df.to_numpy()
    df = torch.from_numpy(df).type(torch.Tensor)
    return df

In [5]:
def daily_change(df):
    df = df.pct_change()
    df = df.tail(-1)
    return df

In [6]:
def daily_return(df):
    df = df.pct_change()
    df = df.tail(-1)
    df = df + 1
    return df

In [7]:
# shallow nnf biuld
class shallow_NNF(nn.Module):
    def __init__(self, input_dim, hidden_size, num_classes):
        super(shallow_NNF, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_size)
        self.fc2 = nn.Linear(hidden_size, num_classes)
        
        self.relu = nn.ReLU()
        self.softmax = nn.Softmax(dim=0)
        
    def reset_parameters(self):
        self.fc1.reset_parameters()
        self.fc2.reset_parameters()
        
    def forward(self, x):
        out = self.relu(self.fc1(x))
        out = self.softmax(self.fc2(out))
        weights = out
        cumulative_change = sum(out * x)
        return cumulative_change, weights

In [8]:
# deep nnf build
class deep_NNF(nn.Module):
    def __init__(self, input_dim, hidden_size1, hidden_size2, hidden_size3,
                 hidden_size4, hidden_size5, num_classes, dropout_p = 0.2):
        super(deep_NNF, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_size1)
        self.fc2 = nn.Linear(hidden_size1, hidden_size2)
        self.fc3 = nn.Linear(hidden_size2, hidden_size3)
        self.fc4 = nn.Linear(hidden_size3, hidden_size4)
        self.fc5 = nn.Linear(hidden_size4, hidden_size5)
        self.fc6 = nn.Linear(hidden_size5, num_classes)
    
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout_p)
        self.softmax = nn.Softmax(dim=0)
        
    def reset_parameters(self):
        self.fc1.reset_parameters()
        self.fc2.reset_parameters()
        self.fc3.reset_parameters()
        self.fc4.reset_parameters()
        self.fc5.reset_parameters()
        self.fc6.reset_parameters()
        
    def forward(self, x):
        out = self.relu(self.fc1(x))
        out = self.dropout(out)
        out = self.relu(self.fc2(out))
        out = self.dropout(out)
        out = self.relu(self.fc3(out))
        out = self.dropout(out)
        out = self.relu(self.fc4(out))
        out = self.dropout(out)
        out = self.relu(self.fc5(out))
        out = self.softmax(self.fc6(out))
        weights = out
        cumulative_change = sum(out * x)
        return cumulative_change, weights

In [9]:
# 1/N model build
class equal_w_model():
    def __init__(self, df):
        self.df = df
        self.performance()
        
    def performance(self):
        self.df = np.array(self.df)
        weights = np.ones((len(self.df), 1)) * (1/len(self.df))
        cumulative_change = sum(np.multiply(weights, self.df.reshape(-1,1)))
        return cumulative_change, weights.reshape(-1)

In [22]:
# rebalancing period = one or three months
rbp = 3

# epochs
num_epochs = 200

In [11]:
# shallow_nnf hyperparameters
input_dim = 471
hidden_size = 471
num_classes = 471
lr = 1e-3

In [12]:
# shallow nnf tune
shallow_NNF = shallow_NNF(input_dim=input_dim, hidden_size=hidden_size, num_classes=num_classes)
shallow_NNF_loss_fun = torch.nn.MSELoss(reduction='mean')
shallow_NNF_optimizer = torch.optim.Adam(shallow_NNF.parameters(), lr=lr)

In [13]:
# deep_nnf hyperparameters
input_dim = 471
hidden_size1 = 471
hidden_size2 = 471
hidden_size3 = 471
hidden_size4 = 471
hidden_size5 = 471
num_classes = 471
lr = 1e-10
dropout_p = 0.5

In [14]:
# deep nnf tune
deep_NNF = deep_NNF(input_dim=input_dim, hidden_size1=hidden_size1, hidden_size2=hidden_size2, 
                    hidden_size3=hidden_size3, hidden_size4=hidden_size4, hidden_size5=hidden_size5,
                    num_classes=num_classes)
deep_NNF_loss_fun = torch.nn.MSELoss(reduction='mean')
deep_NNF_optimizer = torch.optim.Adam(deep_NNF.parameters(), lr=lr)

In [15]:
# RMSE
def RMSE(x, y, weights):
    temp = 0
    for i in range(len(x)):
        temp += (sum(x.iloc[i] * weights) - y.iloc[i]) ** 2
    return math.sqrt(temp/len(x))

In [16]:
# MEAN
def MEAN(x, weights):
    temp = []
    for i in range(len(x)):
        temp.append(sum(x.iloc[i] * weights))
    temp = np.array(temp)
    return temp.mean()

In [17]:
# Volatility
def VOL(x, weights):
    temp = []
    for i in range(len(x)):
        temp.append(sum(x.iloc[i] * weights))
    temp = np.array(temp)
    return temp.std()

In [18]:
def valid_fun(x_valid, i, model):
    x_change = daily_change(date_slicer(df, '2017-07-01', 6, i))
    y_change = daily_change(date_slicer(df_sp, '2017-07-01', 6, i))
    # x_return = daily_return(date_slicer(df, '2017-07-01', 6, i))
    # y_return = daily_return(date_slicer(df_sp, '2017-07-01', 6, i))
    
    if model == equal_w_model:
        weights = model(x_valid).performance()[1]
    else:
        weights = np.array(model(x_valid)[1].detach())
    
    valid_rmse = RMSE(x_change, y_change, weights)
    # valid_mean = MEAN(x_return, weights)
    # valid_vol  = VOL(x_return, weights)
    
    print(f'Validation RMSE: {valid_rmse}')
    # print(f'Validation MEAN: {valid_mean}')
    # print(f'Validation VOL: {valid_vol}')
    
    return valid_rmse

In [19]:
def test_fun(x_test, i, model):
    x_change = daily_change(date_slicer(df, '2018-01-01', 6, i))
    y_change = daily_change(date_slicer(df_sp, '2018-01-01', 6, i))
    x_return = daily_return(date_slicer(df, '2018-01-01', 6, i))
    y_return = daily_return(date_slicer(df_sp, '2018-01-01', 6, i))
    
    if model == equal_w_model:
        weights = model(x_test).performance()[1]
    else:
        weights = np.array(model(x_test)[1].detach())
    
    test_rmse = RMSE(x_change, y_change, weights)
    test_mean = MEAN(x_return, weights)
    test_vol  = VOL(x_return, weights)
    test_dic = {'RMSE': test_rmse, 'MEAN': test_mean, 'VOL': test_vol}
    
    print(f'Test RMSE: {test_rmse}')
    print(f'Test MEAN: {test_mean}')
    print(f'Test VOL: {test_vol}')
    
    return test_dic

### **Deep NNF Training**

In [20]:
# deep nnf training function
def train_deep_nnf(x_train, y_train, i):
    start_time_deep_nnf = time.time()
    print(f'\nDeep NNF Training & Results for model {i+1}:')
    
    for epoch in range(num_epochs):
        y_train_pred = deep_NNF(x_train)[0]
        loss_deep_nnf = deep_NNF_loss_fun(y_train_pred, y_train)
        if epoch == 0 or epoch == num_epochs-1:
            print(f'Epoch {epoch+1} of {num_epochs} | MSE: {loss_deep_nnf.item()}')
        deep_NNF_optimizer.zero_grad()
        loss_deep_nnf.backward()
        deep_NNF_optimizer.step()
        
    training_time = format(time.time()-start_time_deep_nnf, '0.2f')
    print(f'Training time: {training_time}')

In [25]:
# deep nnf
deep_nnf_valid_rmse_list = []
deep_nnf_test_results = []

for i in range(rbp):
    x_train = data_process(date_slicer(df, '2014-07-01', 36, i))
    y_train = data_process(date_slicer(df_sp, '2014-07-01', 36, i))
    x_valid = data_process(date_slicer(df, '2017-07-01', 6, i))
    y_valid = data_process(date_slicer(df_sp, '2017-07-01', 6, i))
    x_test = data_process(date_slicer(df, '2018-01-01', 1, i))
    y_test = data_process(date_slicer(df_sp, '2018-01-01', 1, i))
    train_deep_nnf(x_train, y_train, i)
    deep_nnf_valid_rmse_list.append(valid_fun(x_valid, i, deep_NNF))
    deep_nnf_test_results.append(test_fun(x_test, i, deep_NNF))
    deep_NNF.reset_parameters()

print(f'\nMin Valid RMSE is: {min(deep_nnf_valid_rmse_list)} for model i = {deep_nnf_valid_rmse_list.index(min(deep_nnf_valid_rmse_list))+1}')
print('Selected Model Test Results are:')
print('RMSE =', deep_nnf_test_results[deep_nnf_valid_rmse_list.index(min(deep_nnf_valid_rmse_list))]['RMSE'])
print('MEAN =', deep_nnf_test_results[deep_nnf_valid_rmse_list.index(min(deep_nnf_valid_rmse_list))]['MEAN'])
print('VOL =', deep_nnf_test_results[deep_nnf_valid_rmse_list.index(min(deep_nnf_valid_rmse_list))]['VOL'])

deep_best_result_index = deep_nnf_valid_rmse_list.index(min(deep_nnf_valid_rmse_list))


Deep NNF Training & Results for model 1:
Epoch 1 of 200 | MSE: 0.04553482308983803
Epoch 200 of 200 | MSE: 0.04562773182988167
Training time: 3.97
Validation RMSE: 0.0013802198521385384
Test RMSE: 0.0016818025855540072
Test MEAN: 1.000227226117704
Test VOL: 0.009423321828742793

Deep NNF Training & Results for model 4:
Epoch 1 of 200 | MSE: 0.053461603820323944
Epoch 200 of 200 | MSE: 0.05361170694231987
Training time: 3.81
Validation RMSE: 0.0014071480451117075
Test RMSE: 0.001903991995726517
Test MEAN: 1.0001880335315916
Test VOL: 0.009378009200521557

Deep NNF Training & Results for model 7:
Epoch 1 of 200 | MSE: 0.045039452612400055
Epoch 200 of 200 | MSE: 0.045191604644060135
Training time: 4.07
Validation RMSE: 0.0016138310295803484
Test RMSE: 0.0017931928362382033
Test MEAN: 1.0007236508242823
Test VOL: 0.0073404343030933706

Deep NNF Training & Results for model 10:
Epoch 1 of 200 | MSE: 0.059946682304143906
Epoch 200 of 200 | MSE: 0.059902384877204895
Training time: 4.02
Vali

### **Shallow NNF Training**

In [47]:
# shallow nnf training function
def train_shallow_nnf(x_train, y_train, i):
    start_time_shallow_nnf = time.time()
    print(f'\nShallow NNF Training & Results for model {i+1}:')
    
    for epoch in range(num_epochs):
        y_train_pred = shallow_NNF(x_train)[0]
        loss_shallow_nnf = shallow_NNF_loss_fun(y_train_pred, y_train)
        if epoch == 0 or epoch == num_epochs-1:
            print(f'Epoch {epoch+1} of {num_epochs} | MSE: {loss_shallow_nnf.item()}')
        shallow_NNF_optimizer.zero_grad()
        loss_shallow_nnf.backward()
        shallow_NNF_optimizer.step()
        
    training_time = format(time.time()-start_time_shallow_nnf, '0.2f')
    print(f'Training time: {training_time}')

In [48]:
#shallow nnf
shallow_nnf_valid_rmse_list = []
shallow_nnf_test_results = []

for i in range(24):
    x_train = data_process(date_slicer(df, '2014-07-01', 36, i))
    y_train = data_process(date_slicer(df_sp, '2014-07-01', 36, i))
    x_valid = data_process(date_slicer(df, '2017-07-01', 6, i))
    y_valid = data_process(date_slicer(df_sp, '2017-07-01', 6, i))
    x_test = data_process(date_slicer(df, '2018-01-01', 1, i))
    y_test = data_process(date_slicer(df_sp, '2018-01-01', 1, i))
    
    train_shallow_nnf(x_train, y_train, i)
    shallow_nnf_valid_rmse_list.append(valid_fun(x_valid, i, shallow_NNF))
    shallow_nnf_test_results.append(test_fun(x_test, i, shallow_NNF))
    shallow_NNF.reset_parameters()

# print(f'\nMin Valid RMSE is: {min(valid_rmse_list)} for model i = {(deep_best_result_index)+1}')
print('Selected Model Test Results for model i =', (deep_best_result_index)+1, 'are: ')
print('RMSE =', shallow_nnf_test_results[(deep_best_result_index)]['RMSE'])
print('MEAN =', shallow_nnf_test_results[(deep_best_result_index)]['MEAN'])
print('VOL =', shallow_nnf_test_results[(deep_best_result_index)]['VOL'])


Shallow NNF Training & Results for model 1:
Epoch 1 of 200 | MSE: 0.0497044138610363
Epoch 200 of 200 | MSE: 5.197842156690058e-12
Training time: 2.29
Validation RMSE: 0.0015006693957915719
Test RMSE: 0.0017488709620200585
Test MEAN: 1.0001990221639507
Test VOL: 0.009378312741349325

Shallow NNF Training & Results for model 2:
Epoch 1 of 200 | MSE: 0.05137931555509567
Epoch 200 of 200 | MSE: 2.220446049250313e-14
Training time: 2.31
Validation RMSE: 0.0016117845038208908
Test RMSE: 0.0020302161278026288
Test MEAN: 1.000167273009345
Test VOL: 0.009326726056867657

Shallow NNF Training & Results for model 3:
Epoch 1 of 200 | MSE: 0.047346193343400955
Epoch 200 of 200 | MSE: 8.01581023779363e-14
Training time: 7.77
Validation RMSE: 0.001805205446821438
Test RMSE: 0.001828172435966031
Test MEAN: 1.0007125094585745
Test VOL: 0.007334930446856029

Shallow NNF Training & Results for model 4:
Epoch 1 of 200 | MSE: 0.060038890689611435
Epoch 200 of 200 | MSE: 1.4210854715202004e-12
Training ti

### **1/N Model**

In [49]:
equal_w_model_valid_rmse_list = []
equal_w_model_test_results = []

for i in range(24):
    print(f'\nEqual Weights Model Results for model {i+1}:')
    x_train = data_process(date_slicer(df, '2014-07-01', 36, i))
    y_train = data_process(date_slicer(df_sp, '2014-07-01', 36, i))
    x_valid = data_process(date_slicer(df, '2017-07-01', 6, i))
    y_valid = data_process(date_slicer(df_sp, '2017-07-01', 6, i))
    x_test = data_process(date_slicer(df, '2018-01-01', 1, i))
    y_test = data_process(date_slicer(df_sp, '2018-01-01', 1, i))
    
    equal_w_model_valid_rmse_list.append(valid_fun(x_valid, i, equal_w_model))
    equal_w_model_test_results.append(test_fun(x_test, i, equal_w_model))
    
print('Selected Model Test Results for model i =', (deep_best_result_index)+1, 'are: ')
print('RMSE =', equal_w_model_test_results[(deep_best_result_index)]['RMSE'])
print('MEAN =', equal_w_model_test_results[(deep_best_result_index)]['MEAN'])
print('VOL =', equal_w_model_test_results[(deep_best_result_index)]['VOL'])


Equal Weights Model Results for model 1:
Validation RMSE: 0.0013752466607634818
Test RMSE: 0.0016838896697022906
Test MEAN: 1.000223979363228
Test VOL: 0.009419904643453112

Equal Weights Model Results for model 2:
Validation RMSE: 0.0014103582761839522
Test RMSE: 0.0019078407523448037
Test MEAN: 1.0001866853162422
Test VOL: 0.009375762658917808

Equal Weights Model Results for model 3:
Validation RMSE: 0.00161101492926301
Test RMSE: 0.0017918743187162148
Test MEAN: 1.0007247171178195
Test VOL: 0.0073472545924650355

Equal Weights Model Results for model 4:
Validation RMSE: 0.00178815568268382
Test RMSE: 0.0017472761704575702
Test MEAN: 1.0008824136684178
Test VOL: 0.005771463367467524

Equal Weights Model Results for model 5:
Validation RMSE: 0.0017533218637125797
Test RMSE: 0.002080505442953261
Test MEAN: 1.0001083627465603
Test VOL: 0.0071012566865318

Equal Weights Model Results for model 6:
Validation RMSE: 0.0017002638599400465
Test RMSE: 0.00228250529001297
Test MEAN: 1.0001620

In [50]:
# print test results
print(f'Models test results with rebalancing period of {rbp} month(s) are: ')
deep_temp = pd.DataFrame(deep_nnf_test_results)
deep_temp = deep_temp.iloc[deep_best_result_index]
shallow_temp = pd.DataFrame(shallow_nnf_test_results)
shallow_temp = shallow_temp.iloc[deep_best_result_index]
equal_w_temp = pd.DataFrame(equal_w_model_test_results)
equal_w_temp = equal_w_temp.iloc[deep_best_result_index]

sp_temp_rmse = '-'
sp_temp_mean = daily_return(date_slicer(df_sp, '2018-01-01', 6, deep_best_result_index)).mean()[0]
sp_temp_std = daily_return(date_slicer(df_sp, '2018-01-01', 6, deep_best_result_index)).std()[0]
sp_temp = pd.DataFrame([sp_temp_rmse, sp_temp_mean, sp_temp_std], index=deep_temp.index)

final_result = pd.concat([deep_temp, shallow_temp, equal_w_temp, sp_temp], axis=1, join='inner')
final_result.columns = ['Deep NNF', 'Shallow NNF', '1/N Model', 'S&P 500']
final_result

Models test results with rebalancing period of 1 month(s) are: 


Unnamed: 0,Deep NNF,Shallow NNF,1/N Model,S&P 500
RMSE,0.001684,0.001749,0.001684,-
MEAN,1.000226,1.000199,1.000224,1.000121
VOL,0.00942,0.009378,0.00942,0.010367
