In [1]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
from datetime import datetime
from dateutil.relativedelta import relativedelta as rd
import time
import math
from sklearn.metrics import mean_squared_error

In [2]:
# stocks data csv read
df = pd.read_csv('data.csv')
df = df.set_index('Date')

# s&p data csv read
df_sp = pd.read_csv('sp500.csv')
df_sp = df_sp.set_index('Date')

In [3]:
# stocks data csv read for daily change
df_change = pd.read_csv('data.csv')
df_change = df_change.set_index('Date')

# s&p data csv read for daily change
df_sp_change = pd.read_csv('sp500.csv')
df_sp_change = df_sp_change.set_index('Date')

In [4]:
def date_slicer(df, start, duration, rebalancing_period=0):
    start = str(datetime.strptime(start, '%Y-%m-%d').date() + rd(months=rebalancing_period))
    end = str(datetime.strptime(start, '%Y-%m-%d').date() + rd(months=duration) - rd(days=1))
    return df.loc[start:end]

In [5]:
def data_process(df):
    df = df.pct_change()
    df = df.tail(-1)
    df = df + 1
    df = df.cumprod()
    df = df - 1
    df = df.iloc[-1,:]
    df = df.to_numpy()
    df = torch.from_numpy(df).type(torch.Tensor)
    return df

In [6]:
def daily_change(df):
    df = df - df.shift(1)
    df = df.tail(-1)
    return df

In [7]:
# shallow nnf biuld
class shallow_NNF(nn.Module):
    def __init__(self, input_dim, hidden_size, num_classes):
        super(shallow_NNF, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_size)
        self.fc2 = nn.Linear(hidden_size, num_classes)
        
        self.relu = nn.ReLU()
        self.softmax = nn.Softmax(dim=0)
        
    def reset_parameters(self):
        self.fc1.reset_parameters()
        self.fc2.reset_parameters()
        
    def forward(self, x):
        out = self.relu(self.fc1(x))
        out = self.softmax(self.fc2(out))
        weights = out
        cumulative_change = sum(out * x)
        return cumulative_change, weights

In [8]:
# deep nnf build
class deep_NNF(nn.Module):
    def __init__(self, input_dim, hidden_size1, hidden_size2, hidden_size3,
                 hidden_size4, hidden_size5, num_classes, dropout_p = 0.2):
        super(deep_NNF, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_size1)
        self.fc2 = nn.Linear(hidden_size1, hidden_size2)
        self.fc3 = nn.Linear(hidden_size2, hidden_size3)
        self.fc4 = nn.Linear(hidden_size3, hidden_size4)
        self.fc5 = nn.Linear(hidden_size4, hidden_size5)
        self.fc6 = nn.Linear(hidden_size5, num_classes)
    
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout_p)
        self.softmax = nn.Softmax(dim=0)
        
    def reset_parameters(self):
        self.fc1.reset_parameters()
        self.fc2.reset_parameters()
        self.fc3.reset_parameters()
        self.fc4.reset_parameters()
        self.fc5.reset_parameters()
        self.fc6.reset_parameters()
        
    def forward(self, x):
        out = self.relu(self.fc1(x))
        out = self.dropout(out)
        out = self.relu(self.fc2(out))
        out = self.dropout(out)
        out = self.relu(self.fc3(out))
        out = self.dropout(out)
        out = self.relu(self.fc4(out))
        out = self.dropout(out)
        out = self.relu(self.fc5(out))
        out = self.softmax(self.fc6(out))
        weights = out
        cumulative_change = sum(out * x)
        return cumulative_change, weights

In [9]:
# 1/N model build
class equal_w_model():
    def __init__(self, df):
        self.df = df
        self.performance()
        
    def performance(self):
        self.df = np.array(self.df)
        weights = np.ones((len(self.df), 1)) * (1/len(self.df))
        out = sum(np.multiply(weights, self.df.reshape(-1,1)))
        return out

In [10]:
# epochs
num_epochs = 100

# shallow_nnf hyperparameters
input_dim = 471
hidden_size = 471
num_classes = 471
lr = 1e-3

In [11]:
# shallow nnf tune
shallow_NNF = shallow_NNF(input_dim=input_dim, hidden_size=hidden_size, num_classes=num_classes)
shallow_NNF_loss_fun = torch.nn.MSELoss(reduction='mean')
shallow_NNF_optimizer = torch.optim.Adam(shallow_NNF.parameters(), lr=lr)

In [12]:
# epochs
num_epochs = 100

# deep_nnf hyperparameters
input_dim = 471
hidden_size1 = 471
hidden_size2 = 471
hidden_size3 = 471
hidden_size4 = 471
hidden_size5 = 471
num_classes = 471
lr = 0.001
dropout_p = 0.2

In [13]:
# deep nnf tune
deep_NNF = deep_NNF(input_dim=input_dim, hidden_size1=hidden_size1, hidden_size2=hidden_size2, 
                    hidden_size3=hidden_size3, hidden_size4=hidden_size4, hidden_size5=hidden_size5,
                    num_classes=num_classes)
deep_NNF_loss_fun = torch.nn.MSELoss(reduction='mean')
deep_NNF_optimizer = torch.optim.Adam(deep_NNF.parameters(), lr=lr)

In [17]:
# RMSE
def RMSE(x, y, weights):
    temp = 0
    for i in range(len(x)):
        temp += (sum(x.iloc[i] * weights) - y.iloc[i]) ** 2
    return math.sqrt(temp/len(x))

In [18]:
# shallow nnf validation function
def valid_fun(x_valid, y_valid, i, model):
    x = daily_change(date_slicer(df_change, '2017-01-01', 12, i))
    y = daily_change(date_slicer(df_sp_change, '2017-01-01', 12, i))
    weights = np.array(model(x_valid)[1].detach())
    valid_rmse = RMSE(x, y, weights)
    return print(f'Validation RMSE: {valid_rmse}')

### **Shallow NNF Training**

In [None]:
# shallow nnf training function
def train_shallow_nnf(x_train, y_train, i):
    start_time_shallow_nnf = time.time()
    print(f'\nShallow NNF Training & Results for model {i+1}:')
    
    for epoch in range(num_epochs):
        y_train_pred = shallow_NNF(x_train)[0]
        loss_shallow_nnf = shallow_NNF_loss_fun(y_train_pred, y_train)
        if epoch == 0 or epoch == num_epochs-1:
            print(f'Epoch {epoch+1} of {num_epochs} | MSE: {loss_shallow_nnf.item()}')
        shallow_NNF_optimizer.zero_grad()
        loss_shallow_nnf.backward()
        shallow_NNF_optimizer.step()
        
    training_time = format(time.time()-start_time_shallow_nnf, '0.2f')
    print(f'Training time: {training_time}')

In [19]:
#shallow nnf
for i in range(24):
    x_train = data_process(date_slicer(df, '2014-07-01', 30, i))
    y_train = data_process(date_slicer(df_sp, '2014-07-01', 30, i))
    x_valid = data_process(date_slicer(df, '2017-01-01', 12, i))
    y_valid = data_process(date_slicer(df_sp, '2017-01-01', 12, i))
    # x_test = data_process(date_slicer(df, '2014-07-01', 30, i))
    # y_test = data_process(date_slicer(df_sp, '2014-07-01', 30, i))
    train_shallow_nnf(x_train, y_train, i)
    valid_fun(x_valid, y_valid, i, shallow_NNF)
    # test computation
    shallow_NNF.reset_parameters()


Shallow NNF Training & Results for model 1:
Epoch 1 of 100 | MSE: 0.023609833791851997


  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 100 of 100 | MSE: 1.3682943666992742e-10
Training time: 0.61
Validation RMSE: 9.98002054730571

Shallow NNF Training & Results for model 2:
Epoch 1 of 100 | MSE: 0.028481587767601013
Epoch 100 of 100 | MSE: 1.4893886124411893e-08
Training time: 0.62
Validation RMSE: 10.826153482242791

Shallow NNF Training & Results for model 3:
Epoch 1 of 100 | MSE: 0.0281903687864542
Epoch 100 of 100 | MSE: 2.6983295597915458e-09
Training time: 0.60
Validation RMSE: 16.03049950386817

Shallow NNF Training & Results for model 4:
Epoch 1 of 100 | MSE: 0.03656870871782303
Epoch 100 of 100 | MSE: 1.2982335206856987e-09
Training time: 0.61
Validation RMSE: 18.463641912479574

Shallow NNF Training & Results for model 5:
Epoch 1 of 100 | MSE: 0.026762936264276505
Epoch 100 of 100 | MSE: 6.239586625156335e-09
Training time: 0.59
Validation RMSE: 19.74935393116214

Shallow NNF Training & Results for model 6:
Epoch 1 of 100 | MSE: 0.028765594586730003
Epoch 100 of 100 | MSE: 2.4430433231970028e-09
Traini

### **Deep NNF Training**

In [20]:
# deep nnf training function
def train_deep_nnf(x_train, y_train, i):
    start_time_deep_nnf = time.time()
    print(f'\nDeep NNF Training & Results for model {i+1}:')
    
    for epoch in range(num_epochs):
        y_train_pred = deep_NNF(x_train)[0]
        loss_deep_nnf = deep_NNF_loss_fun(y_train_pred, y_train)
        if epoch == 0 or epoch == num_epochs-1:
            print(f'Epoch {epoch+1} of {num_epochs} | MSE: {loss_deep_nnf.item()}')
        deep_NNF_optimizer.zero_grad()
        loss_deep_nnf.backward()
        deep_NNF_optimizer.step()
        
    training_time = format(time.time()-start_time_deep_nnf, '0.2f')
    print(f'Training time: {training_time}')

In [21]:
#deep nnf
for i in range(24):
    x_train = data_process(date_slicer(df, '2014-07-01', 30, i))
    y_train = data_process(date_slicer(df_sp, '2014-07-01', 30, i))
    x_valid = data_process(date_slicer(df, '2017-01-01', 12, i))
    y_valid = data_process(date_slicer(df_sp, '2017-01-01', 12, i))
    # x_test = data_process(date_slicer(df, '2014-07-01', 30, i))
    # y_test = data_process(date_slicer(df_sp, '2014-07-01', 30, i))
    train_deep_nnf(x_train, y_train, i)
    valid_fun(x_valid, y_valid, i, deep_NNF)
    # test computation
    deep_NNF.reset_parameters()


Deep NNF Training & Results for model 1:
Epoch 1 of 100 | MSE: 0.02248799242079258
Epoch 100 of 100 | MSE: 3.2998104249060134e-09
Training time: 1.30
Validation RMSE: 9.977199195410439

Deep NNF Training & Results for model 2:
Epoch 1 of 100 | MSE: 0.029834376648068428
Epoch 100 of 100 | MSE: 8.860480704697693e-08
Training time: 1.56
Validation RMSE: 10.866355446291402

Deep NNF Training & Results for model 3:
Epoch 1 of 100 | MSE: 0.027851076796650887
Epoch 100 of 100 | MSE: 4.277787013506895e-08
Training time: 1.18
Validation RMSE: 16.283187696319988

Deep NNF Training & Results for model 4:
Epoch 1 of 100 | MSE: 0.03567255660891533
Epoch 100 of 100 | MSE: 4.149808773945551e-06
Training time: 1.20
Validation RMSE: 18.485054295862785

Deep NNF Training & Results for model 5:
Epoch 1 of 100 | MSE: 0.02651566080749035
Epoch 100 of 100 | MSE: 1.3530380783777218e-05
Training time: 1.17
Validation RMSE: 19.843083368304537

Deep NNF Training & Results for model 6:
Epoch 1 of 100 | MSE: 0.0

In [22]:
# def loss_plot(hist_model):   
#     plt.plot(hist_model, color='r')
#     plt.title(f'Loss Plot')
#     plt.xlabel('Epoch')
#     plt.ylabel('Loss')
#     return plt.show()