In [1]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
from datetime import datetime
from dateutil.relativedelta import relativedelta as rd
import time
import math
from sklearn.metrics import mean_squared_error

In [42]:
# stocks data csv read
df = pd.read_csv('data.csv')
df = df.set_index('Date')

# s&p data csv read
df_sp = pd.read_csv('sp500.csv')
df_sp = df_sp.set_index('Date')

In [3]:
# stocks data csv read for daily change
df_change = pd.read_csv('data.csv')
df_change = df_change.set_index('Date')

# s&p data csv read for daily change
df_sp_change = pd.read_csv('data.csv')
df_sp_change = df_sp_change.set_index('Date')

In [None]:
def date_slicer(df, start, duration, rebalancing_period=0):
    start = str(datetime.strptime(start, '%Y-%m-%d').date() + rd(months=rebalancing_period))
    end = str(datetime.strptime(start, '%Y-%m-%d').date() + rd(months=duration) - rd(days=1))
    return df.loc[start:end]

In [4]:
def data_process(df):
    df = df.pct_change()
    df = df.tail(-1)
    df = df + 1
    df = df.cumprod()
    df = df - 1
    df = df.iloc[-1,:]
    df = df.to_numpy()
    df = torch.from_numpy(df).type(torch.Tensor)
    return df

In [5]:
def daily_change(df):
    df = df - df.shift(1)
    df = df.tail(-1)
    return df

In [23]:
# shallow nnf biuld
class shallow_NNF(nn.Module):
    def __init__(self, input_dim, hidden_size, num_classes):
        super(shallow_NNF, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_size)
        self.fc2 = nn.Linear(hidden_size, num_classes)
        
        self.relu = nn.ReLU()
        self.softmax = nn.Softmax(dim=0)
        
    def reset_parameters(self):
        self.fc1.reset_parameters()
        self.fc2.reset_parameters()
        
    def forward(self, x):
        out = self.relu(self.fc1(x))
        out = self.softmax(self.fc2(out))
        weights = out
        cumulative_change = sum(out * x)
        return cumulative_change, weights

In [7]:
# deep nnf build
class deep_NNF(nn.Module):
    def __init__(self, input_dim, hidden_size1, hidden_size2, hidden_size3,
                 hidden_size4, hidden_size5, num_classes, dropout_p = 0.2):
        super(deep_NNF, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_size1)
        self.fc2 = nn.Linear(hidden_size1, hidden_size2)
        self.fc3 = nn.Linear(hidden_size2, hidden_size3)
        self.fc4 = nn.Linear(hidden_size3, hidden_size4)
        self.fc5 = nn.Linear(hidden_size4, hidden_size5)
        self.fc6 = nn.Linear(hidden_size5, num_classes)
    
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout_p)
        self.softmax = nn.Softmax(dim=0)
        
    def reset_parameters(self):
        self.fc1.reset_parameters()
        self.fc2.reset_parameters()
        self.fc3.reset_parameters()
        self.fc4.reset_parameters()
        self.fc5.reset_parameters()
        self.fc6.reset_parameters()
        
    def forward(self, x):
        out = self.relu(self.fc1(x))
        out = self.dropout(out)
        out = self.relu(self.fc2(out))
        out = self.dropout(out)
        out = self.relu(self.fc3(out))
        out = self.dropout(out)
        out = self.relu(self.fc4(out))
        out = self.dropout(out)
        out = self.relu(self.fc5(out))
        out = self.dropout(out)
        out = self.softmax(self.fc6(out))
        weights = out
        cumulative_change = sum(out * x)
        return cumulative_change, weights

In [8]:
# 1/N model build
class equal_w_model():
    def __init__(self, df):
        self.df = df
        self.performance()
        
    def performance(self):
        self.df = np.array(self.df)
        weights = np.ones((len(self.df), 1)) * (1/len(self.df))
        out = sum(np.multiply(weights, self.df.reshape(-1,1)))
        return out

In [9]:
# epochs
num_epochs = 100

# shallow_nnf hyperparameters
input_dim = 471
hidden_size = 471
num_classes = 471
lr = 1e-8

In [26]:
# shallow nnf tune
shallow_NNF = shallow_NNF(input_dim=input_dim, hidden_size=hidden_size, num_classes=num_classes)
loss_fun = torch.nn.MSELoss(reduction='mean')
optimizer = torch.optim.Adam(shallow_NNF.parameters(), lr=lr)

In [11]:
# epochs
num_epochs = 100

# deep_nnf hyperparameters
input_dim = 471
hidden_size1 = 471
hidden_size2 = 471
hidden_size3 = 471
hidden_size4 = 471
hidden_size5 = 471
num_classes = 471
lr = 0.001
dropout_p = 0.2

In [12]:
# deep_NNF = deep_NNF(input_dim=input_dim, hidden_size1=hidden_size1, hidden_size2=hidden_size1,
#                     hidden_size3=hidden_size3, hidden_size4=hidden_size4, hidden_size5=hidden_size5,
#                     dropout_p=dropout_p, num_classes=num_classes)
# loss_fun = torch.nn.L1Loss()
# optimizer = torch.optim.Adam()

In [13]:
# test run models
# hist_shallow_nnf = np.zeros(num_epochs)
# start_time_shallow_nnf = time.time()

# print(f'Shallow NNF Training & Results:')
# for epoch in range(num_epochs):
#     y_train_pred = shallow_NNF(x_train)
#     loss_shallow_nnf = loss_fun(y_train_pred, y_train)
#     print(f'Epoch {epoch+1} of {num_epochs} | MSE: {loss_shallow_nnf.item()}')
#     hist_shallow_nnf[epoch] = loss_shallow_nnf.item()
#     optimizer.zero_grad()
#     loss_shallow_nnf.backward()
#     optimizer.step()

# training_time = format(time.time()-start_time_shallow_nnf, '0.2f')
# print(f'Sallow NNF Training time: {training_time}')

In [27]:
# shallow nnf training function
def train_shallow_nnf(x_train, y_train, i):
    start_time_shallow_nnf = time.time()
    print(f'\nShallow NNF Training & Results for model {i+1}:')
    
    for epoch in range(num_epochs):
        y_train_pred = shallow_NNF(x_train)[0]
        loss_shallow_nnf = loss_fun(y_train_pred, y_train)
        if epoch == 0 or epoch == num_epochs-1:
            print(f'Epoch {epoch+1} of {num_epochs} | MSE: {loss_shallow_nnf.item()}')
        optimizer.zero_grad()
        loss_shallow_nnf.backward()
        optimizer.step()
        
    training_time = format(time.time()-start_time_shallow_nnf, '0.2f')
    print(f'Training time: {training_time}')

In [None]:
# RMSE
# def RMSE(duration, index_return, portfolio_return):
#     RMSE = 0
#     for i in range(duration):
#         RMSE += (index_return[i] - portfolio_return[i]) ** 2
#     return math.sqrt(RMSE/duration)

In [16]:
# shallow nnf validation function
# def valid_shallow_nnf(x_valid, y_valid, i):
#     weights = np.array(shallow_NNF(x_valid)[1].detach())
#     for i in range(len(x_valid)):
#         RMSE()
#     y_valid = np.array(y_valid)
#     valid_rmse = abs(y_valid_pred - y_valid)
#     return print(f'Validation RMSE: {valid_rmse.item()}')

In [None]:
#shallow nnf
for i in range(24):
    x_train = data_process(date_slicer(df, '2014-07-01', 30, i))
    y_train = data_process(date_slicer(df_sp, '2014-07-01', 30, i))
    x_valid = data_process(date_slicer(df, '2017-01-01', 12, i))
    y_valid = data_process(date_slicer(df_sp, '2017-01-01', 12, i))
    # x_test = data_process(date_slicer(df, '2014-07-01', 30, i))
    # y_test = data_process(date_slicer(df_sp, '2014-07-01', 30, i))
    train_shallow_nnf(x_train, y_train, i)
    # valid_shallow_nnf(x_valid, y_valid, i)
    # test computation
    shallow_NNF.reset_parameters()

In [18]:
# def loss_plot(hist_model):   
#     plt.plot(hist_model, color='r')
#     plt.title(f'Loss Plot')
#     plt.xlabel('Epoch')
#     plt.ylabel('Loss')
#     return plt.show()