In [None]:
from google.colab import drive 
drive.mount('/content/drive')

In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim

In [None]:
class FeedforwardNeuralNetModel(nn.Module):
  def __init__(self):
    super(FeedforwardNeuralNetModel, self).__init__()

    self.fc1 = nn.Linear(24, 32)
    self.relu1 = nn.ReLU()
    self.bn1 = nn.BatchNorm1d(32)

    self.fc2 = nn.Linear(32, 16)
    self.relu2 = nn.ReLU()
    self.bn2 = nn.BatchNorm1d(16)

    self.fc3 = nn.Linear(16, 8)
    self.relu3 = nn.ReLU()
    self.bn3 = nn.BatchNorm1d(8)

    self.fc4 = nn.Linear(8, 1)  

  def forward(self, x):
    out = self.fc1(x)
    out = self.bn1(out)
    out = self.relu1(out)

    out = self.fc2(out)
    out = self.relu2(out)
    out = self.bn2(out)

    out = self.fc3(out)
    out = self.relu3(out)
    out = self.bn3(out)
    
    out = self.fc4(out)
    return out

In [None]:
X = pd.read_csv('drive/MyDrive/asset_pricing_data/full_predictor_set_bfill.csv', index_col=0, header=0)

In [None]:
y = pd.read_csv('drive/MyDrive/asset_pricing_data/returns.csv', index_col=0, header=0)

In [None]:
model = FeedforwardNeuralNetModel()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.00001)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=5)

In [None]:
df1_train = X[X["level_1"] < 200000].iloc[:,2:].fillna(method="ffill",axis=1).fillna(0)
df1_test = X[X["level_1"] >= 200000].iloc[:,2:].fillna(method="ffill",axis=1).fillna(0)
df2_train = y[y["level_1"] < 200000].iloc[:,2]
df2_test = y[y["level_1"] >= 200000].iloc[:,2]

tx_train = torch.tensor(df1_train.values.astype(np.float32))
tx_test = torch.tensor(df1_test.values.astype(np.float32))
ty_train = torch.tensor(df2_train.values.astype(np.float32))
ty_test = df2_test.to_numpy()

train_tensor = torch.utils.data.TensorDataset(tx_train, ty_train)
train_loader = torch.utils.data.DataLoader(dataset = train_tensor, batch_size = 32, shuffle = True)

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

In [None]:
model.to(device)

In [None]:
j = 0
epsilon = np.inf
params = None
for epoch in range(10):

  for i,(pred, ret) in enumerate(train_loader):
    if j <= 5:
      optimizer.zero_grad()

      pred = pred.to(device)
      ret = ret.to(device)

      outputs = model(pred)

      loss = criterion(outputs, ret)

      l1_norm = sum(p.abs().sum() for p in model.parameters())
      loss = (loss + 0.000001 * l1_norm)
      loss.backward()

      optimizer.step()
      iter += 1
      
  if loss < epsilon:
    j = 0
    epsilon = loss
    params = model.state_dict()
  else:
    j = j + 1

  if j <= 5:
    scheduler.step(loss)