In [1]:
#Loading all the necessary libaries

import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from tqdm.notebook import tqdm

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

**Loading the data**

In [3]:
from google.colab import drive
drive.mount('/content/drive')
df_train = pd.read_csv('/content/drive/MyDrive/Data/df_train.csv')
train = pd.read_csv('/content/drive/MyDrive/Data/train.csv')

Mounted at /content/drive


**Preparing and splitting the data for training**


*   Here we use the time order(in time_order.csv) to split our data.
*   This helps us to avoid future looking and get better generalization on real time data.





In [None]:
time_order = pd.read_csv('time_order.csv')

df_train['stock_id'] = df_train['row_id'].apply(lambda x: int(x.split('-')[0]))
df_train['time_id'] = df_train['row_id'].apply(lambda x: int(x.split('-')[1]))

df_train['target'] = train['target']

df_train = pd.merge(df_train, time_order, on = 'time_id')
df_train.sort_values('time_id_ordered', inplace = True)


In [None]:
df_train.dropna(inplace=True)


t_train = max(df_train['time_id_ordered'])*0.8

X_train = df_train[df_train['time_id_ordered'] <= t_train].drop(columns=['target', 'row_id', 'time_id', 'time_id_ordered', 'stock_id']).to_numpy()

stock_data_train = df_train[df_train['time_id_ordered'] <= t_train]['stock_id'].to_numpy()

X_test = df_train[df_train['time_id_ordered'] > t_train].drop(columns=['target', 'row_id', 'time_id', 'time_id_ordered', 'stock_id']).to_numpy()

stock_data_test = df_train[df_train['time_id_ordered'] > t_train]['stock_id'].to_numpy()

y_train = df_train[df_train['time_id_ordered'] <= t_train]['target'].to_numpy()

y_test = df_train[df_train['time_id_ordered'] > t_train]['target'].to_numpy()


numeric_features = X_train.shape[1]


**Scaling the data**

In [None]:
scaler = StandardScaler()

scaler.fit(X_train)

X_scaled_train = scaler.transform(X_train)
X_scaled_test = scaler.transform(X_test)

X_numeric_train = torch.from_numpy(X_scaled_train).float()
X_numeric_test = torch.from_numpy(X_scaled_test).float()

X_stock_train = torch.from_numpy(stock_data_train.reshape(-1,1)).long()
X_stock_test = torch.from_numpy(stock_data_test.reshape(-1,1)).long()

y_train = torch.from_numpy(y_train).float()
y_test = torch.from_numpy(y_test).float()

**Defining the model**

Our model has an embedding layer with(embedding dimension = 50) that keeps track of different stock_id's. In the forward method, we apply a sigmoid to the ouput of our network, since volatality is positive, and all the data points have volatality less than 1. We use eight hidden layers for our network, and there are no batch normalization or drop out layers. The activation function in our model is SELU which gives us better normalization during training.

In [None]:
num_of_diff_stocks = max(df_train['stock_id'].unique())+1

class Stock_Volatility(nn.Module):
  def __init__(self, hidden_units, emd_dim, num_features):
      super().__init__()
      self.embd = nn.Embedding(num_of_diff_stocks, emd_dim)
      layers = []
      in_dim = emd_dim+num_features
      self.out = nn.Sigmoid()
      for l in hidden_units:
        layers.append(nn.Linear(in_dim, l))
        layers.append(nn.SELU())
        in_dim = l
      self.hidden = nn.Sequential(*layers)
      self.output = nn.Linear(in_dim, 1)
  def forward(self, stock_id, numeric):
    x_stock = self.embd(stock_id.long()).squeeze(1)
    x = torch.cat([numeric, x_stock], dim=1)
    x = self.hidden(x)
    return self.out(self.output(x))

emd_dim = 50  #Setting the embd dimension

hidden_units = [256, 256, 128, 128, 64, 64, 32, 32] #Setting the hidden layers

model = Stock_Volatility(hidden_units, emd_dim, numeric_features)

**Weight Initialization**

We use Lecun method for our weight initialization, since our activation function is SELU, and it gives a better starting point for training.




In [None]:
torch.manual_seed(42)

import math

def lecun_normal(tensor):
    fan_in = nn.init._calculate_correct_fan(tensor, mode='fan_in')
    std = 1.0 / math.sqrt(fan_in)
    with torch.no_grad():
        return tensor.normal_(0, std)


def init_lecun(m):
    if isinstance(m, (nn.Linear)):
        lecun_normal(m.weight)  # good for SELU
        if m.bias is not None:
            nn.init.zeros_(m.bias)

model.apply(init_lecun)


Stock_Volatility(
  (embd): Embedding(127, 50)
  (out): Sigmoid()
  (hidden): Sequential(
    (0): Linear(in_features=162, out_features=256, bias=True)
    (1): SELU()
    (2): Linear(in_features=256, out_features=256, bias=True)
    (3): SELU()
    (4): Linear(in_features=256, out_features=128, bias=True)
    (5): SELU()
    (6): Linear(in_features=128, out_features=128, bias=True)
    (7): SELU()
    (8): Linear(in_features=128, out_features=64, bias=True)
    (9): SELU()
    (10): Linear(in_features=64, out_features=64, bias=True)
    (11): SELU()
    (12): Linear(in_features=64, out_features=32, bias=True)
    (13): SELU()
    (14): Linear(in_features=32, out_features=32, bias=True)
    (15): SELU()
  )
  (output): Linear(in_features=32, out_features=1, bias=True)
)

**Defining the loss function**

We use rmspe loss function for the training loop defined as follows.

In [None]:
def rmspe(y_true, y_pred):
    ratio = (y_pred - y_true) / y_true
    return torch.sqrt(torch.mean(ratio**2))

**Preparing the Batches**

Our batch size is 256.

In [None]:
dataset = TensorDataset(X_numeric_train ,X_stock_train, y_train)

loader = DataLoader(dataset, batch_size=256, shuffle=True,
                    num_workers=4, pin_memory=True)

num_batches = len(loader)

print(f'Number of batches: {num_batches}')

Number of batches: 1289


**Setting up the optimizer and scheduler**

In [None]:
optimizer = torch.optim.AdamW(
    model.parameters(), lr=1e-4, weight_decay=1e-2)

scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.9)

**Training the model**

We train for 21 epochs with batch_size 256. Every 5 epochs we compute our test(val) error.

In [None]:
loss_fn = rmspe


X_stock_test = X_stock_test.to(device)
X_numeric_test = X_numeric_test.to(device)
y_test = y_test.to(device)

epochs = 21

model.to(device)

total_loss = 0

for e in range(epochs):

   model.train()

   for batch in tqdm(loader, desc="Batches", leave=True):

        X_numeric_batch, X_stock_batch, y_batch = batch

        optimizer.zero_grad()

        X_numeric_batch = X_numeric_batch.to(device)
        X_stock_batch = X_stock_batch.to(device)
        y_batch = y_batch.to(device)

        y_label = model(X_stock_batch, X_numeric_batch)

        loss = loss_fn(y_batch, y_label.squeeze(dim=1))

        loss.backward()

        optimizer.step()


        if e%5==0:
          total_loss += loss.item()/num_batches
          if torch.isnan(loss):
              break

   scheduler.step()

   if e%5 == 0:
     print(f'Epoch: {e} average training loss over all the batches: {total_loss}')
     total_loss = 0
     with torch.no_grad():
            y_label_test = model(X_stock_test, X_numeric_test)
            print(f'test loss: {loss_fn(y_test, y_label_test.squeeze(dim=1))}')

Batches:   0%|          | 0/1289 [00:00<?, ?it/s]

Epoch: 0 average training loss over all the batches: 5.927576607886529
test loss: 0.4239993691444397


Batches:   0%|          | 0/1289 [00:00<?, ?it/s]

Batches:   0%|          | 0/1289 [00:00<?, ?it/s]

Batches:   0%|          | 0/1289 [00:00<?, ?it/s]

Batches:   0%|          | 0/1289 [00:00<?, ?it/s]

Batches:   0%|          | 0/1289 [00:00<?, ?it/s]

Epoch: 5 average training loss over all the batches: 0.23600158592450513
test loss: 0.2648869454860687


Batches:   0%|          | 0/1289 [00:00<?, ?it/s]

Batches:   0%|          | 0/1289 [00:00<?, ?it/s]

Batches:   0%|          | 0/1289 [00:00<?, ?it/s]

Batches:   0%|          | 0/1289 [00:00<?, ?it/s]

Batches:   0%|          | 0/1289 [00:00<?, ?it/s]

Epoch: 10 average training loss over all the batches: 0.22778287606242997
test loss: 0.2427450567483902


Batches:   0%|          | 0/1289 [00:00<?, ?it/s]

Batches:   0%|          | 0/1289 [00:00<?, ?it/s]

Batches:   0%|          | 0/1289 [00:00<?, ?it/s]

Batches:   0%|          | 0/1289 [00:00<?, ?it/s]

Batches:   0%|          | 0/1289 [00:00<?, ?it/s]

Epoch: 15 average training loss over all the batches: 0.2241960472150585
test loss: 0.24485516548156738


Batches:   0%|          | 0/1289 [00:00<?, ?it/s]

Batches:   0%|          | 0/1289 [00:00<?, ?it/s]

Batches:   0%|          | 0/1289 [00:00<?, ?it/s]

Batches:   0%|          | 0/1289 [00:00<?, ?it/s]

Batches:   0%|          | 0/1289 [00:00<?, ?it/s]

Epoch: 20 average training loss over all the batches: 0.2217547926008467
test loss: 0.23769500851631165


**Saving the model**

In [None]:
torch.save(model.state_dict(), 'NN_model.pth')