<a href="https://colab.research.google.com/github/alins95/kaggle-Optiver-Realized-Volatility-Prediction/blob/main/code/Neural_Net_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#Loading all the necessary libaries

import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from tqdm.notebook import tqdm

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

**Loading the data**

In [None]:
from google.colab import drive
drive.mount('/content/drive')
df_train = pd.read_csv('/content/drive/MyDrive/Data/df_train.csv')
train = pd.read_csv('/content/drive/MyDrive/Data/train.csv')

**Preparing and splitting the data for training**

In [None]:
df_train['target'] = train['target']

df_train['stock_id'] = df_train['row_id'].apply(lambda x: int(str(x).split('-')[0]))

df_train.dropna(inplace=True)

row_id = df_train['row_id']

target = df_train['target'].to_numpy()

df_train.drop(['row_id', 'target'], axis=1, inplace=True)

numeric_features = len(df_train.columns)-1

stock_data = df_train['stock_id'].to_numpy()

data = df_train.drop(['stock_id'], axis=1).to_numpy()

X_train, X_test, stock_data_train, stock_data_test, y_train, y_test = train_test_split(data, stock_data, target, test_size=0.2, random_state=42)

**Scaling the data**

In [None]:
scaler = StandardScaler()

scaler.fit(X_train)

X_scaled_train = scaler.transform(X_train)
X_scaled_test = scaler.transform(X_test)

X_numeric_train = torch.from_numpy(X_scaled_train).float()
X_numeric_test = torch.from_numpy(X_scaled_test).float()

X_stock_train = torch.from_numpy(stock_data_train.reshape(-1,1)).long()
X_stock_test = torch.from_numpy(stock_data_test.reshape(-1,1)).long()

y_train = torch.from_numpy(y_train).float()
y_test = torch.from_numpy(y_test).float()

**Defining the model**

Our model has an embedding layer with(embedding dimension = 50) that keeps track of different stock_id's. In the forward method, we apply a sigmoid to the ouput of our network, since volatality is positive, and all the data points have volatality less than 1. We use eight hidden layers for our network, and there are no batch normalization or drop out layers. The activation function in our model is SELU.

In [None]:
num_of_diff_stocks = max(df_train['stock_id'].unique())+1

class Stock_Volatility(nn.Module):
  def __init__(self, hidden_units, emd_dim, num_features):
      super().__init__()
      self.embd = nn.Embedding(num_of_diff_stocks, emd_dim)
      layers = []
      in_dim = emd_dim+num_features
      self.out = nn.Sigmoid()
      for l in hidden_units:
        layers.append(nn.Linear(in_dim, l))
        layers.append(nn.SELU())
        in_dim = l
      self.hidden = nn.Sequential(*layers)
      self.output = nn.Linear(in_dim, 1)
  def forward(self, stock_id, numeric):
    x_stock = self.embd(stock_id.long()).squeeze(1)
    x = torch.cat([numeric, x_stock], dim=1)
    x = self.hidden(x)
    return self.out(self.output(x))

emd_dim = 50  #Setting the embd dimension

hidden_units = [256, 256, 128, 128, 64, 64, 32, 32] #Setting the hidden layers

torch.manual_seed(42)

model = Stock_Volatility(hidden_units, emd_dim, numeric_features)

**Defining the loss function**

We use rmspe loss function for the training loop defined as follows.

In [None]:
def rmspe(y_true, y_pred):
    ratio = (y_pred - y_true) / y_true
    return torch.sqrt(torch.mean(ratio**2))

**Preparing the Batches**

Our batch size is 4096.

In [None]:
dataset = TensorDataset(X_numeric_train ,X_stock_train, y_train)

loader = DataLoader(dataset, batch_size=4096, shuffle=True,
                    num_workers=4, pin_memory=True)

num_batches = len(loader)

print(f'Number of batches: {num_batches}')

**Setting up the optimizer**

In [None]:
optimizer = torch.optim.AdamW(
    model.parameters(), lr=1e-3, weight_decay=1e-3)

scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.7)

**Training the model**

In [None]:
loss_fn = rmspe


X_stock_test = X_stock_test.to(device)
X_numeric_test = X_numeric_test.to(device)
y_test = y_test.to(device)

epochs = 91

model.to(device)

total_loss = 0

for e in range(epochs):

   model.train()

   for batch in tqdm(loader, desc="Batches", leave=True):

        X_numeric_batch, X_stock_batch, y_batch = batch

        optimizer.zero_grad()

        X_numeric_batch = X_numeric_batch.to(device)
        X_stock_batch = X_stock_batch.to(device)
        y_batch = y_batch.to(device)

        y_label = model(X_stock_batch, X_numeric_batch)

        loss = loss_fn(y_batch, y_label.squeeze(dim=1))

        loss.backward()

        optimizer.step()


        if e%5==0:
          total_loss += loss.item()/num_batches
          if torch.isnan(loss):
              break

   scheduler.step()

   if e%5 == 0:
     print(f'Epoch: {e} average training loss over all the batches: {total_loss}')
     total_loss = 0
     with torch.no_grad():
            y_label_test = model(X_stock_test, X_numeric_test)
            print(f'test loss: {loss_fn(y_test, y_label_test.squeeze(dim=1))}')

**Saving the model**

In [None]:
torch.save(model.state_dict(), 'NN_model.pth')