In [1]:
import pandas as pd
import os
import matplotlib.pyplot as plt
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
import numpy as np
from optiver_features_handler import get_features_map_for_stock, get_row_id

In [2]:
DATA_DIRECTORY = os.path.join("..","input","optiver-realized-volatility-prediction")
TRADE_TRAIN_DIRECTORY = os.path.join(DATA_DIRECTORY,"trade_train.parquet")
TRADE_TEST_DIRECTORY = os.path.join(DATA_DIRECTORY,"trade_test.parquet")
BOOK_TRAIN_DIRECTORY = os.path.join(DATA_DIRECTORY,"book_train.parquet")
BOOK_TEST_DIRECTORY = os.path.join(DATA_DIRECTORY,"book_test.parquet")
OUTPUT_DIRECTORY = os.path.join("..","output")
os.makedirs(OUTPUT_DIRECTORY,exist_ok=True)

In [3]:
train_df = pd.read_csv(os.path.join(DATA_DIRECTORY,"train.csv"))
test_df = pd.read_csv(os.path.join(DATA_DIRECTORY,"test.csv"))

In [4]:
data_interval_seconds = 5
data_intervals_count = int(600/data_interval_seconds)
class OptiverRealizedVolatilityDataset(Dataset):
    def __init__(self, data_directory, mode="train", lazy_load=True):
        """initializes Optiver Competition dataset
        `mode`: train|test
        `data_directory`: the datadirectory of the input data, where there are test.csv, train.csv, and parquet folders for trade_train.parquet and other relevant folders
        """
        print("INIT: OptiverRealizedVolatilityDataset")
        if mode.lower() not in ['train','test']:
            raise Exception("Invalid mode passed for Optiver dataset. Valid values:train|test")
        self.data_directory = data_directory
        self.mode = mode.lower()
        self.main_df = pd.read_csv(os.path.join(self.data_directory,f'{self.mode}.csv'))
#         if self.mode == 'train':
#             self.main_df['row_id'] = self.main_df.apply(lambda x: f"{x['stock_id']:.0f}-{x['time_id']:.0f}", axis=1)
        if self.mode == 'test':
            self.main_df['target'] = 0
        
        self.cache_stocks_done_set = set()
        # this is our final features lookup where we park all our features which can be addressed by row_id
        # which is individual train/test.csv row id using 'stock_id`-`time_id`
        self.cache_rowid_feature_map = {}
        row_id_series = self.main_df['stock_id'].astype(str) + "-" +self.main_df['time_id'].astype(str)
        targets = self.main_df['target'].tolist()
        self.stock_possible_timeids_list = {}
        for idx, row_id in enumerate(row_id_series.tolist()):
            stock_id = int(row_id.split('-')[0])
            time_id = int(row_id.split('-')[1])
            self.cache_rowid_feature_map[row_id] = {'target':targets[idx], 'stock_id':stock_id,'time_id':time_id,'row_id':row_id}
            
            # below code is to make sure what timeids we expect from stock data extractor
            # in case of missing parquet files we'll have to know the keys to fill default values into
            if stock_id not in self.stock_possible_timeids_list:
                self.stock_possible_timeids_list[stock_id] = []
            self.stock_possible_timeids_list[stock_id].append(time_id)
            
        
        if lazy_load == False:
            worker_data = []
            for gkey, gdf in self.main_df.groupby(['stock_id']):
                worker_data.append((self.data_directory, self.mode, gkey))
#             print("---------- CPU COUNG:", multiprocessing.cpu_count())
            # NOTE: this was hell of a hunt; this windows and pytorch and jupyter combination is too tedious
            #       make sure the function that we distribute don't call pytorch
            chunksize = multiprocessing.cpu_count() * 1
            processed = 0
            for worker_data_chunk in [worker_data[i * chunksize:(i + 1) * chunksize] for i in range((len(worker_data) + chunksize - 1) // chunksize )]:
                with Pool(multiprocessing.cpu_count()) as p:
                    
                    feature_set_list = p.starmap(get_features_map_for_stock, worker_data_chunk)
                    
                    for feature_map in feature_set_list:
                        for rowid, features_dict in feature_map.items():
                            for fkey,fval in features_dict.items():
                                self.cache_rowid_feature_map[rowid][fkey] = fval
                            self.cache_rowid_feature_map[rowid]  = OptiverRealizedVolatilityDataset.transform_to_01_realized_volatility_linear_data(self.cache_rowid_feature_map[rowid])
                        # udpate the indications that we've already fetched this stock and the lazy loader code won't fetch this again
                        self.cache_stocks_done_set.add(int(rowid.split('-')[0]))
                    
                    processed += chunksize
                    print(f"Processed and loaded {processed} stocks features.")
    
    def __cache_generate_features(self, main_stock_id, main_time_id):
            
            main_row_id = get_row_id(main_stock_id, main_time_id)
            if main_stock_id not in self.cache_stocks_done_set:
#                 trade_df = pd.read_parquet(os.path.join(self.data_directory, f"trade_{self.mode}.parquet", f"stock_id={stock_id}"))   
                # we'll combine the featureset with the bigger feature set of all stocks
                feature_map = get_features_map_for_stock(self.data_directory, self.mode, main_stock_id)
                # NOTE: sometime we might now have parquet files in that case we'll have 3 entried in .csv while only 1 gets returned in feature map
                # we need to cover for that disparity
                for time_id in self.stock_possible_timeids_list[main_stock_id]:
                    expected_row_id = get_row_id(main_stock_id, time_id)
                    if expected_row_id not in feature_map:
                        feature_map[expected_row_id] = {}
                for rowid, features_dict in feature_map.items():
                    for fkey,fval in features_dict.items():
                        self.cache_rowid_feature_map[rowid][fkey] = fval
                    self.cache_rowid_feature_map[rowid]  = OptiverRealizedVolatilityDataset.transform_to_01_realized_volatility_linear_data(self.cache_rowid_feature_map[rowid])
                self.cache_stocks_done_set.add(main_stock_id)
#             print(self.cache_rowid_feature_map[main_row_id])
#             print(torch.tensor([self.cache_rowid_feature_map[main_row_id].get('book_realized_volatility',0)]))
#             print(torch.tensor(self.cache_rowid_feature_map[main_row_id].get('log_return1_2s', [0]*(int(600/2)))))
#             print(torch.tensor(self.cache_rowid_feature_map.get('book_directional_volume1_2s', [0]*(int(600/2)))))
            return self.cache_rowid_feature_map[main_row_id]
        
    @staticmethod
    def transform_to_01_realized_volatility_linear_data(features_dict):
        return (
                {
                    'row_id':features_dict['row_id'],
                    'stock_id':torch.tensor(features_dict['stock_id'], dtype=torch.float32),
                    'seconds_in_bucket_xs': torch.tensor(np.nan_to_num(features_dict.get('seconds_in_bucket_xs', [0]*(int(600/data_interval_seconds)))), dtype=torch.float32),
#                     'book_realized_volatility':torch.tensor([features_dict.get('book_realized_volatility',0)]),
                    # TRADE FEATURES
                    'logrett_xs': torch.tensor(np.nan_to_num(features_dict.get('logrett_xs', [0]*(int(600/data_interval_seconds)))), dtype=torch.float32),
                    'trade_volume_xs': torch.tensor(np.nan_to_num(features_dict.get('trade_volume_xs', [0]*(int(600/data_interval_seconds)))), dtype=torch.float32),
                    'trade_ordercount_xs': torch.tensor(np.nan_to_num(features_dict.get('trade_ordercount_xs', [0]*(int(600/data_interval_seconds)))), dtype=torch.float32),
                    'trade_money_turnover_xs': torch.tensor(np.nan_to_num(features_dict.get('trade_money_turnover_xs', [0]*(int(600/data_interval_seconds)))), dtype=torch.float32),
                    'trade_money_turnover_per_order_xs': torch.tensor(np.nan_to_num(features_dict.get('trade_money_turnover_per_order_xs', [0]*(int(600/data_interval_seconds)))), dtype=torch.float32),
                    
#                     'trade_money_turnover_mean': torch.tensor(np.nan_to_num(features_dict.get('trade_money_turnover_mean', 0)), dtype=torch.float32),
#                     'trade_money_turnover_std': torch.tensor(np.nan_to_num(features_dict.get('trade_money_turnover_std', 0)), dtype=torch.float32),
                    # BOOK FEATURES
                    'logret1_xs': torch.tensor(np.nan_to_num(features_dict.get('logret1_xs', [0]*(int(600/data_interval_seconds)))), dtype=torch.float32),
                    'logret2_xs': torch.tensor(np.nan_to_num(features_dict.get('logret2_xs', [0]*(int(600/data_interval_seconds)))), dtype=torch.float32),
                    'book_directional_volume1_xs': torch.tensor(np.nan_to_num(features_dict.get('book_directional_volume1_xs', [0]*(int(600/data_interval_seconds)))), dtype=torch.float32),
                    'book_directional_volume2_xs': torch.tensor(np.nan_to_num(features_dict.get('book_directional_volume2_xs', [0]*(int(600/data_interval_seconds)))), dtype=torch.float32),
                    'book_price_spread1_xs': torch.tensor(np.nan_to_num(features_dict.get('book_price_spread1_xs', [0]*(int(600/data_interval_seconds)))), dtype=torch.float32),
                    'book_price_spread2_xs': torch.tensor(np.nan_to_num(features_dict.get('book_price_spread2_xs', [0]*(int(600/data_interval_seconds)))), dtype=torch.float32),
                    'book_bid_spread_xs': torch.tensor(np.nan_to_num(features_dict.get('book_bid_spread_xs', [0]*(int(600/data_interval_seconds)))), dtype=torch.float32),
                    'book_ask_spread_xs': torch.tensor(np.nan_to_num(features_dict.get('book_ask_spread_xs', [0]*(int(600/data_interval_seconds)))), dtype=torch.float32),
                    'book_total_volume_xs': torch.tensor(np.nan_to_num(features_dict.get('book_total_volume_xs', [0]*(int(600/data_interval_seconds)))), dtype=torch.float32),
                    'book_volume_imbalance_xs': torch.tensor(np.nan_to_num(features_dict.get('book_volume_imbalance_xs', [0]*(int(600/data_interval_seconds)))), dtype=torch.float32),
                    'book_money_turnover1_xs': torch.tensor(np.nan_to_num(features_dict.get('book_money_turnover1_xs', [0]*(int(600/data_interval_seconds)))), dtype=torch.float32),
                    
#                     'askp2_1s':torch.tensor(features_dict.get('askp2_1s', [0]*(int(600/1)))),
#                     'book_directional_volume1_1s':torch.tensor(features_dict.get('book_directional_volume1_1s', [0]*(int(600/1)))) 
                },
                torch.tensor([features_dict['target']])
#                 [features_dict['target']]
        )
    
    def __len__(self):
        return len(self.main_df)
    
    def __getitem__(self, idx):
        #TODO: handle for num_workers more than 0
        #      using https://pytorch.org/docs/stable/data.html
        #      using torch.util.data.get_worker_info()
        if torch.is_tensor(idx):
            idx = idx.tolist()
        stock_id = self.main_df.at[idx, 'stock_id']
        time_id = self.main_df.at[idx, 'time_id']
        x,y = self.__cache_generate_features(stock_id,time_id)
#         x, y = self.__transform_to_01_realized_volatility_linear_data(features_dict)
        return x,y

In [5]:
# dataset = OptiverRealizedVolatilityDataset(DATA_DIRECTORY, mode="train")
dataset = OptiverRealizedVolatilityDataset(DATA_DIRECTORY, mode="test", lazy_load=True)

INIT: OptiverRealizedVolatilityDataset


In [6]:
dataset[0]

({'row_id': '0-4',
  'stock_id': tensor(0.),
  'seconds_in_bucket_xs': tensor([  5.,  10.,  15.,  20.,  25.,  30.,  35.,  40.,  45.,  50.,  55.,  60.,
           65.,  70.,  75.,  80.,  85.,  90.,  95., 100., 105., 110., 115., 120.,
          125., 130., 135., 140., 145., 150., 155., 160., 165., 170., 175., 180.,
          185., 190., 195., 200., 205., 210., 215., 220., 225., 230., 235., 240.,
          245., 250., 255., 260., 265., 270., 275., 280., 285., 290., 295., 300.,
          305., 310., 315., 320., 325., 330., 335., 340., 345., 350., 355., 360.,
          365., 370., 375., 380., 385., 390., 395., 400., 405., 410., 415., 420.,
          425., 430., 435., 440., 445., 450., 455., 460., 465., 470., 475., 480.,
          485., 490., 495., 500., 505., 510., 515., 520., 525., 530., 535., 540.,
          545., 550., 555., 560., 565., 570., 575., 580., 585., 590., 595., 600.]),
  'logrett_xs': tensor([ 0.0000e+00,  0.0000e+00, -9.8319e-05, -9.8328e-05, -9.8338e-05,
           9.8938e-0

In [7]:
use_cuda = torch.cuda.is_available()
# use_cuda = False
device = torch.device("cuda:0" if use_cuda else "cpu")
torch.backends.cudnn.benchmark = True
model = None


def loss_fn_mse(y, pred):
    return torch.mean(torch.square((y-pred)))

def loss_fn_mspe(y, pred):
    return torch.mean(torch.square((y-pred)/y))

def loss_fn_orig(y, pred):
    return torch.sqrt(torch.mean(torch.square((y-pred)/y)))

In [56]:
class NeuralNetwork(nn.Module):
    def __init__(self, feature_generator_mode_hidden_size=64, mode='both', use_stock_id = False):
        super(NeuralNetwork, self).__init__()
        self.use_stock_id = use_stock_id
        self.flatten = nn.Flatten()
        self.mode = mode
        self.feature_generator_mode_hidden_size = feature_generator_mode_hidden_size
        self.cnn_stack = nn.Sequential(
            nn.Conv1d(10, 16, kernel_size=4, stride=2, padding=0),
            nn.GELU(),
#             nn.BatchNorm1d(4),
#             nn.Dropout(0.1),
            nn.Conv1d(16, 24, kernel_size=4, stride=2, padding=0),
            nn.GELU(),
#             nn.Conv1d(16, 24, kernel_size=2, stride=1, padding=0),
#             nn.GELU(),
#             nn.Conv1d(24, 32, kernel_size=2, stride=1, padding=0),
#             nn.ReLU(),
#             nn.BatchNorm1d(8),
#             nn.Conv1d(4, 4, kernel_size=6, stride=3, padding=0),
#             nn.GELU(),
#             nn.Conv1d(4, 4, kernel_size=4, stride=2, padding=0),
#             nn.GELU(),
#             nn.BatchNorm1d(4),
#             nn.Conv1d(4, 4, kernel_size=6, stride=2, padding=0),
#             nn.GELU(),
#             nn.Dropout(0.1),
#             nn.Dropout(0.1),
#             nn.Conv1d(8, 8, kernel_size=4, stride=2, padding=0), 
#             nn.ReLU(),
#             nn.Dropout(0.1),
        )
        self.linear_stack = nn.Sequential(
            nn.Linear(672,512),
            nn.GELU(),
#             nn.Dropout(0.3),
            nn.Linear(512, 256),
#             nn.GELU(),
#             nn.Linear(512, 512),
            nn.GELU(),
            nn.Linear(256, self.feature_generator_mode_hidden_size),
            nn.GELU(),
#             nn.Dropout(0.3),
#             nn.Linear(256, 64),
#             nn.ReLU(),
#             nn.Linear(64, 16),
#             nn.ReLU()
        )
        self.linear_hybrid = nn.Sequential(
            nn.Linear(self.feature_generator_mode_hidden_size, 32),
#             nn.GELU(),
#             nn.Linear(256, 256),
#             nn.GELU(),
#             nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(32, 1),
        )
#         self.basic_stack = nn.Sequential(
#             nn.Linear(int(600/2)*1,512),
#             nn.ReLU(),
#             nn.Dropout(0.4),
#             nn.Linear(512,1024),
#             nn.ReLU(),
#             nn.Dropout(0.4),
# #             nn.Linear(2048,1024),
# #             nn.ReLU(),
# #             nn.Dropout(),
#             nn.Linear(1024,512),
#             nn.ReLU(),
#             nn.Dropout(0.3),
#             nn.Linear(512,128),
#             nn.ReLU(),
#             nn.Dropout(0.2),
#             nn.Linear(128,128),
#             nn.ReLU(),
#             nn.Linear(128,1),
#         )

    def get_feature_gen_train_modes(self):
        return []
    
    def set_mode(self,mode):
        self.mode = mode
    
    def forward(self, feature_dict):
#         logits = self.basic_stack(x)
#         x = self.flatten(x)
        x = torch.cat([
                            feature_dict['logrett_xs'].to(device)*10000, 
#                                torch.log(feature_dict['trade_volume_xs'].to(device)+0.001),
#                               torch.log(feature_dict['trade_ordercount_xs'].to(device)+0.001),
#                             feature_dict['trade_volume_xs'].to(device),
#                                         feature_dict['trade_ordercount_xs'].to(device),
#                                         feature_dict['book_total_volume_xs'].to(device),
#                                         feature_dict['book_volume_imbalance_xs'].to(device)
#                             feature_dict['logrett_xs'].to(device)*10000, 
#                                torch.log(feature_dict['trade_volume_xs'].to(device)+0.001),
#                               torch.log(feature_dict['trade_ordercount_xs'].to(device)+0.001),
            
                                feature_dict['logret1_xs'].to(device)*10000,
                            
                                 feature_dict['logret2_xs'].to(device)*10000,
                                    feature_dict['book_price_spread1_xs'].to(device)*10000, 
#                                 feature_dict['book_price_spread2_xs'].to(device)*10000, 
                                feature_dict['book_bid_spread_xs'].to(device)*10000, 
                                feature_dict['book_ask_spread_xs'].to(device)*10000, 
#                                  feature_dict['book_directional_volume1_xs'].to(device),
#                                 feature_dict['book_price_spread1_xs'].to(device)*1000,
                                torch.log(feature_dict['book_total_volume_xs'].to(device)+1),
                                torch.log(feature_dict['book_volume_imbalance_xs'].to(device)+1),
                                torch.log(feature_dict['book_money_turnover1_xs'].to(device)+1),
                                torch.log(feature_dict['trade_money_turnover_xs'].to(device)+1),
# #                              feature_dict['book_dirvolume_xs'],
                          ], 1)

#         x = torch.nan_to_num(feature_dict['logrett_xs']).type(torch.cuda.FloatTensor)
        
        
#         print(x)
#         input()
#         if torch.isnan(x).any():
# #             print(x)
#             print(feature_dict)
#             input()
        x = x.to(device)
        x = x.reshape(-1,10,data_intervals_count)
        
        logits = self.cnn_stack(x)
        logits = self.flatten(logits)
        
       
        
        logits = self.linear_stack(logits)
        
        if self.mode == 'hidden_generator':
            return logits
#         logits = torch.cat( [logits, 
#                              torch.log(feature_dict['trade_money_turnover_mean'].type(torch.cuda.FloatTensor).to(device).reshape(-1,1)+0.001), 
#                                            torch.log(feature_dict['trade_money_turnover_std'].type(torch.cuda.FloatTensor).to(device).reshape(-1,1)+0.001),
#                                            torch.log(feature_dict['trade_price_mean'].type(torch.cuda.FloatTensor).to(device).reshape(-1,1)+0.001),
#                                            torch.log(feature_dict['book_money_turnover_mean'].type(torch.cuda.FloatTensor).to(device).reshape(-1,1)+0.001),
#                                            torch.log(feature_dict['book_money_turnover_std'].type(torch.cuda.FloatTensor).to(device).reshape(-1,1)+0.001),
#                                            torch.log(feature_dict['book_price_mean'].type(torch.cuda.FloatTensor).to(device).reshape(-1,1)+0.001)
#                                       ], 1)
        
        if self.use_stock_id:
            stock_id = torch.tensor(feature_dict['stock_id']).reshape(-1,1)
            stock_id = stock_id.to(device)
            logits = torch.cat([logits, stock_id], 1)
            
        logits = self.linear_hybrid(logits)
        return logits





In [57]:
# class VolatilityGRU(nn.Module):
#     def __init__(self, input_size=1, hidden_size=64, repeated_cells=1):
#         self.input_size = input_size
#         self.hidden_size = hidden_size

class SingleFetGRU(nn.Module):
    def __init__(self, hidden_size=64, layers=1, dropout=0, features_out=32, mode="train"):
        """single feature, feature learner
        `mode`: train|feature_generator
        """
        super(SingleFetGRU, self).__init__()
        self.input_size_ = 1
        self.hidden_size_ = hidden_size
        self.repeated_lstm_cells_ = layers
        self.dropout_ = dropout
        self.features_out = features_out
        
        self.rnn_ = nn.GRU(self.input_size_, self.hidden_size_, self.repeated_lstm_cells_, batch_first=True, dropout=self.dropout_)
        
        self.linear_feature_stack_ = nn.Sequential(
            nn.Linear(self.hidden_size_*self.repeated_lstm_cells_, 128),
            nn.GELU(),
            nn.Linear(128, 128),
            nn.GELU(),
            nn.Linear(128, self.features_out),
        )
        
        self.linear_trainer_stack_ = nn.Sequential(
            nn.Linear(self.features_out, 128),
            nn.GELU(),
            nn.Linear(128, 64),
            nn.GELU(),
            nn.Linear(64, 32),
            nn.GELU(),
            nn.Linear(32, 1),   
        )
        
    def set_mode(self, mode):
        self.mode = mode
        
    def forward(self, feature_tensor, h0_tensor=None):
        if self.mode in ["feature_generator","train"]:
            if h0_tensor is None:
                h_0_ = torch.rand(self.repeated_lstm_cells_, feature_tensor.size(0), self.hidden_size_, device=device) #hidden state
            else:
                h_0_ = h0_tensor
            output_, hn_ = self.rnn_(feature_tensor, h_0_) #lstm with input, hidden, and internal state
            hn_ = hn_.reshape(-1, self.hidden_size_*self.repeated_lstm_cells_) #reshaping the data for Dense layer next  
            
            out_ = self.linear_feature_stack_(hn_)
            
            if self.mode == "train":
                out_ = self.linear_trainer_stack_(out_)
            
            return out_
            
            
            
class VolatilityBSModel(nn.Module):
    def __init__(self, mode="hybrid", use_stock_id=False):
        """various rnn features' fusion with fully connected nn
        `mode`: hybrid|<feature_name>
        """
        super(VolatilityBSModel, self).__init__()
        self.mode = mode
        self.use_stock_id = use_stock_id
#         self.feature_list = ['logrett_xs','trade_volume_xs','trade_ordercount_xs','trade_money_turnover_xs','trade_money_turnover_per_order_xs',
#                              'logret1_xs','logret2_xs','book_directional_volume1_xs','book_directional_volume2_xs',
#                              'book_price_spread1_xs','book_price_spread2_xs','book_bid_spread_xs','book_ask_spread_xs',
#                              'book_total_volume_xs','book_volume_imbalance_xs','book_money_turnover1_xs']
        self.feature_list = ['logrett_xs','trade_volume_xs','trade_ordercount_xs','trade_money_turnover_xs','trade_money_turnover_per_order_xs',
                             'logret1_xs','logret2_xs','book_directional_volume1_xs',
                             'book_price_spread1_xs','book_bid_spread_xs','book_ask_spread_xs',
                             'book_total_volume_xs','book_volume_imbalance_xs']
        self.feature_gen_feature_size = 32
        self.feature_gen_models = {}
        self.rnn_hidden_size = 64
        self.rnn_layers = 2
        self.hidden_generator_network = NeuralNetwork(feature_generator_mode_hidden_size=self.rnn_hidden_size*self.rnn_layers)
        
        for k in self.feature_list:
            self.feature_gen_models[k]=SingleFetGRU(hidden_size=self.rnn_hidden_size, layers=self.rnn_layers, dropout=0.2, features_out=self.feature_gen_feature_size) 
            self.feature_gen_models[k].to(device)
        
        
        self.linear_fusion = nn.Sequential(
            nn.Linear(self.feature_gen_feature_size*len(self.feature_list) + (1 if self.use_stock_id else 0), 512),
            nn.GELU(),
            nn.Linear(512,512),
            nn.GELU(),
            nn.Linear(512,256),
            nn.GELU(),
            nn.Linear(256,128),
            nn.GELU(),
            nn.Linear(128,128),
            nn.ReLU(),
            nn.Linear(128,1)
        )
        self.set_mode(self.mode)
    
    def get_feature_gen_train_modes(self):
        return self.feature_list
    
    def set_mode(self, mode):
        print(f"------- set mode : {mode} -----------")
        self.mode = mode
        for feature_gen_model in self.feature_gen_models.values():
            feature_gen_model.set_mode('feature_generator' if self.mode in ['hybrid','hybrid_feature_out','hidden_generator'] else 'train')
        if self.mode == 'hidden_generator':
            self.hidden_generator_network.set_mode('train')
        else:
            self.hidden_generator_network.set_mode('hidden_generator')
    
    def parameters(self):
        
        generator_sources_map = {k:[v] for k,v in self.feature_gen_models.items()}
        generator_sources_map['hybrid']= [self.linear_fusion]
        generator_sources_map['hidden_generator'] = [self.hidden_generator_network]
        params = []
        if self.mode in generator_sources_map:
            for generator_source in generator_sources_map[self.mode]:
                for param in generator_source.parameters():
                    params.append(param)
        else:
            return super(VolatilityBSModel,self).parameters()
        return params
    
    def feature_transform(self, feature_x, feature_name):
        if feature_name in ['logrett_xs',
                             'logret1_xs','logret2_xs',
                             'book_price_spread1_xs','book_price_spread2_xs','book_bid_spread_xs','book_ask_spread_xs']:
            return feature_x * 10000
        if feature_name in ['trade_ordercount_xs','trade_volume_xs','trade_money_turnover_xs','trade_money_turnover_per_order_xs',
                             'book_total_volume_xs','book_volume_imbalance_xs','book_money_turnover1_xs']:
            return torch.log(feature_x + 1)
        return feature_x
    
    def forward(self, feature_dict):
        
        if self.mode in ['hidden_generator']:
            return self.hidden_generator_network(feature_dict)
        
        h0_tensor = self.hidden_generator_network(feature_dict)
        h0_tensor = h0_tensor.reshape(self.rnn_layers,-1,self.rnn_hidden_size)
        
        
        
        if self.mode in self.feature_list:
            feature_x = feature_dict[self.mode].to(device).reshape(-1, data_intervals_count ,1)
            feature_x = self.feature_transform(feature_x, self.mode)
            
            # pass in some randomness to the initial hidden tensor to force it to learn some stuff on its own
            # otherwise as the initial hidden layer contains solid infor to minimize the loss; it'll just use that hidden layer to minimize and instead
            # learn to not learn and directly bypass initial hidden
            h0_tensor.masked_fill_((torch.rand(h0_tensor.size()) > 0.5).to(device), 0.0)
            
            out = self.feature_gen_models[self.mode](feature_x, h0_tensor=h0_tensor)
            return out
        
        if self.mode in ['hybrid','hybrid_feature_out']:
            generated_features = []
            for feature_name, feature_gen_model in self.feature_gen_models.items():
                feature_x = feature_dict[feature_name].to(device).reshape(-1, data_intervals_count ,1)
                feature_x = self.feature_transform(feature_x, feature_name)
                features_out = feature_gen_model(feature_x, h0_tensor=h0_tensor)
                generated_features.append(features_out)
                
                
            combined_features = torch.cat(generated_features, 1)#.reshape(-1, self.feature_gen_feature_size*len(self.feature_list))
            
            if self.use_stock_id:
                stock_id = feature_dict['stock_id'].to(device).reshape(-1,1)
                combined_features = torch.cat([combined_features, stock_id], 1)
                
            if self.mode == 'hybrid_feature_out':
                return combined_features
            
            out = self.linear_fusion(combined_features)
            return out
        
#         input("--- out got")
        


        

In [58]:
model = VolatilityBSModel(use_stock_id=False)
model.set_mode('hybrid')
modelpath = "../input/optiver-realized-volatility-binarysentient-pytorch/07_1s_logret1n2_cnn_epoch_400_tloss_0.2393.pth"
modelpath = "../output/models/16_ultimate_hybridEXP14_CNN_5s_StkFalse_1e-05_4_epoch_57_tloss_0.2282.pth"
checkpoint = torch.load(modelpath)
model.load_state_dict(checkpoint['base'])
for k,v in model.feature_gen_models.items():
    v.load_state_dict(checkpoint[k])
model.to(device)
model.eval()

------- set mode : hybrid -----------
------- set mode : hybrid -----------


VolatilityBSModel(
  (hidden_generator_network): NeuralNetwork(
    (flatten): Flatten(start_dim=1, end_dim=-1)
    (cnn_stack): Sequential(
      (0): Conv1d(10, 16, kernel_size=(4,), stride=(2,))
      (1): GELU()
      (2): Conv1d(16, 24, kernel_size=(4,), stride=(2,))
      (3): GELU()
    )
    (linear_stack): Sequential(
      (0): Linear(in_features=672, out_features=512, bias=True)
      (1): GELU()
      (2): Linear(in_features=512, out_features=256, bias=True)
      (3): GELU()
      (4): Linear(in_features=256, out_features=128, bias=True)
      (5): GELU()
    )
    (linear_hybrid): Sequential(
      (0): Linear(in_features=128, out_features=32, bias=True)
      (1): ReLU()
      (2): Linear(in_features=32, out_features=1, bias=True)
    )
  )
  (linear_fusion): Sequential(
    (0): Linear(in_features=416, out_features=512, bias=True)
    (1): GELU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): GELU()
    (4): Linear(in_features=512, out_features=2

In [83]:
dataloader = DataLoader(dataset, batch_size=3,
                        shuffle=True, num_workers=0, pin_memory=True)
size = len(dataloader.dataset)
submission_data = []
data_interval_len = int(600/1)
output_scaling = 10000
data_ohlc_sample_len = 1 # 1 for each of open high low close
for batch, (Feature_X, feature_y) in enumerate(dataloader):
    row_ids = Feature_X['row_id']
    y = feature_y.to(device) * output_scaling 
    
    pred = model(Feature_X) 
#     print(pred)
    predicted_volatility = (pred/output_scaling).tolist()
    for idx, row_id in enumerate(row_ids):
        submission_data.append({'row_id':row_id, 'target':predicted_volatility[idx][0]})
submission_df = pd.DataFrame(submission_data)
submission_df = dataset.main_df.merge(submission_df,on='row_id',how='left')
submission_df = submission_df.rename(columns={'target_y':'target'})
# submission_df
# print(submission_df.columns)
submission_df[['row_id','target']].to_csv("submission.csv", index=False)
# for idx, (X,y) in enumerate(dataset):
#     print(idx, X)

In [84]:
pd.read_csv("submission.csv")

Unnamed: 0,row_id,target
0,0-4,0.001254
1,0-32,0.000647
2,0-34,0.000531


In [13]:
import torch.version

In [14]:
torch.__version__

'1.9.0+cu111'