In [1]:
import pandas as pd
import os
import matplotlib.pyplot as plt
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
import numpy as np
from optiver_features_handler import get_features_map_for_stock, get_row_id

In [2]:
DATA_DIRECTORY = os.path.join("..","input","optiver-realized-volatility-prediction")
TRADE_TRAIN_DIRECTORY = os.path.join(DATA_DIRECTORY,"trade_train.parquet")
TRADE_TEST_DIRECTORY = os.path.join(DATA_DIRECTORY,"trade_test.parquet")
BOOK_TRAIN_DIRECTORY = os.path.join(DATA_DIRECTORY,"book_train.parquet")
BOOK_TEST_DIRECTORY = os.path.join(DATA_DIRECTORY,"book_test.parquet")
OUTPUT_DIRECTORY = os.path.join("..","output")
os.makedirs(OUTPUT_DIRECTORY,exist_ok=True)

In [3]:
train_df = pd.read_csv(os.path.join(DATA_DIRECTORY,"train.csv"))
test_df = pd.read_csv(os.path.join(DATA_DIRECTORY,"test.csv"))

In [4]:
data_interval_seconds = 24
data_intervals_count = int(600/data_interval_seconds)
class OptiverRealizedVolatilityDataset(Dataset):
    def __init__(self, data_directory, mode="train", lazy_load=True):
        """initializes Optiver Competition dataset
        `mode`: train|test
        `data_directory`: the datadirectory of the input data, where there are test.csv, train.csv, and parquet folders for trade_train.parquet and other relevant folders
        """
        print("INIT: OptiverRealizedVolatilityDataset")
        if mode.lower() not in ['train','test']:
            raise Exception("Invalid mode passed for Optiver dataset. Valid values:train|test")
        self.data_directory = data_directory
        self.mode = mode.lower()
        self.main_df = pd.read_csv(os.path.join(self.data_directory,f'{self.mode}.csv'))
#         if self.mode == 'train':
#             self.main_df['row_id'] = self.main_df.apply(lambda x: f"{x['stock_id']:.0f}-{x['time_id']:.0f}", axis=1)
        if self.mode == 'test':
            self.main_df['target'] = 0
        
        self.cache_stocks_done_set = set()
        # this is our final features lookup where we park all our features which can be addressed by row_id
        # which is individual train/test.csv row id using 'stock_id`-`time_id`
        self.cache_rowid_feature_map = {}
        row_id_series = self.main_df['stock_id'].astype(str) + "-" +self.main_df['time_id'].astype(str)
        targets = self.main_df['target'].tolist()
        self.stock_possible_timeids_list = {}
        for idx, row_id in enumerate(row_id_series.tolist()):
            stock_id = int(row_id.split('-')[0])
            time_id = int(row_id.split('-')[1])
            self.cache_rowid_feature_map[row_id] = {'target_realized_volatility':targets[idx], 'stock_id':stock_id,'time_id':time_id,'row_id':row_id}
            
            # below code is to make sure what timeids we expect from stock data extractor
            # in case of missing parquet files we'll have to know the keys to fill default values into
            if stock_id not in self.stock_possible_timeids_list:
                self.stock_possible_timeids_list[stock_id] = []
            self.stock_possible_timeids_list[stock_id].append(time_id)
            
        
        if lazy_load == False:
            worker_data = []
            for gkey, gdf in self.main_df.groupby(['stock_id']):
                worker_data.append((self.data_directory, self.mode, gkey))
#             print("---------- CPU COUNG:", multiprocessing.cpu_count())
            # NOTE: this was hell of a hunt; this windows and pytorch and jupyter combination is too tedious
            #       make sure the function that we distribute don't call pytorch
            chunksize = multiprocessing.cpu_count() * 1
            processed = 0
            for worker_data_chunk in [worker_data[i * chunksize:(i + 1) * chunksize] for i in range((len(worker_data) + chunksize - 1) // chunksize )]:
                with Pool(multiprocessing.cpu_count()) as p:
                    
                    feature_set_list = p.starmap(get_features_map_for_stock, worker_data_chunk)
                    
                    for feature_map in feature_set_list:
                        for rowid, features_dict in feature_map.items():
                            for fkey,fval in features_dict.items():
                                self.cache_rowid_feature_map[rowid][fkey] = fval
                            self.cache_rowid_feature_map[rowid]  = OptiverRealizedVolatilityDataset.transform_to_01_realized_volatility_linear_data(self.cache_rowid_feature_map[rowid])
                        # udpate the indications that we've already fetched this stock and the lazy loader code won't fetch this again
                        self.cache_stocks_done_set.add(int(rowid.split('-')[0]))
                    
                    processed += chunksize
                    print(f"Processed and loaded {processed} stocks features.")
    
    def __cache_generate_features(self, main_stock_id, main_time_id):
            
            main_row_id = get_row_id(main_stock_id, main_time_id)
            if main_stock_id not in self.cache_stocks_done_set:
#                 trade_df = pd.read_parquet(os.path.join(self.data_directory, f"trade_{self.mode}.parquet", f"stock_id={stock_id}"))   
                # we'll combine the featureset with the bigger feature set of all stocks
                feature_map = get_features_map_for_stock(self.data_directory, self.mode, main_stock_id)
                # NOTE: sometime we might now have parquet files in that case we'll have 3 entried in .csv while only 1 gets returned in feature map
                # we need to cover for that disparity
                for time_id in self.stock_possible_timeids_list[main_stock_id]:
                    expected_row_id = get_row_id(main_stock_id, time_id)
                    if expected_row_id not in feature_map:
                        feature_map[expected_row_id] = {}
                for rowid, features_dict in feature_map.items():
                    for fkey,fval in features_dict.items():
                        self.cache_rowid_feature_map[rowid][fkey] = fval
                    self.cache_rowid_feature_map[rowid]  = OptiverRealizedVolatilityDataset.transform_to_01_realized_volatility_linear_data(self.cache_rowid_feature_map[rowid])
                self.cache_stocks_done_set.add(main_stock_id)
#             print(self.cache_rowid_feature_map[main_row_id])
#             print(torch.tensor([self.cache_rowid_feature_map[main_row_id].get('book_realized_volatility',0)]))
#             print(torch.tensor(self.cache_rowid_feature_map[main_row_id].get('log_return1_2s', [0]*(int(600/2)))))
#             print(torch.tensor(self.cache_rowid_feature_map.get('book_directional_volume1_2s', [0]*(int(600/2)))))
            return self.cache_rowid_feature_map[main_row_id]
        
    @staticmethod
    def transform_to_01_realized_volatility_linear_data(features_dict):
        return (
                {
                    'row_id':features_dict['row_id'],
                    'stock_id':torch.tensor(features_dict['stock_id'], dtype=torch.float32),
                    'seconds_in_bucket_xs': torch.tensor(np.nan_to_num(features_dict.get('seconds_in_bucket_xs',  [(idx*data_interval_seconds)+data_interval_seconds for idx in range(0,int(data_intervals_count))])), dtype=torch.float32),
                    'book_realized_volatility':torch.tensor([features_dict.get('book_realized_volatility',0)], dtype=torch.float32),
                    # TRADE FEATURES
                    'trade_logrett_sum_xs': torch.tensor(np.nan_to_num(features_dict.get('trade_logrett_sum_xs', [0]*(int(600/data_interval_seconds)))), dtype=torch.float32),
                    'trade_logrett_realized_volatility_xs': torch.tensor(np.nan_to_num(features_dict.get('trade_logrett_realized_volatility_xs', [0]*(int(600/data_interval_seconds)))), dtype=torch.float32),
                    'trade_logrett_std_xs': torch.tensor(np.nan_to_num(features_dict.get('trade_logrett_std_xs', [0]*(int(600/data_interval_seconds)))), dtype=torch.float32),
                    'trade_logrett_mean_xs': torch.tensor(np.nan_to_num(features_dict.get('trade_logrett_mean_xs', [0]*(int(600/data_interval_seconds)))), dtype=torch.float32),
                    'trade_size_sum_xs': torch.tensor(np.nan_to_num(features_dict.get('trade_size_sum_xs', [0]*(int(600/data_interval_seconds)))), dtype=torch.float32),
                    'trade_size_std_xs': torch.tensor(np.nan_to_num(features_dict.get('trade_size_std_xs', [0]*(int(600/data_interval_seconds)))), dtype=torch.float32),
                    'trade_order_count_sum_xs': torch.tensor(np.nan_to_num(features_dict.get('trade_order_count_sum_xs', [0]*(int(600/data_interval_seconds)))), dtype=torch.float32),
                    'trade_order_count_std_xs': torch.tensor(np.nan_to_num(features_dict.get('trade_order_count_std_xs', [0]*(int(600/data_interval_seconds)))), dtype=torch.float32),
                    'trade_trade_money_turnover_sum_xs': torch.tensor(np.nan_to_num(features_dict.get('trade_trade_money_turnover_sum_xs', [0]*(int(600/data_interval_seconds)))), dtype=torch.float32),
                    'trade_trade_money_turnover_std_xs': torch.tensor(np.nan_to_num(features_dict.get('trade_trade_money_turnover_std_xs', [0]*(int(600/data_interval_seconds)))), dtype=torch.float32),
                    
                    
                    'book_logret1_sum_xs': torch.tensor(np.nan_to_num(features_dict.get('book_logret1_sum_xs', [0]*(int(600/data_interval_seconds)))), dtype=torch.float32),
                    'book_logret1_realized_volatility_xs': torch.tensor(np.nan_to_num(features_dict.get('book_logret1_realized_volatility_xs', [0]*(int(600/data_interval_seconds)))), dtype=torch.float32),
                    'book_logret1_std_xs': torch.tensor(np.nan_to_num(features_dict.get('book_logret1_std_xs', [0]*(int(600/data_interval_seconds)))), dtype=torch.float32),
                    'book_logret1_mean_xs': torch.tensor(np.nan_to_num(features_dict.get('book_logret1_mean_xs', [0]*(int(600/data_interval_seconds)))), dtype=torch.float32),
                    'book_logret2_sum_xs': torch.tensor(np.nan_to_num(features_dict.get('book_logret2_sum_xs', [0]*(int(600/data_interval_seconds)))), dtype=torch.float32),
                    'book_logret2_realized_volatility_xs': torch.tensor(np.nan_to_num(features_dict.get('book_logret2_realized_volatility_xs', [0]*(int(600/data_interval_seconds)))), dtype=torch.float32),
                    'book_logret2_std_xs': torch.tensor(np.nan_to_num(features_dict.get('book_logret2_std_xs', [0]*(int(600/data_interval_seconds)))), dtype=torch.float32),
                    'book_logret2_mean_xs': torch.tensor(np.nan_to_num(features_dict.get('book_logret2_mean_xs', [0]*(int(600/data_interval_seconds)))), dtype=torch.float32),
                    'book_price_spread1_sum_xs': torch.tensor(np.nan_to_num(features_dict.get('book_price_spread1_sum_xs', [0]*(int(600/data_interval_seconds)))), dtype=torch.float32),
                    'book_price_spread1_std_xs': torch.tensor(np.nan_to_num(features_dict.get('book_price_spread1_std_xs', [0]*(int(600/data_interval_seconds)))), dtype=torch.float32),
                    'book_bid_spread_sum_xs': torch.tensor(np.nan_to_num(features_dict.get('book_bid_spread_sum_xs', [0]*(int(600/data_interval_seconds)))), dtype=torch.float32),
                    'book_bid_spread_std_xs': torch.tensor(np.nan_to_num(features_dict.get('book_bid_spread_std_xs', [0]*(int(600/data_interval_seconds)))), dtype=torch.float32),
                    'book_ask_spread_sum_xs': torch.tensor(np.nan_to_num(features_dict.get('book_ask_spread_sum_xs', [0]*(int(600/data_interval_seconds)))), dtype=torch.float32),
                    'book_ask_spread_std_xs': torch.tensor(np.nan_to_num(features_dict.get('book_ask_spread_std_xs', [0]*(int(600/data_interval_seconds)))), dtype=torch.float32),
                    'book_total_volume_sum_xs': torch.tensor(np.nan_to_num(features_dict.get('book_total_volume_sum_xs', [0]*(int(600/data_interval_seconds)))), dtype=torch.float32),
                    'book_total_volume_std_xs': torch.tensor(np.nan_to_num(features_dict.get('book_total_volume_std_xs', [0]*(int(600/data_interval_seconds)))), dtype=torch.float32),
                    'book_volume_imbalance_sum_xs': torch.tensor(np.nan_to_num(features_dict.get('book_volume_imbalance_sum_xs', [0]*(int(600/data_interval_seconds)))), dtype=torch.float32),
                    'book_volume_imbalance_std_xs': torch.tensor(np.nan_to_num(features_dict.get('book_volume_imbalance_std_xs', [0]*(int(600/data_interval_seconds)))), dtype=torch.float32),
#                   
                },
                {'target_realized_volatility':torch.tensor([features_dict['target_realized_volatility']])}
#                 [features_dict['target']]
        )
    
    def __len__(self):
        return len(self.main_df)
    
    def __getitem__(self, idx):
        #TODO: handle for num_workers more than 0
        #      using https://pytorch.org/docs/stable/data.html
        #      using torch.util.data.get_worker_info()
        if torch.is_tensor(idx):
            idx = idx.tolist()
        stock_id = self.main_df.at[idx, 'stock_id']
        time_id = self.main_df.at[idx, 'time_id']
        x,y = self.__cache_generate_features(stock_id,time_id)
#         x, y = self.__transform_to_01_realized_volatility_linear_data(features_dict)
        return x,y

In [5]:
# dataset = OptiverRealizedVolatilityDataset(DATA_DIRECTORY, mode="train")
dataset = OptiverRealizedVolatilityDataset(DATA_DIRECTORY, mode="test", lazy_load=True)

INIT: OptiverRealizedVolatilityDataset


In [6]:
dataset[0]

({'row_id': '0-4',
  'stock_id': tensor(0.),
  'seconds_in_bucket_xs': tensor([ 24.,  48.,  72.,  96., 120., 144., 168., 192., 216., 240., 264., 288.,
          312., 336., 360., 384., 408., 432., 456., 480., 504., 528., 552., 576.,
          600.]),
  'book_realized_volatility': tensor([0.0003]),
  'trade_logrett_sum_xs': tensor([ 0.0000, -0.0003,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
           0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
           0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
           0.0000]),
  'trade_logrett_realized_volatility_xs': tensor([0.0000, 0.0003, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000]),
  'trade_logrett_std_xs': tensor([0.0000, 0.0002, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000

In [7]:
use_cuda = torch.cuda.is_available()
# use_cuda = False
device = torch.device("cuda:0" if use_cuda else "cpu")
torch.backends.cudnn.benchmark = True
model = None


def loss_fn_mse(y, pred):
    return torch.mean(torch.square((y-pred)))

def loss_fn_mspe(y, pred):
    return torch.mean(torch.square((y-pred)/y))

def loss_fn_orig(y, pred):
    return torch.sqrt(torch.mean(torch.square((y-pred)/y)))

In [8]:
realize_volatility_scale_factor = 1000
def scale_optiver_feature(feature_name, feature_tensor):
    standard_scaling_feature_map ={'seconds_in_bucket_xs': {'mean': 312.0, 'std': 173.06646728515625},
             'book_realized_volatility': {'mean': 0.005850940477102995,
              'std': 0.004778958857059479},
             'trade_logrett_sum_xs': {'mean': -1.1226925167306945e-08,
              'std': 0.0012633935548365116},
             'trade_logrett_realized_volatility_xs': {'mean': 0.0005547573091462255,
              'std': 0.0011005407432094216},
             'trade_logrett_std_xs': {'mean': 0.0002125719329342246,
              'std': 0.000520365487318486},
             'trade_logrett_mean_xs': {'mean': 2.2360411549016135e-06,
              'std': 0.0006506441859528422},
             'trade_size_sum_xs': {'mean': 1274.408447265625, 'std': 3957.27392578125},
             'trade_size_std_xs': {'mean': 201.2566680908203, 'std': 809.9418334960938},
             'trade_order_count_sum_xs': {'mean': 14.93747329711914,
              'std': 31.093172073364258},
             'trade_order_count_std_xs': {'mean': 1.9885706901550293,
              'std': 3.986957311630249},
             'trade_trade_money_turnover_sum_xs': {'mean': 1274.364990234375,
              'std': 3957.194580078125},
             'trade_trade_money_turnover_std_xs': {'mean': 201.25364685058594,
              'std': 809.8814697265625},
             'book_logret1_sum_xs': {'mean': -9.877491713439213e-09,
              'std': 0.0013098383788019419},
             'book_logret1_realized_volatility_xs': {'mean': 0.0008679572492837906,
              'std': 0.001236740150488913},
             'book_logret1_std_xs': {'mean': 0.00023386265092995018,
              'std': 0.0003532674163579941},
             'book_logret1_mean_xs': {'mean': 9.141682255631167e-08,
              'std': 0.00013150273298379034},
             'book_logret2_sum_xs': {'mean': -9.768747588623228e-09,
              'std': 0.0013809562660753727},
             'book_logret2_realized_volatility_xs': {'mean': 0.0011465881252661347,
              'std': 0.00145239126868546},
             'book_logret2_std_xs': {'mean': 0.00031500737532041967,
              'std': 0.00042466234299354255},
             'book_logret2_mean_xs': {'mean': 9.769648556812172e-08,
              'std': 0.0001457101752748713},
             'book_price_spread1_sum_xs': {'mean': 0.008905505761504173,
              'std': 0.00932735949754715},
             'book_price_spread1_std_xs': {'mean': 0.00013038843462709337,
              'std': 0.00018358806846663356},
             'book_bid_spread_sum_xs': {'mean': 0.0030224656220525503,
              'std': 0.0032615496311336756},
             'book_bid_spread_std_xs': {'mean': 7.655585795873776e-05,
              'std': 0.0001423279318260029},
             'book_ask_spread_sum_xs': {'mean': 0.0030542113818228245,
              'std': 0.003295465372502804},
             'book_ask_spread_std_xs': {'mean': 7.811487739672884e-05,
              'std': 0.00014516572991851717},
             'book_total_volume_sum_xs': {'mean': 65197.703125, 'std': 372891.21875},
             'book_total_volume_std_xs': {'mean': 394.8986511230469,
              'std': 1568.0399169921875},
             'book_volume_imbalance_sum_xs': {'mean': 15863.017578125,
              'std': 113432.6953125},
             'book_volume_imbalance_std_xs': {'mean': 385.7958984375,
              'std': 1644.90966796875}}
    
#     if feature_name in ['book_realized_volatility_xs','trade_realized_volatility_xs']:
#         # we expect feature_tensor to be log returns tensor
#         feature_tensor = feature_tensor ** 2
# #         print(feature_tensor)
#         feature_tensor = torch.cumsum(feature_tensor,1)
#         # scale it to make each step realize volatility extrapolatable to 10 min window
# #         feature_tensor = feature_tensor * torch.tensor([data_intervals_count/idx for idx in range(1,data_intervals_count+1,1)])
#         feature_tensor = torch.sqrt(feature_tensor) * realize_volatility_scale_factor
        
        
    if feature_name in standard_scaling_feature_map:
        return (feature_tensor - standard_scaling_feature_map[feature_name]['mean'])/standard_scaling_feature_map[feature_name]['std']
    if feature_name in ['trade_price_local_standardized_xs','book_wap1_local_standardized_xs']:
        #TODO: the kaggle version of pytorch dont have nan_to_num, do something here!
        feature_tensor = torch.masked_fill(feature_tensor, torch.isinf(feature_tensor),0)
#         feature_tensor = torch.nan_to_num(feature_tensor,nan=0, posinf=0, neginf=0)
#     print(feature_tensor)
#     print(torch.any(torch.isnan(feature_tensor)))
#     input()
    return feature_tensor

In [12]:


    
class StockIdEmbedding(nn.Module):
    def __init__(self,number_of_stock_embeddings=126+10, number_of_stock_embedding_dimention=2, mode='train'):
        super(StockIdEmbedding, self).__init__()
        
        
        self.number_of_stock_embeddings = number_of_stock_embeddings
        self.number_of_stock_embedding_dimention = number_of_stock_embedding_dimention
        self.stock_embedding = nn.Embedding(self.number_of_stock_embeddings, self.number_of_stock_embedding_dimention)
        self.mode = mode
        self.linear_stack = nn.Sequential(
            nn.Linear(self.number_of_stock_embedding_dimention, 32),
            nn.Hardswish(),
            nn.Linear(32, 16),
            nn.Hardswish(),
            nn.Linear(16, 16),
            nn.ReLU(),
            nn.Linear(16, 1)
        )
        
    def get_feature_gen_train_modes(self):
        return []
    
    def set_mode(self,mode):
        self.mode = mode
    
    def forward(self, feature_dict):
        
        stock_id_clamped = torch.clamp(feature_dict['stock_id'],0,self.number_of_stock_embeddings-1)
        stock_id_clamped = stock_id_clamped.type(torch.cuda.IntTensor)
        stock_id_clamped = stock_id_clamped.to(device).reshape(-1,1)
        embedding_logits = self.stock_embedding(stock_id_clamped)
        embedding_logits = embedding_logits.reshape(-1,self.number_of_stock_embedding_dimention)
        
        if self.mode == 'stock_id_embedding':
            return embedding_logits

            
        logits = self.linear_stack(embedding_logits)
        return logits

class NeuralNetwork(nn.Module):
    def __init__(self, feature_generator_mode_hidden_size=64, mode='train'):
        super(NeuralNetwork, self).__init__()
        
        self.flatten = nn.Flatten()
        self.mode = mode
        self.feature_generator_mode_hidden_size = feature_generator_mode_hidden_size
        self.stock_id_embedding = StockIdEmbedding(number_of_stock_embedding_dimention=2, mode='stock_id_embedding')
        self.cnn_stack = nn.Sequential(
            nn.Dropout(0.1),
            nn.Conv1d(28, 14, kernel_size=10, stride=5, padding=0),
            nn.Hardswish(),
#             nn.BatchNorm1d(16),
            
            nn.Conv1d(14, 7, kernel_size=2, stride=1, padding=0),
            nn.Hardswish(),
#             nn.Dropout(0.07),
#             nn.Conv1d(16, 24, kernel_size=4, stride=1, padding=0),
#             nn.Hardswish(),
#             nn.Dropout(0.1),

        )
        self.linear_stack = nn.Sequential(
#             nn.LazyLinear(256),
#             nn.Hardswish(),
#             nn.Dropout(0.05),
#             nn.Linear(512, 256),
#             nn.GELU(),
#             nn.Linear(512, 512),
#             nn.Hardswish(),
#             nn.Dropout(0.05),
            nn.LazyLinear(self.feature_generator_mode_hidden_size),
#             nn.Hardswish(),

        )
        self.linear_hybrid = nn.Sequential(
            nn.Linear(self.feature_generator_mode_hidden_size, 128),
            nn.GELU(),
            nn.Linear(128, 64),
#             nn.GELU(),
#             nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(64, 1),
        )


    def get_feature_gen_train_modes(self):
        return []
    
    def set_mode(self,mode):
        self.mode = mode
        if self.mode == 'train_stock_id_embedding':
            self.stock_id_embedding.set_mode('train')
        else:
            self.stock_id_embedding.set_mode('stock_id_embedding')
    
    def forward(self, feature_dict):
#         logits = self.basic_stack(x)
#         x = self.flatten(x)
        x = torch.cat([
            
            scale_optiver_feature('trade_logrett_sum_xs',feature_dict['trade_logrett_sum_xs']).to(device),
            scale_optiver_feature('trade_logrett_realized_volatility_xs',feature_dict['trade_logrett_realized_volatility_xs']).to(device),
            scale_optiver_feature('trade_logrett_std_xs',feature_dict['trade_logrett_std_xs']).to(device),
            scale_optiver_feature('trade_logrett_mean_xs',feature_dict['trade_logrett_mean_xs']).to(device),
            
            scale_optiver_feature('book_logret1_sum_xs',feature_dict['book_logret1_sum_xs']).to(device),
            scale_optiver_feature('book_logret1_realized_volatility_xs',feature_dict['book_logret1_realized_volatility_xs']).to(device),
            scale_optiver_feature('book_logret1_std_xs',feature_dict['book_logret1_std_xs']).to(device),
            scale_optiver_feature('book_logret1_mean_xs',feature_dict['book_logret1_mean_xs']).to(device),
            
            scale_optiver_feature('book_logret2_sum_xs',feature_dict['book_logret2_sum_xs']).to(device),
            scale_optiver_feature('book_logret2_realized_volatility_xs',feature_dict['book_logret2_realized_volatility_xs']).to(device),
            scale_optiver_feature('book_logret2_std_xs',feature_dict['book_logret2_std_xs']).to(device),
            scale_optiver_feature('book_logret2_mean_xs',feature_dict['book_logret2_mean_xs']).to(device),
            
            scale_optiver_feature('trade_size_sum_xs',feature_dict['trade_size_sum_xs']).to(device),
            scale_optiver_feature('trade_size_std_xs',feature_dict['trade_size_std_xs']).to(device),
            scale_optiver_feature('trade_order_count_sum_xs',feature_dict['trade_order_count_sum_xs']).to(device),
            scale_optiver_feature('trade_order_count_std_xs',feature_dict['trade_order_count_std_xs']).to(device),
            scale_optiver_feature('trade_trade_money_turnover_sum_xs',feature_dict['trade_trade_money_turnover_sum_xs']).to(device),
            scale_optiver_feature('trade_trade_money_turnover_std_xs',feature_dict['trade_trade_money_turnover_std_xs']).to(device),
            scale_optiver_feature('book_price_spread1_sum_xs',feature_dict['book_price_spread1_sum_xs']).to(device),
            scale_optiver_feature('book_price_spread1_std_xs',feature_dict['book_price_spread1_std_xs']).to(device),
            scale_optiver_feature('book_bid_spread_sum_xs',feature_dict['book_bid_spread_sum_xs']).to(device),
            scale_optiver_feature('book_bid_spread_std_xs',feature_dict['book_bid_spread_std_xs']).to(device),
            scale_optiver_feature('book_ask_spread_sum_xs',feature_dict['book_ask_spread_sum_xs']).to(device),
            scale_optiver_feature('book_ask_spread_std_xs',feature_dict['book_ask_spread_std_xs']).to(device),
            scale_optiver_feature('book_total_volume_sum_xs',feature_dict['book_total_volume_sum_xs']).to(device),
            scale_optiver_feature('book_total_volume_std_xs',feature_dict['book_total_volume_std_xs']).to(device),
            scale_optiver_feature('book_volume_imbalance_sum_xs',feature_dict['book_volume_imbalance_sum_xs']).to(device),
            scale_optiver_feature('book_volume_imbalance_std_xs',feature_dict['book_volume_imbalance_std_xs']).to(device),
                            
#                             scale_optiver_feature('book_logret1_sum_xs',feature_dict['book_logret1_sum_xs']).to(device),
#                             scale_optiver_feature('book_logret1_realized_volatility_xs',feature_dict['book_logret1_realized_volatility_xs']).to(device),
                            
            
#                             scale_optiver_feature('book_logret1_std_xs',feature_dict['book_logret1_std_xs']).to(device),
#                             scale_optiver_feature('book_logret1_mean_xs',feature_dict['book_logret1_mean_xs']).to(device),
                            
               
            
#                             scale_optiver_feature('logrett_xs',feature_dict['logrett_xs']).to(device),

#                                 feature_dict['logret1_xs'].to(device)*10000,
                            
#                                     feature_dict['book_price_spread1_xs'].to(device)*1000, 
#                                 feature_dict['book_bid_spread_xs'].to(device)*10000, 
#                                 feature_dict['book_ask_spread_xs'].to(device)*10000, 

#                                 torch.log(feature_dict['book_total_volume_xs'].to(device)+1),
#                                 torch.log(feature_dict['book_volume_imbalance_xs'].to(device)+1),

#                                 torch.log(feature_dict['trade_money_turnover_per_order_xs'].to(device)+1),
                          ], 1)

#         x = torch.nan_to_num(feature_dict['logrett_xs']).type(torch.cuda.FloatTensor)
        
        
#         print(x)
#         input()
#         if torch.isnan(x).any():
# #             print(x)
#             print(feature_dict)
#             input()
        x = x.to(device)
        x = x.reshape(-1,28,data_intervals_count)
        
        logits = self.cnn_stack(x)
        logits = self.flatten(logits)
        
        #         if self.use_stock_id:
#         embedding_logits = self.stock_id_embedding(feature_dict)
        
#         if self.mode == 'train_stock_id_embedding':
            # in that case embedding logits are predicted volatility
#             return embedding_logits
        
#         print('cat',logits.size(), embedding_logits.size())
#         logits = torch.cat([logits,embedding_logits], 1)
#         logits = embedding_logits
        
        
        logits = self.linear_stack(logits)
        
        if self.mode == 'hidden_generator':
            return logits
#         logits = torch.cat( [logits, 
#                              torch.log(feature_dict['trade_money_turnover_mean'].type(torch.cuda.FloatTensor).to(device).reshape(-1,1)+0.001), 
#                                            torch.log(feature_dict['trade_money_turnover_std'].type(torch.cuda.FloatTensor).to(device).reshape(-1,1)+0.001),
#                                            torch.log(feature_dict['trade_price_mean'].type(torch.cuda.FloatTensor).to(device).reshape(-1,1)+0.001),
#                                            torch.log(feature_dict['book_money_turnover_mean'].type(torch.cuda.FloatTensor).to(device).reshape(-1,1)+0.001),
#                                            torch.log(feature_dict['book_money_turnover_std'].type(torch.cuda.FloatTensor).to(device).reshape(-1,1)+0.001),
#                                            torch.log(feature_dict['book_price_mean'].type(torch.cuda.FloatTensor).to(device).reshape(-1,1)+0.001)
#                                       ], 1)
        
#         if self.use_stock_id:
#             stock_id = torch.tensor(feature_dict['stock_id']).reshape(-1,1)
#             stock_id = stock_id.to(device)
#             logits = torch.cat([logits, stock_id], 1)
            
        logits = self.linear_hybrid(logits)
        return logits


    





In [13]:
# class VolatilityGRU(nn.Module):
#     def __init__(self, input_size=1, hidden_size=64, repeated_cells=1):
#         self.input_size = input_size
#         self.hidden_size = hidden_size

class MultiFetGRU(nn.Module):
    def __init__(self,feature_names, hidden_size=64, layers=1, dropout=0, features_out=32, mode="train"):
        """single feature, feature learner
        `mode`: train|feature_generator
        """
        super(MultiFetGRU,self).__init__()
        if type(feature_names) == str:
            feature_names = [feature_names]
        self.feature_names = feature_names
        self.input_size_ = len(self.feature_names)
        
        self.hidden_size_ = hidden_size
        self.repeated_lstm_cells_ = layers
        self.dropout_ = dropout
        self.features_out = features_out
        self.initial_dropout_ = nn.Dropout(0.1)
        self.rnn_ = nn.GRU(self.input_size_, self.hidden_size_, self.repeated_lstm_cells_, batch_first=True, dropout=self.dropout_)
        
        self.linear_feature_stack_ = nn.Sequential(
            nn.Linear(self.hidden_size_, self.features_out),
#             nn.Hardswish(),
#             nn.Linear(128, 128),
#             nn.Hardswish(),
#             nn.Linear(128, self.features_out),
        )
        
        self.linear_trainer_stack_ = nn.Sequential(
            nn.Linear(self.features_out, 128),
#             nn.Linear(self.features_out, 1),
            nn.Hardswish(),
#             nn.Linear(128, 64),
#             nn.Hardswish(),
#             nn.Linear(64, 32),
#             nn.Hardswish(),
            nn.Linear(128, 1),   
        )
        
    def set_mode(self, mode):
        self.mode = mode
        
    def forward(self, feature_dict, h0_tensor=None):
        
            
        feature_x = []
        for feature_name in self.feature_names:
            if feature_name == 'book_realized_volatility_xs':
                feature_tensor = feature_dict['logret1_xs']
            elif feature_name == 'trade_realized_volatility_xs':
                feature_tensor = feature_dict['logrett_xs']
            else:
                feature_tensor = feature_dict[feature_name]
                
            feature_x.append(scale_optiver_feature(feature_name, feature_tensor).to(device))
            
        feature_x = torch.stack(feature_x,dim=2) #.reshape(-1, data_intervals_count, self.input_size_)
        feature_x = self.initial_dropout_(feature_x)
        if self.mode in ["feature_generator","train"]:
#             if h0_tensor is None:
# #                 h_0_ = torch.rand(self.repeated_lstm_cells_, feature_x.size(0), self.hidden_size_, device=device) #hidden state
#                 h_0_ = torch.zeros(self.repeated_lstm_cells_, feature_x.size(0), self.hidden_size_, device=device) #hidden state
#             else:
#                 h_0_ = h0_tensor
            output_, hn_ = self.rnn_(feature_x)#, h_0_) #lstm with input, hidden, and internal state
#             print('output',output_.size(), 'hn', hn_.size())
#             print(output_[:,-1].size())
#             input()
#             hn_ = hn_.reshape(-1, self.hidden_size_*self.repeated_lstm_cells_) #reshaping the data for Dense layer next  
#             output_ = output_.reshape(-1, self.hidden_size_*self.repeated_lstm_cells_) #reshaping the data for Dense layer next  
            
            out_ = self.linear_feature_stack_(output_[:,-1])
            
            if self.mode == "train":
                out_ = self.linear_trainer_stack_(out_)
            
            return out_
            
            
            
class VolatilityBSModel(nn.Module):
    def __init__(self, mode="hybrid"):
        """various rnn features' fusion with fully connected nn
        `mode`: hybrid|<feature_name>
        """
        super(VolatilityBSModel, self).__init__()
        self.mode = mode
#         self.feature_list = ['logrett_xs','trade_volume_xs','trade_ordercount_xs','trade_money_turnover_xs','trade_money_turnover_per_order_xs',
#                              'logret1_xs',
#                              'book_price_spread1_xs','book_bid_spread_xs','book_ask_spread_xs',
#                              'book_total_volume_xs','book_volume_imbalance_xs','book_money_turnover_intention1_xs','book_wap1_local_standardized_xs','trade_price_local_standardized_xs']
#         self.feature_list = [[x] for x in self.feature_list]
        # 'trade_volume_xs','trade_ordercount_xs','trade_money_turnover_xs','trade_money_turnover_per_order_xs',
#                             #realized_volatility_xs  
        self.feature_list = [
            
            ['book_logret1_sum_xs','book_logret1_realized_volatility_xs','book_logret1_std_xs','book_logret1_mean_xs',
            'book_logret2_sum_xs','book_logret2_realized_volatility_xs','book_logret2_std_xs','book_logret2_mean_xs',
            'trade_size_sum_xs','trade_size_std_xs','trade_order_count_sum_xs','trade_order_count_std_xs','trade_order_count_std_xs','trade_trade_money_turnover_sum_xs','trade_trade_money_turnover_std_xs',
            'trade_logrett_sum_xs','trade_logrett_realized_volatility_xs','trade_logrett_std_xs','trade_logrett_mean_xs',
            'book_price_spread1_sum_xs','book_price_spread1_std_xs','book_bid_spread_sum_xs','book_bid_spread_std_xs','book_ask_spread_sum_xs',
                    'book_ask_spread_std_xs','book_total_volume_sum_xs','book_total_volume_std_xs','book_volume_imbalance_sum_xs','book_volume_imbalance_std_xs']]
        # ['trade_price_local_standardized_xs','trade_money_turnover_xs','book_money_turnover_intention1_xs','book_wap1_local_standardized_xs']
#         self.feature_list = [['logrett_xs','logret1_xs'],['trade_volume_xs','trade_ordercount_xs','trade_money_turnover_per_order_xs','book_money_turnover_intention1_xs','trade_price_local_standardized_xs','book_wap1_local_standardized_xs'],['book_price_spread1_xs','book_bid_spread_xs','book_ask_spread_xs',
#                              'book_total_volume_xs','book_volume_imbalance_xs']]
         
        self.feature_gen_feature_size = 128
        self.feature_gen_models = {}
        self.rnn_hidden_size = 128
        self.rnn_layers = 4
        self.stock_embedding_dimention = 6
        self.stock_id_embedding = StockIdEmbedding(number_of_stock_embedding_dimention=self.stock_embedding_dimention, mode='stock_id_embedding')
        self.hidden_generator_network = NeuralNetwork(feature_generator_mode_hidden_size=self.feature_gen_feature_size)
        
        for k in self.feature_list:
            self.feature_gen_models[str(k)]=MultiFetGRU(k, hidden_size=self.rnn_hidden_size, layers=self.rnn_layers, dropout=0.0, features_out=self.feature_gen_feature_size) 
            self.feature_gen_models[str(k)].to(device)
        
        
        self.linear_fusion = nn.Sequential(
            #self.feature_gen_feature_size*len(self.feature_list) + self.rnn_hidden_size*self.rnn_layers + 2 + 1
            nn.LazyLinear(256),
            nn.Hardswish(),
#             nn.BatchNorm1d(512),
            nn.Dropout(0.1),
#             nn.Linear(256,256),
#             nn.Hardswish(),
#             nn.Dropout(0.05),
            nn.Linear(256,64),
#             nn.Hardswish(),
#             nn.Dropout(0.05),
#             nn.Linear(128,64),
#             nn.Hardswish(),
#             nn.Dropout(0.05),
#             nn.Linear(64,64),
            nn.ReLU(),
            nn.Linear(64,1)
        )
        self.set_mode(self.mode)
    
    def get_feature_gen_train_modes(self):
        return self.feature_list
    
    def set_mode(self, mode):
        print(f"------- set mode : {mode} -----------")
        self.mode = mode
        for feature_gen_model in self.feature_gen_models.values():
            feature_gen_model.set_mode('feature_generator' if self.mode in ['hybrid','hybrid_feature_out','hidden_generator','ultimate'] else 'train')
        if self.mode == 'hidden_generator':
            self.hidden_generator_network.set_mode('train')
        else:
            self.hidden_generator_network.set_mode('hidden_generator')
        self.stock_id_embedding.set_mode('stock_id_embedding')
    
    def parameters(self):
        
        generator_sources_map = {str(k):[v] for k,v in self.feature_gen_models.items()}
        generator_sources_map['hybrid']= [self.linear_fusion, self.stock_id_embedding]
        generator_sources_map['hidden_generator'] = [self.hidden_generator_network] 
        generator_sources_map['ultimate']= [self.linear_fusion, self.hidden_generator_network, self.stock_id_embedding] + list(self.feature_gen_models.values())
        params = []
        # mode and key is actually str version of array of strings with feature name as values e.g. ['logret1_xs','volume_xs']
        if str(self.mode) in generator_sources_map:
            for generator_source in generator_sources_map[str(self.mode)]:
                for param in generator_source.parameters():
                    params.append(param)
        else:
            return super(VolatilityBSModel,self).parameters()
        return params
    
    
    def forward(self, feature_dict):
        
        
        if self.mode in self.feature_list:
            
            
            # pass in some randomness to the initial hidden tensor to force it to learn some stuff on its own
            # otherwise as the initial hidden layer contains solid infor to minimize the loss; it'll just use that hidden layer to minimize and instead
            # learn to not learn and directly bypass initial hidden
#             h0_tensor.masked_fill_((torch.rand(h0_tensor.size()) > 0.5).to(device), 0.0)
#             h0_tensor = torch.zeros(self.rnn_layers, -1, self.rnn_hidden_size)
            out = self.feature_gen_models[str(self.mode)](feature_dict, h0_tensor=None)
            return out
        
        if self.mode in ['hidden_generator']:
            out = self.hidden_generator_network(feature_dict)
            return out
        
        if self.mode in ['hybrid','hybrid_feature_out','ultimate']:
            generated_features = []
            for feature_name, feature_gen_model in self.feature_gen_models.items():
#                 h0_tensor = torch.zeros(self.rnn_layers, -1, self.rnn_hidden_size)
                features_out = feature_gen_model(feature_dict, h0_tensor=None)
                generated_features.append(features_out)
                
                
            combined_features = torch.cat(generated_features, 1)#.reshape(-1, self.feature_gen_feature_size*len(self.feature_list))
            
            cnn_features = self.hidden_generator_network(feature_dict)
#             cnn_features = cnn_features.reshape(self.rnn_layers,-1,self.rnn_hidden_size)
            combined_features = torch.cat([combined_features,cnn_features],1)
    
            embedding_logits = self.stock_id_embedding(feature_dict)
            combined_features = torch.cat([combined_features,embedding_logits],1)
            
            realized_volatility_logits = feature_dict['book_realized_volatility'].to(device).reshape(-1,1) * realize_volatility_scale_factor
#             realized_volatility_logits = scale_optiver_feature('book_realized_volatility',feature_dict['book_realized_volatility']).to(device).reshape(-1,1)
#             realized_volatility_logits = realized_volatility_logits # * realize_volatility_scale_factor
            combined_features = torch.cat([combined_features,realized_volatility_logits],1)
            
            if self.mode == 'hybrid_feature_out':
                return combined_features
            
            out = self.linear_fusion(combined_features)
            return out
        
#         input("--- out got")
        


        

In [14]:
model = VolatilityBSModel()
model.set_mode('ultimate')
modelpath = "../input/optiver-realized-volatility-binarysentient-pytorch/07_1s_logret1n2_cnn_epoch_400_tloss_0.2393.pth"
modelpath = "../output/models/17_gg_hybridEXP47_CNNRNN_5s_StkFalse_0.001_256_epoch_49_tloss_nan.pth"
checkpoint = torch.load(modelpath)
model.load_state_dict(checkpoint['base'])
for k,v in model.feature_gen_models.items():
    v.load_state_dict(checkpoint[k])
model.to(device)
model.eval()



------- set mode : hybrid -----------
------- set mode : ultimate -----------


RuntimeError: Error(s) in loading state_dict for VolatilityBSModel:
	Missing key(s) in state_dict: "hidden_generator_network.cnn_stack.3.weight", "hidden_generator_network.cnn_stack.3.bias", "linear_fusion.0.weight", "linear_fusion.0.bias", "linear_fusion.3.weight", "linear_fusion.3.bias", "linear_fusion.5.weight", "linear_fusion.5.bias". 
	Unexpected key(s) in state_dict: "hidden_generator_network.cnn_stack.4.weight", "hidden_generator_network.cnn_stack.4.bias", "linear_fusion.6.weight", "linear_fusion.6.bias", "linear_fusion.1.weight", "linear_fusion.1.bias", "linear_fusion.4.weight", "linear_fusion.4.bias". 

In [None]:
dataloader = DataLoader(dataset, batch_size=3,
                        shuffle=False, num_workers=0, pin_memory=True)
size = len(dataloader.dataset)
submission_data = []

output_scaling = realize_volatility_scale_factor
data_ohlc_sample_len = 1 # 1 for each of open high low close
for batch, (Feature_X, feature_y) in enumerate(dataloader):
    row_ids = Feature_X['row_id']
#     y = feature_y.to(device) * output_scaling 
    
    pred = model(Feature_X) 
#     print(pred)
    predicted_volatility = (pred/realize_volatility_scale_factor).tolist()
    for idx, row_id in enumerate(row_ids):
        submission_data.append({'row_id':row_id, 'target':predicted_volatility[idx][0]})
submission_df = pd.DataFrame(submission_data)
submission_df = dataset.main_df.merge(submission_df,on='row_id',how='left')
submission_df = submission_df.rename(columns={'target_y':'target'})
# submission_df
# print(submission_df.columns)
submission_df[['row_id','target']].to_csv("submission.csv", index=False)
# for idx, (X,y) in enumerate(dataset):
#     print(idx, X)

In [None]:
pd.read_csv("submission.csv")

In [None]:
import torch.version

In [None]:
torch.__version__