In [8]:
SEED=42
import os
import glob
from joblib import Parallel, delayed
import pandas as pd
import numpy as np
import scipy as sc
from sklearn.model_selection import KFold, GroupKFold
import lightgbm as lgb
import warnings
from utils import get_feature_groups
import itertools
warnings.filterwarnings('ignore')
pd.set_option('max_columns', 300)
pd.set_option('max_rows', 300)

# Function to read our base train and test set
def read_train_test():
    train = pd.read_csv('../input/optiver-realized-volatility-prediction/train.csv')
    test = pd.read_csv('../input/optiver-realized-volatility-prediction/test.csv')
    # Create a key to merge with book and trade data
    train['row_id'] = train['stock_id'].astype(str) + '-' + train['time_id'].astype(str)
    test['row_id'] = test['stock_id'].astype(str) + '-' + test['time_id'].astype(str)
    print(f'Our training set has {train.shape[0]} rows')
    return train, test

# Function to get group stats for the stock_id and time_id
def get_time_stock(df):
    # Get realized volatility columns
    vol_cols = ['log_return1_realized_volatility', 'log_return2_realized_volatility', 'log_return1_realized_volatility_450', 'log_return2_realized_volatility_450', 
                'log_return1_realized_volatility_300', 'log_return2_realized_volatility_300', 'log_return1_realized_volatility_150', 'log_return2_realized_volatility_150', 
                'trade_log_return_realized_volatility', 'trade_log_return_realized_volatility_450', 'trade_log_return_realized_volatility_300', 'trade_log_return_realized_volatility_150']

    # Group by the stock id
    df_stock_id = df.groupby(['stock_id'])[vol_cols].agg(['mean', 'std', 'max', 'min', 'median']).reset_index()
    # Rename columns joining suffix
    df_stock_id.columns = ['_'.join(col) for col in df_stock_id.columns]
    df_stock_id = df_stock_id.add_suffix('_' + 'stock')

    # Group by the time id
    df_time_id = df.groupby(['time_id'])[vol_cols].agg(['mean', 'std', 'max', 'min', 'median']).reset_index()
    # Rename columns joining suffix
    df_time_id.columns = ['_'.join(col) for col in df_time_id.columns]
    df_time_id = df_time_id.add_suffix('_' + 'time')
    
    # Merge with original dataframe
    df = df.merge(df_stock_id, how = 'left', left_on = ['stock_id'], right_on = ['stock_id__stock'])
    df = df.merge(df_time_id, how = 'left', left_on = ['time_id'], right_on = ['time_id__time'])
    df.drop(['stock_id__stock', 'time_id__time'], axis = 1, inplace = True)
    return df

# Function to calculate the root mean squared percentage error
def rmspe(y_true, y_pred):
    return np.sqrt(np.mean(np.square((y_true - y_pred) / y_true)))

In [9]:
train, _ = read_train_test()
df_book = pd.read_csv('../input/processed-book-ffill/df_book.csv')
print(df_book.shape)
df_trade = pd.read_csv('../input/processed-book-ffill/df_trade.csv')
print(df_trade.shape)
train_ = df_book.merge(df_trade, on = ['row_id'], how = 'left')
train = train.merge(train_, on = ['row_id'], how = 'left')

# Get group stats of time_id and stock_id
train = get_time_stock(train)
train['logtarget'] = train['target'].apply(np.log)
train = train.sample(frac=1, random_state=SEED).reset_index(drop=True)
train

Our training set has 428932 rows
(428932, 169)
(428913, 41)


Unnamed: 0,stock_id,time_id,target,row_id,wap1_sum,wap1_mean,wap1_std,wap1_median,wap2_sum,wap2_mean,wap2_std,wap2_median,log_return1_sum,log_return1_realized_volatility,log_return1_mean,log_return1_std,log_return1_median,log_return2_sum,log_return2_realized_volatility,log_return2_mean,log_return2_std,log_return2_median,wap_balance_sum,wap_balance_mean,wap_balance_std,wap_balance_median,price_spread_sum,price_spread_mean,price_spread_std,price_spread_median,bid_spread_sum,bid_spread_mean,bid_spread_std,bid_spread_median,ask_spread_sum,ask_spread_mean,ask_spread_std,ask_spread_median,total_volume_sum,total_volume_mean,total_volume_std,total_volume_median,volume_imbalance_sum,volume_imbalance_mean,volume_imbalance_std,volume_imbalance_median,wap1_sum_450,wap1_mean_450,wap1_std_450,wap1_median_450,wap2_sum_450,wap2_mean_450,wap2_std_450,wap2_median_450,log_return1_sum_450,log_return1_realized_volatility_450,log_return1_mean_450,log_return1_std_450,log_return1_median_450,log_return2_sum_450,log_return2_realized_volatility_450,log_return2_mean_450,log_return2_std_450,log_return2_median_450,wap_balance_sum_450,wap_balance_mean_450,wap_balance_std_450,wap_balance_median_450,price_spread_sum_450,price_spread_mean_450,price_spread_std_450,price_spread_median_450,bid_spread_sum_450,bid_spread_mean_450,bid_spread_std_450,bid_spread_median_450,ask_spread_sum_450,ask_spread_mean_450,ask_spread_std_450,ask_spread_median_450,total_volume_sum_450,total_volume_mean_450,total_volume_std_450,total_volume_median_450,volume_imbalance_sum_450,volume_imbalance_mean_450,volume_imbalance_std_450,volume_imbalance_median_450,wap1_sum_300,wap1_mean_300,wap1_std_300,wap1_median_300,wap2_sum_300,wap2_mean_300,wap2_std_300,wap2_median_300,log_return1_sum_300,log_return1_realized_volatility_300,log_return1_mean_300,log_return1_std_300,log_return1_median_300,log_return2_sum_300,log_return2_realized_volatility_300,log_return2_mean_300,log_return2_std_300,log_return2_median_300,wap_balance_sum_300,wap_balance_mean_300,wap_balance_std_300,wap_balance_median_300,price_spread_sum_300,price_spread_mean_300,price_spread_std_300,price_spread_median_300,bid_spread_sum_300,bid_spread_mean_300,bid_spread_std_300,bid_spread_median_300,ask_spread_sum_300,ask_spread_mean_300,ask_spread_std_300,ask_spread_median_300,total_volume_sum_300,total_volume_mean_300,total_volume_std_300,total_volume_median_300,volume_imbalance_sum_300,volume_imbalance_mean_300,volume_imbalance_std_300,volume_imbalance_median_300,wap1_sum_150,wap1_mean_150,wap1_std_150,wap1_median_150,wap2_sum_150,wap2_mean_150,wap2_std_150,wap2_median_150,log_return1_sum_150,log_return1_realized_volatility_150,log_return1_mean_150,log_return1_std_150,log_return1_median_150,log_return2_sum_150,log_return2_realized_volatility_150,log_return2_mean_150,log_return2_std_150,log_return2_median_150,wap_balance_sum_150,wap_balance_mean_150,...,trade_seconds_in_bucket_count_unique_450,trade_size_mean_450,trade_size_median_450,trade_size_std_450,trade_size_sum_450,trade_order_count_mean_450,trade_order_count_median_450,trade_order_count_std_450,trade_order_count_sum_450,trade_log_return_realized_volatility_300,trade_seconds_in_bucket_count_unique_300,trade_size_mean_300,trade_size_median_300,trade_size_std_300,trade_size_sum_300,trade_order_count_mean_300,trade_order_count_median_300,trade_order_count_std_300,trade_order_count_sum_300,trade_log_return_realized_volatility_150,trade_seconds_in_bucket_count_unique_150,trade_size_mean_150,trade_size_median_150,trade_size_std_150,trade_size_sum_150,trade_order_count_mean_150,trade_order_count_median_150,trade_order_count_std_150,trade_order_count_sum_150,log_return1_realized_volatility_mean_stock,log_return1_realized_volatility_std_stock,log_return1_realized_volatility_max_stock,log_return1_realized_volatility_min_stock,log_return1_realized_volatility_median_stock,log_return2_realized_volatility_mean_stock,log_return2_realized_volatility_std_stock,log_return2_realized_volatility_max_stock,log_return2_realized_volatility_min_stock,log_return2_realized_volatility_median_stock,log_return1_realized_volatility_450_mean_stock,log_return1_realized_volatility_450_std_stock,log_return1_realized_volatility_450_max_stock,log_return1_realized_volatility_450_min_stock,log_return1_realized_volatility_450_median_stock,log_return2_realized_volatility_450_mean_stock,log_return2_realized_volatility_450_std_stock,log_return2_realized_volatility_450_max_stock,log_return2_realized_volatility_450_min_stock,log_return2_realized_volatility_450_median_stock,log_return1_realized_volatility_300_mean_stock,log_return1_realized_volatility_300_std_stock,log_return1_realized_volatility_300_max_stock,log_return1_realized_volatility_300_min_stock,log_return1_realized_volatility_300_median_stock,log_return2_realized_volatility_300_mean_stock,log_return2_realized_volatility_300_std_stock,log_return2_realized_volatility_300_max_stock,log_return2_realized_volatility_300_min_stock,log_return2_realized_volatility_300_median_stock,log_return1_realized_volatility_150_mean_stock,log_return1_realized_volatility_150_std_stock,log_return1_realized_volatility_150_max_stock,log_return1_realized_volatility_150_min_stock,log_return1_realized_volatility_150_median_stock,log_return2_realized_volatility_150_mean_stock,log_return2_realized_volatility_150_std_stock,log_return2_realized_volatility_150_max_stock,log_return2_realized_volatility_150_min_stock,log_return2_realized_volatility_150_median_stock,trade_log_return_realized_volatility_mean_stock,trade_log_return_realized_volatility_std_stock,trade_log_return_realized_volatility_max_stock,trade_log_return_realized_volatility_min_stock,trade_log_return_realized_volatility_median_stock,trade_log_return_realized_volatility_450_mean_stock,trade_log_return_realized_volatility_450_std_stock,trade_log_return_realized_volatility_450_max_stock,trade_log_return_realized_volatility_450_min_stock,trade_log_return_realized_volatility_450_median_stock,trade_log_return_realized_volatility_300_mean_stock,trade_log_return_realized_volatility_300_std_stock,trade_log_return_realized_volatility_300_max_stock,trade_log_return_realized_volatility_300_min_stock,trade_log_return_realized_volatility_300_median_stock,trade_log_return_realized_volatility_150_mean_stock,trade_log_return_realized_volatility_150_std_stock,trade_log_return_realized_volatility_150_max_stock,trade_log_return_realized_volatility_150_min_stock,trade_log_return_realized_volatility_150_median_stock,log_return1_realized_volatility_mean_time,log_return1_realized_volatility_std_time,log_return1_realized_volatility_max_time,log_return1_realized_volatility_min_time,log_return1_realized_volatility_median_time,log_return2_realized_volatility_mean_time,log_return2_realized_volatility_std_time,log_return2_realized_volatility_max_time,log_return2_realized_volatility_min_time,log_return2_realized_volatility_median_time,log_return1_realized_volatility_450_mean_time,log_return1_realized_volatility_450_std_time,log_return1_realized_volatility_450_max_time,log_return1_realized_volatility_450_min_time,log_return1_realized_volatility_450_median_time,log_return2_realized_volatility_450_mean_time,log_return2_realized_volatility_450_std_time,log_return2_realized_volatility_450_max_time,log_return2_realized_volatility_450_min_time,log_return2_realized_volatility_450_median_time,log_return1_realized_volatility_300_mean_time,log_return1_realized_volatility_300_std_time,log_return1_realized_volatility_300_max_time,log_return1_realized_volatility_300_min_time,log_return1_realized_volatility_300_median_time,log_return2_realized_volatility_300_mean_time,log_return2_realized_volatility_300_std_time,log_return2_realized_volatility_300_max_time,log_return2_realized_volatility_300_min_time,log_return2_realized_volatility_300_median_time,log_return1_realized_volatility_150_mean_time,log_return1_realized_volatility_150_std_time,log_return1_realized_volatility_150_max_time,log_return1_realized_volatility_150_min_time,log_return1_realized_volatility_150_median_time,log_return2_realized_volatility_150_mean_time,log_return2_realized_volatility_150_std_time,log_return2_realized_volatility_150_max_time,log_return2_realized_volatility_150_min_time,log_return2_realized_volatility_150_median_time,trade_log_return_realized_volatility_mean_time,trade_log_return_realized_volatility_std_time,trade_log_return_realized_volatility_max_time,trade_log_return_realized_volatility_min_time,trade_log_return_realized_volatility_median_time,trade_log_return_realized_volatility_450_mean_time,trade_log_return_realized_volatility_450_std_time,trade_log_return_realized_volatility_450_max_time,trade_log_return_realized_volatility_450_min_time,trade_log_return_realized_volatility_450_median_time,trade_log_return_realized_volatility_300_mean_time,trade_log_return_realized_volatility_300_std_time,trade_log_return_realized_volatility_300_max_time,trade_log_return_realized_volatility_300_min_time,trade_log_return_realized_volatility_300_median_time,trade_log_return_realized_volatility_150_mean_time,trade_log_return_realized_volatility_150_std_time,trade_log_return_realized_volatility_150_max_time,trade_log_return_realized_volatility_150_min_time,trade_log_return_realized_volatility_150_median_time,logtarget
0,16,9787,0.005983,16-9787,600.47920,1.000799,0.001474,1.001224,600.47980,1.000800,0.001451,1.000853,3.412304e-03,0.007143,5.696667e-06,0.000292,0.0,0.002679,0.009541,4.473089e-06,0.000390,0.0,0.301854,0.000503,0.000389,0.000380,0.766263,0.001277,3.961763e-04,0.001222,0.206112,0.000344,3.413202e-04,0.000102,-0.208047,-0.000347,2.339496e-04,-0.000306,205938,343.230000,125.163414,343.5,50934,84.890000,80.327489,61.0,150.23227,1.001548,0.000609,1.001588,150.22621,1.001508,0.000777,1.001582,-0.000668,0.002511,-0.000004,0.000206,0.000000,-0.001603,0.004331,-1.068959e-05,0.000355,0.0,0.059297,0.000395,0.000262,0.000321,0.157611,0.001051,2.933580e-04,0.001019,0.042936,0.000286,3.438091e-04,0.000102,-0.061496,-0.000410,2.819893e-04,-0.000408,54328,362.186667,101.039589,376.5,13944,92.960000,80.218149,68.0,300.06528,1.000218,0.001532,1.000459,300.09310,1.000310,0.001450,1.000318,0.001536,0.004749,5.118334e-06,0.000275,0.0,0.001796,0.007158,5.988251e-06,0.000414,0.0,0.146709,0.000489,0.000392,0.000365,0.367893,0.001226,4.177645e-04,0.001123,0.110348,0.000368,3.335068e-04,0.000204,-0.102086,-0.000340,2.486433e-04,-0.000306,104820,349.400000,106.187409,355.0,25510,85.033333,70.961520,65.0,450.28293,1.000629,0.001511,1.000776,450.26680,1.000593,0.001429,1.000638,-0.000837,0.006348,-1.860856e-06,0.000300,0.0,-0.001758,0.008621,-3.907312e-06,0.000407,0.0,0.228190,0.000507,...,15.0,122.133333,100.0,108.509951,1832.0,2.066667,1.0,1.387015,31.0,0.002793,21.0,133.285714,100.0,129.576673,2799.0,2.476190,2.0,1.965173,52.0,0.003539,31.0,116.032258,100.0,115.595987,3597.0,2.290323,2.0,1.773763,71.0,0.004612,0.003989,0.054206,0.000541,0.003416,0.006776,0.005683,0.094116,0.000949,0.005156,0.002079,0.001809,0.031666,0.000039,0.001561,0.003016,0.002335,0.035454,0.000000,0.002393,0.003059,0.002609,0.044278,0.000356,0.002311,0.004432,0.003394,0.054119,0.000547,0.003515,0.003853,0.003236,0.052743,0.000404,0.002905,0.005609,0.004375,0.066829,0.000796,0.004370,0.002533,0.001900,0.030371,0.000335,0.001972,0.001180,0.000938,0.020232,0.000000,0.000924,0.001746,0.001333,0.023597,0.000143,0.001370,0.002185,0.001642,0.026871,0.000024,0.001703,0.006039,0.002132,0.013142,0.002440,0.005639,0.008308,0.003377,0.017820,0.002770,0.007545,0.002739,0.001128,0.008007,0.000805,0.002543,0.003962,0.002027,0.013407,0.001198,0.003420,0.004215,0.001588,0.011059,0.001270,0.004037,0.005889,0.002655,0.015861,0.001820,0.005391,0.005294,0.001917,0.012067,0.002230,0.005023,0.007287,0.003069,0.016192,0.002477,0.006576,0.003645,0.000970,0.007297,0.001789,0.003490,0.001548,0.000587,0.004070,0.000294,0.001452,0.002483,0.000756,0.004954,0.001089,0.002451,0.003181,0.000859,0.006486,0.001580,0.003066,-5.118799
1,105,5773,0.002415,105-5773,600.64197,1.001070,0.000766,1.000880,600.63720,1.001062,0.000767,1.000869,9.610265e-04,0.002404,1.604385e-06,0.000098,0.0,0.000862,0.002987,1.439038e-06,0.000122,0.0,0.037653,0.000063,0.000045,0.000059,0.136220,0.000227,2.992114e-05,0.000223,0.133913,0.000223,4.916321e-08,0.000223,-0.133915,-0.000223,5.276032e-08,-0.000223,1345594,2242.656667,355.292645,2261.5,249858,416.430000,301.443309,397.5,150.27628,1.001842,0.000232,1.001783,150.26897,1.001793,0.000239,1.001701,-0.001076,0.000935,-0.000007,0.000076,0.000000,-0.001014,0.000984,-6.759636e-06,0.000080,0.0,0.009523,0.000063,0.000037,0.000064,0.034974,0.000233,4.717714e-05,0.000223,0.033477,0.000223,4.723490e-08,0.000223,-0.033479,-0.000223,5.370415e-08,-0.000223,365155,2434.366667,331.188817,2402.5,94345,628.966667,315.955596,658.5,300.50732,1.001691,0.000554,1.001777,300.50260,1.001675,0.000565,1.001686,0.000965,0.001549,3.217323e-06,0.000090,0.0,0.001028,0.002124,3.428133e-06,0.000123,0.0,0.021125,0.000070,0.000046,0.000068,0.068846,0.000229,3.809096e-05,0.000223,0.066958,0.000223,5.361427e-08,0.000223,-0.066957,-0.000223,5.123754e-08,-0.000223,685614,2285.380000,387.177518,2296.5,148816,496.053333,325.825780,463.5,450.56537,1.001256,0.000792,1.001388,450.55984,1.001244,0.000796,1.001450,0.001215,0.002048,2.700762e-06,0.000097,0.0,0.001197,0.002503,2.659889e-06,0.000118,0.0,0.028468,0.000063,...,16.0,166.062500,110.0,148.108280,2657.0,2.500000,2.0,1.549193,40.0,0.001400,42.0,178.166667,119.5,163.577224,7483.0,2.642857,2.0,2.228654,111.0,0.001684,68.0,184.926471,104.5,165.604124,12575.0,2.676471,2.0,2.235675,182.0,0.002903,0.002619,0.046933,0.000416,0.002131,0.003796,0.003375,0.062174,0.000605,0.002765,0.001340,0.001094,0.014084,0.000057,0.001025,0.001747,0.001415,0.018163,0.000045,0.001333,0.001941,0.001567,0.024346,0.000200,0.001462,0.002533,0.002039,0.034762,0.000154,0.001927,0.002428,0.002012,0.035214,0.000367,0.001816,0.003174,0.002619,0.044624,0.000400,0.002363,0.002252,0.001882,0.028099,0.000292,0.001659,0.001057,0.000856,0.012821,0.000000,0.000814,0.001533,0.001210,0.017172,0.000000,0.001164,0.001915,0.001528,0.022168,0.000206,0.001439,0.003812,0.001515,0.008950,0.001842,0.003445,0.005569,0.002612,0.015949,0.002197,0.004947,0.001800,0.000757,0.004375,0.000912,0.001592,0.002656,0.001331,0.007022,0.000816,0.002192,0.002618,0.001056,0.006323,0.001220,0.002515,0.003834,0.001907,0.013032,0.001509,0.003281,0.003262,0.001305,0.007827,0.001638,0.002965,0.004800,0.002281,0.014448,0.001934,0.004258,0.002515,0.000834,0.006272,0.001413,0.002284,0.001176,0.000500,0.003769,0.000000,0.001091,0.001704,0.000593,0.004029,0.000843,0.001570,0.002196,0.000715,0.005209,0.001141,0.002004,-6.026106
2,58,22076,0.002863,58-22076,598.30756,0.997179,0.000907,0.996946,598.41360,0.997356,0.000832,0.997271,-2.216200e-03,0.002623,-3.699834e-06,0.000107,0.0,-0.002213,0.004488,-3.694307e-06,0.000183,0.0,0.214987,0.000358,0.000263,0.000281,0.429021,0.000715,2.510883e-04,0.000681,0.078867,0.000131,8.000458e-05,0.000097,-0.202357,-0.000337,2.384041e-04,-0.000291,276551,460.918333,185.923068,430.0,81439,135.731667,144.914780,95.0,149.43927,0.996262,0.000364,0.996238,149.47705,0.996514,0.000483,0.996605,-0.000479,0.001350,-0.000003,0.000111,0.000000,-0.000676,0.003176,-4.509830e-06,0.000260,0.0,0.052095,0.000347,0.000276,0.000237,0.094242,0.000628,2.469873e-04,0.000584,0.018627,0.000124,5.852864e-05,0.000097,-0.073240,-0.000488,2.536689e-04,-0.000582,73626,490.840000,140.650220,508.0,25184,167.893333,155.208937,96.0,298.93270,0.996442,0.000369,0.996369,299.01505,0.996717,0.000500,0.996605,-0.001064,0.001750,-3.547048e-06,0.000101,0.0,-0.001163,0.003562,-3.876705e-06,0.000206,0.0,0.126108,0.000420,0.000247,0.000484,0.192645,0.000642,2.118420e-04,0.000584,0.034149,0.000114,4.591428e-05,0.000097,-0.128534,-0.000428,2.232002e-04,-0.000388,127843,426.143333,134.610113,412.0,39235,130.783333,130.744497,95.0,448.54175,0.996759,0.000572,0.996722,448.66577,0.997035,0.000681,0.996880,-0.001837,0.002440,-4.082175e-06,0.000115,0.0,-0.001795,0.004289,-3.989488e-06,0.000202,0.0,0.192981,0.000429,...,10.0,200.000000,132.5,190.117508,2000.0,3.900000,4.0,2.726414,39.0,0.001050,14.0,168.714286,113.5,168.786815,2362.0,3.714286,4.0,2.524604,52.0,0.001286,19.0,167.631579,113.0,167.329087,3185.0,3.947368,4.0,3.045464,75.0,0.004224,0.003572,0.041124,0.000373,0.003158,0.006102,0.005002,0.067929,0.000493,0.004704,0.001929,0.001585,0.020951,0.000125,0.001482,0.002807,0.002198,0.022311,0.000197,0.002169,0.002831,0.002277,0.023949,0.000232,0.002168,0.004104,0.003169,0.029909,0.000403,0.003190,0.003555,0.002924,0.035527,0.000323,0.002710,0.005138,0.004028,0.043294,0.000455,0.003984,0.002628,0.002023,0.021194,0.000394,0.002060,0.001233,0.000984,0.014402,0.000000,0.000968,0.001807,0.001396,0.018886,0.000248,0.001425,0.002261,0.001729,0.019871,0.000355,0.001784,0.002035,0.001872,0.018706,0.000503,0.001735,0.002879,0.002309,0.020866,0.000553,0.002457,0.001050,0.001474,0.015257,0.000192,0.000786,0.001375,0.001559,0.015508,0.000198,0.001054,0.001474,0.001654,0.016968,0.000296,0.001188,0.002035,0.001992,0.019575,0.000365,0.001655,0.001774,0.001698,0.017135,0.000401,0.001470,0.002498,0.002108,0.020027,0.000470,0.002108,0.001165,0.000610,0.004684,0.000407,0.000997,0.000591,0.000381,0.002469,0.000138,0.000500,0.000838,0.000478,0.003798,0.000237,0.000727,0.001039,0.000544,0.004101,0.000335,0.000903,-5.855742
3,77,5817,0.005279,77-5817,601.05817,1.001764,0.002921,1.001746,601.09640,1.001827,0.002950,1.001847,3.857260e-03,0.006993,6.439499e-06,0.000286,0.0,0.003651,0.009757,6.094700e-06,0.000399,0.0,0.126732,0.000211,0.000145,0.000183,0.243257,0.000405,9.510022e-05,0.000380,0.228444,0.000381,3.469868e-08,0.000381,-0.228436,-0.000381,4.818484e-08,-0.000381,2227382,3712.303333,960.696486,3510.5,503860,839.766667,738.489271,700.0,150.06807,1.000454,0.000727,1.000512,150.06671,1.000445,0.000898,1.000448,0.000180,0.003105,0.000001,0.000254,0.000006,-0.000076,0.005661,-5.054132e-07,0.000464,0.0,0.035303,0.000235,0.000157,0.000186,0.060510,0.000403,9.066640e-05,0.000381,0.057111,0.000381,3.302441e-08,0.000381,-0.057109,-0.000381,5.061392e-08,-0.000381,560983,3739.886667,798.023267,3700.0,104961,699.740000,589.194108,500.0,300.67102,1.002237,0.002305,1.001224,300.66970,1.002232,0.002362,1.001224,-0.005484,0.005275,-1.827947e-05,0.000305,0.0,-0.005791,0.007743,-1.930346e-05,0.000447,0.0,0.068302,0.000228,0.000150,0.000200,0.120806,0.000403,9.043028e-05,0.000380,0.114223,0.000381,3.342015e-08,0.000381,-0.114217,-0.000381,5.231611e-08,-0.000381,1116434,3721.446667,813.080328,3600.0,218108,727.026667,549.027166,625.5,451.28336,1.002852,0.002193,1.002919,451.30206,1.002893,0.002273,1.003037,-0.002539,0.006182,-5.641985e-06,0.000292,0.0,-0.002425,0.008701,-5.387934e-06,0.000411,0.0,0.099115,0.000220,...,58.0,714.344828,300.0,1466.614499,41432.0,4.879310,3.0,8.130682,283.0,0.004610,112.0,682.857143,300.0,1172.768495,76480.0,4.437500,3.0,6.513716,497.0,0.005418,151.0,624.788079,300.0,1039.558513,94343.0,4.145695,3.0,5.706601,626.0,0.003366,0.002558,0.035552,0.000700,0.002649,0.004712,0.003120,0.042911,0.001081,0.003910,0.001614,0.001287,0.026178,0.000064,0.001293,0.002272,0.001577,0.031902,0.000011,0.001919,0.002320,0.001804,0.034769,0.000252,0.001853,0.003260,0.002180,0.041658,0.000414,0.002721,0.002877,0.002206,0.035307,0.000614,0.002284,0.004037,0.002690,0.042607,0.000827,0.003368,0.002755,0.002188,0.026274,0.000000,0.002100,0.001314,0.001091,0.017820,0.000000,0.001036,0.001898,0.001536,0.025383,0.000000,0.001469,0.002362,0.001880,0.026047,0.000000,0.001823,0.009777,0.004200,0.024979,0.003297,0.008878,0.013158,0.006552,0.038566,0.003549,0.011594,0.005003,0.002328,0.015763,0.001717,0.004437,0.006671,0.004012,0.028888,0.001896,0.005535,0.006854,0.003120,0.018998,0.002273,0.006056,0.009058,0.004685,0.031326,0.002470,0.007990,0.008396,0.003615,0.021787,0.002864,0.007503,0.011277,0.005539,0.033449,0.003090,0.010024,0.006122,0.001858,0.013466,0.003014,0.005873,0.003191,0.001223,0.009058,0.001113,0.003018,0.004393,0.001471,0.009818,0.001796,0.004206,0.005367,0.001700,0.011079,0.002390,0.005120,-5.243951
4,56,14572,0.002328,56-14572,598.78840,0.997981,0.000927,0.997882,598.76544,0.997942,0.000961,0.997819,-1.671476e-03,0.002564,-2.790444e-06,0.000105,0.0,-0.001691,0.003622,-2.823096e-06,0.000148,0.0,0.074003,0.000123,0.000095,0.000108,0.166596,0.000278,8.049258e-05,0.000261,0.042784,0.000071,6.605039e-05,0.000043,-0.048897,-0.000081,6.813075e-05,-0.000065,201007,335.011667,156.600216,321.0,72007,120.011667,94.411798,99.0,149.60847,0.997390,0.000361,0.997394,149.59203,0.997280,0.000412,0.997257,0.001371,0.001495,0.000009,0.000122,0.000000,0.001544,0.002001,1.029398e-05,0.000164,0.0,0.022062,0.000147,0.000112,0.000138,0.043701,0.000291,9.093613e-05,0.000282,0.018575,0.000124,8.508687e-05,0.000087,-0.009558,-0.000064,6.090475e-05,-0.000043,48883,325.886667,116.885967,321.0,14875,99.166667,81.427731,96.5,299.17014,0.997234,0.000510,0.997321,299.14703,0.997157,0.000513,0.997187,0.000498,0.002086,1.661003e-06,0.000121,0.0,0.000439,0.003080,1.461875e-06,0.000178,0.0,0.041381,0.000138,0.000103,0.000120,0.086588,0.000289,9.205237e-05,0.000282,0.029108,0.000097,7.898205e-05,0.000065,-0.024188,-0.000081,7.010426e-05,-0.000043,93614,312.046667,115.815061,310.0,29882,99.606667,80.920862,91.0,448.92505,0.997611,0.000732,0.997507,448.90010,0.997556,0.000762,0.997468,-0.000640,0.002358,-1.421429e-06,0.000111,0.0,-0.000676,0.003320,-1.502127e-06,0.000157,0.0,0.055772,0.000124,...,43.0,96.255814,100.0,78.813551,4139.0,2.302326,2.0,1.439823,99.0,0.001192,101.0,97.594059,100.0,116.953852,9857.0,2.950495,2.0,5.534214,298.0,0.001373,123.0,94.666667,97.0,117.439881,11644.0,2.991870,2.0,5.193779,368.0,0.004845,0.002790,0.030041,0.000988,0.004028,0.006085,0.003376,0.037915,0.001576,0.005096,0.002321,0.001305,0.011015,0.000308,0.001975,0.002918,0.001580,0.018216,0.000562,0.002488,0.003318,0.001850,0.019398,0.000627,0.002793,0.004174,0.002240,0.025303,0.000967,0.003548,0.004125,0.002332,0.026912,0.000823,0.003439,0.005185,0.002822,0.034495,0.001291,0.004373,0.003389,0.002171,0.020474,0.000686,0.002687,0.001620,0.001017,0.008734,0.000231,0.001312,0.002324,0.001445,0.013968,0.000461,0.001865,0.002892,0.001817,0.019497,0.000608,0.002310,0.001889,0.000964,0.006077,0.000576,0.001648,0.002897,0.001591,0.008706,0.000714,0.002395,0.000907,0.000507,0.002648,0.000203,0.000752,0.001394,0.000904,0.006202,0.000018,0.001085,0.001300,0.000626,0.002994,0.000260,0.001122,0.002006,0.001147,0.006834,0.000067,0.001633,0.001625,0.000805,0.004466,0.000288,0.001443,0.002479,0.001403,0.008010,0.000070,0.002047,0.001267,0.000550,0.003294,0.000000,0.001189,0.000620,0.000313,0.001484,0.000128,0.000568,0.000915,0.000401,0.002313,0.000221,0.000834,0.001135,0.000482,0.002866,0.000348,0.001030,-6.062848
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
428927,76,21439,0.002647,76-21439,600.85600,1.001427,0.000982,1.001238,600.83440,1.001391,0.000968,1.001338,2.942843e-03,0.003456,4.912926e-06,0.000141,0.0,0.003319,0.004683,5.540259e-06,0.000191,0.0,0.097441,0.000162,0.000117,0.000146,0.213385,0.000356,1.394403e-04,0.000345,0.104993,0.000175,1.198182e-04,0.000148,-0.122059,-0.000203,1.454425e-04,-0.000198,319671,532.785000,188.145076,501.0,108369,180.615000,120.773592,169.0,150.42587,1.002839,0.000632,1.002918,150.41370,1.002758,0.000646,1.002840,0.001639,0.001829,0.000011,0.000149,0.000000,0.001876,0.002308,1.250727e-05,0.000189,0.0,0.022604,0.000151,0.000116,0.000123,0.054624,0.000364,1.195822e-04,0.000345,0.028350,0.000189,1.403048e-04,0.000148,-0.024490,-0.000163,1.056552e-04,-0.000198,96118,640.786667,204.102277,600.0,29646,197.640000,141.300018,171.0,300.64368,1.002146,0.000847,1.001750,300.62393,1.002080,0.000841,1.001654,0.002298,0.002261,7.660316e-06,0.000131,0.0,0.002537,0.002880,8.455152e-06,0.000166,0.0,0.040573,0.000135,0.000104,0.000117,0.107396,0.000358,1.280518e-04,0.000345,0.058281,0.000194,1.394809e-04,0.000149,-0.050761,-0.000169,1.127547e-04,-0.000148,175274,584.246667,198.864601,568.0,52754,175.846667,122.822968,156.5,450.79474,1.001766,0.000888,1.001509,450.77580,1.001724,0.000867,1.001458,0.002481,0.002861,5.513764e-06,0.000135,0.0,0.002609,0.003722,5.798256e-06,0.000176,0.0,0.071520,0.000159,...,27.0,153.259259,100.0,145.237363,4138.0,3.037037,2.0,2.328444,82.0,0.001431,41.0,143.487805,100.0,141.423499,5883.0,2.707317,2.0,2.347806,111.0,0.001656,61.0,123.098361,100.0,128.331953,7509.0,2.557377,2.0,2.061752,156.0,0.004540,0.003100,0.038771,0.000880,0.003643,0.005778,0.003884,0.055342,0.001048,0.004723,0.002154,0.001408,0.017263,0.000338,0.001766,0.002754,0.001732,0.022490,0.000381,0.002320,0.003093,0.002034,0.025409,0.000497,0.002536,0.003952,0.002546,0.036109,0.000560,0.003305,0.003847,0.002545,0.030346,0.000648,0.003119,0.004908,0.003188,0.047050,0.000764,0.004065,0.002735,0.001948,0.022090,0.000485,0.002136,0.001303,0.000939,0.014751,0.000154,0.001020,0.001871,0.001312,0.019345,0.000287,0.001474,0.002336,0.001648,0.019607,0.000453,0.001842,0.002818,0.001335,0.007531,0.000284,0.002492,0.004039,0.002142,0.011330,0.000068,0.003553,0.001380,0.000721,0.004571,0.000158,0.001195,0.002009,0.001109,0.007004,0.000061,0.001671,0.001969,0.001024,0.006053,0.000174,0.001695,0.002815,0.001572,0.008584,0.000062,0.002383,0.002403,0.001156,0.006412,0.000234,0.002181,0.003446,0.001866,0.010201,0.000063,0.002935,0.001909,0.000811,0.004795,0.000650,0.001692,0.000981,0.000478,0.002818,0.000000,0.000866,0.001346,0.000629,0.003775,0.000000,0.001217,0.001646,0.000698,0.004178,0.000520,0.001442,-5.934183
428928,108,16733,0.001089,108-16733,599.87500,0.999792,0.000372,0.999724,599.80200,0.999670,0.000471,0.999634,-1.170027e-03,0.001024,-1.953301e-06,0.000042,0.0,-0.001522,0.001667,-2.540422e-06,0.000068,0.0,0.098845,0.000165,0.000118,0.000126,0.244101,0.000407,1.585902e-07,0.000407,0.244052,0.000407,2.316365e-08,0.000407,-0.244045,-0.000407,0.000000e+00,-0.000407,14477849,24129.748333,2932.225114,23654.0,2080539,3467.565000,2420.802655,3165.5,149.93121,0.999541,0.000154,0.999547,149.92207,0.999481,0.000248,0.999576,-0.000289,0.000406,-0.000002,0.000033,0.000000,-0.000653,0.000614,-4.350038e-06,0.000050,0.0,0.017084,0.000114,0.000105,0.000086,0.061040,0.000407,6.290679e-08,0.000407,0.061013,0.000407,2.263804e-08,0.000407,-0.061011,-0.000407,0.000000e+00,-0.000407,3705074,24700.493333,1749.543725,24668.5,593902,3959.346667,1983.670031,3574.0,299.85287,0.999510,0.000253,0.999528,299.81332,0.999378,0.000357,0.999572,-0.000916,0.000812,-3.054283e-06,0.000047,0.0,-0.001098,0.001236,-3.660867e-06,0.000071,0.0,0.047890,0.000160,0.000122,0.000104,0.122086,0.000407,1.024651e-07,0.000407,0.122029,0.000407,2.882546e-08,0.000407,-0.122023,-0.000407,0.000000e+00,-0.000407,7164477,23881.590000,3119.925592,23894.0,1128809,3762.696667,2613.769338,3359.0,449.83868,0.999642,0.000294,0.999615,449.77167,0.999493,0.000375,0.999576,-0.000892,0.000950,-1.982151e-06,0.000045,0.0,-0.000885,0.001563,-1.966084e-06,0.000074,0.0,0.077214,0.000172,...,4.0,1462.000000,1046.0,1766.039637,5848.0,7.500000,7.0,7.047458,30.0,0.000910,9.0,1340.444444,233.0,1554.064036,12064.0,7.111111,7.0,5.710614,64.0,0.000997,12.0,1419.583333,525.0,1573.548766,17035.0,7.666667,9.0,5.532274,92.0,0.003337,0.002545,0.035924,0.000368,0.002642,0.004335,0.003075,0.038709,0.000376,0.003548,0.001568,0.001191,0.017611,0.000062,0.001274,0.002023,0.001410,0.018896,0.000031,0.001705,0.002261,0.001660,0.022615,0.000261,0.001824,0.002926,0.001957,0.024179,0.000081,0.002458,0.002820,0.002097,0.029733,0.000319,0.002260,0.003658,0.002491,0.032411,0.000113,0.003030,0.002683,0.002127,0.027750,0.000208,0.002072,0.001259,0.001024,0.015276,0.000000,0.000998,0.001826,0.001420,0.020301,0.000000,0.001436,0.002284,0.001775,0.024155,0.000000,0.001780,0.001949,0.001015,0.007754,0.000115,0.001798,0.002858,0.001561,0.010666,0.000008,0.002547,0.000901,0.000463,0.002852,0.000075,0.000811,0.001314,0.000737,0.003958,0.000002,0.001102,0.001298,0.000639,0.004554,0.000080,0.001186,0.001904,0.001077,0.007826,0.000003,0.001551,0.001633,0.000869,0.006988,0.000088,0.001503,0.002394,0.001340,0.009858,0.000008,0.002068,0.001322,0.000531,0.003180,0.000000,0.001261,0.000622,0.000273,0.001351,0.000000,0.000579,0.000877,0.000352,0.002123,0.000000,0.000857,0.001113,0.000443,0.002451,0.000000,0.001026,-6.822463
428929,37,14273,0.002531,37-14273,601.11096,1.001852,0.000367,1.001833,601.35460,1.002258,0.000556,1.002294,-1.316365e-03,0.002124,-2.197604e-06,0.000087,0.0,0.000596,0.005234,9.953217e-07,0.000214,0.0,0.404903,0.000675,0.000359,0.000584,0.765416,0.001276,3.093680e-04,0.001303,0.093891,0.000156,1.623810e-04,0.000030,-0.036006,-0.000060,1.071243e-04,-0.000006,103454,172.423333,100.354185,125.5,47972,79.953333,61.444013,89.0,150.27243,1.001816,0.000176,1.001920,150.35689,1.002379,0.000251,1.002554,-0.000238,0.001060,-0.000002,0.000087,0.000000,-0.000248,0.001680,-1.655800e-06,0.000138,0.0,0.089504,0.000597,0.000148,0.000634,0.140031,0.000934,1.211388e-04,0.000877,0.044171,0.000294,1.026674e-04,0.000338,-0.017555,-0.000117,1.033026e-04,-0.000214,9660,64.400000,51.013618,16.0,6910,46.066667,42.313573,6.0,300.50052,1.001668,0.000206,1.001709,300.65912,1.002197,0.000318,1.002294,-0.000041,0.001089,-1.364900e-07,0.000063,0.0,-0.000411,0.002053,-1.369017e-06,0.000119,0.0,0.163670,0.000546,0.000193,0.000584,0.303233,0.001011,1.745227e-04,0.000877,0.058808,0.000196,1.522390e-04,0.000338,-0.018448,-0.000061,9.172004e-05,-0.000006,27635,92.116667,45.863434,114.0,20883,69.610000,38.465176,89.0,450.78230,1.001738,0.000273,1.001709,450.98755,1.002195,0.000483,1.002281,-0.000759,0.001754,-1.686999e-06,0.000083,0.0,-0.000725,0.004075,-1.612065e-06,0.000192,0.0,0.253397,0.000563,...,2.0,19.000000,19.0,25.455844,38.0,1.500000,1.5,0.707107,3.0,0.000884,2.0,19.000000,19.0,25.455844,38.0,1.500000,1.5,0.707107,3.0,0.000884,2.0,19.000000,19.0,25.455844,38.0,1.500000,1.5,0.707107,3.0,0.007163,0.004996,0.053782,0.000603,0.005614,0.009888,0.007433,0.096552,0.000927,0.007564,0.003281,0.002495,0.034143,0.000000,0.002588,0.004529,0.003564,0.046615,0.000000,0.003491,0.004827,0.003404,0.042128,0.000000,0.003835,0.006654,0.004968,0.064391,0.000248,0.005171,0.006061,0.004196,0.044557,0.000458,0.004740,0.008354,0.006137,0.068750,0.000572,0.006446,0.003000,0.002200,0.041908,0.000000,0.002386,0.001409,0.001079,0.011524,0.000000,0.001149,0.002076,0.001536,0.028799,0.000000,0.001673,0.002614,0.001863,0.029389,0.000000,0.002122,0.002076,0.000836,0.004408,0.000767,0.001833,0.003012,0.001376,0.007255,0.000411,0.002610,0.001016,0.000454,0.002424,0.000245,0.000888,0.001463,0.000757,0.004608,0.000005,0.001290,0.001439,0.000609,0.003302,0.000519,0.001245,0.002101,0.001045,0.005407,0.000052,0.001750,0.001787,0.000754,0.003910,0.000629,0.001564,0.002585,0.001225,0.006322,0.000090,0.002270,0.001414,0.000568,0.003279,0.000000,0.001259,0.000712,0.000352,0.001986,0.000000,0.000645,0.000989,0.000426,0.002689,0.000000,0.000896,0.001216,0.000498,0.002931,0.000000,0.001113,-5.979000
428930,41,11351,0.001864,41-11351,600.30000,1.000500,0.000194,1.000479,600.28180,1.000470,0.000254,1.000469,-1.884784e-04,0.001252,-3.146550e-07,0.000051,0.0,-0.000210,0.002172,-3.510817e-07,0.000089,0.0,0.047055,0.000078,0.000048,0.000073,0.136816,0.000228,9.300933e-06,0.000228,0.136676,0.000228,3.183415e-08,0.000228,-0.136652,-0.000228,5.952166e-08,-0.000228,16360223,27267.038333,3021.952883,27049.5,3003171,5005.285000,3831.373475,3972.0,150.07831,1.000522,0.000111,1.000487,150.07579,1.000505,0.000171,1.000411,0.000309,0.000536,0.000002,0.000044,0.000000,0.000271,0.000930,1.809912e-06,0.000076,0.0,0.012084,0.000081,0.000027,0.000074,0.034148,0.000228,8.138696e-08,0.000228,0.034171,0.000228,9.634912e-09,0.000228,-0.034160,-0.000228,5.692277e-08,-0.000228,4212732,28084.880000,2638.984259,28636.5,442000,2946.666667,2254.018329,2656.0,300.14554,1.000485,0.000178,1.000487,300.13773,1.000459,0.000241,1.000412,-0.000110,0.000897,-3.665666e-07,0.000052,0.0,-0.000189,0.001603,-6.291133e-07,0.000093,0.0,0.024750,0.000082,0.000039,0.000076,0.068523,0.000228,1.315304e-05,0.000228,0.068339,0.000228,2.342203e-08,0.000228,-0.068327,-0.000228,5.927281e-08,-0.000228,8415807,28052.690000,2858.524197,27979.0,1379997,4599.990000,3494.697989,3637.5,450.20566,1.000457,0.000181,1.000462,450.19034,1.000423,0.000242,1.000412,0.000033,0.001111,7.334507e-08,0.000052,0.0,0.000078,0.001882,1.726586e-07,0.000089,0.0,0.036140,0.000080,...,19.0,1726.263158,250.0,2475.416774,32799.0,6.210526,3.0,5.883866,118.0,0.000864,42.0,1592.547619,621.0,2131.626188,66887.0,6.404762,5.0,5.468578,269.0,0.001093,64.0,1504.265625,621.0,1988.283109,96273.0,6.828125,5.0,5.722567,437.0,0.002252,0.001799,0.020220,0.000304,0.001681,0.002925,0.002118,0.023693,0.000322,0.002292,0.001051,0.000827,0.008286,0.000051,0.000824,0.001364,0.000978,0.008742,0.000012,0.001122,0.001520,0.001175,0.012973,0.000132,0.001173,0.001978,0.001370,0.013610,0.000035,0.001599,0.001895,0.001479,0.015995,0.000190,0.001444,0.002468,0.001731,0.018022,0.000285,0.001968,0.001919,0.001580,0.017448,0.000223,0.001423,0.000900,0.000737,0.007570,0.000000,0.000698,0.001301,0.001043,0.012015,0.000000,0.000995,0.001623,0.001306,0.013876,0.000000,0.001220,0.002390,0.001159,0.006707,0.000825,0.002102,0.003412,0.001626,0.009134,0.000956,0.002970,0.001214,0.000648,0.003261,0.000400,0.001035,0.001728,0.000844,0.004467,0.000439,0.001528,0.001701,0.000877,0.005591,0.000577,0.001453,0.002417,0.001213,0.006826,0.000653,0.002030,0.002083,0.001053,0.006372,0.000717,0.001765,0.002958,0.001462,0.008596,0.000834,0.002470,0.001623,0.000693,0.004710,0.000694,0.001442,0.000828,0.000416,0.002878,0.000000,0.000747,0.001173,0.000534,0.003743,0.000489,0.001038,0.001426,0.000638,0.004545,0.000618,0.001287,-6.284911


In [18]:
feature_groups = get_feature_groups(train)
pruned_groups = ["timeagg_450_log_return1",
    "trade_seconds_in_bucket_450",
    "wap1_300",
    "timeagg_450_trade_log_return",
    "trade_seconds_in_bucket",
    "ask_spread_450"]
feature_cols = list(itertools.chain.from_iterable([c for g, c in feature_groups.items() if g not in pruned_groups]))
print(f"# features: {len(feature_cols)}")
target_col = 'target'
# {'reg_alpha': 9.594283186777315e-06, 'reg_lambda': 0.00030328360027000927, 'n_estimators': 3178, 'learning_rate': 0.019233318286949023, 'num_leaves': 1251, 'feature_fraction': 0.26088472222018966, 'bagging_fraction': 0.20674682558678448, 'bagging_freq': 25, 'min_child_samples': 263}
params = {
    'random_state': SEED,
    'objective': 'rmse',  
    'boosting_type': 'gbdt',
#     'boosting_type': 'dart',
#     'num_leaves': 100,
#     'learning_rate': 0.1,
#     'colsample_bytree': 0.8,
#     'subsample': 0.8,
#     'n_estimators': 10000,
    'min_child_samples': int(0.01*train.shape[0])
}


oof_predictions = np.zeros(train.shape[0])
kfold = GroupKFold(n_splits=5)
for fold, (trn_ind, val_ind) in enumerate(kfold.split(train, groups=train.time_id)):
    print(f'Training fold {fold}')
    x_train, x_val = train.loc[trn_ind, feature_cols], train.loc[val_ind, feature_cols]
    y_train, y_val = train.loc[trn_ind, target_col], train.loc[val_ind, target_col]

    # Root mean squared percentage error weights
    if target_col = 'logtarget':
        train_weights = 1 / np.square(np.exp(y_train))
        val_weights = 1 / np.square(np.exp(y_val))
    else:
        train_weights = 1 / np.exp(y_train)
        val_weights = 1 / np.exp(y_val)

    # Fit with sklearn API
    model = lgb.LGBMRegressor(**params)
    model.fit(x_train, 
              y_train, 
              sample_weight=train_weights,
              eval_set=[(x_val, y_val)],
              eval_sample_weight=[val_weights],
              eval_metric='rmse',
              early_stopping_rounds=100,
              verbose=False)

    # Add predictions to the out of folds array
    oof_predictions[val_ind] = model.predict(x_val)
if target_col == 'logtarget':
    oof_predictions = np.exp(oof_predictions)
oof_score = rmspe(train['target'], oof_predictions)

print(f"oof rmspe: {oof_score}") # 0.2260 # dart 0.2465 #

# features: 328
# features: 308
Training fold 0
Training fold 1
Training fold 2
Training fold 3
Training fold 4
oof rmspe: 0.233855153861511


In [8]:
df_importance = pd.DataFrame({"feature": model.feature_name_, "importance": model.feature_importances_})
df_importance = df_importance.sort_values(by=["importance"], ascending=False).reset_index(drop=True)
df_importance

Unnamed: 0,feature,importance
0,log_return1_realized_volatility,138
1,trade_log_return_realized_volatility_450_media...,133
2,log_return1_realized_volatility_450_median_stock,109
3,log_return2_realized_volatility_450_min_time,106
4,trade_log_return_realized_volatility_450_min_time,92
...,...,...
323,wap_balance_std_450,0
324,wap_balance_sum_450,0
325,log_return2_median_450,0
326,log_return1_median_450,0
