# Competition

In [1]:
import os
import glob
from joblib import Parallel, delayed
import pandas as pd
import numpy as np
import scipy as sc
from sklearn.model_selection import KFold
import xgboost as xgb
import warnings
warnings.filterwarnings('ignore')
pd.set_option('max_columns', 300)

In [20]:
class Config:
    seed = 1234
    data_dir = '../data/'
    
    feature_dict_book = {
        'wap1': [np.sum, np.mean, np.std],
        'wap2': [np.sum, np.mean, np.std],
        'log_return1': [np.sum, realized_volatility, np.mean, np.std],
        'log_return2': [np.sum, realized_volatility, np.mean, np.std],
        'wap_balance': [np.sum, np.mean, np.std],
        'price_spread':[np.sum, np.mean, np.std],
        'price_spread2':[np.sum, np.mean, np.std],
        'bid_spread':[np.sum, np.mean, np.std],
        'ask_spread':[np.sum, np.mean, np.std],
        'total_volume':[np.sum, np.mean, np.std],
        'volume_imbalance':[np.sum, np.mean, np.std],
        "bid_ask_spread":[np.sum, np.mean, np.std],
    }
    
    feature_dict_trade = {
        'log_return':[realized_volatility],
        'seconds_in_bucket':[count_unique],
        'size':[np.sum, realized_volatility, np.mean, np.std, np.max, np.min],
        'order_count':[np.mean,np.sum,np.max],
    }
    
    model_params = {
        "xgb_bl": {
            "objective": "reg:squarederror",
            "booster": "gbtree",
            "nthread": -1,
            "eta": 0.3,
            "max_depth": 8,
            "min_child_weight": 1,
            "sampling_method": "gradient_based",
#             "tree_method": "gpu_hist"  # turn it on for GPU
        }
    }

In [21]:
config = Config

## Preprocessing

### Preprocessing Utilities

In [4]:
def calc_wap1(df):
    wap = (df['bid_price1'] * df['ask_size1'] + df['ask_price1'] * df['bid_size1'])/(df['bid_size1'] + df['ask_size1'])
    return wap

def calc_wap2(df):
    wap = (df['bid_price2'] * df['ask_size2'] + df['ask_price2'] * df['bid_size2'])/(df['bid_size2'] + df['ask_size2'])
    return wap

In [5]:
def log_return(wap):
    return np.log(wap).diff()

In [6]:
def realized_volatility(series):
    return np.sqrt(np.sum(series**2))

In [7]:
def count_unique(series):
    return len(np.unique(series))

In [19]:
# Function to read our base train and test set
def read_train_test():
    train = pd.read_csv(config.data_dir + 'train.csv')
    test = pd.read_csv(config.data_dir + 'test.csv')
    # Create a key to merge with book and trade data
    train['row_id'] = train['stock_id'].astype(str) + '-' + train['time_id'].astype(str)
    test['row_id'] = test['stock_id'].astype(str) + '-' + test['time_id'].astype(str)
    print(f'Our training set has {train.shape[0]} rows')
    return train, test

### Preprocessing of Book

In [8]:
from functools import reduce

def book_preprocessor(file_path):
    # Function to preprocess book data (for each stock id)
    
    df = pd.read_parquet(file_path)
    
    # Calculate Wap
    df['wap1'] = calc_wap1(df)
    df['wap2'] = calc_wap2(df)
    
    # Calculate log returns
    df['log_return1'] = df.groupby(['time_id'])['wap1'].apply(log_return)
    df['log_return2'] = df.groupby(['time_id'])['wap2'].apply(log_return)
    
    # Calculate wap balance
    df['wap_balance'] = abs(df['wap1'] - df['wap2'])
    
    # Calculate spread
    df['price_spread'] = (df['ask_price1'] - df['bid_price1']) / ((df['ask_price1'] + df['bid_price1']) / 2)
    df['price_spread2'] = (df['ask_price2'] - df['bid_price2']) / ((df['ask_price2'] + df['bid_price2']) / 2)
    df['bid_spread'] = df['bid_price1'] - df['bid_price2']
    df['ask_spread'] = df['ask_price1'] - df['ask_price2']
    df["bid_ask_spread"] = abs(df['bid_spread'] - df['ask_spread'])
    df['total_volume'] = (df['ask_size1'] + df['ask_size2']) + (df['bid_size1'] + df['bid_size2'])
    df['volume_imbalance'] = abs((df['ask_size1'] + df['ask_size2']) - (df['bid_size1'] + df['bid_size2']))
    
    # Dict for aggregations
    create_feature_dict = {
        'wap1': [np.sum, np.mean, np.std],
        'wap2': [np.sum, np.mean, np.std],
        'log_return1': [np.sum, realized_volatility, np.mean, np.std],
        'log_return2': [np.sum, realized_volatility, np.mean, np.std],
        'wap_balance': [np.sum, np.mean, np.std],
        'price_spread':[np.sum, np.mean, np.std],
        'price_spread2':[np.sum, np.mean, np.std],
        'bid_spread':[np.sum, np.mean, np.std],
        'ask_spread':[np.sum, np.mean, np.std],
        'total_volume':[np.sum, np.mean, np.std],
        'volume_imbalance':[np.sum, np.mean, np.std],
        "bid_ask_spread":[np.sum, np.mean, np.std],
    }
    
    def get_stats_window(seconds_in_bucket, add_suffix = False):
        # Function to get group stats for different windows (seconds in bucket)
        
        # Group by the window
        df_feature = df[df['seconds_in_bucket'] >= seconds_in_bucket].groupby(['time_id']).agg(create_feature_dict).reset_index()
        
        # Rename columns joining suffix
        df_feature.columns = ['_'.join(col) for col in df_feature.columns]
        
        # Add a suffix to differentiate windows
        if add_suffix:
            df_feature = df_feature.add_suffix('_' + str(seconds_in_bucket))
            df_feature.rename({f'time_id__{seconds_in_bucket}': 'time_id_'}, axis=1, inplace=True)
        return df_feature
    
    # Get the stats for different windows
    df_list = [get_stats_window(seconds_in_bucket = 0, add_suffix = False)]
    
    time_gap = 100
    time_slices = [t * 100 for t in range(600 // time_gap)]
    for t in time_slices:
        df_list += [get_stats_window(seconds_in_bucket = t, add_suffix = True)]
    
    df_feature = reduce(lambda left, right: pd.merge(left, right, on='time_id_'), df_list)
    
    # Create row_id so we can merge
    stock_id = file_path.split('=')[1]
    df_feature.loc[:, 'row_id'] = df_feature['time_id_'].apply(lambda x: f'{stock_id}-{x}')
    df_feature.drop(['time_id_'], axis = 1, inplace = True)
    
    return df_feature

### Preprocessing of Trade

In [9]:
def trade_preprocessor(file_path):
    # Function to preprocess trade data (for each stock id)
    
    df = pd.read_parquet(file_path)
    df['log_return'] = df.groupby('time_id')['price'].apply(log_return)
    
    # Dict for aggregations
    create_feature_dict = {
        'log_return':[realized_volatility],
        'seconds_in_bucket':[count_unique],
        'size':[np.sum, realized_volatility, np.mean, np.std, np.max, np.min],
        'order_count':[np.mean,np.sum,np.max],
    }
    
    def get_stats_window(seconds_in_bucket, add_suffix = False):
        # Function to get group stats for different windows (seconds in bucket)
        
        # Group by the window
        df_feature = df[df['seconds_in_bucket'] >= seconds_in_bucket].groupby(['time_id']).agg(create_feature_dict).reset_index()
        
        # Rename columns joining suffix
        df_feature.columns = ['_'.join(col) for col in df_feature.columns]
        
        # Add a suffix to differentiate windows
        if add_suffix:
            df_feature = df_feature.add_suffix('_' + str(seconds_in_bucket))
            df_feature.rename({f'time_id__{seconds_in_bucket}': 'time_id_'}, axis=1, inplace=True)
        return df_feature
    
    def tendency(price, vol):    
        df_diff = np.diff(price)
        val = (df_diff/price[1:])*100
        power = np.sum(val*vol[1:])
        return(power)
    
    def process_trade_features(df):
        lis = []
        for n_time_id in df['time_id'].unique():
            df_id = df[df['time_id'] == n_time_id]        
            tendencyV = tendency(df_id['price'].values, df_id['size'].values)      
            f_max = np.sum(df_id['price'].values > np.mean(df_id['price'].values))
            f_min = np.sum(df_id['price'].values < np.mean(df_id['price'].values))
            df_max =  np.sum(np.diff(df_id['price'].values) > 0)
            df_min =  np.sum(np.diff(df_id['price'].values) < 0)
            abs_diff = np.median(np.abs( df_id['price'].values - np.mean(df_id['price'].values)))        
            energy = np.mean(df_id['price'].values**2)
            iqr_p = np.percentile(df_id['price'].values,75) - np.percentile(df_id['price'].values,25)
            abs_diff_v = np.median(np.abs( df_id['size'].values - np.mean(df_id['size'].values)))        
            energy_v = np.sum(df_id['size'].values**2)
            iqr_p_v = np.percentile(df_id['size'].values,75) - np.percentile(df_id['size'].values,25)

            lis.append({'time_id':n_time_id,'tendency':tendencyV,'f_max':f_max,'f_min':f_min,'df_max':df_max,'df_min':df_min,
                       'abs_diff':abs_diff,'energy':energy,'iqr_p':iqr_p,'abs_diff_v':abs_diff_v,'energy_v':energy_v,'iqr_p_v':iqr_p_v})

        df_lr = pd.DataFrame(lis)
        return df_lr
    
    # Get the stats for different windows
    df_list = [get_stats_window(seconds_in_bucket = 0, add_suffix = False)]
    
    time_gap = 100
    time_slices = [t * 100 for t in range(600 // time_gap)]
    for t in time_slices:
        df_list += [get_stats_window(seconds_in_bucket = t, add_suffix = True)]
        
    df_feature = reduce(lambda left, right: pd.merge(left, right, on='time_id_'), df_list)
    df_lr = process_trade_features(df)
    df_feature = df_feature.merge(df_lr, how = 'left', left_on = 'time_id_', right_on = 'time_id')
    
    df_feature = df_feature.add_prefix('trade_')
    stock_id = file_path.split('=')[1]
    df_feature['row_id'] = df_feature['trade_time_id_'].apply(lambda x:f'{stock_id}-{x}')
    df_feature.drop(['trade_time_id_'], axis = 1, inplace = True)
    
    return df_feature

### Preprocess of Time Stock

In [10]:
# Function to get group stats for the stock_id and time_id
def get_time_stock(df):
    # Get realized volatility columns
    vol_cols = [col for col in df.columns if 'realized_volatility' in col]

    # Group by the stock id
    df_stock_id = df.groupby(['stock_id'])[vol_cols].agg(['mean', 'std', 'max', 'min', ]).reset_index()
    # Rename columns joining suffix
    df_stock_id.columns = ['_'.join(col) for col in df_stock_id.columns]
    df_stock_id = df_stock_id.add_suffix('_' + 'stock')

    # Group by the stock id
    df_time_id = df.groupby(['time_id'])[vol_cols].agg(['mean', 'std', 'max', 'min', ]).reset_index()
    # Rename columns joining suffix
    df_time_id.columns = ['_'.join(col) for col in df_time_id.columns]
    df_time_id = df_time_id.add_suffix('_' + 'time')
    
    # Merge with original dataframe
    df = df.merge(df_stock_id, how = 'left', left_on = ['stock_id'], right_on = ['stock_id__stock'])
    df = df.merge(df_time_id, how = 'left', left_on = ['time_id'], right_on = ['time_id__time'])
    df.drop(['stock_id__stock', 'time_id__time'], axis = 1, inplace = True)
    return df

### Overall Preprocessing

In [12]:
# Funtion to make preprocessing function in parallel (for each stock id)
def preprocessor(list_stock_ids, is_train = True):
    
    # Parrallel for loop
    def for_joblib(stock_id):
        # Train
        if is_train:
            file_path_book = config.data_dir + "book_train.parquet/stock_id=" + str(stock_id)
            file_path_trade = config.data_dir + "trade_train.parquet/stock_id=" + str(stock_id)
        # Test
        else:
            file_path_book = config.data_dir + "book_test.parquet/stock_id=" + str(stock_id)
            file_path_trade = config.data_dir + "trade_test.parquet/stock_id=" + str(stock_id)
    
        # Preprocess book and trade data and merge them
        df_tmp = pd.merge(book_preprocessor(file_path_book), trade_preprocessor(file_path_trade), on = 'row_id', how = 'left')
        
        # Return the merge dataframe
        return df_tmp
    
    # Use parallel api to call paralle for loop
    df = Parallel(n_jobs = -1, verbose = 1)(delayed(for_joblib)(stock_id) for stock_id in list_stock_ids)
    # Concatenate all the dataframes that return from Parallel
    df = pd.concat(df, ignore_index = True)
    return df

In [13]:
df = preprocessor([0])

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:   30.9s finished


In [14]:
df

Unnamed: 0,wap1_sum,wap1_mean,wap1_std,wap2_sum,wap2_mean,wap2_std,log_return1_sum,log_return1_realized_volatility,log_return1_mean,log_return1_std,log_return2_sum,log_return2_realized_volatility,log_return2_mean,log_return2_std,wap_balance_sum,wap_balance_mean,wap_balance_std,price_spread_sum,price_spread_mean,price_spread_std,price_spread2_sum,price_spread2_mean,price_spread2_std,bid_spread_sum,bid_spread_mean,bid_spread_std,ask_spread_sum,ask_spread_mean,ask_spread_std,total_volume_sum,total_volume_mean,total_volume_std,volume_imbalance_sum,volume_imbalance_mean,volume_imbalance_std,bid_ask_spread_sum,bid_ask_spread_mean,bid_ask_spread_std,wap1_sum_0,wap1_mean_0,wap1_std_0,wap2_sum_0,wap2_mean_0,wap2_std_0,log_return1_sum_0,log_return1_realized_volatility_0,log_return1_mean_0,log_return1_std_0,log_return2_sum_0,log_return2_realized_volatility_0,log_return2_mean_0,log_return2_std_0,wap_balance_sum_0,wap_balance_mean_0,wap_balance_std_0,price_spread_sum_0,price_spread_mean_0,price_spread_std_0,price_spread2_sum_0,price_spread2_mean_0,price_spread2_std_0,bid_spread_sum_0,bid_spread_mean_0,bid_spread_std_0,ask_spread_sum_0,ask_spread_mean_0,ask_spread_std_0,total_volume_sum_0,total_volume_mean_0,total_volume_std_0,volume_imbalance_sum_0,volume_imbalance_mean_0,volume_imbalance_std_0,bid_ask_spread_sum_0,bid_ask_spread_mean_0,bid_ask_spread_std_0,wap1_sum_100,wap1_mean_100,wap1_std_100,wap2_sum_100,wap2_mean_100,wap2_std_100,log_return1_sum_100,log_return1_realized_volatility_100,log_return1_mean_100,log_return1_std_100,log_return2_sum_100,log_return2_realized_volatility_100,log_return2_mean_100,log_return2_std_100,wap_balance_sum_100,wap_balance_mean_100,wap_balance_std_100,price_spread_sum_100,price_spread_mean_100,price_spread_std_100,price_spread2_sum_100,price_spread2_mean_100,price_spread2_std_100,bid_spread_sum_100,bid_spread_mean_100,bid_spread_std_100,ask_spread_sum_100,ask_spread_mean_100,ask_spread_std_100,total_volume_sum_100,total_volume_mean_100,total_volume_std_100,volume_imbalance_sum_100,volume_imbalance_mean_100,volume_imbalance_std_100,bid_ask_spread_sum_100,bid_ask_spread_mean_100,bid_ask_spread_std_100,wap1_sum_200,wap1_mean_200,wap1_std_200,wap2_sum_200,wap2_mean_200,wap2_std_200,log_return1_sum_200,log_return1_realized_volatility_200,log_return1_mean_200,log_return1_std_200,log_return2_sum_200,log_return2_realized_volatility_200,log_return2_mean_200,log_return2_std_200,wap_balance_sum_200,wap_balance_mean_200,wap_balance_std_200,price_spread_sum_200,price_spread_mean_200,price_spread_std_200,price_spread2_sum_200,price_spread2_mean_200,price_spread2_std_200,bid_spread_sum_200,bid_spread_mean_200,bid_spread_std_200,ask_spread_sum_200,ask_spread_mean_200,ask_spread_std_200,total_volume_sum_200,total_volume_mean_200,total_volume_std_200,volume_imbalance_sum_200,volume_imbalance_mean_200,volume_imbalance_std_200,bid_ask_spread_sum_200,...,wap_balance_std_400,price_spread_sum_400,price_spread_mean_400,price_spread_std_400,price_spread2_sum_400,price_spread2_mean_400,price_spread2_std_400,bid_spread_sum_400,bid_spread_mean_400,bid_spread_std_400,ask_spread_sum_400,ask_spread_mean_400,ask_spread_std_400,total_volume_sum_400,total_volume_mean_400,total_volume_std_400,volume_imbalance_sum_400,volume_imbalance_mean_400,volume_imbalance_std_400,bid_ask_spread_sum_400,bid_ask_spread_mean_400,bid_ask_spread_std_400,wap1_sum_500,wap1_mean_500,wap1_std_500,wap2_sum_500,wap2_mean_500,wap2_std_500,log_return1_sum_500,log_return1_realized_volatility_500,log_return1_mean_500,log_return1_std_500,log_return2_sum_500,log_return2_realized_volatility_500,log_return2_mean_500,log_return2_std_500,wap_balance_sum_500,wap_balance_mean_500,wap_balance_std_500,price_spread_sum_500,price_spread_mean_500,price_spread_std_500,price_spread2_sum_500,price_spread2_mean_500,price_spread2_std_500,bid_spread_sum_500,bid_spread_mean_500,bid_spread_std_500,ask_spread_sum_500,ask_spread_mean_500,ask_spread_std_500,total_volume_sum_500,total_volume_mean_500,total_volume_std_500,volume_imbalance_sum_500,volume_imbalance_mean_500,volume_imbalance_std_500,bid_ask_spread_sum_500,bid_ask_spread_mean_500,bid_ask_spread_std_500,row_id,trade_log_return_realized_volatility,trade_seconds_in_bucket_count_unique,trade_size_sum,trade_size_realized_volatility,trade_size_mean,trade_size_std,trade_size_amax,trade_size_amin,trade_order_count_mean,trade_order_count_sum,trade_order_count_amax,trade_log_return_realized_volatility_0,trade_seconds_in_bucket_count_unique_0,trade_size_sum_0,trade_size_realized_volatility_0,trade_size_mean_0,trade_size_std_0,trade_size_amax_0,trade_size_amin_0,trade_order_count_mean_0,trade_order_count_sum_0,trade_order_count_amax_0,trade_log_return_realized_volatility_100,trade_seconds_in_bucket_count_unique_100,trade_size_sum_100,trade_size_realized_volatility_100,trade_size_mean_100,trade_size_std_100,trade_size_amax_100,trade_size_amin_100,trade_order_count_mean_100,trade_order_count_sum_100,trade_order_count_amax_100,trade_log_return_realized_volatility_200,trade_seconds_in_bucket_count_unique_200,trade_size_sum_200,trade_size_realized_volatility_200,trade_size_mean_200,trade_size_std_200,trade_size_amax_200,trade_size_amin_200,trade_order_count_mean_200,trade_order_count_sum_200,trade_order_count_amax_200,trade_log_return_realized_volatility_300,trade_seconds_in_bucket_count_unique_300,trade_size_sum_300,trade_size_realized_volatility_300,trade_size_mean_300,trade_size_std_300,trade_size_amax_300,trade_size_amin_300,trade_order_count_mean_300,trade_order_count_sum_300,trade_order_count_amax_300,trade_log_return_realized_volatility_400,trade_seconds_in_bucket_count_unique_400,trade_size_sum_400,trade_size_realized_volatility_400,trade_size_mean_400,trade_size_std_400,trade_size_amax_400,trade_size_amin_400,trade_order_count_mean_400,trade_order_count_sum_400,trade_order_count_amax_400,trade_log_return_realized_volatility_500,trade_seconds_in_bucket_count_unique_500,trade_size_sum_500,trade_size_realized_volatility_500,trade_size_mean_500,trade_size_std_500,trade_size_amax_500,trade_size_amin_500,trade_order_count_mean_500,trade_order_count_sum_500,trade_order_count_amax_500,trade_time_id,trade_tendency,trade_f_max,trade_f_min,trade_df_max,trade_df_min,trade_abs_diff,trade_energy,trade_iqr_p,trade_abs_diff_v,trade_energy_v,trade_iqr_p_v
0,303.125061,1.003725,0.000693,303.105539,1.003661,0.000781,0.002292,0.004499,7.613599e-06,0.000260,0.002325,0.006999,0.000008,0.000404,0.117051,0.000388,0.000295,0.257255,0.000852,0.000211,0.355451,0.001177,0.000213,0.053006,0.000176,0.000162,-0.045557,-0.000151,0.000126,97696,323.496689,138.101214,40738,134.894040,107.260583,0.098563,0.000326,0.000222,303.125061,1.003725,0.000693,303.105539,1.003661,0.000781,0.002292,0.004499,7.613599e-06,0.000260,0.002325,0.006999,0.000008,0.000404,0.117051,0.000388,0.000295,0.257255,0.000852,0.000211,0.355451,0.001177,0.000213,0.053006,0.000176,0.000162,-0.045557,-0.000151,0.000126,97696,323.496689,138.101214,40738,134.894040,107.260583,0.098563,0.000326,0.000222,263.015170,1.003875,0.000453,263.002863,1.003828,0.000558,-0.000396,0.004041,-0.000002,0.000250,-0.000086,0.006566,-3.291241e-07,0.000406,0.105931,0.000404,0.000289,0.222287,0.000848,0.000214,0.309706,0.001182,0.000215,0.048196,0.000184,0.000164,-0.039558,-0.000151,0.000127,87028,332.167939,141.270190,34284,130.854962,107.857691,0.087754,0.000335,0.000217,193.740261,1.003836,0.000466,193.720078,1.003731,0.000547,-0.000269,0.003402,-1.392656e-06,0.000246,0.000260,0.005802,0.000001,0.000419,0.073062,0.000379,0.000278,0.167016,0.000865,0.000238,0.236153,0.001224,0.000206,0.039560,0.000205,0.000174,-0.029837,-0.000155,0.000124,62041,321.455959,151.899636,26011,134.772021,106.866300,0.069397,...,0.000291,0.076778,0.000783,0.000181,0.116351,0.001187,0.000226,0.020996,0.000214,0.000168,-0.018721,-0.000191,0.000140,25724,262.489796,118.188932,12184,124.326531,82.090066,0.039717,0.000405,0.000228,50.168442,1.003369,0.000546,50.163233,1.003265,0.000602,-0.000263,0.001459,-5.253940e-06,0.000208,-0.000272,0.003018,-0.000005,0.000431,0.017729,0.000355,0.000247,0.039065,0.000781,0.000179,0.060658,0.001213,0.000243,0.012877,0.000258,0.000158,-0.008792,-0.000176,0.000131,12485,249.700000,100.559709,7241,144.820000,71.123833,0.021669,0.000433,0.000252,0-5,0.002006,40.0,3179.0,893.949104,79.475000,118.375107,499.0,1.0,2.750000,110.0,12.0,0.002006,40.0,3179.0,893.949104,79.475000,118.375107,499.0,1.0,2.750000,110.0,12.0,0.001852,34.0,2411.0,800.529200,70.911765,119.326232,499.0,1.0,2.411765,82.0,9.0,0.001666,27.0,1901.0,740.869084,70.407407,126.345454,499.0,1.0,2.555556,69.0,9.0,0.001308,21.0,1587.0,722.061632,75.571429,141.675888,499.0,1.0,2.571429,54.0,9.0,0.001121,16.0,1045.0,569.827167,65.312500,130.754589,499.0,1.0,2.437500,39.0,7.0,0.000959,10.0,737.0,523.916978,73.700000,156.408333,499.0,1.0,2.600000,26.0,6.0,5.0,19.265160,24.0,16.0,22.0,16.0,0.000391,1.007459,0.000651,75.975000,799145.0,107.75
1,200.047768,1.000239,0.000262,200.041171,1.000206,0.000272,0.000360,0.001204,1.810239e-06,0.000086,0.000801,0.002476,0.000004,0.000176,0.042312,0.000212,0.000155,0.078836,0.000394,0.000157,0.134182,0.000671,0.000200,0.028358,0.000142,0.000148,-0.027001,-0.000135,0.000065,82290,411.450000,172.263581,28410,142.050000,102.139758,0.055359,0.000277,0.000158,200.047768,1.000239,0.000262,200.041171,1.000206,0.000272,0.000360,0.001204,1.810239e-06,0.000086,0.000801,0.002476,0.000004,0.000176,0.042312,0.000212,0.000155,0.078836,0.000394,0.000157,0.134182,0.000671,0.000200,0.028358,0.000142,0.000148,-0.027001,-0.000135,0.000065,82290,411.450000,172.263581,28410,142.050000,102.139758,0.055359,0.000277,0.000158,187.049275,1.000264,0.000251,187.042180,1.000226,0.000269,0.000561,0.001140,0.000003,0.000084,0.000710,0.002432,3.798911e-06,0.000178,0.039800,0.000213,0.000157,0.070603,0.000378,0.000143,0.122035,0.000653,0.000194,0.027304,0.000146,0.000151,-0.024140,-0.000129,0.000056,78561,420.112299,173.976587,27473,146.914439,102.961696,0.051445,0.000275,0.000160,152.050502,1.000332,0.000215,152.040762,1.000268,0.000266,0.000331,0.001014,2.180828e-06,0.000083,0.000950,0.002105,0.000006,0.000171,0.031864,0.000210,0.000165,0.052886,0.000348,0.000115,0.092774,0.000610,0.000168,0.021131,0.000139,0.000163,-0.018770,-0.000123,0.000059,66716,438.921053,178.623008,23014,151.407895,108.481656,0.039901,...,0.000170,0.026790,0.000367,0.000129,0.050068,0.000686,0.000192,0.013552,0.000186,0.000220,-0.009736,-0.000133,0.000063,35040,480.000000,167.075582,7018,96.136986,79.708203,0.023288,0.000319,0.000226,37.022563,1.000610,0.000230,37.019386,1.000524,0.000231,-0.000013,0.000857,-3.608732e-07,0.000143,0.000575,0.001435,0.000016,0.000239,0.010265,0.000277,0.000186,0.012540,0.000339,0.000163,0.028945,0.000782,0.000207,0.010942,0.000296,0.000255,-0.005470,-0.000148,0.000063,15755,425.810811,180.407846,4059,109.702703,97.631639,0.016412,0.000444,0.000239,0-11,0.000901,30.0,1289.0,480.608989,42.966667,77.815203,280.0,1.0,1.900000,57.0,6.0,0.000901,30.0,1289.0,480.608989,42.966667,77.815203,280.0,1.0,1.900000,57.0,6.0,0.000819,25.0,1174.0,467.843991,46.960000,82.600182,280.0,1.0,2.000000,50.0,6.0,0.000802,22.0,1124.0,466.514737,51.090909,87.344370,280.0,1.0,2.045455,45.0,6.0,0.000587,16.0,900.0,416.521308,56.250000,90.504144,280.0,1.0,2.250000,36.0,6.0,0.000510,11.0,829.0,414.819238,75.363636,104.689324,280.0,1.0,2.090909,23.0,6.0,0.000451,7.0,546.0,306.058818,78.000000,92.271339,206.0,1.0,2.000000,14.0,6.0,11.0,5.799148,15.0,15.0,14.0,11.0,0.000176,1.000413,0.000397,41.966667,230985.0,26.75
2,187.913849,0.999542,0.000864,187.939824,0.999680,0.000862,-0.002074,0.002369,-1.109201e-05,0.000173,-0.001493,0.004801,-0.000008,0.000352,0.062228,0.000331,0.000246,0.136330,0.000725,0.000164,0.210563,0.001120,0.000295,0.036955,0.000197,0.000170,-0.037243,-0.000198,0.000171,78274,416.351064,138.433034,26586,141.414894,108.891243,0.074198,0.000395,0.000220,187.913849,0.999542,0.000864,187.939824,0.999680,0.000862,-0.002074,0.002369,-1.109201e-05,0.000173,-0.001493,0.004801,-0.000008,0.000352,0.062228,0.000331,0.000246,0.136330,0.000725,0.000164,0.210563,0.001120,0.000295,0.036955,0.000197,0.000170,-0.037243,-0.000198,0.000171,78274,416.351064,138.433034,26586,141.414894,108.891243,0.074198,0.000395,0.000220,144.907030,0.999359,0.000902,144.932538,0.999535,0.000915,-0.002715,0.002205,-0.000019,0.000183,-0.002671,0.004106,-1.841958e-05,0.000342,0.050096,0.000345,0.000260,0.098615,0.000680,0.000149,0.152930,0.001055,0.000285,0.023552,0.000162,0.000154,-0.030732,-0.000212,0.000187,58090,400.620690,140.906641,19334,133.337931,106.949574,0.054284,0.000374,0.000231,100.893294,0.998944,0.000757,100.915397,0.999162,0.000829,-0.002240,0.001940,-2.217922e-05,0.000193,-0.002434,0.003900,-0.000024,0.000389,0.039989,0.000396,0.000286,0.068989,0.000683,0.000174,0.110331,0.001092,0.000322,0.014792,0.000146,0.000158,-0.026519,-0.000263,0.000201,44495,440.544554,138.584092,14495,143.514851,116.258558,0.041311,...,0.000299,0.031877,0.000625,0.000120,0.050816,0.000996,0.000307,0.008521,0.000167,0.000207,-0.010387,-0.000204,0.000164,23154,454.000000,115.120632,7778,152.509804,100.093231,0.018908,0.000371,0.000276,18.957882,0.997783,0.000365,18.961707,0.997985,0.000632,-0.000882,0.000640,-4.641993e-05,0.000143,-0.000495,0.002509,-0.000026,0.000591,0.007657,0.000403,0.000258,0.010505,0.000553,0.000092,0.021587,0.001136,0.000366,0.006367,0.000335,0.000264,-0.004691,-0.000247,0.000125,8558,450.421053,130.228140,3080,162.105263,131.846668,0.011058,0.000582,0.000293,0-16,0.001961,25.0,2161.0,704.588532,86.440000,113.587000,391.0,1.0,2.720000,68.0,8.0,0.001961,25.0,2161.0,704.588532,86.440000,113.587000,391.0,1.0,2.720000,68.0,8.0,0.001875,23.0,2032.0,693.045453,88.347826,116.928342,391.0,1.0,2.739130,63.0,8.0,0.001575,18.0,1691.0,628.597646,93.944444,117.892011,391.0,1.0,2.833333,51.0,8.0,0.001137,12.0,1189.0,552.864359,99.083333,130.679663,391.0,1.0,3.166667,38.0,8.0,0.001048,10.0,1087.0,543.559564,108.700000,140.356728,391.0,2.0,3.400000,34.0,8.0,0.000723,4.0,661.0,377.164420,165.250000,104.917031,246.0,11.0,3.500000,14.0,6.0,16.0,-49.863100,15.0,10.0,7.0,17.0,0.000820,0.998409,0.001628,81.440000,496445.0,122.00
3,119.859781,0.998832,0.000757,119.835941,0.998633,0.000656,-0.002828,0.002574,-2.376661e-05,0.000236,-0.002053,0.003637,-0.000017,0.000334,0.045611,0.000380,0.000248,0.103252,0.000860,0.000280,0.139066,0.001159,0.000366,0.022764,0.000190,0.000199,-0.013001,-0.000108,0.000091,52232,435.266667,156.120334,17546,146.216667,121.533215,0.035765,0.000298,0.000214,119.859781,0.998832,0.000757,119.835941,0.998633,0.000656,-0.002828,0.002574,-2.376661e-05,0.000236,-0.002053,0.003637,-0.000017,0.000334,0.045611,0.000380,0.000248,0.103252,0.000860,0.000280,0.139066,0.001159,0.000366,0.022764,0.000190,0.000199,-0.013001,-0.000108,0.000091,52232,435.266667,156.120334,17546,146.216667,121.533215,0.035765,0.000298,0.000214,102.854482,0.998587,0.000491,102.838843,0.998435,0.000472,-0.002525,0.002552,-0.000025,0.000251,-0.002102,0.003580,-2.040528e-05,0.000354,0.037411,0.000363,0.000258,0.092658,0.000900,0.000283,0.126345,0.001227,0.000351,0.021932,0.000213,0.000206,-0.011706,-0.000114,0.000097,44707,434.048544,158.774929,15969,155.038835,124.004263,0.033637,0.000327,0.000218,68.894597,0.998472,0.000452,68.890314,0.998410,0.000466,-0.000866,0.001855,-1.255555e-05,0.000225,-0.000456,0.002880,-0.000007,0.000349,0.023407,0.000339,0.000225,0.059033,0.000856,0.000269,0.076087,0.001103,0.000301,0.009162,0.000133,0.000152,-0.007865,-0.000114,0.000088,29310,424.782609,140.057013,9468,137.217391,101.947632,0.017027,...,0.000250,0.024143,0.001050,0.000067,0.028826,0.001253,0.000166,0.003563,0.000155,0.000147,-0.001111,-0.000048,0.000010,11476,498.956522,156.965682,3538,153.826087,95.507569,0.004673,0.000203,0.000146,11.976885,0.998074,0.000534,11.979078,0.998256,0.000599,-0.000635,0.000987,-5.288889e-05,0.000292,-0.000857,0.001360,-0.000071,0.000403,0.002419,0.000202,0.000091,0.012745,0.001062,0.000090,0.014924,0.001244,0.000134,0.001573,0.000131,0.000048,-0.000601,-0.000050,0.000013,6470,539.166667,176.489934,2140,178.333333,112.091954,0.002174,0.000181,0.000046,0-31,0.001561,15.0,1962.0,741.811297,130.800000,144.828569,450.0,5.0,3.933333,59.0,15.0,0.001561,15.0,1962.0,741.811297,130.800000,144.828569,450.0,5.0,3.933333,59.0,15.0,0.001561,13.0,1933.0,741.524780,148.692308,147.884180,450.0,5.0,4.384615,57.0,15.0,0.001090,10.0,1561.0,702.961592,156.100000,166.827556,450.0,5.0,4.700000,47.0,15.0,0.001089,9.0,1556.0,702.943810,172.888889,167.747909,450.0,5.0,5.111111,46.0,15.0,0.000802,3.0,514.0,453.878838,171.333333,242.838080,450.0,5.0,3.666667,11.0,8.0,0.000327,2.0,509.0,453.851297,254.500000,276.478751,450.0,59.0,5.000000,10.0,8.0,31.0,-14.963219,8.0,7.0,5.0,8.0,0.000593,0.998041,0.001038,82.800000,550284.0,131.50
4,175.932865,0.999619,0.000258,175.934256,0.999626,0.000317,-0.000002,0.001894,-1.057099e-08,0.000144,-0.000281,0.003257,-0.000002,0.000247,0.044783,0.000254,0.000188,0.069901,0.000397,0.000130,0.122698,0.000697,0.000185,0.033565,0.000191,0.000083,-0.019206,-0.000109,0.000076,60407,343.221591,158.054066,21797,123.846591,102.407501,0.052771,0.000300,0.000131,175.932865,0.999619,0.000258,175.934256,0.999626,0.000317,-0.000002,0.001894,-1.057099e-08,0.000144,-0.000281,0.003257,-0.000002,0.000247,0.044783,0.000254,0.000188,0.069901,0.000397,0.000130,0.122698,0.000697,0.000185,0.033565,0.000191,0.000083,-0.019206,-0.000109,0.000076,60407,343.221591,158.054066,21797,123.846591,102.407501,0.052771,0.000300,0.000131,149.941716,0.999611,0.000246,149.944699,0.999631,0.000306,0.000533,0.001617,0.000004,0.000132,0.000605,0.002975,4.033350e-06,0.000244,0.037210,0.000248,0.000187,0.058384,0.000389,0.000133,0.102832,0.000686,0.000193,0.027691,0.000185,0.000086,-0.016736,-0.000112,0.000079,54615,364.100000,156.327343,19003,126.686667,105.347118,0.044427,0.000296,0.000139,113.951545,0.999575,0.000259,113.961458,0.999662,0.000277,0.000274,0.001571,2.401551e-06,0.000148,-0.000310,0.002461,-0.000003,0.000231,0.026236,0.000230,0.000186,0.047192,0.000414,0.000137,0.079329,0.000696,0.000189,0.019859,0.000174,0.000076,-0.012260,-0.000108,0.000073,44277,388.394737,164.649352,13333,116.956140,102.595726,0.032120,...,0.000208,0.025789,0.000469,0.000143,0.044261,0.000805,0.000164,0.011515,0.000209,0.000071,-0.006946,-0.000126,0.000083,18697,339.945455,129.296590,4709,85.618182,93.493413,0.018461,0.000336,0.000113,16.990718,0.999454,0.000307,16.993885,0.999640,0.000180,0.000573,0.001124,3.369865e-05,0.000279,-0.000173,0.000780,-0.000010,0.000195,0.006128,0.000360,0.000175,0.007042,0.000414,0.000072,0.012732,0.000749,0.000075,0.003310,0.000195,0.000018,-0.002377,-0.000140,0.000066,5939,349.352941,147.725227,1851,108.882353,112.990532,0.005687,0.000335,0.000055,0-62,0.000871,22.0,1791.0,661.653232,81.409091,117.914682,341.0,1.0,4.045455,89.0,17.0,0.000871,22.0,1791.0,661.653232,81.409091,117.914682,341.0,1.0,4.045455,89.0,17.0,0.000551,18.0,1574.0,629.928567,87.444444,123.472418,341.0,1.0,4.166667,75.0,17.0,0.000498,14.0,1458.0,620.580374,104.142857,133.957431,341.0,1.0,4.428571,62.0,17.0,0.000453,11.0,1219.0,575.551040,110.818182,140.061285,341.0,1.0,4.909091,54.0,17.0,0.000395,6.0,162.0,122.000000,27.000000,45.847574,117.0,1.0,3.666667,22.0,10.0,0.000348,3.0,40.0,34.380227,13.333333,18.009257,34.0,1.0,4.000000,12.0,10.0,62.0,-10.535369,9.0,13.0,9.0,11.0,0.000108,0.999237,0.000228,78.909091,437785.0,112.50
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3825,296.387479,0.997938,0.000747,296.365481,0.997864,0.000769,-0.002136,0.002579,-7.215157e-06,0.000150,-0.002055,0.003821,-0.000007,0.000222,0.062912,0.000212,0.000159,0.163999,0.000552,0.000202,0.242920,0.000818,0.000188,0.024713,0.000083,0.000072,-0.054055,-0.000182,0.000125,111148,374.235690,165.473374,58552,197.144781,129.451712,0.078768,0.000265,0.000128,296.387479,0.997938,0.000747,296.365481,0.997864,0.000769,-0.002136,0.002579,-7.215157e-06,0.000150,-0.002055,0.003821,-0.000007,0.000222,0.062912,0.000212,0.000159,0.163999,0.000552,0.000202,0.242920,0.000818,0.000188,0.024713,0.000083,0.000072,-0.054055,-0.000182,0.000125,111148,374.235690,165.473374,58552,197.144781,129.451712,0.078768,0.000265,0.000128,249.465819,0.997863,0.000783,249.441403,0.997766,0.000786,-0.002480,0.002400,-0.000010,0.000152,-0.002259,0.003628,-9.036732e-06,0.000230,0.054467,0.000218,0.000164,0.139559,0.000558,0.000217,0.200349,0.000801,0.000196,0.017846,0.000071,0.000048,-0.042817,-0.000171,0.000117,91355,365.420000,169.946376,50465,201.860000,134.492533,0.060664,0.000243,0.000125,203.535859,0.997725,0.000796,203.512783,0.997612,0.000770,-0.001620,0.002247,-7.942849e-06,0.000158,-0.001614,0.003328,-0.000008,0.000233,0.045543,0.000223,0.000171,0.113766,0.000558,0.000193,0.163406,0.000801,0.000183,0.013580,0.000067,0.000031,-0.035950,-0.000176,0.000120,69744,341.882353,148.976759,42334,207.519608,137.125251,0.049530,...,0.000160,0.040700,0.000447,0.000102,0.064909,0.000713,0.000125,0.006191,0.000068,0.000034,-0.017949,-0.000197,0.000118,28874,317.296703,145.085683,20098,220.857143,151.670298,0.024140,0.000265,0.000121,38.868268,0.996622,0.000233,38.863364,0.996497,0.000279,-0.000205,0.001019,-5.243988e-06,0.000165,-0.000207,0.001348,-0.000005,0.000219,0.006996,0.000179,0.000111,0.016705,0.000428,0.000085,0.026726,0.000685,0.000126,0.003381,0.000087,0.000045,-0.006607,-0.000169,0.000079,9340,239.487179,94.308528,5520,141.538462,94.836937,0.009989,0.000256,0.000093,0-32751,0.001519,52.0,3450.0,974.903072,66.346154,118.944852,503.0,1.0,3.057692,159.0,14.0,0.001519,52.0,3450.0,974.903072,66.346154,118.944852,503.0,1.0,3.057692,159.0,14.0,0.001448,45.0,2969.0,875.292523,65.977778,113.842892,503.0,1.0,3.111111,140.0,14.0,0.001257,39.0,2407.0,782.764971,61.717949,110.521003,503.0,1.0,3.128205,122.0,14.0,0.001162,35.0,2365.0,782.124670,67.571429,115.288968,503.0,1.0,3.257143,114.0,14.0,0.000911,28.0,1856.0,689.511421,66.285714,114.244655,503.0,1.0,3.142857,88.0,12.0,0.000672,9.0,872.0,392.469107,96.888889,93.236855,213.0,1.0,2.777778,25.0,8.0,32751.0,7.276162,28.0,24.0,18.0,29.0,0.000724,0.995119,0.001409,63.346154,950436.0,49.25
3826,206.063903,1.000310,0.000551,206.100395,1.000487,0.000599,0.000403,0.002206,1.966770e-06,0.000154,0.000959,0.002847,0.000005,0.000199,0.055028,0.000267,0.000193,0.111732,0.000542,0.000147,0.166022,0.000806,0.000128,0.018874,0.000092,0.000088,-0.035445,-0.000172,0.000083,127953,621.131068,266.019708,48159,233.781553,153.128340,0.054319,0.000264,0.000122,206.063903,1.000310,0.000551,206.100395,1.000487,0.000599,0.000403,0.002206,1.966770e-06,0.000154,0.000959,0.002847,0.000005,0.000199,0.055028,0.000267,0.000193,0.111732,0.000542,0.000147,0.166022,0.000806,0.000128,0.018874,0.000092,0.000088,-0.035445,-0.000172,0.000083,127953,621.131068,266.019708,48159,233.781553,153.128340,0.054319,0.000264,0.000122,167.082500,1.000494,0.000435,167.114511,1.000686,0.000473,0.000803,0.001900,0.000005,0.000147,0.000817,0.002532,4.890253e-06,0.000196,0.045444,0.000272,0.000192,0.095557,0.000572,0.000133,0.137879,0.000826,0.000119,0.012667,0.000076,0.000069,-0.029688,-0.000178,0.000088,112057,671.000000,262.442443,41641,249.347305,156.099016,0.042354,0.000254,0.000117,135.073360,1.000543,0.000446,135.106496,1.000789,0.000446,0.000169,0.001496,1.254316e-06,0.000129,0.000595,0.002315,0.000004,0.000200,0.039555,0.000293,0.000191,0.076890,0.000570,0.000117,0.112205,0.000831,0.000102,0.011014,0.000082,0.000075,-0.024330,-0.000180,0.000088,93823,694.985185,253.288903,35479,262.807407,160.655397,0.035345,...,0.000170,0.032162,0.000555,0.000084,0.048064,0.000829,0.000108,0.004806,0.000083,0.000075,-0.011113,-0.000192,0.000109,32829,566.017241,215.469255,14539,250.672414,152.685480,0.015919,0.000274,0.000139,31.017879,1.000577,0.000268,31.022971,1.000741,0.000332,-0.000486,0.000493,-1.568289e-05,0.000089,-0.000290,0.001155,-0.000009,0.000211,0.007242,0.000234,0.000186,0.017208,0.000555,0.000015,0.025609,0.000826,0.000125,0.002103,0.000068,0.000057,-0.006307,-0.000203,0.000113,17774,573.354839,156.098590,7788,251.225806,155.965532,0.008410,0.000271,0.000121,0-32753,0.001411,28.0,4547.0,1423.656208,162.392857,218.445406,772.0,1.0,3.892857,109.0,19.0,0.001411,28.0,4547.0,1423.656208,162.392857,218.445406,772.0,1.0,3.892857,109.0,19.0,0.001342,22.0,3285.0,1186.404231,149.318182,208.970808,772.0,1.0,3.454545,76.0,12.0,0.001235,18.0,2493.0,1083.998616,138.500000,220.930186,772.0,1.0,3.555556,64.0,12.0,0.001066,12.0,2161.0,1035.056037,180.083333,249.031016,772.0,1.0,4.250000,51.0,12.0,0.000765,6.0,1401.0,852.912070,233.500000,282.957771,772.0,1.0,5.166667,31.0,12.0,0.000268,2.0,201.0,200.002500,100.500000,140.714249,200.0,1.0,2.000000,4.0,3.0,32753.0,36.888451,16.0,12.0,13.0,13.0,0.000327,1.000651,0.000595,159.500000,2026797.0,213.50
3827,187.915689,0.999552,0.000743,187.897700,0.999456,0.000736,0.001663,0.002913,8.895445e-06,0.000213,0.002077,0.003266,0.000011,0.000239,0.044629,0.000237,0.000188,0.098700,0.000525,0.000244,0.152395,0.000811,0.000307,0.038039,0.000202,0.000147,-0.015621,-0.000083,0.000060,64622,343.734043,140.150429,21776,115.829787,105.146411,0.053660,0.000285,0.000155,187.915689,0.999552,0.000743,187.897700,0.999456,0.000736,0.001663,0.002913,8.895445e-06,0.000213,0.002077,0.003266,0.000011,0.000239,0.044629,0.000237,0.000188,0.098700,0.000525,0.000244,0.152395,0.000811,0.000307,0.038039,0.000202,0.000147,-0.015621,-0.000083,0.000060,64622,343.734043,140.150429,21776,115.829787,105.146411,0.053660,0.000285,0.000155,142.973458,0.999814,0.000648,142.961349,0.999730,0.000621,0.001972,0.002633,0.000014,0.000221,0.002033,0.003124,1.421911e-05,0.000262,0.036384,0.000254,0.000201,0.077764,0.000544,0.000251,0.119754,0.000837,0.000322,0.030230,0.000211,0.000152,-0.011741,-0.000082,0.000065,45907,321.027972,139.420225,15497,108.370629,100.688033,0.041971,0.000294,0.000158,116.000109,1.000001,0.000540,115.991152,0.999924,0.000511,0.001382,0.001963,1.191725e-05,0.000183,0.001253,0.002842,0.000011,0.000265,0.025847,0.000223,0.000157,0.053102,0.000458,0.000173,0.086162,0.000743,0.000260,0.024688,0.000213,0.000141,-0.008365,-0.000072,0.000044,36906,318.155172,123.184740,12884,111.068966,84.957455,0.033053,...,0.000158,0.036216,0.000470,0.000176,0.056117,0.000729,0.000265,0.013604,0.000177,0.000138,-0.006299,-0.000082,0.000050,25816,335.272727,117.648074,8412,109.246753,88.450872,0.019902,0.000258,0.000140,40.013864,1.000347,0.000154,40.007487,1.000187,0.000180,0.000007,0.000969,1.724415e-07,0.000155,0.000238,0.001231,0.000006,0.000197,0.010303,0.000258,0.000141,0.019794,0.000495,0.000110,0.029112,0.000728,0.000099,0.005391,0.000135,0.000078,-0.003930,-0.000098,0.000062,14699,367.475000,121.926625,4791,119.775000,81.662058,0.009321,0.000233,0.000077,0-32758,0.001521,36.0,4250.0,1095.818416,118.055556,141.328583,601.0,1.0,3.500000,126.0,18.0,0.001521,36.0,4250.0,1095.818416,118.055556,141.328583,601.0,1.0,3.500000,126.0,18.0,0.001398,28.0,3337.0,995.747458,119.178571,148.300759,601.0,1.0,3.607143,101.0,18.0,0.001243,23.0,2295.0,740.532916,99.782609,120.489138,432.0,1.0,3.608696,83.0,18.0,0.001242,22.0,2294.0,740.532241,104.272727,121.339030,432.0,1.0,3.727273,82.0,18.0,0.000875,13.0,1149.0,515.461929,88.384615,116.956928,432.0,3.0,2.692308,35.0,11.0,0.000349,2.0,15.0,12.369317,7.500000,6.363961,12.0,3.0,2.000000,4.0,3.0,32758.0,52.053475,17.0,19.0,20.0,13.0,0.000652,0.999267,0.001315,98.055556,1200818.0,161.00
3828,307.723687,1.002357,0.000356,307.732623,1.002386,0.000424,0.000520,0.003046,1.698933e-06,0.000174,0.000614,0.005105,0.000002,0.000292,0.075224,0.000245,0.000187,0.147444,0.000480,0.000145,0.232856,0.000758,0.000206,0.034710,0.000113,0.000104,-0.050912,-0.000166,0.000149,118327,385.429967,140.552333,40547,132.074919,95.735325,0.085622,0.000279,0.000172,307.723687,1.002357,0.000356,307.732623,1.002386,0.000424,0.000520,0.003046,1.698933e-06,0.000174,0.000614,0.005105,0.000002,0.000292,0.075224,0.000245,0.000187,0.147444,0.000480,0.000145,0.232856,0.000758,0.000206,0.034710,0.000113,0.000104,-0.050912,-0.000166,0.000149,118327,385.429967,140.552333,40547,132.074919,95.735325,0.085622,0.000279,0.000172,247.577120,1.002337,0.000302,247.589044,1.002385,0.000389,-0.000597,0.002712,-0.000002,0.000173,-0.000717,0.004583,-2.902686e-06,0.000292,0.062746,0.000254,0.000184,0.122121,0.000494,0.000148,0.182358,0.000738,0.000198,0.023701,0.000096,0.000072,-0.036682,-0.000149,0.000138,96045,388.846154,142.010537,32243,130.538462,97.623830,0.060383,0.000244,0.000152,206.464721,1.002256,0.000248,206.476506,1.002313,0.000366,-0.000064,0.002550,-3.105771e-07,0.000178,0.000244,0.004430,0.000001,0.000309,0.055414,0.000269,0.000193,0.107737,0.000523,0.000135,0.158385,0.000769,0.000199,0.019230,0.000093,0.000063,-0.031538,-0.000153,0.000147,79728,387.029126,147.183142,24424,118.563107,89.135979,0.050768,...,0.000165,0.051850,0.000503,0.000114,0.079906,0.000776,0.000207,0.010672,0.000104,0.000078,-0.017453,-0.000169,0.000172,44822,435.165049,165.821490,11920,115.728155,89.854042,0.028125,0.000273,0.000191,50.110131,1.002203,0.000235,50.104044,1.002081,0.000333,-0.000554,0.001212,-1.107497e-05,0.000173,-0.000514,0.001893,-0.000010,0.000270,0.011932,0.000239,0.000150,0.024656,0.000493,0.000109,0.038901,0.000778,0.000247,0.005527,0.000111,0.000094,-0.008751,-0.000175,0.000185,20500,410.000000,184.443122,6250,125.000000,94.481399,0.014278,0.000286,0.000203,0-32763,0.001794,53.0,3217.0,732.866291,60.698113,81.077737,383.0,1.0,2.150943,114.0,10.0,0.001794,53.0,3217.0,732.866291,60.698113,81.077737,383.0,1.0,2.150943,114.0,10.0,0.001686,41.0,2422.0,623.538291,59.073171,78.378055,383.0,1.0,2.097561,86.0,8.0,0.001622,33.0,2171.0,603.157525,65.787879,83.098871,383.0,1.0,2.030303,67.0,8.0,0.001404,25.0,1627.0,514.380210,65.080000,81.317751,383.0,1.0,1.920000,48.0,8.0,0.001070,16.0,1463.0,503.639752,91.437500,89.397963,383.0,3.0,2.312500,37.0,8.0,0.000601,7.0,932.0,451.457639,133.142857,115.271316,383.0,20.0,2.857143,20.0,8.0,32763.0,-15.091280,23.0,30.0,24.0,23.0,0.000262,1.004616,0.000529,53.698113,537093.0,96.00


  self.obj[key] = value


## Traning

### Utilities

In [31]:
# Function to calculate the root mean squared percentage error
def rmspe(y_true, y_pred):
    return np.sqrt(np.mean(np.square((y_true - y_pred) / y_true)))

# Function to early stop with root mean squared percentage error
def feval_rmspe(y_pred, xgb_train):
    y_true = xgb_train.get_label()
    return 'RMSPE', rmspe(y_true, y_pred)

### Modeling

In [32]:
def train_and_evaluate(train, test):
    # Hyperparammeters (just basic)
    params = {
        "objective": "reg:squarederror",
        "booster": "gbtree",
        "nthread": -1,
        "eta": 0.3,
        "max_depth": 8,
        "min_child_weight": 1,
        "sampling_method": "gradient_based",
#         "tree_method": "gpu_hist"  # turn it on for GPU
    }
    
    # Split features and target
    x = train.drop(['row_id', 'target', 'time_id'], axis = 1)
    y = train['target']
    x_test = test.drop(['row_id', 'time_id'], axis = 1)
    # Transform stock id to a numeric value
    x['stock_id'] = x['stock_id'].astype(int)
    x_test['stock_id'] = x_test['stock_id'].astype(int)
    dtest = xgb.DMatrix(x_test)
    
    # Create out of folds array
    oof_predictions = np.zeros(x.shape[0])
    # Create test array to store predictions
    test_predictions = np.zeros(x_test.shape[0])
    # Create a KFold object
    kfold = KFold(n_splits = 5, random_state = config.seed, shuffle = True)
    # Iterate through each fold
    for fold, (train_idx, val_idx) in enumerate(kfold.split(x)):
        print(f'Training fold {fold + 1}')
        x_train, y_train = x.iloc[train_idx], y.iloc[train_idx]
        x_val, y_val = x.iloc[val_idx], y.iloc[val_idx]

        x_train["stock_id"] = x_train["stock_id"].astype(int)
        x_val["stock_id"] = x_val["stock_id"].astype(int)

        dtrain = xgb.DMatrix(x_train, label=y_train, weight=1/np.square(y_train), enable_categorical=True)
        dval = xgb.DMatrix(x_val, label=y_val, weight=1/np.square(y_val), enable_categorical=True)

        model = xgb.train(params,
                          dtrain=dtrain,
                          num_boost_round=100,
                          evals=[(dtrain, "dtrain"), (dval, "dval")],
                          verbose_eval=10,
                          feval=feval_rmspe)

        oof_predictions[val_idx] = model.predict(dval)
        test_predictions += model.predict(dtest) / 5
        
    rmspe_score = rmspe(y, oof_predictions)
    print(f'Our out of folds RMSPE is {rmspe_score}')
    # Return test predictions
    return test_predictions

In [None]:
def objective(trial):
    data, target = sklearn.datasets.load_breast_cancer(return_X_y=True)
    train_x, valid_x, train_y, valid_y = train_test_split(data, target, test_size=0.25)
    dtrain = xgb.DMatrix(train_x, label=train_y)
    dvalid = xgb.DMatrix(valid_x, label=valid_y)

    param = {
        "verbosity": 0,
        "objective": "binary:logistic",
        "eval_metric": "auc",
        "booster": trial.suggest_categorical("booster", ["gbtree", "gblinear", "dart"]),
        "lambda": trial.suggest_float("lambda", 1e-8, 1.0, log=True),
        "alpha": trial.suggest_float("alpha", 1e-8, 1.0, log=True),
    }

    if param["booster"] == "gbtree" or param["booster"] == "dart":
        param["max_depth"] = trial.suggest_int("max_depth", 1, 9)
        param["eta"] = trial.suggest_float("eta", 1e-8, 1.0, log=True)
        param["gamma"] = trial.suggest_float("gamma", 1e-8, 1.0, log=True)
        param["grow_policy"] = trial.suggest_categorical("grow_policy", ["depthwise", "lossguide"])
    if param["booster"] == "dart":
        param["sample_type"] = trial.suggest_categorical("sample_type", ["uniform", "weighted"])
        param["normalize_type"] = trial.suggest_categorical("normalize_type", ["tree", "forest"])
        param["rate_drop"] = trial.suggest_float("rate_drop", 1e-8, 1.0, log=True)
        param["skip_drop"] = trial.suggest_float("skip_drop", 1e-8, 1.0, log=True)

    # Add a callback for pruning.
    pruning_callback = optuna.integration.XGBoostPruningCallback(trial, "validation-auc")
    bst = xgb.train(param, dtrain, evals=[(dvalid, "validation")], callbacks=[pruning_callback])
    preds = bst.predict(dvalid)
    pred_labels = np.rint(preds)
    accuracy = sklearn.metrics.accuracy_score(valid_y, pred_labels)
    return accuracy

### Running

In [23]:
# Read train and test
train, test = read_train_test()

# Get unique stock ids 
train_stock_ids = train['stock_id'].unique()
# Preprocess them using Parallel and our single stock id functions
train_ = preprocessor(train_stock_ids, is_train = True)
train = train.merge(train_, on = ['row_id'], how = 'left')

# Get unique stock ids 
test_stock_ids = test['stock_id'].unique()
# Preprocess them using Parallel and our single stock id functions
test_ = preprocessor(test_stock_ids, is_train = False)
test = test.merge(test_, on = ['row_id'], how = 'left')

# Get group stats of time_id and stock_id
train = get_time_stock(train)
test = get_time_stock(test)

Our training set has 428932 rows


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:   30.3s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:    0.8s finished


In [33]:
# Traing and evaluate
test_predictions = train_and_evaluate(train, test)
# Save test predictions
test['target'] = test_predictions
test[['row_id', 'target']].to_csv('submission.csv',index = False)

Training fold 1
[0]	dtrain-rmse:0.34874	dtrain-RMSPE:161.45158	dval-rmse:0.34874	dval-RMSPE:161.20300
[10]	dtrain-rmse:0.00988	dtrain-RMSPE:4.57245	dval-rmse:0.00989	dval-RMSPE:4.57050
[20]	dtrain-rmse:0.00069	dtrain-RMSPE:0.32170	dval-rmse:0.00073	dval-RMSPE:0.33865
[30]	dtrain-rmse:0.00060	dtrain-RMSPE:0.27676	dval-rmse:0.00064	dval-RMSPE:0.29700
[40]	dtrain-rmse:0.00057	dtrain-RMSPE:0.26397	dval-rmse:0.00062	dval-RMSPE:0.28652
[50]	dtrain-rmse:0.00055	dtrain-RMSPE:0.25597	dval-rmse:0.00061	dval-RMSPE:0.28077
[60]	dtrain-rmse:0.00054	dtrain-RMSPE:0.24883	dval-rmse:0.00060	dval-RMSPE:0.27531
[70]	dtrain-rmse:0.00053	dtrain-RMSPE:0.24414	dval-rmse:0.00059	dval-RMSPE:0.27218
[80]	dtrain-rmse:0.00052	dtrain-RMSPE:0.24038	dval-rmse:0.00058	dval-RMSPE:0.26984
[90]	dtrain-rmse:0.00051	dtrain-RMSPE:0.23700	dval-rmse:0.00058	dval-RMSPE:0.26851
[99]	dtrain-rmse:0.00051	dtrain-RMSPE:0.23450	dval-rmse:0.00058	dval-RMSPE:0.26739
Training fold 2
[0]	dtrain-rmse:0.34874	dtrain-RMSPE:161.40588	dval-

KeyboardInterrupt: 