In [4]:
import pandas as pd
import numpy as np
pd.set_option('display.max_columns', None)
import warnings
import seaborn as sns
import matplotlib.pyplot as plt
warnings.filterwarnings('ignore')


In [9]:
def log_return(list_stock_prices):
    return np.log(list_stock_prices).diff()
def realized_volatility(series):
    return np.sqrt(np.sum(series**2))
def flatten_name(prefix, src_names):
    ret = []
    for c in src_names:
        if c[0] in ['time_id', 'stock_id']:
            ret.append(c[0])
        else:
            ret.append('.'.join([prefix] + list(c)))
    return ret

In [10]:
def preprocessor_book(stock_id):
        
        book_dtypes = {
            'stock_id':np.uint16, 
            'time_id': np.uint16,
            'seconds_in_bucket': np.uint16,
            'bid_price1': np.float32,
            'ask_price1': np.float32,
            'bid_price2': np.float32,
            'ask_price2': np.float32,
            'bid_size1': np.uint32,
            'ask_size1': np.uint32,
            'bid_size2': np.uint32,
            'ask_size2': np.uint32,
        }

        dir_path=f"individual_book_train/stock_{stock_id}.csv"
        data=pd.read_csv(dir_path)
        for column, dtype in book_dtypes.items():
            data[column] = data[column].astype(dtype)
        

        return data

In [20]:
def feature_generator(stock_id,forecast_period=60):
    
    def difference(series):
        return np.max(series)-np.min(series)
    
    data=preprocessor_book(stock_id=stock_id)
    data['wap1'] =(data['bid_price1'] * data['ask_size1'] + data['ask_price1'] * data['bid_size1'])/(data['bid_size1'] + data['ask_size1'])
    data['log_return1'] = data.groupby('time_id')['wap1'].transform(log_return)
    data['wap2'] = (data['bid_price2'] * data['ask_size2'] + data['ask_price2'] * data['bid_size2'])/(data['bid_size2'] + data['ask_size2'])
    data['log_return2'] = data.groupby('time_id')['wap2'].transform(log_return)

    data['wap_balance'] = abs(data['wap1'] - data['wap2'])
    data["BidAskSpread"]=(data['ask_price1'] / data['bid_price1'])-1
    
    data['bid_spread'] = data['bid_price1'] - data['bid_price2']
    data['ask_spread'] = data['ask_price1'] - data['ask_price2']
    data['total_volume'] = (data['ask_size1'] + data['ask_size2']) + (data['bid_size1'] + data['bid_size2'])
    data['volume_imbalance'] = abs((data['ask_size1'] + data['ask_size2']) - (data['bid_size1'] + data['bid_size2']))
    data['log_return_ask1'] = data.groupby(['time_id'])['ask_price1'].transform(log_return)
    data['log_return_ask2'] = data.groupby(['time_id'])['ask_price2'].transform(log_return)
    data['log_return_bid1'] = data.groupby(['time_id'])['bid_price1'].transform(log_return)
    data['log_return_bid2'] = data.groupby(['time_id'])['bid_price2'].transform(log_return)

    data_test=data[data["seconds_in_bucket"]>=600-forecast_period]
    data_train=data[data["seconds_in_bucket"]<600-forecast_period]
    #dict for aggregate
    create_feature_dict = {
    'seconds_in_bucket': ['count'],
    'wap1': [np.nanmean, np.nanstd,difference],
    'wap2': [np.nanmean, np.nanstd,difference],
    'log_return1': [np.nansum, realized_volatility],
    'log_return2': [np.nansum, realized_volatility],
    'log_return_ask1': [np.nansum, realized_volatility],
    'log_return_ask2': [np.nansum, realized_volatility],
    'log_return_bid1': [np.nansum, realized_volatility],
    'log_return_bid2': [np.nansum, realized_volatility],
    'wap_balance': [np.nanmean, np.nanstd],
    'BidAskSpread': [np.nanmax, np.nanmean,difference],
    'bid_spread': [np.nanmax, np.nanmean,difference],
    'ask_spread': [np.nanmax, np.nanmean,difference],
    'total_volume': [np.nansum, np.nanmean,difference],
    'volume_imbalance': [np.nansum, np.nanmean,np.nanmax]
}


    df_feature = pd.DataFrame(data_train.groupby(['time_id'],sort=False).agg(create_feature_dict)).reset_index()
    df_feature.columns = flatten_name('book', df_feature.columns)
    df_feature["missing_values"]=pd.DataFrame(data_train.groupby(['time_id'],sort=False).apply(lambda x: 540-len(x))).reset_index().iloc[:,1]
    for start_time in [180,300,420]:
        d = data_train[(data_train['seconds_in_bucket'] > start_time)].groupby('time_id').agg(create_feature_dict).reset_index(drop=False)
        d.columns = flatten_name(f'book_{int(start_time/60)}to_9', d.columns)
        df_feature = pd.merge(df_feature, d, on='time_id', how='left')
    create_target_dict = {
        'log_return1':[realized_volatility],
            }
    target = pd.DataFrame(data_test.dropna(subset=["log_return1"]).groupby(['time_id'],sort=False).agg(create_target_dict)).reset_index()
    target.columns = flatten_name('book', target.columns)
    df_feature = pd.merge(df_feature,target,how='left',left_on='time_id',right_on=f'time_id')
    df_feature.rename(columns={'book.log_return1.realized_volatility_y': 'target','book.log_return1.realized_volatility_x': 'book.log_return1.realized_volatility'}, inplace=True)
    df_feature["stock_id"]=stock_id
    return df_feature

In [21]:
df_feat=feature_generator(1)
df_feat

Unnamed: 0,time_id,book.seconds_in_bucket.count,book.wap1.nanmean,book.wap1.nanstd,book.wap1.difference,book.wap2.nanmean,book.wap2.nanstd,book.wap2.difference,book.log_return1.nansum,book.log_return1.realized_volatility,book.log_return2.nansum,book.log_return2.realized_volatility,book.log_return_ask1.nansum,book.log_return_ask1.realized_volatility,book.log_return_ask2.nansum,book.log_return_ask2.realized_volatility,book.log_return_bid1.nansum,book.log_return_bid1.realized_volatility,book.log_return_bid2.nansum,book.log_return_bid2.realized_volatility,book.wap_balance.nanmean,book.wap_balance.nanstd,book.BidAskSpread.nanmax,book.BidAskSpread.nanmean,book.BidAskSpread.difference,book.bid_spread.nanmax,book.bid_spread.nanmean,book.bid_spread.difference,book.ask_spread.nanmax,book.ask_spread.nanmean,book.ask_spread.difference,book.total_volume.nansum,book.total_volume.nanmean,book.total_volume.difference,book.volume_imbalance.nansum,book.volume_imbalance.nanmean,book.volume_imbalance.nanmax,missing_values,book_3to_9.seconds_in_bucket.count,book_3to_9.wap1.nanmean,book_3to_9.wap1.nanstd,book_3to_9.wap1.difference,book_3to_9.wap2.nanmean,book_3to_9.wap2.nanstd,book_3to_9.wap2.difference,book_3to_9.log_return1.nansum,book_3to_9.log_return1.realized_volatility,book_3to_9.log_return2.nansum,book_3to_9.log_return2.realized_volatility,book_3to_9.log_return_ask1.nansum,book_3to_9.log_return_ask1.realized_volatility,book_3to_9.log_return_ask2.nansum,book_3to_9.log_return_ask2.realized_volatility,book_3to_9.log_return_bid1.nansum,book_3to_9.log_return_bid1.realized_volatility,book_3to_9.log_return_bid2.nansum,book_3to_9.log_return_bid2.realized_volatility,book_3to_9.wap_balance.nanmean,book_3to_9.wap_balance.nanstd,book_3to_9.BidAskSpread.nanmax,book_3to_9.BidAskSpread.nanmean,book_3to_9.BidAskSpread.difference,book_3to_9.bid_spread.nanmax,book_3to_9.bid_spread.nanmean,book_3to_9.bid_spread.difference,book_3to_9.ask_spread.nanmax,book_3to_9.ask_spread.nanmean,book_3to_9.ask_spread.difference,book_3to_9.total_volume.nansum,book_3to_9.total_volume.nanmean,book_3to_9.total_volume.difference,book_3to_9.volume_imbalance.nansum,book_3to_9.volume_imbalance.nanmean,book_3to_9.volume_imbalance.nanmax,book_5to_9.seconds_in_bucket.count,book_5to_9.wap1.nanmean,book_5to_9.wap1.nanstd,book_5to_9.wap1.difference,book_5to_9.wap2.nanmean,book_5to_9.wap2.nanstd,book_5to_9.wap2.difference,book_5to_9.log_return1.nansum,book_5to_9.log_return1.realized_volatility,book_5to_9.log_return2.nansum,book_5to_9.log_return2.realized_volatility,book_5to_9.log_return_ask1.nansum,book_5to_9.log_return_ask1.realized_volatility,book_5to_9.log_return_ask2.nansum,book_5to_9.log_return_ask2.realized_volatility,book_5to_9.log_return_bid1.nansum,book_5to_9.log_return_bid1.realized_volatility,book_5to_9.log_return_bid2.nansum,book_5to_9.log_return_bid2.realized_volatility,book_5to_9.wap_balance.nanmean,book_5to_9.wap_balance.nanstd,book_5to_9.BidAskSpread.nanmax,book_5to_9.BidAskSpread.nanmean,book_5to_9.BidAskSpread.difference,book_5to_9.bid_spread.nanmax,book_5to_9.bid_spread.nanmean,book_5to_9.bid_spread.difference,book_5to_9.ask_spread.nanmax,book_5to_9.ask_spread.nanmean,book_5to_9.ask_spread.difference,book_5to_9.total_volume.nansum,book_5to_9.total_volume.nanmean,book_5to_9.total_volume.difference,book_5to_9.volume_imbalance.nansum,book_5to_9.volume_imbalance.nanmean,book_5to_9.volume_imbalance.nanmax,book_7to_9.seconds_in_bucket.count,book_7to_9.wap1.nanmean,book_7to_9.wap1.nanstd,book_7to_9.wap1.difference,book_7to_9.wap2.nanmean,book_7to_9.wap2.nanstd,book_7to_9.wap2.difference,book_7to_9.log_return1.nansum,book_7to_9.log_return1.realized_volatility,book_7to_9.log_return2.nansum,book_7to_9.log_return2.realized_volatility,book_7to_9.log_return_ask1.nansum,book_7to_9.log_return_ask1.realized_volatility,book_7to_9.log_return_ask2.nansum,book_7to_9.log_return_ask2.realized_volatility,book_7to_9.log_return_bid1.nansum,book_7to_9.log_return_bid1.realized_volatility,book_7to_9.log_return_bid2.nansum,book_7to_9.log_return_bid2.realized_volatility,book_7to_9.wap_balance.nanmean,book_7to_9.wap_balance.nanstd,book_7to_9.BidAskSpread.nanmax,book_7to_9.BidAskSpread.nanmean,book_7to_9.BidAskSpread.difference,book_7to_9.bid_spread.nanmax,book_7to_9.bid_spread.nanmean,book_7to_9.bid_spread.difference,book_7to_9.ask_spread.nanmax,book_7to_9.ask_spread.nanmean,book_7to_9.ask_spread.difference,book_7to_9.total_volume.nansum,book_7to_9.total_volume.nanmean,book_7to_9.total_volume.difference,book_7to_9.volume_imbalance.nansum,book_7to_9.volume_imbalance.nanmean,book_7to_9.volume_imbalance.nanmax,target,stock_id
0,5,520,1.003659,0.001203,0.005658,1.003692,0.001242,0.005632,0.002215,0.006001,0.002142,0.007367,0.001898,0.003313,0.002029,0.003248,0.002227,0.003361,2.226747e-03,0.003455,0.000271,0.000204,0.001176,0.000675,0.000915,0.000525,0.000114,0.000459,-0.000066,-0.000105,0.000328,152873,293.986538,1983,858993489841,1.651911e+09,4294967295,20,347,1.004248,0.000954,0.003827,1.004313,0.000958,0.003815,-0.000251,0.004828,-0.000645,0.005947,-0.000327,0.002832,-0.000196,0.002767,-0.000131,0.002850,-0.000131,0.002898,0.000255,0.000195,0.001176,0.000655,0.000915,0.000394,0.000103,0.000328,-0.000066,-0.000104,0.000328,94117,271.230548,846,459561523021,1.324385e+09,4294967293,228,1.003725,0.000565,0.002332,1.003811,0.000635,0.002361,-0.001818,0.003681,-0.001535,0.004584,-0.001699,0.002169,-0.001568,0.002208,-0.001503,0.002338,-0.001504,0.002402,0.000266,0.000201,0.001176,0.000636,0.000915,0.000394,0.000101,0.000328,-0.000066,-0.000102,0.000328,58637,257.179825,663,240518188635,1.054904e+09,4294967293,110,1.003919,0.000450,0.001939,1.004089,0.000539,0.001933,-0.000578,0.002545,-0.000816,0.003076,-0.000654,0.001395,-0.000719,0.001547,-0.000458,0.001373,-0.000392,0.001541,0.000307,0.000235,0.001176,0.000682,0.000784,0.000394,0.000091,0.000328,-0.000066,-0.000093,0.000262,30196,274.509091,632,141733926800,1.288490e+09,4294967291,0.001727,1
1,11,338,1.001762,0.000370,0.001784,1.001766,0.000403,0.001941,-0.000298,0.002148,-0.000549,0.002916,0.000067,0.001544,0.000067,0.001491,-0.000134,0.001583,-1.335472e-04,0.001407,0.000230,0.000167,0.000802,0.000481,0.000602,0.000334,0.000117,0.000268,-0.000067,-0.000107,0.000268,108429,320.795858,830,652835027215,1.931465e+09,4294967295,202,213,1.001776,0.000324,0.001673,1.001795,0.000371,0.001725,0.000491,0.001811,-0.000204,0.002550,0.000334,0.001243,0.000334,0.001203,0.000534,0.001274,0.000534,0.001110,0.000234,0.000159,0.000802,0.000477,0.000602,0.000334,0.000126,0.000268,-0.000067,-0.000111,0.000268,64534,302.976526,755,412316861710,1.935760e+09,4294967295,142,1.001871,0.000317,0.001588,1.001869,0.000412,0.001680,-0.000069,0.001543,-0.000082,0.001900,0.000267,0.001137,0.000200,0.001023,-0.000067,0.001070,0.000067,0.001006,0.000218,0.000142,0.000801,0.000447,0.000601,0.000334,0.000140,0.000268,-0.000067,-0.000097,0.000268,45259,318.725352,755,214748370915,1.512312e+09,4294967295,77,1.001993,0.000326,0.001187,1.002016,0.000413,0.001334,-0.000399,0.001297,-0.000691,0.001156,-0.000133,0.000812,-0.000134,0.000773,-0.000334,0.000703,-0.000267,0.000647,0.000224,0.000160,0.000801,0.000496,0.000534,0.000334,0.000139,0.000268,-0.000067,-0.000098,0.000268,25049,325.311688,755,115964120459,1.506028e+09,4294967276,0.000837,1
2,16,312,1.000972,0.000893,0.003273,1.000947,0.000900,0.003458,0.001881,0.002356,0.001916,0.002840,0.001810,0.002264,0.001810,0.002350,0.002001,0.002085,2.001286e-03,0.002268,0.000087,0.000073,0.000667,0.000293,0.000572,0.000286,0.000117,0.000191,-0.000095,-0.000118,0.000191,188854,605.301282,1148,648540056198,2.078654e+09,4294967291,228,209,1.001475,0.000622,0.002548,1.001450,0.000634,0.002868,0.001148,0.002060,0.001235,0.002369,0.001238,0.001953,0.001238,0.002035,0.001620,0.001814,0.001620,0.001977,0.000088,0.000074,0.000572,0.000261,0.000477,0.000286,0.000116,0.000191,-0.000095,-0.000119,0.000191,126723,606.330144,1148,481036333471,2.301609e+09,4294967290,150,1.001735,0.000480,0.002074,1.001705,0.000505,0.002325,0.000156,0.001871,0.000269,0.002152,0.000286,0.001788,0.000285,0.001858,0.000286,0.001721,0.000286,0.001858,0.000083,0.000064,0.000572,0.000270,0.000476,0.000286,0.000119,0.000191,-0.000095,-0.000118,0.000095,95252,635.013333,1111,326417514374,2.176117e+09,4294967290,77,1.002056,0.000240,0.001068,1.002037,0.000273,0.001329,-0.000053,0.001148,-0.000127,0.001369,0.000000,0.001059,0.000000,0.001117,0.000000,0.001007,0.000095,0.001097,0.000083,0.000068,0.000381,0.000246,0.000286,0.000191,0.000118,0.000095,-0.000095,-0.000126,0.000095,47634,618.623377,579,176093657716,2.286931e+09,4294967290,0.000886,1
3,31,152,0.997548,0.000609,0.002284,0.997471,0.000639,0.002682,-0.002105,0.003376,-0.002041,0.004092,-0.002108,0.001678,-0.002035,0.001777,-0.002109,0.002886,-2.036914e-03,0.002939,0.000341,0.000238,0.001676,0.000796,0.001240,0.000726,0.000188,0.000653,-0.000073,-0.000154,0.000508,65168,428.736842,1272,292057778338,1.921433e+09,4294967295,388,83,0.997198,0.000539,0.001919,0.997080,0.000508,0.001980,-0.000693,0.002456,0.000163,0.002658,-0.000655,0.001215,-0.000873,0.001213,-0.000364,0.002416,-0.000364,0.002425,0.000328,0.000249,0.001676,0.000863,0.001167,0.000726,0.000191,0.000653,-0.000073,-0.000144,0.000435,37238,448.650602,1240,171798689864,2.069864e+09,4294967295,48,0.996879,0.000458,0.001568,0.996728,0.000268,0.001447,-0.001335,0.002261,-0.000601,0.002101,-0.001091,0.001062,-0.001018,0.001074,-0.001092,0.002178,-0.001092,0.002257,0.000370,0.000284,0.001676,0.000971,0.001167,0.000726,0.000248,0.000653,-0.000073,-0.000184,0.000435,22642,471.708333,1205,111669148512,2.326441e+09,4294967295,25,0.996884,0.000553,0.001435,0.996676,0.000235,0.000985,-0.000006,0.001313,-0.000357,0.001427,-0.000218,0.000724,-0.000364,0.000621,0.000000,0.000763,0.000291,0.000868,0.000433,0.000296,0.000728,0.000684,0.000219,0.000726,0.000351,0.000653,-0.000073,-0.000148,0.000218,9493,379.720000,491,42949675089,1.717987e+09,4294967295,0.001161,1
4,62,201,1.000698,0.000379,0.001978,1.000654,0.000466,0.002938,0.000467,0.002834,-0.000034,0.004271,0.000924,0.001670,0.001065,0.002064,0.000782,0.000755,7.823546e-04,0.000982,0.000324,0.000251,0.001137,0.000725,0.000853,0.000427,0.000118,0.000356,-0.000071,-0.000206,0.000569,54268,269.990050,811,382252092924,1.901752e+09,4294967295,339,137,1.000781,0.000332,0.001341,1.000712,0.000506,0.002161,-0.000606,0.002256,-0.000854,0.003880,0.000000,0.001466,0.000071,0.001783,-0.000284,0.000619,-0.000213,0.000819,0.000328,0.000247,0.001137,0.000716,0.000782,0.000284,0.000116,0.000213,-0.000071,-0.000221,0.000569,33612,245.343066,811,158913801294,1.159955e+09,4294967295,90,1.000687,0.000326,0.001216,1.000526,0.000483,0.002161,-0.000644,0.001834,-0.000718,0.003534,0.000142,0.001250,0.000071,0.001521,-0.000355,0.000455,-0.000426,0.000619,0.000364,0.000262,0.001137,0.000777,0.000782,0.000142,0.000116,0.000071,-0.000071,-0.000252,0.000569,20153,223.922222,617,68719486823,7.635499e+08,4294967293,47,1.000491,0.000195,0.000686,1.000249,0.000206,0.001116,-0.000221,0.000956,-0.000725,0.001973,-0.000142,0.000904,0.000000,0.000948,-0.000142,0.000201,-0.000213,0.000355,0.000293,0.000216,0.001137,0.000765,0.000782,0.000142,0.000130,0.000071,-0.000071,-0.000172,0.000569,9107,193.765957,468,8589940397,1.827647e+08,4294967278,0.000629,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3825,32751,285,1.000159,0.000403,0.001515,1.000012,0.000379,0.001596,-0.000363,0.003685,-0.000377,0.004903,0.000067,0.001482,0.000000,0.001528,0.000067,0.001811,-2.910383e-11,0.002243,0.000343,0.000266,0.001075,0.000605,0.000941,0.000470,0.000159,0.000403,-0.000067,-0.000118,0.000336,84393,296.115789,691,605590388609,2.124879e+09,4294967295,255,192,1.000039,0.000364,0.001267,0.999951,0.000370,0.001456,-0.000488,0.003094,0.000283,0.003857,-0.000067,0.001225,-0.000067,0.001085,-0.000067,0.001605,0.000067,0.001714,0.000306,0.000233,0.001075,0.000611,0.000941,0.000470,0.000155,0.000403,-0.000067,-0.000118,0.000269,55784,290.541667,691,326417522040,1.700091e+09,4294967293,107,1.000175,0.000279,0.001267,1.000046,0.000308,0.001309,0.000429,0.002122,-0.000285,0.002794,0.000336,0.000961,0.000269,0.000691,0.000605,0.001110,0.000470,0.001122,0.000306,0.000199,0.000941,0.000638,0.000672,0.000403,0.000173,0.000336,-0.000067,-0.000099,0.000269,35023,327.317757,682,201863464173,1.886574e+09,4294967287,46,1.000245,0.000269,0.001260,1.000138,0.000269,0.001285,0.000007,0.001488,0.000402,0.001689,0.000403,0.000570,0.000403,0.000529,0.000537,0.000921,0.000470,0.000967,0.000263,0.000188,0.000941,0.000615,0.000672,0.000403,0.000197,0.000336,-0.000067,-0.000098,0.000202,16683,362.673913,515,55834578021,1.213795e+09,4294967282,0.000533,1
3826,32753,441,1.007107,0.006508,0.021141,1.006926,0.006420,0.021146,0.016325,0.010446,0.016294,0.011507,0.016293,0.008475,0.016292,0.008679,0.016517,0.008397,1.651777e-02,0.008242,0.000400,0.000344,0.001999,0.000929,0.001860,0.000627,0.000148,0.000557,-0.000070,-0.000127,0.000627,251528,570.358277,1892,622770310860,1.412178e+09,4294967295,99,284,1.011591,0.002421,0.011724,1.011347,0.002390,0.011993,0.008154,0.009681,0.008790,0.010542,0.008703,0.007776,0.008702,0.007945,0.008502,0.007451,0.008502,0.007389,0.000469,0.000383,0.001999,0.001044,0.001793,0.000627,0.000161,0.000557,-0.000070,-0.000139,0.000627,149883,527.757042,1341,425201789013,1.497189e+09,4294967295,187,1.012778,0.001220,0.006201,1.012542,0.001192,0.006854,-0.000405,0.008005,0.000182,0.009152,0.000138,0.005948,0.000069,0.006135,0.000138,0.006106,0.000207,0.006211,0.000498,0.000369,0.001860,0.001144,0.001654,0.000627,0.000182,0.000557,-0.000070,-0.000133,0.000627,87495,467.887701,1341,287762827231,1.538839e+09,4294967295,91,1.012445,0.000656,0.002993,1.012204,0.000691,0.002845,0.001106,0.005323,0.000317,0.005938,0.000069,0.003870,0.000069,0.004080,0.000895,0.004217,0.000895,0.004267,0.000559,0.000381,0.001655,0.001172,0.001241,0.000557,0.000206,0.000488,-0.000070,-0.000110,0.000348,48668,534.813187,1264,171798700556,1.887898e+09,4294967293,0.002855,1
3827,32758,283,1.000810,0.000571,0.002151,1.000815,0.000587,0.002377,0.001492,0.002914,0.001112,0.004106,0.001071,0.001416,0.001160,0.001886,0.001250,0.001703,1.249984e-03,0.001754,0.000240,0.000168,0.000983,0.000660,0.000626,0.000268,0.000145,0.000179,-0.000089,-0.000124,0.000357,123753,437.289753,797,949187739097,3.354020e+09,4294967289,257,196,1.001130,0.000324,0.001576,1.001123,0.000385,0.001582,0.001020,0.002121,0.000725,0.003471,0.001071,0.001100,0.001070,0.001544,0.001161,0.001380,0.001250,0.001371,0.000256,0.000179,0.000982,0.000639,0.000625,0.000268,0.000150,0.000179,-0.000089,-0.000130,0.000268,82278,419.785714,797,674309843790,3.440356e+09,4294967289,118,1.001247,0.000250,0.000988,1.001240,0.000241,0.001155,0.000347,0.001760,-0.000181,0.002661,0.000268,0.000782,0.000178,0.001196,0.000536,0.001150,0.000536,0.001122,0.000256,0.000175,0.000893,0.000646,0.000447,0.000268,0.000158,0.000179,-0.000089,-0.000132,0.000179,53519,453.550847,732,360777244443,3.057434e+09,4294967289,54,1.001168,0.000279,0.000988,1.001299,0.000217,0.000964,-0.000032,0.001278,-0.000292,0.001772,-0.000089,0.000428,0.000000,0.000535,-0.000089,0.000557,0.000000,0.000691,0.000299,0.000186,0.000892,0.000689,0.000357,0.000268,0.000147,0.000179,-0.000089,-0.000116,0.000179,27320,505.925926,732,111669150160,2.067947e+09,4294967281,0.001155,1
3828,32763,388,1.002958,0.001747,0.006537,1.002969,0.001777,0.006824,0.004229,0.003468,0.004146,0.005344,0.004174,0.003845,0.004418,0.004539,0.004175,0.002620,4.298436e-03,0.003102,0.000201,0.000158,0.001227,0.000423,0.001105,0.000615,0.000194,0.000492,-0.000123,-0.000228,0.000615,205326,529.190722,945,627065240314,1.616148e+09,4294967295,152,265,1.003979,0.000885,0.004127,1.003993,0.000939,0.004059,0.001749,0.002776,0.001723,0.004818,0.001717,0.003434,0.001593,0.003936,0.001717,0.002073,0.001717,0.002452,0.000201,0.000158,0.001227,0.000442,0.001105,0.000615,0.000189,0.000492,-0.000123,-0.000233,0.000615,140690,530.905660,945,433791707148,1.636950e+09,4294967294,176,1.004466,0.000495,0.002118,1.004479,0.000587,0.002336,0.000177,0.002325,0.000510,0.003859,0.000000,0.002699,0.000367,0.003142,0.000368,0.001620,0.000368,0.001910,0.000200,0.000158,0.000858,0.000410,0.000736,0.000492,0.000168,0.000369,-0.000123,-0.000236,0.000615,92251,524.153409,945,261993015567,1.488597e+09,4294967294,81,1.004276,0.000324,0.001214,1.004184,0.000395,0.001696,-0.000497,0.001545,0.000248,0.003059,-0.000490,0.001296,-0.000122,0.001947,-0.000245,0.001249,-0.000245,0.001569,0.000203,0.000155,0.000613,0.000364,0.000490,0.000492,0.000173,0.000369,-0.000123,-0.000223,0.000492,44532,549.777778,893,94489286846,1.166534e+09,4294967290,0.001426,1


In [22]:
df_feat["target"].isna().sum()

0

In [13]:
from tqdm import tqdm
non_tradable=[]
for i in tqdm(range(0,127)):
    try:
        dir_path=f"individual_book_train/stock_{i}.csv"
        data=pd.read_csv(dir_path)
    except:
        non_tradable.append(i)

tradable_id=[i for i in range(0,127) if i not in non_tradable]
tradable_id[:15]

100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 127/127 [02:39<00:00,  1.25s/it]


[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15]

In [23]:
def preprocessor(list_stock_ids):
    from joblib import Parallel, delayed 
    df = pd.DataFrame()
    
    def for_joblib(stock_id):
    
            
        df_tmp = feature_generator(stock_id=stock_id)
     
        return pd.concat([df,df_tmp])
    
    df = Parallel(n_jobs=-1, verbose=1)(
        delayed(for_joblib)(stock_id) for stock_id in list_stock_ids
        )

    df =  pd.concat(df,ignore_index = True)
    return df

In [24]:
final_df=preprocessor(tradable_id)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 20 concurrent workers.
[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:   37.6s
[Parallel(n_jobs=-1)]: Done 112 out of 112 | elapsed:  3.9min finished


In [25]:
final_df.isna().sum().value_counts()

0     144
5       3
78      1
Name: count, dtype: int64

In [26]:
final_df

Unnamed: 0,time_id,book.seconds_in_bucket.count,book.wap1.nanmean,book.wap1.nanstd,book.wap1.difference,book.wap2.nanmean,book.wap2.nanstd,book.wap2.difference,book.log_return1.nansum,book.log_return1.realized_volatility,book.log_return2.nansum,book.log_return2.realized_volatility,book.log_return_ask1.nansum,book.log_return_ask1.realized_volatility,book.log_return_ask2.nansum,book.log_return_ask2.realized_volatility,book.log_return_bid1.nansum,book.log_return_bid1.realized_volatility,book.log_return_bid2.nansum,book.log_return_bid2.realized_volatility,book.wap_balance.nanmean,book.wap_balance.nanstd,book.BidAskSpread.nanmax,book.BidAskSpread.nanmean,book.BidAskSpread.difference,book.bid_spread.nanmax,book.bid_spread.nanmean,book.bid_spread.difference,book.ask_spread.nanmax,book.ask_spread.nanmean,book.ask_spread.difference,book.total_volume.nansum,book.total_volume.nanmean,book.total_volume.difference,book.volume_imbalance.nansum,book.volume_imbalance.nanmean,book.volume_imbalance.nanmax,missing_values,book_3to_9.seconds_in_bucket.count,book_3to_9.wap1.nanmean,book_3to_9.wap1.nanstd,book_3to_9.wap1.difference,book_3to_9.wap2.nanmean,book_3to_9.wap2.nanstd,book_3to_9.wap2.difference,book_3to_9.log_return1.nansum,book_3to_9.log_return1.realized_volatility,book_3to_9.log_return2.nansum,book_3to_9.log_return2.realized_volatility,book_3to_9.log_return_ask1.nansum,book_3to_9.log_return_ask1.realized_volatility,book_3to_9.log_return_ask2.nansum,book_3to_9.log_return_ask2.realized_volatility,book_3to_9.log_return_bid1.nansum,book_3to_9.log_return_bid1.realized_volatility,book_3to_9.log_return_bid2.nansum,book_3to_9.log_return_bid2.realized_volatility,book_3to_9.wap_balance.nanmean,book_3to_9.wap_balance.nanstd,book_3to_9.BidAskSpread.nanmax,book_3to_9.BidAskSpread.nanmean,book_3to_9.BidAskSpread.difference,book_3to_9.bid_spread.nanmax,book_3to_9.bid_spread.nanmean,book_3to_9.bid_spread.difference,book_3to_9.ask_spread.nanmax,book_3to_9.ask_spread.nanmean,book_3to_9.ask_spread.difference,book_3to_9.total_volume.nansum,book_3to_9.total_volume.nanmean,book_3to_9.total_volume.difference,book_3to_9.volume_imbalance.nansum,book_3to_9.volume_imbalance.nanmean,book_3to_9.volume_imbalance.nanmax,book_5to_9.seconds_in_bucket.count,book_5to_9.wap1.nanmean,book_5to_9.wap1.nanstd,book_5to_9.wap1.difference,book_5to_9.wap2.nanmean,book_5to_9.wap2.nanstd,book_5to_9.wap2.difference,book_5to_9.log_return1.nansum,book_5to_9.log_return1.realized_volatility,book_5to_9.log_return2.nansum,book_5to_9.log_return2.realized_volatility,book_5to_9.log_return_ask1.nansum,book_5to_9.log_return_ask1.realized_volatility,book_5to_9.log_return_ask2.nansum,book_5to_9.log_return_ask2.realized_volatility,book_5to_9.log_return_bid1.nansum,book_5to_9.log_return_bid1.realized_volatility,book_5to_9.log_return_bid2.nansum,book_5to_9.log_return_bid2.realized_volatility,book_5to_9.wap_balance.nanmean,book_5to_9.wap_balance.nanstd,book_5to_9.BidAskSpread.nanmax,book_5to_9.BidAskSpread.nanmean,book_5to_9.BidAskSpread.difference,book_5to_9.bid_spread.nanmax,book_5to_9.bid_spread.nanmean,book_5to_9.bid_spread.difference,book_5to_9.ask_spread.nanmax,book_5to_9.ask_spread.nanmean,book_5to_9.ask_spread.difference,book_5to_9.total_volume.nansum,book_5to_9.total_volume.nanmean,book_5to_9.total_volume.difference,book_5to_9.volume_imbalance.nansum,book_5to_9.volume_imbalance.nanmean,book_5to_9.volume_imbalance.nanmax,book_7to_9.seconds_in_bucket.count,book_7to_9.wap1.nanmean,book_7to_9.wap1.nanstd,book_7to_9.wap1.difference,book_7to_9.wap2.nanmean,book_7to_9.wap2.nanstd,book_7to_9.wap2.difference,book_7to_9.log_return1.nansum,book_7to_9.log_return1.realized_volatility,book_7to_9.log_return2.nansum,book_7to_9.log_return2.realized_volatility,book_7to_9.log_return_ask1.nansum,book_7to_9.log_return_ask1.realized_volatility,book_7to_9.log_return_ask2.nansum,book_7to_9.log_return_ask2.realized_volatility,book_7to_9.log_return_bid1.nansum,book_7to_9.log_return_bid1.realized_volatility,book_7to_9.log_return_bid2.nansum,book_7to_9.log_return_bid2.realized_volatility,book_7to_9.wap_balance.nanmean,book_7to_9.wap_balance.nanstd,book_7to_9.BidAskSpread.nanmax,book_7to_9.BidAskSpread.nanmean,book_7to_9.BidAskSpread.difference,book_7to_9.bid_spread.nanmax,book_7to_9.bid_spread.nanmean,book_7to_9.bid_spread.difference,book_7to_9.ask_spread.nanmax,book_7to_9.ask_spread.nanmean,book_7to_9.ask_spread.difference,book_7to_9.total_volume.nansum,book_7to_9.total_volume.nanmean,book_7to_9.total_volume.difference,book_7to_9.volume_imbalance.nansum,book_7to_9.volume_imbalance.nanmean,book_7to_9.volume_imbalance.nanmax,target,stock_id
0,5,274,1.003792,0.000671,0.003486,1.003724,0.000767,0.003735,0.002150,0.004323,0.001810,0.006440,0.002113,0.002230,0.002319,0.002469,2.115126e-03,0.002391,0.001806,0.002729,0.000392,0.000298,0.001394,0.000864,0.001034,0.000672,0.000173,0.000621,-0.000052,-0.000153,0.000517,91752,334.861314,728,644245095236,2.351259e+09,4294967295,266,183,1.003975,0.000288,0.001341,1.003834,0.000423,0.002073,-0.000226,0.003266,-0.001104,0.005280,0.000155,0.001843,0.000154,0.001724,0.000000,0.001790,-0.000309,0.002267,0.000393,0.000278,0.001392,0.000885,0.001031,0.000672,0.000198,0.000621,-0.000052,-0.000151,0.000466,62230,340.054645,693,408021897280,2.229628e+09,4294967295,111,1.003924,0.000270,0.001341,1.003825,0.000449,0.002061,0.000015,0.002678,-0.000242,0.004019,-0.000309,0.001425,-0.000154,0.001279,0.000000,0.001328,-0.000258,0.001953,0.000379,0.000274,0.001340,0.000844,0.000980,0.000672,0.000228,0.000621,-0.000052,-0.000170,0.000414,35051,315.774775,663,270582941381,2.437684e+09,4294967294,61,1.003836,0.000184,0.000858,1.003744,0.000474,0.001956,-0.000738,0.001344,-0.000351,0.003385,0.000052,0.001267,0.000154,0.001191,0.000206,0.000866,-0.000052,0.001805,0.000355,0.000277,0.001135,0.000775,0.000774,0.000620,0.000237,0.000569,-0.000052,-0.000226,0.000414,18764,307.606557,525,163208756066,2.675553e+09,4294967294,0.001246,0
1,11,178,1.000179,0.000199,0.001040,1.000170,0.000257,0.001491,0.000720,0.000918,0.001262,0.002110,0.000702,0.000920,0.000802,0.000706,1.154305e-03,0.000795,0.001054,0.001080,0.000194,0.000141,0.000904,0.000397,0.000753,0.000401,0.000106,0.000351,-0.000050,-0.000136,0.000301,71571,402.084270,830,485331289793,2.726580e+09,4294967292,362,138,1.000257,0.000135,0.000766,1.000217,0.000257,0.001412,0.000565,0.000694,0.000807,0.001746,0.000652,0.000637,0.000853,0.000652,0.000753,0.000592,0.000753,0.001020,0.000187,0.000145,0.000552,0.000347,0.000402,0.000401,0.000093,0.000351,-0.000050,-0.000127,0.000251,58446,423.521739,830,399431944078,2.894434e+09,4294967270,93,1.000320,0.000113,0.000595,1.000311,0.000234,0.001366,0.000457,0.000596,0.000874,0.001534,0.000502,0.000507,0.000652,0.000591,0.000452,0.000463,0.000351,0.000946,0.000212,0.000137,0.000552,0.000349,0.000402,0.000401,0.000100,0.000351,-0.000050,-0.000122,0.000251,45001,483.881720,830,270582931041,2.909494e+09,4294967270,38,1.000390,0.000125,0.000582,1.000293,0.000301,0.001366,0.000194,0.000497,0.001009,0.001366,0.000401,0.000449,0.000652,0.000434,0.000552,0.000398,0.000452,0.000844,0.000239,0.000143,0.000552,0.000345,0.000301,0.000401,0.000088,0.000351,-0.000050,-0.000139,0.000251,16775,441.447368,499,111669147367,2.938662e+09,4294967270,0.000779,0
2,16,180,0.999636,0.000756,0.003423,0.999773,0.000750,0.003979,-0.002223,0.002346,-0.002469,0.004511,-0.002347,0.001677,-0.002347,0.002671,-2.205301e-03,0.001457,-0.002542,0.001961,0.000326,0.000244,0.001150,0.000732,0.000719,0.000670,0.000189,0.000622,-0.000048,-0.000195,0.000670,74847,415.816667,626,429496727291,2.386093e+09,4294967295,360,97,0.999109,0.000647,0.002736,0.999327,0.000703,0.003788,-0.002530,0.001916,-0.003256,0.003584,-0.002252,0.001515,-0.002251,0.002540,-0.002301,0.001335,-0.002925,0.001658,0.000384,0.000286,0.001150,0.000689,0.000719,0.000670,0.000133,0.000622,-0.000048,-0.000256,0.000670,42636,439.546392,616,133143993544,1.372618e+09,4294967295,60,0.998853,0.000667,0.002736,0.999114,0.000770,0.003414,-0.002740,0.001254,-0.002525,0.002741,-0.002347,0.001157,-0.002634,0.001715,-0.002109,0.001133,-0.002686,0.001410,0.000429,0.000298,0.000958,0.000705,0.000479,0.000670,0.000112,0.000622,-0.000048,-0.000247,0.000622,27529,458.816667,509,77309415265,1.288490e+09,4294967279,39,0.998467,0.000439,0.001629,0.998757,0.000698,0.003323,-0.001629,0.001112,-0.002558,0.002489,-0.001677,0.001002,-0.001533,0.001365,-0.001678,0.001047,-0.002303,0.001357,0.000368,0.000283,0.000815,0.000616,0.000336,0.000670,0.000139,0.000622,-0.000048,-0.000187,0.000622,18089,463.820513,476,30064776045,7.708917e+08,4294967279,0.000323,0
3,31,113,0.998899,0.000720,0.002397,0.998677,0.000637,0.002609,-0.001717,0.002399,-0.001039,0.003378,-0.001435,0.001133,-0.001388,0.001273,-1.806316e-03,0.001880,-0.001807,0.002504,0.000391,0.000250,0.001624,0.000846,0.001299,0.000694,0.000192,0.000648,-0.000046,-0.000112,0.000416,48457,428.823009,737,214748371689,1.900428e+09,4294967294,427,67,0.998560,0.000370,0.001413,0.998425,0.000438,0.002152,0.000348,0.001927,0.000320,0.002744,-0.000232,0.000932,-0.000185,0.001078,0.000000,0.001584,0.000464,0.001994,0.000381,0.000255,0.001624,0.000859,0.001299,0.000648,0.000139,0.000602,-0.000046,-0.000121,0.000278,27359,408.343284,497,154618824081,2.307744e+09,4294967294,46,0.998541,0.000423,0.001413,0.998537,0.000456,0.002065,-0.000068,0.001512,0.000575,0.002354,0.000324,0.000863,0.000324,0.000952,-0.000046,0.001583,-0.000046,0.001926,0.000350,0.000235,0.001624,0.000792,0.001299,0.000648,0.000158,0.000602,-0.000046,-0.000103,0.000231,18388,399.739130,497,115964116860,2.520959e+09,4294967286,14,0.998239,0.000234,0.000585,0.998708,0.000109,0.000331,0.000477,0.000361,0.000614,0.000663,0.000000,0.000113,0.000046,0.000139,0.000000,0.000000,0.000510,0.000514,0.000469,0.000241,0.001066,0.001026,0.000093,0.000093,0.000089,0.000046,-0.000046,-0.000050,0.000046,7044,503.142857,365,34359738486,2.454267e+09,4294967286,0.000931,0
4,62,165,0.999628,0.000248,0.000918,0.999624,0.000323,0.001147,-0.000845,0.001587,-0.000403,0.003193,-0.000466,0.000935,-0.000373,0.000978,-2.798411e-04,0.001231,-0.000233,0.001284,0.000245,0.000185,0.000793,0.000397,0.000700,0.000466,0.000190,0.000420,-0.000047,-0.000107,0.000373,57293,347.230303,711,459561489111,2.785221e+09,4294967295,375,112,0.999609,0.000249,0.000791,0.999689,0.000291,0.000947,-0.000598,0.001208,-0.000270,0.002399,-0.000186,0.000746,-0.000233,0.000727,-0.000280,0.000963,-0.000233,0.001063,0.000209,0.000171,0.000793,0.000403,0.000700,0.000466,0.000171,0.000420,-0.000047,-0.000100,0.000373,43085,384.687500,632,309237637671,2.761050e+09,4294967293,78,0.999490,0.000175,0.000783,0.999608,0.000281,0.000947,-0.000197,0.001113,-0.000324,0.002093,-0.000186,0.000666,-0.000140,0.000690,0.000000,0.000808,0.000000,0.000850,0.000232,0.000180,0.000793,0.000429,0.000607,0.000466,0.000190,0.000420,-0.000047,-0.000118,0.000373,33161,425.141026,628,197568493119,2.532929e+09,4294967293,34,0.999535,0.000195,0.000779,0.999771,0.000256,0.000893,-0.000043,0.000987,0.000323,0.001254,0.000140,0.000573,0.000280,0.000604,0.000187,0.000412,0.000187,0.000462,0.000315,0.000208,0.000793,0.000521,0.000560,0.000466,0.000219,0.000420,-0.000047,-0.000145,0.000373,13302,391.235294,479,115964114084,3.410709e+09,4294967293,0.001035,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
428927,32751,278,0.999618,0.000491,0.002485,0.999648,0.000610,0.002838,-0.000841,0.003425,-0.000905,0.005712,-0.001306,0.002077,-0.001371,0.001926,-1.176235e-03,0.002112,-0.000980,0.002334,0.000366,0.000277,0.001570,0.000870,0.001178,0.000686,0.000088,0.000653,-0.000033,-0.000212,0.000555,115411,415.147482,871,790273953089,2.842712e+09,4294967295,262,172,0.999346,0.000353,0.001551,0.999376,0.000547,0.002459,-0.000271,0.003090,-0.001255,0.004904,-0.000359,0.001723,-0.000686,0.001657,-0.000360,0.001660,-0.000392,0.001829,0.000393,0.000298,0.001570,0.000893,0.001145,0.000686,0.000088,0.000653,-0.000033,-0.000200,0.000522,74252,431.697674,695,502511154538,2.921576e+09,4294967294,109,0.999407,0.000307,0.001551,0.999329,0.000319,0.001515,0.000023,0.002553,-0.000394,0.003515,0.000457,0.001302,0.000490,0.001085,0.000687,0.001296,0.000654,0.001375,0.000292,0.000248,0.001570,0.000871,0.001145,0.000686,0.000085,0.000653,-0.000033,-0.000176,0.000425,42700,391.743119,695,339302403882,3.112866e+09,4294967294,56,0.999358,0.000244,0.000988,0.999228,0.000316,0.001484,-0.000508,0.001131,-0.000570,0.002152,-0.000065,0.000957,-0.000229,0.000713,-0.000033,0.000626,-0.000065,0.000916,0.000321,0.000224,0.001210,0.000755,0.000785,0.000359,0.000076,0.000327,-0.000033,-0.000162,0.000425,21663,386.839286,695,188978555453,3.374617e+09,4294967294,0.001374,126
428928,32753,191,1.002089,0.000853,0.003050,1.002215,0.000929,0.003032,0.002199,0.003394,0.002444,0.004442,0.002407,0.001988,0.002441,0.002185,2.614460e-03,0.001886,0.002614,0.002213,0.000301,0.000226,0.001135,0.000694,0.000859,0.000724,0.000132,0.000689,-0.000034,-0.000131,0.000483,45680,239.162304,649,631360166596,3.305551e+09,4294967295,349,118,1.002649,0.000557,0.002479,1.002799,0.000671,0.003023,0.002214,0.002980,0.002755,0.003654,0.002372,0.001821,0.002200,0.001998,0.002614,0.001752,0.002614,0.002101,0.000311,0.000236,0.001135,0.000698,0.000791,0.000724,0.000156,0.000689,-0.000034,-0.000140,0.000483,24314,206.050847,649,399431942970,3.385016e+09,4294967295,73,1.002978,0.000348,0.001625,1.003181,0.000311,0.001226,0.001101,0.002570,0.000623,0.002528,0.000859,0.001672,0.000893,0.001567,0.001203,0.001447,0.001203,0.001927,0.000288,0.000238,0.001135,0.000709,0.000791,0.000724,0.000170,0.000689,-0.000034,-0.000151,0.000483,16888,231.342466,649,236223190040,3.235934e+09,4294967295,26,1.003253,0.000290,0.000817,1.003375,0.000225,0.000771,0.000332,0.001229,0.000098,0.001449,0.000515,0.000632,0.000446,0.000446,0.000997,0.000778,0.001375,0.001079,0.000284,0.000264,0.000859,0.000595,0.000516,0.000483,0.000143,0.000448,-0.000034,-0.000135,0.000310,4072,156.615385,634,60129539998,2.312675e+09,4294967288,0.002307,126
428929,32758,224,1.000989,0.000398,0.002092,1.000916,0.000512,0.002298,0.000237,0.002919,0.001449,0.005683,0.000345,0.001505,0.000591,0.001816,1.085473e-03,0.002543,0.000888,0.002733,0.000374,0.000288,0.001283,0.000737,0.001135,0.000790,0.000175,0.000741,-0.000049,-0.000181,0.000444,76979,343.656250,925,880468243061,3.930662e+09,4294967295,316,164,1.001056,0.000411,0.001662,1.001014,0.000513,0.002298,0.001746,0.002668,0.002098,0.005190,0.001134,0.001246,0.001183,0.001693,0.001530,0.002184,0.001333,0.002446,0.000369,0.000297,0.001283,0.000706,0.001135,0.000790,0.000201,0.000741,-0.000049,-0.000187,0.000444,55151,336.286585,925,635655123137,3.875946e+09,4294967295,135,1.001174,0.000344,0.001588,1.001109,0.000490,0.001709,0.000966,0.002568,0.000656,0.005008,0.000641,0.001202,0.000641,0.001531,0.001085,0.001879,0.000888,0.002178,0.000405,0.000303,0.001283,0.000692,0.001135,0.000790,0.000234,0.000741,-0.000049,-0.000183,0.000395,45247,335.162963,925,519691012831,3.849563e+09,4294967295,71,1.001364,0.000196,0.000838,1.001272,0.000453,0.001390,0.000478,0.001417,0.000643,0.002991,0.000592,0.000623,0.000641,0.000916,0.000395,0.001381,0.000197,0.001415,0.000385,0.000312,0.001283,0.000613,0.000987,0.000790,0.000291,0.000741,-0.000049,-0.000207,0.000346,23157,326.154930,902,292057756531,4.113490e+09,4294967295,0.001096,126
428930,32763,362,1.001871,0.000430,0.002401,1.001862,0.000472,0.002472,0.000914,0.003602,0.000812,0.005224,0.000658,0.002666,0.000394,0.003036,7.235296e-04,0.002592,0.000658,0.002723,0.000233,0.000180,0.001117,0.000540,0.000985,0.000790,0.000143,0.000724,-0.000066,-0.000134,0.000461,153026,422.723757,983,476741396028,1.316965e+09,4294967295,178,235,1.001892,0.000403,0.001903,1.001879,0.000432,0.001985,-0.000881,0.002948,-0.000920,0.004016,-0.000854,0.002424,-0.000854,0.002496,-0.000723,0.002147,-0.000460,0.002073,0.000208,0.000169,0.001117,0.000529,0.000985,0.000790,0.000150,0.000724,-0.000066,-0.000120,0.000329,103929,442.251064,969,304942699451,1.297629e+09,4294967295,154,1.001952,0.000402,0.001903,1.001939,0.000424,0.001985,-0.001101,0.002287,-0.001371,0.002944,-0.001117,0.001953,-0.001117,0.002116,-0.000854,0.001791,-0.000526,0.001850,0.000197,0.000159,0.001117,0.000507,0.000985,0.000790,0.000153,0.000724,-0.000066,-0.000115,0.000329,72882,473.259740,757,154618838782,1.004018e+09,4294967295,76,1.001825,0.000393,0.001903,1.001796,0.000422,0.001863,0.000028,0.001671,0.000255,0.001618,-0.000131,0.001480,-0.000131,0.001622,0.000197,0.001110,0.000131,0.001039,0.000175,0.000132,0.001117,0.000494,0.000985,0.000461,0.000132,0.000395,-0.000066,-0.000125,0.000329,35960,473.157895,678,42949684592,5.651274e+08,4294967295,0.000654,126


In [14]:
final_df.to_csv("merge_regression_data.csv")

In [16]:
final_df.to_pickle("merge_regression_data.pkl")