In [2]:
import polars as pl

In [3]:
lf = pl.scan_parquet('~/Interning/Kaggle/jane_street_kaggle/jane-street-real-time-market-data-forecasting/train.parquet/').fill_null(3)

In [15]:
schema = lf.collect_schema()

In [16]:
schema

Schema([('date_id', Int16),
        ('time_id', Int16),
        ('symbol_id', Int8),
        ('weight', Float32),
        ('feature_00', Float32),
        ('feature_01', Float32),
        ('feature_02', Float32),
        ('feature_03', Float32),
        ('feature_04', Float32),
        ('feature_05', Float32),
        ('feature_06', Float32),
        ('feature_07', Float32),
        ('feature_08', Float32),
        ('feature_09', Int8),
        ('feature_10', Int8),
        ('feature_11', Int16),
        ('feature_12', Float32),
        ('feature_13', Float32),
        ('feature_14', Float32),
        ('feature_15', Float32),
        ('feature_16', Float32),
        ('feature_17', Float32),
        ('feature_18', Float32),
        ('feature_19', Float32),
        ('feature_20', Float32),
        ('feature_21', Float32),
        ('feature_22', Float32),
        ('feature_23', Float32),
        ('feature_24', Float32),
        ('feature_25', Float32),
        ('feature_26', Float32),
   

In [5]:
assert 'date_id' in schema, "Missing date_id column"

In [7]:
for i in range(79):
    col_name = f'feature_{i:02d}'
    assert col_name in schema, "Missing {col_name} column"
    assert schema[col_name] == pl.Float32, f"{type(col_name)} should be {pl.Float32}"

AssertionError: <class 'str'> should be Float32

In [4]:
feature_cols = [f"feature_{i:02d}" for i in range(79)]

In [7]:
top_5 = lf.select([pl.col(col).quantile(.95) for col in feature_cols[:5]]).collect()

In [8]:
top_5

feature_00,feature_01,feature_02,feature_03,feature_04
f32,f32,f32,f32,f32
3.159119,1.810017,3.156065,3.154454,1.703309


In [6]:
query = lf.select([
    pl.col('date_id'),
    pl.col('responder_6').alias('resp'),
    pl.col('weight'),
    *[pl.col(f) for f in feature_cols]
]).limit(1000000).sort('date_id')

In [26]:
query.

In [27]:
stats = query.select([
    *[pl.col(col).quantile(.95).alias(f"{col}_q95") for col in feature_cols],
    *[pl.col(col).quantile(.05).alias(f"{col}_q05") for col in feature_cols],
    *[pl.col(col).std().alias(f"{col}_std") for col in feature_cols],
    pl.col('resp').quantile(.95).alias("resp_q95"),
    pl.col('resp').quantile(.05).alias("resp_q05"),
    pl.col('resp').std().alias("resp_std"),
]
).collect()

In [15]:
stats

feature_00_q95,feature_01_q95,feature_02_q95,feature_03_q95,feature_04_q95,feature_05_q95,feature_06_q95,feature_07_q95,feature_08_q95,feature_09_q95,feature_10_q95,feature_11_q95,feature_12_q95,feature_13_q95,feature_14_q95,feature_15_q95,feature_16_q95,feature_17_q95,feature_18_q95,feature_19_q95,feature_20_q95,feature_21_q95,feature_22_q95,feature_23_q95,feature_24_q95,feature_25_q95,feature_26_q95,feature_27_q95,feature_28_q95,feature_29_q95,feature_30_q95,feature_31_q95,feature_32_q95,feature_33_q95,feature_34_q95,feature_35_q95,feature_36_q95,…,feature_45_std,feature_46_std,feature_47_std,feature_48_std,feature_49_std,feature_50_std,feature_51_std,feature_52_std,feature_53_std,feature_54_std,feature_55_std,feature_56_std,feature_57_std,feature_58_std,feature_59_std,feature_60_std,feature_61_std,feature_62_std,feature_63_std,feature_64_std,feature_65_std,feature_66_std,feature_67_std,feature_68_std,feature_69_std,feature_70_std,feature_71_std,feature_72_std,feature_73_std,feature_74_std,feature_75_std,feature_76_std,feature_77_std,feature_78_std,resp_q95,resp_q05,resp_std
f32,f32,f32,f32,f32,f32,f32,f32,f32,f64,f64,f64,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,…,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32
3.0,3.0,3.0,3.0,3.0,1.306166,1.48272,0.828503,-0.496455,44.0,7.0,150.0,-0.550296,2.976024,0.49229,3.0,1.341418,3.0,-0.717197,-0.879003,1.412757,3.0,1.636431,1.522133,-0.573883,0.964037,3.0,3.0,1.378301,0.580907,1.128879,3.0,3.0,3.0,1.078662,1.637528,3.114953,…,0.0,0.0,9.415174,54.008766,16.022173,0.0,0.700603,0.0,0.0,1.467331,0.0,0.743412,0.809627,0.0,2.377194,1.426087,0.0,0.0,0.0,0.0,0.0,0.0,0.17661,0.596004,0.236987,0.27884,1.285636,0.543738,0.0,0.0,0.753187,0.744525,0.997021,0.98731,2.109352,-2.848184,1.437894


In [28]:
normalized_features_and_resp = []
for col in feature_cols + ['resp']:
    q05 = stats.get_column(f'{col}_q05')[0]
    q95 = stats.get_column(f'{col}_q95')[0]
    std = stats.get_column(f'{col}_std')[0]

    center = (q95 + q05) / 2
    scale = (q95 - q05) / 2 if abs(q95 - q05) > 1e-10 else std if std > 1e-10 else 1.0
    normalized_features_and_resp.append(
        pl.when(pl.col(col) > q95)
        .then(1.0)
        .when(pl.col(col) < q05)
        .then(-1.0)
        .otherwise((pl.col(col) - center) / scale)
        .alias(f"{col}_normalized")
    )

temp_lf = query.select([pl.col("date_id"),*normalized_features_and_resp])

In [28]:
q05 = stats.get_column(f'{"feature_00_q05"}')

In [30]:
q05[0]

3.0

In [29]:
temp_lf.collect()

date_id,feature_00_normalized,feature_01_normalized,feature_02_normalized,feature_03_normalized,feature_04_normalized,feature_05_normalized,feature_06_normalized,feature_07_normalized,feature_08_normalized,feature_09_normalized,feature_10_normalized,feature_11_normalized,feature_12_normalized,feature_13_normalized,feature_14_normalized,feature_15_normalized,feature_16_normalized,feature_17_normalized,feature_18_normalized,feature_19_normalized,feature_20_normalized,feature_21_normalized,feature_22_normalized,feature_23_normalized,feature_24_normalized,feature_25_normalized,feature_26_normalized,feature_27_normalized,feature_28_normalized,feature_29_normalized,feature_30_normalized,feature_31_normalized,feature_32_normalized,feature_33_normalized,feature_34_normalized,feature_35_normalized,…,feature_43_normalized,feature_44_normalized,feature_45_normalized,feature_46_normalized,feature_47_normalized,feature_48_normalized,feature_49_normalized,feature_50_normalized,feature_51_normalized,feature_52_normalized,feature_53_normalized,feature_54_normalized,feature_55_normalized,feature_56_normalized,feature_57_normalized,feature_58_normalized,feature_59_normalized,feature_60_normalized,feature_61_normalized,feature_62_normalized,feature_63_normalized,feature_64_normalized,feature_65_normalized,feature_66_normalized,feature_67_normalized,feature_68_normalized,feature_69_normalized,feature_70_normalized,feature_71_normalized,feature_72_normalized,feature_73_normalized,feature_74_normalized,feature_75_normalized,feature_76_normalized,feature_77_normalized,feature_78_normalized,resp_normalized
i16,f32,f32,f32,f32,f32,f32,f32,f32,f32,f64,f64,f64,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,…,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32
0,0.0,0.0,0.0,0.0,0.0,0.662582,0.195808,0.222406,-0.81331,-0.787879,1.0,-0.745597,-1.0,-0.520421,-1.0,1.0,-0.63652,1.0,-1.0,-1.0,0.316823,0.0,0.587119,0.726357,-0.863244,-0.419099,0.0,0.0,0.81877,-0.763899,-0.585566,0.0,1.0,1.0,0.524081,0.520284,…,-0.113731,1.0,1.0,1.0,0.469799,1.0,0.602127,1.0,0.169913,1.0,1.0,-0.446845,1.0,-1.0,0.454158,1.0,-0.870966,-0.239611,-0.738164,1.0,1.0,1.0,1.0,1.0,-1.0,-0.658768,-0.857611,-1.0,-0.396898,-0.921999,1.0,1.0,-0.717852,-0.556947,-0.835269,-0.647074,0.627418
0,0.0,0.0,0.0,0.0,0.0,0.537033,0.126725,0.169875,-0.520382,-0.787879,1.0,-0.745597,-1.0,-0.721275,-0.885144,1.0,-0.353748,1.0,-0.961537,-1.0,0.597138,0.0,-0.117227,0.172723,-0.232123,0.088127,0.0,0.0,0.149996,-0.213859,0.009058,0.0,1.0,1.0,-0.7367,-0.661644,…,1.0,1.0,1.0,1.0,-1.0,0.016934,-0.388681,1.0,0.652369,1.0,1.0,-0.865944,1.0,-0.754701,0.655693,1.0,0.805321,0.391549,-0.738164,1.0,1.0,1.0,1.0,1.0,-1.0,-0.534556,-0.937508,-1.0,-0.904844,-0.982865,1.0,1.0,-0.754196,-0.50603,-0.638358,-0.691581,0.567851
0,0.0,0.0,0.0,0.0,0.0,0.81062,0.153484,0.21241,-0.719375,-0.787879,1.0,-0.745597,-0.87778,-0.698243,-0.888689,1.0,-1.0,1.0,-0.990797,-1.0,0.165942,0.0,0.56246,0.111086,-0.696721,-0.073436,0.0,0.0,0.007683,-0.785305,-0.730766,0.0,1.0,1.0,0.100812,-0.341318,…,1.0,1.0,1.0,1.0,-0.668662,-1.0,-1.0,1.0,0.334928,1.0,1.0,-0.404471,1.0,-1.0,1.0,1.0,-0.095685,-0.02093,-0.738164,1.0,1.0,1.0,1.0,1.0,-1.0,-0.620804,-0.957211,-0.975708,-0.763583,-0.959271,1.0,1.0,0.454557,0.355209,-0.334816,-0.254527,1.0
0,0.0,0.0,0.0,0.0,0.0,0.870543,0.218856,0.25436,-1.0,0.151515,0.333333,-0.455969,-0.890666,1.0,-0.352086,1.0,-0.684299,1.0,-1.0,-0.62975,-0.05626,0.0,-0.69326,-0.516707,-1.0,-0.783282,0.0,0.0,0.252797,-0.951213,-0.59591,0.0,1.0,1.0,0.708937,0.270173,…,1.0,1.0,1.0,1.0,0.365428,1.0,0.451984,1.0,1.0,1.0,1.0,0.640315,1.0,-0.386004,1.0,1.0,1.0,1.0,-0.738164,1.0,1.0,1.0,1.0,1.0,-0.921635,0.531913,-0.634312,-0.741905,1.0,-0.005038,1.0,1.0,-0.654469,-0.628048,-0.572619,-0.678412,0.905961
0,0.0,0.0,0.0,0.0,0.0,0.737713,0.210562,0.282432,-0.593293,0.212121,-0.333333,-0.980431,-1.0,-0.549479,-0.824124,1.0,-0.16296,1.0,-1.0,-0.98605,-0.292273,0.0,-1.0,-0.8363,-0.640442,-1.0,0.0,0.0,0.043452,-0.787127,-0.677158,0.0,1.0,1.0,0.264257,-0.367233,…,1.0,1.0,1.0,1.0,-1.0,-1.0,-1.0,1.0,0.692771,1.0,1.0,0.211352,1.0,-0.362408,1.0,1.0,1.0,0.347956,-0.738164,1.0,1.0,1.0,1.0,1.0,-1.0,-0.685468,-0.823473,-1.0,-0.445844,-0.923536,1.0,1.0,1.0,1.0,1.0,1.0,-1.0
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
101,0.0,0.0,0.0,0.0,0.0,-0.062818,0.107517,-0.084064,-0.350803,0.393939,-1.0,1.0,-0.92372,-0.919166,-0.899961,-0.703061,-0.606383,-0.730807,0.337261,0.783803,0.15387,0.0,-0.531845,-0.208822,-0.141404,0.228736,0.0,0.0,0.381483,-0.660589,-0.628209,0.0,0.799041,-0.675174,0.977674,1.0,…,-0.770957,-0.79237,-0.066509,-0.245036,-0.010647,0.016861,-0.028769,-0.908731,0.270999,-0.536186,-0.240265,-0.056335,-0.064077,-0.018911,-0.099126,-0.11281,0.015428,-0.085991,-0.537113,-0.838565,-0.893049,-0.941961,0.192434,0.699424,-0.919577,-0.887098,-0.808844,-0.706996,-0.884357,-1.0,-0.600202,-0.423371,-0.506333,-0.720926,-0.564162,-0.656408,-0.031448
101,0.0,0.0,0.0,0.0,0.0,-0.132548,0.033207,-0.067455,-0.466325,-0.787879,1.0,-0.745597,-0.644037,-0.846917,-0.679286,-0.738759,-1.0,-0.840606,-0.176956,1.0,0.282871,0.0,0.213043,0.023845,-0.625307,-0.322064,0.0,0.0,0.460934,-0.852751,-0.74378,0.0,0.879945,-0.365823,1.0,1.0,…,-0.55997,-0.34837,0.008467,-0.617918,-0.035365,-0.09153,-0.271346,-0.598209,-0.526855,-0.073771,-0.287951,0.265399,-0.360761,0.932877,0.088061,-0.094963,0.092336,-0.006648,-0.537113,-0.437952,-0.296328,-0.420755,-0.536533,0.289696,-0.637279,-0.852743,-0.716734,-0.495004,-0.592973,-0.555326,-0.68503,-0.610083,-0.680137,-0.599191,-0.635256,-0.688079,-0.204388
101,0.0,0.0,0.0,0.0,0.0,-0.149836,0.039117,-0.143782,-0.588131,-0.787879,1.0,-0.745597,-0.487319,-0.929525,-0.62532,-0.736228,-0.776881,-0.762555,-0.203827,1.0,0.191422,0.0,0.738954,0.598304,-0.818206,-0.600016,0.0,0.0,1.0,-0.813294,-0.729428,0.0,0.597222,0.258456,0.777929,1.0,…,-0.190366,-0.105578,0.277578,0.248418,0.374707,0.044783,0.162714,-0.462769,-0.799879,0.124075,-0.383061,-0.479723,0.03335,0.784185,0.598739,0.202148,0.040602,0.004238,-0.537113,-0.586795,-0.923582,-0.752832,-0.770719,0.713245,-0.505111,-0.827044,-0.678651,-0.435477,-0.808931,-0.643841,-0.84629,-0.616799,-0.761623,-0.543518,-0.776484,-0.585873,-0.83371
101,0.0,0.0,0.0,0.0,0.0,-0.036972,0.028305,-0.051021,-0.390288,1.0,-0.666667,-0.812133,-0.331163,-0.839461,0.282202,-0.865456,-0.58134,-0.830028,1.0,1.0,-0.623401,0.0,0.27885,0.125745,-0.517641,0.631302,0.0,0.0,0.575724,-0.994528,-0.907418,0.0,-1.0,0.741402,-1.0,-1.0,…,0.525137,0.078575,-0.295641,-0.454207,-0.097651,0.009677,-0.293467,-0.818865,-0.04311,-0.193071,-0.910164,-0.272983,-0.505921,-0.577237,-0.090461,-0.9389,-0.040299,-0.514789,-0.537113,0.161249,-0.093395,0.385974,0.7426,-0.559925,-0.103385,-0.712681,0.949995,-0.598914,-0.741091,-0.318975,-0.420779,-0.479158,-0.592035,-0.680423,-0.552032,-0.481016,0.730099


In [24]:
temp_lf.describe()

statistic,date_id,feature_00_normalized,feature_01_normalized,feature_02_normalized,feature_03_normalized,feature_04_normalized,feature_05_normalized,feature_06_normalized,feature_07_normalized,feature_08_normalized,feature_09_normalized,feature_10_normalized,feature_11_normalized,feature_12_normalized,feature_13_normalized,feature_14_normalized,feature_15_normalized,feature_16_normalized,feature_17_normalized,feature_18_normalized,feature_19_normalized,feature_20_normalized,feature_21_normalized,feature_22_normalized,feature_23_normalized,feature_24_normalized,feature_25_normalized,feature_26_normalized,feature_27_normalized,feature_28_normalized,feature_29_normalized,feature_30_normalized,feature_31_normalized,feature_32_normalized,feature_33_normalized,feature_34_normalized,…,feature_43_normalized,feature_44_normalized,feature_45_normalized,feature_46_normalized,feature_47_normalized,feature_48_normalized,feature_49_normalized,feature_50_normalized,feature_51_normalized,feature_52_normalized,feature_53_normalized,feature_54_normalized,feature_55_normalized,feature_56_normalized,feature_57_normalized,feature_58_normalized,feature_59_normalized,feature_60_normalized,feature_61_normalized,feature_62_normalized,feature_63_normalized,feature_64_normalized,feature_65_normalized,feature_66_normalized,feature_67_normalized,feature_68_normalized,feature_69_normalized,feature_70_normalized,feature_71_normalized,feature_72_normalized,feature_73_normalized,feature_74_normalized,feature_75_normalized,feature_76_normalized,feature_77_normalized,feature_78_normalized,resp_normalized
str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,…,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
"""count""",100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,…,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0
"""null_count""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"""mean""",5.13341,0.0,0.0,0.0,0.0,0.0,0.132316,0.035556,0.067053,-0.394021,-0.488853,0.305923,-0.535338,-0.30697,-0.580529,-0.396917,-0.52672,-0.273254,-0.33771,-0.04208,-0.049747,0.154009,0.0,-0.108384,0.067751,-0.203608,0.016681,0.0,0.0,0.03612,-0.258544,-0.308417,0.0,0.006044,0.003837,0.077464,…,-0.020518,0.045668,-0.039242,0.097693,0.096893,0.029725,0.024623,-0.099372,0.138103,-0.014765,-0.143791,-0.132638,-0.219894,0.030249,0.057312,0.053441,0.014738,0.041865,0.053897,-0.204777,-0.255601,-0.209524,-0.134801,-0.094335,-0.351819,-0.631954,-0.442002,-0.295942,-0.624703,-0.404703,-0.546486,-0.477002,-0.621585,-0.52557,-0.557405,-0.478127,-0.015313
"""std""",3.259245,0.0,0.0,0.0,0.0,0.0,0.494506,0.476305,0.487268,0.44502,0.566982,0.805181,0.60956,0.543822,0.539291,0.543115,0.476189,0.521386,0.501276,0.538931,0.565342,0.527538,0.0,0.50836,0.513694,0.474212,0.561775,0.0,0.0,0.484326,0.593327,0.534745,0.0,0.483461,0.568833,0.530373,…,0.628656,0.635019,0.61223,0.643033,0.462094,0.392066,0.460709,0.604403,0.596597,0.519582,0.604124,0.603423,0.499063,0.564216,0.538677,0.487904,0.420025,0.47641,0.682926,0.71537,0.694217,0.702859,0.610044,0.592895,0.540792,0.528233,0.541067,0.544276,0.529762,0.548147,0.502727,0.492871,0.453109,0.436659,0.488351,0.474851,0.496543
"""min""",0.0,0.0,0.0,0.0,0.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,-1.0,-1.0,-1.0,-1.0,0.0,0.0,-1.0,-1.0,-1.0,0.0,-1.0,-1.0,-1.0,…,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
"""25%""",2.0,0.0,0.0,0.0,0.0,0.0,-0.116327,-0.199098,-0.189823,-0.656405,-0.818182,-0.333333,-0.800391,-0.731537,-0.926303,-0.809594,-0.819075,-0.661923,-0.686891,-0.445803,-0.455914,-0.097302,0.0,-0.495822,-0.207388,-0.571148,-0.384515,0.0,0.0,-0.22949,-0.713595,-0.717146,0.0,-0.263896,-0.45552,-0.276071,…,-0.594545,-0.520952,-0.531823,-0.429366,-0.125018,0.025471,-0.175943,-0.550109,-0.314496,-0.37027,-0.580749,-0.597953,-0.573574,-0.387344,-0.323737,-0.215725,-0.076735,-0.191966,-0.590772,-0.743591,-0.891796,-0.7553,-0.566181,-0.540375,-0.762491,-0.93994,-0.843047,-0.723653,-0.937356,-0.821572,-0.837716,-0.760315,-0.831108,-0.726723,-0.830052,-0.741378,-0.313377
"""50%""",5.0,0.0,0.0,0.0,0.0,0.0,0.186212,0.054868,0.098761,-0.439255,-0.818182,1.0,-0.745597,-0.425128,-0.821479,-0.559495,-0.666316,-0.358621,-0.451189,-0.044575,-0.126796,0.159247,0.0,-0.096019,0.054162,-0.204529,0.010658,0.0,0.0,0.039424,-0.279228,-0.479328,0.0,0.022931,0.020917,0.101935,…,0.016345,0.135333,-0.066969,0.045865,0.133171,0.040712,0.047084,-0.196396,0.289301,-0.039386,-0.270343,-0.287881,-0.257584,0.04697,0.08237,0.086421,0.024332,0.067556,0.19146,-0.53275,-0.511674,-0.486805,-0.28857,-0.206783,-0.497184,-0.868366,-0.621326,-0.40578,-0.862454,-0.576515,-0.717445,-0.626906,-0.745763,-0.622379,-0.713454,-0.608878,-0.013859
"""75%""",8.0,0.0,0.0,0.0,0.0,0.0,0.440291,0.283317,0.350812,-0.280234,-0.012987,1.0,-0.745597,-0.017608,-0.468273,-0.144039,-0.427529,-0.001546,-0.120407,0.306369,0.40069,0.542126,0.0,0.174235,0.417382,0.02292,0.504567,0.0,0.0,0.327529,0.003479,-0.04934,0.0,0.294426,0.454271,0.461755,…,0.507109,0.560287,0.333523,0.68753,0.346502,0.052131,0.22374,0.21499,0.588746,0.338038,0.125828,0.3647,0.034617,0.460624,0.450711,0.33771,0.112044,0.288199,0.679095,0.359738,0.41281,0.41593,0.193869,0.214457,-0.07672,-0.591517,-0.224768,-0.0002,-0.573956,-0.151256,-0.508577,-0.429148,-0.644174,-0.516195,-0.541434,-0.436461,0.278907
"""max""",11.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,…,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [7]:
query.select('weight').describe()

statistic,weight
str,f64
"""count""",1000000.0
"""null_count""",0.0
"""mean""",2.072165
"""std""",1.06575
"""min""",0.44057
"""25%""",1.295378
"""50%""",1.799324
"""75%""",2.608444
"""max""",6.011999
