In [1]:
import warnings
warnings.simplefilter('ignore')

import gc

%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns

import numpy as np
import pandas as pd
pd.set_option('max_columns', 500)
pd.set_option('max_rows', 200)
pd.set_option('float_format', lambda x: '%.6f' % x)

from tqdm import tqdm
tqdm.pandas()

In [2]:
train = pd.read_csv('./jet_complex_data/complex_train_R04_jet.csv')
test = pd.read_csv('./jet_complex_data/complex_test_R04_jet.csv')

df = pd.concat([train, test], axis=0)

In [3]:
def l2_dict(x, y, z): 
    return np.sqrt(np.square(x)+np.square(y)+np.square(z))

df['jet_distance'] = df.progress_apply(lambda row: l2_dict(row.jet_px, row.jet_py, row.jet_pz), axis=1)

100%|██████████| 1672504/1672504 [01:44<00:00, 16009.63it/s]


In [4]:
df['xy_dis'] = df.progress_apply(lambda row: l2_dict(row.jet_px, row.jet_py, 0), axis=1)
df['yz_dis'] = df.progress_apply(lambda row: l2_dict(0, row.jet_py, row.jet_pz), axis=1)
df['zx_dis'] = df.progress_apply(lambda row: l2_dict(row.jet_px, 0, row.jet_pz), axis=1)

100%|██████████| 1672504/1672504 [01:23<00:00, 20143.21it/s]
100%|██████████| 1672504/1672504 [01:32<00:00, 17999.05it/s]
100%|██████████| 1672504/1672504 [01:22<00:00, 20185.05it/s]


In [6]:
df['x_div_dist'] = df['jet_px'] / df['jet_distance']
df['y_div_dist'] = df['jet_py'] / df['jet_distance']
df['z_div_dist'] = df['jet_pz'] / df['jet_distance']
df['xy_div_dist'] = df['xy_dis'] / df['jet_distance']
df['yz_div_dist'] = df['yz_dis'] / df['jet_distance']
df['zx_div_dist'] = df['zx_dis'] / df['jet_distance']

In [7]:
df['energy_x'] = df['jet_energy'] * df['x_div_dist']
df['energy_y'] = df['jet_energy'] * df['y_div_dist']
df['energy_z'] = df['jet_energy'] * df['z_div_dist']
df['energy_xy'] = df['jet_energy'] * df['xy_div_dist']
df['energy_yz'] = df['jet_energy'] * df['yz_div_dist']
df['energy_zx'] = df['jet_energy'] * df['zx_div_dist']

In [8]:
def fill_zero(x):
    return 0 if x < 0 else x

df.jet_mass = df.jet_mass.progress_apply(lambda x: fill_zero(x))

100%|██████████| 1672504/1672504 [00:01<00:00, 972572.61it/s] 


In [9]:
df['mass_x'] = df['jet_mass'] * df['x_div_dist']
df['mass_y'] = df['jet_mass'] * df['y_div_dist']
df['mass_z'] = df['jet_mass'] * df['z_div_dist']
df['mass_xy'] = df['jet_mass'] * df['xy_div_dist']
df['mass_yz'] = df['jet_mass'] * df['yz_div_dist']
df['mass_zx'] = df['jet_mass'] * df['zx_div_dist']

In [10]:
def angle(x, y):
    return np.degrees(np.math.atan(x / y)) if y != 0 else 0

df['angle_xy'] = df.progress_apply(lambda row: angle(row.jet_px, row.jet_py), axis=1)
df['angle_yx'] = df.progress_apply(lambda row: angle(row.jet_py, row.jet_px), axis=1)
df['angle_yz'] = df.progress_apply(lambda row: angle(row.jet_py, row.jet_pz), axis=1)
df['angle_zy'] = df.progress_apply(lambda row: angle(row.jet_pz, row.jet_py), axis=1)
df['angle_zx'] = df.progress_apply(lambda row: angle(row.jet_pz, row.jet_px), axis=1)
df['angle_xz'] = df.progress_apply(lambda row: angle(row.jet_px, row.jet_pz), axis=1)

100%|██████████| 1672504/1672504 [01:15<00:00, 22211.61it/s]
100%|██████████| 1672504/1672504 [01:14<00:00, 22358.73it/s]
100%|██████████| 1672504/1672504 [01:15<00:00, 22288.38it/s]
100%|██████████| 1672504/1672504 [01:15<00:00, 22209.42it/s]
100%|██████████| 1672504/1672504 [01:15<00:00, 22297.72it/s]
100%|██████████| 1672504/1672504 [01:15<00:00, 22259.43it/s]


In [11]:
df['mean_particles_mass'] = df['jet_mass'] / df['number_of_particles_in_this_jet']
df['mean_particles_energy'] = df['jet_energy'] / df['number_of_particles_in_this_jet']

In [12]:
def calculate_speed(e, m):
    return np.sqrt(2*e/m) if m > 0 else 0

df['jet_speed'] = df.progress_apply(lambda row: calculate_speed(row.jet_energy, row.jet_mass), axis=1)

100%|██████████| 1672504/1672504 [01:15<00:00, 22181.95it/s]


In [13]:
df['speed_x'] = df['jet_speed'] * df['x_div_dist']
df['speed_y'] = df['jet_speed'] * df['y_div_dist']
df['speed_z'] = df['jet_speed'] * df['z_div_dist']
df['speed_xy'] = df['jet_speed'] * df['xy_div_dist']
df['speed_yz'] = df['jet_speed'] * df['yz_div_dist']
df['speed_zx'] = df['jet_speed'] * df['zx_div_dist']

In [14]:
def calculate_travel_time(d, v):
    return np.abs(d) / v if v > 0 else 0

df['time_dis'] = df.progress_apply(lambda row: calculate_travel_time(row.jet_distance, row.jet_speed), axis=1)
df['time_x'] = df.progress_apply(lambda row: calculate_travel_time(row.jet_px, row.speed_x), axis=1)
df['time_y'] = df.progress_apply(lambda row: calculate_travel_time(row.jet_py, row.speed_y), axis=1)
df['time_z'] = df.progress_apply(lambda row: calculate_travel_time(row.jet_pz, row.speed_z), axis=1)
df['time_xy'] = df.progress_apply(lambda row: calculate_travel_time(row.xy_dis, row.speed_xy), axis=1)
df['time_yz'] = df.progress_apply(lambda row: calculate_travel_time(row.yz_dis, row.speed_yz), axis=1)
df['time_zx'] = df.progress_apply(lambda row: calculate_travel_time(row.zx_dis, row.speed_zx), axis=1)

100%|██████████| 1672504/1672504 [01:16<00:00, 22005.40it/s]
100%|██████████| 1672504/1672504 [01:12<00:00, 23149.62it/s]
100%|██████████| 1672504/1672504 [01:12<00:00, 22959.55it/s]
100%|██████████| 1672504/1672504 [01:12<00:00, 22945.76it/s]
100%|██████████| 1672504/1672504 [01:16<00:00, 21947.89it/s]
100%|██████████| 1672504/1672504 [01:15<00:00, 22036.73it/s]
100%|██████████| 1672504/1672504 [01:15<00:00, 22061.94it/s]


In [15]:
df['jet_mv'] = df['jet_mass'] * df['jet_speed']
df['mv_x'] = df['jet_mv'] * df['x_div_dist']
df['mv_y'] = df['jet_mv'] * df['y_div_dist']
df['mv_z'] = df['jet_mv'] * df['z_div_dist']
df['mv_xy'] = df['jet_mv'] * df['xy_div_dist']
df['mv_yz'] = df['jet_mv'] * df['yz_div_dist']
df['mv_zx'] = df['jet_mv'] * df['zx_div_dist']

df['particle_mv'] = df['jet_mv'] / df['number_of_particles_in_this_jet']
df['particle_mv_x'] = df['particle_mv'] * df['x_div_dist']
df['particle_mv_y'] = df['particle_mv'] * df['y_div_dist']
df['particle_mv_z'] = df['particle_mv'] * df['z_div_dist']
df['particle_mv_xy'] = df['particle_mv'] * df['xy_div_dist']
df['particle_mv_yz'] = df['particle_mv'] * df['yz_div_dist']
df['particle_mv_zx'] = df['particle_mv'] * df['zx_div_dist']

In [18]:
def brute_force(df):
    
    df['event_id_count'] = df.groupby('event_id')['jet_id'].transform('count')
    
    df['event_id_number_particles_max'] = df.groupby('event_id')['number_of_particles_in_this_jet'].transform('max')
    df['event_id_number_particles_mean'] = df.groupby('event_id')['number_of_particles_in_this_jet'].transform('mean')
    df['event_id_number_particles_min'] = df.groupby('event_id')['number_of_particles_in_this_jet'].transform('min')
    df['event_id_number_particles_std'] = df.groupby('event_id')['number_of_particles_in_this_jet'].transform('std')
    
    df['event_id_mass_max'] = df.groupby('event_id')['jet_mass'].transform('max')
    df['event_id_mass_mean'] = df.groupby('event_id')['jet_mass'].transform('mean')
    df['event_id_mass_min'] = df.groupby('event_id')['jet_mass'].transform('min')
    df['event_id_mass_std'] = df.groupby('event_id')['jet_mass'].transform('std')
    
    df['event_id_energy_max'] = df.groupby('event_id')['jet_energy'].transform('max')
    df['event_id_energy_mean'] = df.groupby('event_id')['jet_energy'].transform('mean')
    df['event_id_energy_min'] = df.groupby('event_id')['jet_energy'].transform('min')
    df['event_id_energy_std'] = df.groupby('event_id')['jet_energy'].transform('std')
    
    df['event_id_mass_x_max'] = df.groupby('event_id')['mass_x'].transform('max')
    df['event_id_mass_x_mean'] = df.groupby('event_id')['mass_x'].transform('mean')
    df['event_id_mass_x_min'] = df.groupby('event_id')['mass_x'].transform('min')
    df['event_id_mass_x_std'] = df.groupby('event_id')['mass_x'].transform('std')
    
    df['event_id_mass_y_max'] = df.groupby('event_id')['mass_y'].transform('max')
    df['event_id_mass_y_mean'] = df.groupby('event_id')['mass_y'].transform('mean')
    df['event_id_mass_y_min'] = df.groupby('event_id')['mass_y'].transform('min')
    df['event_id_mass_y_std'] = df.groupby('event_id')['mass_y'].transform('std')
    
    df['event_id_mass_z_max'] = df.groupby('event_id')['mass_z'].transform('max')
    df['event_id_mass_z_mean'] = df.groupby('event_id')['mass_z'].transform('mean')
    df['event_id_mass_z_min'] = df.groupby('event_id')['mass_z'].transform('min')
    df['event_id_mass_z_std'] = df.groupby('event_id')['mass_z'].transform('std')
    
    df['event_id_mass_xy_max'] = df.groupby('event_id')['mass_xy'].transform('max')
    df['event_id_mass_xy_mean'] = df.groupby('event_id')['mass_xy'].transform('mean')
    df['event_id_mass_xy_min'] = df.groupby('event_id')['mass_xy'].transform('min')
    df['event_id_mass_xy_std'] = df.groupby('event_id')['mass_xy'].transform('std')
    
    df['event_id_mass_yz_max'] = df.groupby('event_id')['mass_yz'].transform('max')
    df['event_id_mass_yz_mean'] = df.groupby('event_id')['mass_yz'].transform('mean')
    df['event_id_mass_yz_min'] = df.groupby('event_id')['mass_yz'].transform('min')
    df['event_id_mass_yz_std'] = df.groupby('event_id')['mass_yz'].transform('std')
    
    df['event_id_mass_zx_max'] = df.groupby('event_id')['mass_zx'].transform('max')
    df['event_id_mass_zx_mean'] = df.groupby('event_id')['mass_zx'].transform('mean')
    df['event_id_mass_zx_min'] = df.groupby('event_id')['mass_zx'].transform('min')
    df['event_id_mass_zx_std'] = df.groupby('event_id')['mass_zx'].transform('std')
    
    df['event_id_energy_x_max'] = df.groupby('event_id')['energy_x'].transform('max')
    df['event_id_energy_x_mean'] = df.groupby('event_id')['energy_x'].transform('mean')
    df['event_id_energy_x_min'] = df.groupby('event_id')['energy_x'].transform('min')
    df['event_id_energy_x_std'] = df.groupby('event_id')['energy_x'].transform('std')
    
    df['event_id_energy_y_max'] = df.groupby('event_id')['energy_y'].transform('max')
    df['event_id_energy_y_mean'] = df.groupby('event_id')['energy_y'].transform('mean')
    df['event_id_energy_y_min'] = df.groupby('event_id')['energy_y'].transform('min')
    df['event_id_energy_y_std'] = df.groupby('event_id')['energy_y'].transform('std')
    
    df['event_id_energy_z_max'] = df.groupby('event_id')['energy_z'].transform('max')
    df['event_id_energy_z_mean'] = df.groupby('event_id')['energy_z'].transform('mean')
    df['event_id_energy_z_min'] = df.groupby('event_id')['energy_z'].transform('min')
    df['event_id_energy_z_std'] = df.groupby('event_id')['energy_z'].transform('std')
    
    df['event_id_energy_xy_max'] = df.groupby('event_id')['energy_xy'].transform('max')
    df['event_id_energy_xy_mean'] = df.groupby('event_id')['energy_xy'].transform('mean')
    df['event_id_energy_xy_min'] = df.groupby('event_id')['energy_xy'].transform('min')
    df['event_id_energy_xy_std'] = df.groupby('event_id')['energy_xy'].transform('std')
    
    df['event_id_energy_yz_max'] = df.groupby('event_id')['energy_yz'].transform('max')
    df['event_id_energy_yz_mean'] = df.groupby('event_id')['energy_yz'].transform('mean')
    df['event_id_energy_yz_min'] = df.groupby('event_id')['energy_yz'].transform('min')
    df['event_id_energy_yz_std'] = df.groupby('event_id')['energy_yz'].transform('std')
    
    df['event_id_energy_zx_max'] = df.groupby('event_id')['energy_zx'].transform('max')
    df['event_id_energy_zx_mean'] = df.groupby('event_id')['energy_zx'].transform('mean')
    df['event_id_energy_zx_min'] = df.groupby('event_id')['energy_zx'].transform('min')
    df['event_id_energy_zx_std'] = df.groupby('event_id')['energy_zx'].transform('std')
    
    df['event_id_particles_mass_max'] = df.groupby('event_id')['mean_particles_mass'].transform('max')
    df['event_id_particles_mass_mean'] = df.groupby('event_id')['mean_particles_mass'].transform('mean')
    df['event_id_particles_mass_min'] = df.groupby('event_id')['mean_particles_mass'].transform('min')
    df['event_id_particles_mass_std'] = df.groupby('event_id')['mean_particles_mass'].transform('std')
    
    df['event_id_particles_energy_max'] = df.groupby('event_id')['mean_particles_energy'].transform('max')
    df['event_id_particles_energy_mean'] = df.groupby('event_id')['mean_particles_energy'].transform('mean')
    df['event_id_particles_energy_min'] = df.groupby('event_id')['mean_particles_energy'].transform('min')
    df['event_id_particles_energy_std'] = df.groupby('event_id')['mean_particles_energy'].transform('std')
    
    df['event_id_distance_max'] = df.groupby('event_id')['jet_distance'].transform('max')
    df['event_id_distance_mean'] = df.groupby('event_id')['jet_distance'].transform('mean')
    df['event_id_distance_min'] = df.groupby('event_id')['jet_distance'].transform('min')
    df['event_id_distance_std'] = df.groupby('event_id')['jet_distance'].transform('std')
    
    df['event_id_xy_dis_max'] = df.groupby('event_id')['xy_dis'].transform('max')
    df['event_id_xy_dis_mean'] = df.groupby('event_id')['xy_dis'].transform('mean')
    df['event_id_xy_dis_min'] = df.groupby('event_id')['xy_dis'].transform('min')
    df['event_id_xy_dis_std'] = df.groupby('event_id')['xy_dis'].transform('std')
    
    df['event_id_yz_dis_max'] = df.groupby('event_id')['yz_dis'].transform('max')
    df['event_id_yz_dis_mean'] = df.groupby('event_id')['yz_dis'].transform('mean')
    df['event_id_yz_dis_min'] = df.groupby('event_id')['yz_dis'].transform('min')
    df['event_id_yz_dis_std'] = df.groupby('event_id')['yz_dis'].transform('std')
    
    df['event_id_zx_dis_max'] = df.groupby('event_id')['zx_dis'].transform('max')
    df['event_id_zx_dis_mean'] = df.groupby('event_id')['zx_dis'].transform('mean')
    df['event_id_zx_dis_min'] = df.groupby('event_id')['zx_dis'].transform('min')
    df['event_id_zx_dis_std'] = df.groupby('event_id')['zx_dis'].transform('std')
    
    df['event_id_x_div_dist_max'] = df.groupby('event_id')['x_div_dist'].transform('max')
    df['event_id_x_div_dist_mean'] = df.groupby('event_id')['x_div_dist'].transform('mean')
    df['event_id_x_div_dist_min'] = df.groupby('event_id')['x_div_dist'].transform('min')
    df['event_id_x_div_dist_std'] = df.groupby('event_id')['x_div_dist'].transform('std')
    
    df['event_id_y_div_dist_max'] = df.groupby('event_id')['y_div_dist'].transform('max')
    df['event_id_y_div_dist_mean'] = df.groupby('event_id')['y_div_dist'].transform('mean')
    df['event_id_y_div_dist_min'] = df.groupby('event_id')['y_div_dist'].transform('min')
    df['event_id_y_div_dist_std'] = df.groupby('event_id')['y_div_dist'].transform('std')
    
    df['event_id_z_div_dist_max'] = df.groupby('event_id')['z_div_dist'].transform('max')
    df['event_id_z_div_dist_mean'] = df.groupby('event_id')['z_div_dist'].transform('mean')
    df['event_id_z_div_dist_min'] = df.groupby('event_id')['z_div_dist'].transform('min')
    df['event_id_z_div_dist_std'] = df.groupby('event_id')['z_div_dist'].transform('std')
    
    df['event_id_xy_div_dist_max'] = df.groupby('event_id')['xy_div_dist'].transform('max')
    df['event_id_xy_div_dist_mean'] = df.groupby('event_id')['xy_div_dist'].transform('mean')
    df['event_id_xy_div_dist_min'] = df.groupby('event_id')['xy_div_dist'].transform('min')
    df['event_id_xy_div_dist_std'] = df.groupby('event_id')['xy_div_dist'].transform('std')
    
    df['event_id_yz_div_dist_max'] = df.groupby('event_id')['yz_div_dist'].transform('max')
    df['event_id_yz_div_dist_mean'] = df.groupby('event_id')['yz_div_dist'].transform('mean')
    df['event_id_yz_div_dist_min'] = df.groupby('event_id')['yz_div_dist'].transform('min')
    df['event_id_yz_div_dist_std'] = df.groupby('event_id')['yz_div_dist'].transform('std')
    
    df['event_id_zx_div_dist_max'] = df.groupby('event_id')['zx_div_dist'].transform('max')
    df['event_id_zx_div_dist_mean'] = df.groupby('event_id')['zx_div_dist'].transform('mean')
    df['event_id_zx_div_dist_min'] = df.groupby('event_id')['zx_div_dist'].transform('min')
    df['event_id_zx_div_dist_std'] = df.groupby('event_id')['zx_div_dist'].transform('std')
    
    df['event_id_speed_max'] = df.groupby('event_id')['jet_speed'].transform('max')
    df['event_id_speed_mean'] = df.groupby('event_id')['jet_speed'].transform('mean')
    df['event_id_speed_min'] = df.groupby('event_id')['jet_speed'].transform('min')
    df['event_id_speed_std'] = df.groupby('event_id')['jet_speed'].transform('std')
    
    df['event_id_speed_x_max'] = df.groupby('event_id')['speed_x'].transform('max')
    df['event_id_speed_x_mean'] = df.groupby('event_id')['speed_x'].transform('mean')
    df['event_id_speed_x_min'] = df.groupby('event_id')['speed_x'].transform('min')
    df['event_id_speed_x_std'] = df.groupby('event_id')['speed_x'].transform('std')
    
    df['event_id_speed_y_max'] = df.groupby('event_id')['speed_y'].transform('max')
    df['event_id_speed_y_mean'] = df.groupby('event_id')['speed_y'].transform('mean')
    df['event_id_speed_y_min'] = df.groupby('event_id')['speed_y'].transform('min')
    df['event_id_speed_y_std'] = df.groupby('event_id')['speed_y'].transform('std')
    
    df['event_id_speed_z_max'] = df.groupby('event_id')['speed_z'].transform('max')
    df['event_id_speed_z_mean'] = df.groupby('event_id')['speed_z'].transform('mean')
    df['event_id_speed_z_min'] = df.groupby('event_id')['speed_z'].transform('min')
    df['event_id_speed_z_std'] = df.groupby('event_id')['speed_z'].transform('std')
    
    df['event_id_speed_xy_max'] = df.groupby('event_id')['speed_xy'].transform('max')
    df['event_id_speed_xy_mean'] = df.groupby('event_id')['speed_xy'].transform('mean')
    df['event_id_speed_xy_min'] = df.groupby('event_id')['speed_xy'].transform('min')
    df['event_id_speed_xy_std'] = df.groupby('event_id')['speed_xy'].transform('std')
    
    df['event_id_speed_yz_max'] = df.groupby('event_id')['speed_yz'].transform('max')
    df['event_id_speed_yz_mean'] = df.groupby('event_id')['speed_yz'].transform('mean')
    df['event_id_speed_yz_min'] = df.groupby('event_id')['speed_yz'].transform('min')
    df['event_id_speed_yz_std'] = df.groupby('event_id')['speed_yz'].transform('std')
    
    df['event_id_speed_zx_max'] = df.groupby('event_id')['speed_zx'].transform('max')
    df['event_id_speed_zx_mean'] = df.groupby('event_id')['speed_zx'].transform('mean')
    df['event_id_speed_zx_min'] = df.groupby('event_id')['speed_zx'].transform('min')
    df['event_id_speed_zx_std'] = df.groupby('event_id')['speed_zx'].transform('std')    
    
    df['event_id_px_max'] = df.groupby('event_id')['jet_px'].transform('max')
    df['event_id_px_mean'] = df.groupby('event_id')['jet_px'].transform('mean')
    df['event_id_px_min'] = df.groupby('event_id')['jet_px'].transform('min')
    df['event_id_px_std'] = df.groupby('event_id')['jet_px'].transform('std')
    
    df['event_id_py_max'] = df.groupby('event_id')['jet_py'].transform('max')
    df['event_id_py_mean'] = df.groupby('event_id')['jet_py'].transform('mean')
    df['event_id_py_min'] = df.groupby('event_id')['jet_py'].transform('min')
    df['event_id_py_std'] = df.groupby('event_id')['jet_py'].transform('std')
    
    df['event_id_pz_max'] = df.groupby('event_id')['jet_pz'].transform('max')
    df['event_id_pz_mean'] = df.groupby('event_id')['jet_pz'].transform('mean')
    df['event_id_pz_min'] = df.groupby('event_id')['jet_pz'].transform('min')
    df['event_id_pz_std'] = df.groupby('event_id')['jet_pz'].transform('std')
    
    df['event_id_angle_xy_max'] = df.groupby('event_id')['angle_xy'].transform('max')
    df['event_id_angle_xy_mean'] = df.groupby('event_id')['angle_xy'].transform('mean')
    df['event_id_angle_xy_min'] = df.groupby('event_id')['angle_xy'].transform('min')
    df['event_id_angle_xy_std'] = df.groupby('event_id')['angle_xy'].transform('std')
    
    df['event_id_angle_xz_max'] = df.groupby('event_id')['angle_xz'].transform('max')
    df['event_id_angle_xz_mean'] = df.groupby('event_id')['angle_xz'].transform('mean')
    df['event_id_angle_xz_min'] = df.groupby('event_id')['angle_xz'].transform('min')
    df['event_id_angle_xz_std'] = df.groupby('event_id')['angle_xz'].transform('std')
    
    df['event_id_angle_yx_max'] = df.groupby('event_id')['angle_yx'].transform('max')
    df['event_id_angle_yx_mean'] = df.groupby('event_id')['angle_yx'].transform('mean')
    df['event_id_angle_yx_min'] = df.groupby('event_id')['angle_yx'].transform('min')
    df['event_id_angle_yx_std'] = df.groupby('event_id')['angle_yx'].transform('std')
    
    df['event_id_angle_yz_max'] = df.groupby('event_id')['angle_yz'].transform('max')
    df['event_id_angle_yz_mean'] = df.groupby('event_id')['angle_yz'].transform('mean')
    df['event_id_angle_yz_min'] = df.groupby('event_id')['angle_yz'].transform('min')
    df['event_id_angle_yz_std'] = df.groupby('event_id')['angle_yz'].transform('std')
    
    df['event_id_angle_zy_max'] = df.groupby('event_id')['angle_zy'].transform('max')
    df['event_id_angle_zy_mean'] = df.groupby('event_id')['angle_zy'].transform('mean')
    df['event_id_angle_zy_min'] = df.groupby('event_id')['angle_zy'].transform('min')
    df['event_id_angle_zy_std'] = df.groupby('event_id')['angle_zy'].transform('std')
    
    df['event_id_angle_zx_max'] = df.groupby('event_id')['angle_zx'].transform('max')
    df['event_id_angle_zx_mean'] = df.groupby('event_id')['angle_zx'].transform('mean')
    df['event_id_angle_zx_min'] = df.groupby('event_id')['angle_zx'].transform('min')
    df['event_id_angle_zx_std'] = df.groupby('event_id')['angle_zx'].transform('std')
    
    df['event_id_time_dis_max'] = df.groupby('event_id')['time_dis'].transform('max')
    df['event_id_time_dis_mean'] = df.groupby('event_id')['time_dis'].transform('mean')
    df['event_id_time_dis_min'] = df.groupby('event_id')['time_dis'].transform('min')
    df['event_id_time_dis_std'] = df.groupby('event_id')['time_dis'].transform('std')
    
    df['event_id_time_x_max'] = df.groupby('event_id')['time_x'].transform('max')
    df['event_id_time_x_mean'] = df.groupby('event_id')['time_x'].transform('mean')
    df['event_id_time_x_min'] = df.groupby('event_id')['time_x'].transform('min')
    df['event_id_time_x_std'] = df.groupby('event_id')['time_x'].transform('std')
    
    df['event_id_time_y_max'] = df.groupby('event_id')['time_y'].transform('max')
    df['event_id_time_y_mean'] = df.groupby('event_id')['time_y'].transform('mean')
    df['event_id_time_y_min'] = df.groupby('event_id')['time_y'].transform('min')
    df['event_id_time_y_std'] = df.groupby('event_id')['time_y'].transform('std')
    
    df['event_id_time_z_max'] = df.groupby('event_id')['time_z'].transform('max')
    df['event_id_time_z_mean'] = df.groupby('event_id')['time_z'].transform('mean')
    df['event_id_time_z_min'] = df.groupby('event_id')['time_z'].transform('min')
    df['event_id_time_z_std'] = df.groupby('event_id')['time_z'].transform('std')
    
    df['event_id_time_xy_max'] = df.groupby('event_id')['time_xy'].transform('max')
    df['event_id_time_xy_mean'] = df.groupby('event_id')['time_xy'].transform('mean')
    df['event_id_time_xy_min'] = df.groupby('event_id')['time_xy'].transform('min')
    df['event_id_time_xy_std'] = df.groupby('event_id')['time_xy'].transform('std')
    
    df['event_id_time_yz_max'] = df.groupby('event_id')['time_yz'].transform('max')
    df['event_id_time_yz_mean'] = df.groupby('event_id')['time_yz'].transform('mean')
    df['event_id_time_yz_min'] = df.groupby('event_id')['time_yz'].transform('min')
    df['event_id_time_yz_std'] = df.groupby('event_id')['time_yz'].transform('std')
    
    df['event_id_time_zx_max'] = df.groupby('event_id')['time_zx'].transform('max')
    df['event_id_time_zx_mean'] = df.groupby('event_id')['time_zx'].transform('mean')
    df['event_id_time_zx_min'] = df.groupby('event_id')['time_zx'].transform('min')
    df['event_id_time_zx_std'] = df.groupby('event_id')['time_zx'].transform('std')
    
    df['event_id_mv_max'] = df.groupby('event_id')['jet_mv'].transform('max')
    df['event_id_mv_mean'] = df.groupby('event_id')['jet_mv'].transform('mean')
    df['event_id_mv_min'] = df.groupby('event_id')['jet_mv'].transform('min')
    df['event_id_mv_std'] = df.groupby('event_id')['jet_mv'].transform('std')
    
    df['event_id_mv_x_max'] = df.groupby('event_id')['mv_x'].transform('max')
    df['event_id_mv_x_mean'] = df.groupby('event_id')['mv_x'].transform('mean')
    df['event_id_mv_x_min'] = df.groupby('event_id')['mv_x'].transform('min')
    df['event_id_mv_x_std'] = df.groupby('event_id')['mv_x'].transform('std')
    
    df['event_id_mv_y_max'] = df.groupby('event_id')['mv_y'].transform('max')
    df['event_id_mv_y_mean'] = df.groupby('event_id')['mv_y'].transform('mean')
    df['event_id_mv_y_min'] = df.groupby('event_id')['mv_y'].transform('min')
    df['event_id_mv_y_std'] = df.groupby('event_id')['mv_y'].transform('std')
    
    df['event_id_mv_z_max'] = df.groupby('event_id')['mv_z'].transform('max')
    df['event_id_mv_z_mean'] = df.groupby('event_id')['mv_z'].transform('mean')
    df['event_id_mv_z_min'] = df.groupby('event_id')['mv_z'].transform('min')
    df['event_id_mv_z_std'] = df.groupby('event_id')['mv_z'].transform('std')
    
    df['event_id_mv_xy_max'] = df.groupby('event_id')['mv_xy'].transform('max')
    df['event_id_mv_xy_mean'] = df.groupby('event_id')['mv_xy'].transform('mean')
    df['event_id_mv_xy_min'] = df.groupby('event_id')['mv_xy'].transform('min')
    df['event_id_mv_xy_std'] = df.groupby('event_id')['mv_xy'].transform('std')
    
    df['event_id_mv_yz_max'] = df.groupby('event_id')['mv_yz'].transform('max')
    df['event_id_mv_yz_mean'] = df.groupby('event_id')['mv_yz'].transform('mean')
    df['event_id_mv_yz_min'] = df.groupby('event_id')['mv_yz'].transform('min')
    df['event_id_mv_yz_std'] = df.groupby('event_id')['mv_yz'].transform('std')
    
    df['event_id_mv_zx_max'] = df.groupby('event_id')['mv_zx'].transform('max')
    df['event_id_mv_zx_mean'] = df.groupby('event_id')['mv_zx'].transform('mean')
    df['event_id_mv_zx_min'] = df.groupby('event_id')['mv_zx'].transform('min')
    df['event_id_mv_zx_std'] = df.groupby('event_id')['mv_zx'].transform('std')
    
    df['event_id_particle_mv_max'] = df.groupby('event_id')['particle_mv'].transform('max')
    df['event_id_particle_mv_mean'] = df.groupby('event_id')['particle_mv'].transform('mean')
    df['event_id_particle_mv_min'] = df.groupby('event_id')['particle_mv'].transform('min')
    df['event_id_particle_mv_std'] = df.groupby('event_id')['particle_mv'].transform('std')
    
    df['event_id_particle_mv_x_max'] = df.groupby('event_id')['particle_mv_x'].transform('max')
    df['event_id_particle_mv_x_mean'] = df.groupby('event_id')['particle_mv_x'].transform('mean')
    df['event_id_particle_mv_x_min'] = df.groupby('event_id')['particle_mv_x'].transform('min')
    df['event_id_particle_mv_x_std'] = df.groupby('event_id')['particle_mv_x'].transform('std')
    
    df['event_id_particle_mv_y_max'] = df.groupby('event_id')['particle_mv_y'].transform('max')
    df['event_id_particle_mv_y_mean'] = df.groupby('event_id')['particle_mv_y'].transform('mean')
    df['event_id_particle_mv_y_min'] = df.groupby('event_id')['particle_mv_y'].transform('min')
    df['event_id_particle_mv_y_std'] = df.groupby('event_id')['particle_mv_y'].transform('std')
    
    df['event_id_particle_mv_z_max'] = df.groupby('event_id')['particle_mv_z'].transform('max')
    df['event_id_particle_mv_z_mean'] = df.groupby('event_id')['particle_mv_z'].transform('mean')
    df['event_id_particle_mv_z_min'] = df.groupby('event_id')['particle_mv_z'].transform('min')
    df['event_id_particle_mv_z_std'] = df.groupby('event_id')['particle_mv_z'].transform('std')
    
    df['event_id_particle_mv_xy_max'] = df.groupby('event_id')['particle_mv_xy'].transform('max')
    df['event_id_particle_mv_xy_mean'] = df.groupby('event_id')['particle_mv_xy'].transform('mean')
    df['event_id_particle_mv_xy_min'] = df.groupby('event_id')['particle_mv_xy'].transform('min')
    df['event_id_particle_mv_xy_std'] = df.groupby('event_id')['particle_mv_xy'].transform('std')
    
    df['event_id_particle_mv_yz_max'] = df.groupby('event_id')['particle_mv_yz'].transform('max')
    df['event_id_particle_mv_yz_mean'] = df.groupby('event_id')['particle_mv_yz'].transform('mean')
    df['event_id_particle_mv_yz_min'] = df.groupby('event_id')['particle_mv_yz'].transform('min')
    df['event_id_particle_mv_yz_std'] = df.groupby('event_id')['particle_mv_yz'].transform('std')
    
    df['event_id_particle_mv_zx_max'] = df.groupby('event_id')['particle_mv_zx'].transform('max')
    df['event_id_particle_mv_zx_mean'] = df.groupby('event_id')['particle_mv_zx'].transform('mean')
    df['event_id_particle_mv_zx_min'] = df.groupby('event_id')['particle_mv_zx'].transform('min')
    df['event_id_particle_mv_zx_std'] = df.groupby('event_id')['particle_mv_zx'].transform('std')
     
    return df

In [19]:
df = brute_force(df)

In [20]:
df['event_id_energy_sum'] = df.groupby('event_id')['jet_energy'].transform('sum')
df['event_id_mass_sum'] = df.groupby('event_id')['jet_mass'].transform('sum')
df['event_id_speed'] = df.progress_apply(lambda row: calculate_speed(row.event_id_energy_sum, row.event_id_mass_sum), axis=1)

100%|██████████| 1672504/1672504 [02:13<00:00, 12561.75it/s]


In [23]:
df.shape

(1672504, 327)

In [24]:
train_jet = df[:len(train)]
test_jet = df[len(train):]

In [25]:
del df

gc.collect()

791

In [26]:
train_jet.to_pickle('train_jet.pickle')
test_jet.to_pickle('test_jet.pickle')