In [4]:
import h5py    
import numpy as np 
import matplotlib.pyplot as plt
from pyjet import cluster,DTYPE_PTEPM
import pandas as pd

import sys
sys.path.append("../data")

In [6]:
f = pd.read_hdf("data/events_anomalydetection.h5")

In [7]:
events_combined = f.T

In [8]:
# This is the boilerplate code from the LHC Olympics webpage
leadpT = {}
alljets = {}
for mytype in ['background','signal']:
    leadpT[mytype]=[]
    alljets[mytype]=[]
    for i in range(10): #len(events_combined)):
        if (i%10000==0):
            print(mytype,i)
            pass
        issignal = events_combined[i][2100]
        if (mytype=='background' and issignal):
            continue
        elif (mytype=='signal' and issignal==0):
             continue
        pseudojets_input = np.zeros(len([x for x in events_combined[i][::3] if x > 0]), dtype=DTYPE_PTEPM)
        for j in range(700):
            if (events_combined[i][j*3]>0):
                pseudojets_input[j]['pT'] = events_combined[i][j*3]
                pseudojets_input[j]['eta'] = events_combined[i][j*3+1]
                pseudojets_input[j]['phi'] = events_combined[i][j*3+2]
                pass
            pass
        sequence = cluster(pseudojets_input, R=1.0, p=-1)
        jets = sequence.inclusive_jets(ptmin=20)
        leadpT[mytype] += [jets[0].pt]
        alljets[mytype] += [jets]
        pass

background 0
signal 0


In [9]:
# We will create a numpy record array with the following fields
# With this we can easily create a pandas dataframe, csv, etc. 
pseudojet_dtype = [('id', np.float64), # identifies which cluster the jet is in  
                   ('px', np.float64),
                   ('py', np.float64),
                   ('pz', np.float64),
                   ('eta', np.float64), 
                   ('phi', np.float64), 
                   ('mass', np.float64),
                   ('signal', bool)] # True for signal, False for background

In [10]:
# How many total jets?
num_clusters = len(alljets['background']) + len(alljets['signal'])
num_total_jets = sum([len(x) for x in alljets['background']]) + sum([len(x) for x in alljets['signal']])

# Collect the jet data
jet_data = np.zeros((num_total_jets, ), dtype=pseudojet_dtype)

cluster_idx = 0
jet_idx = 0

for jet_type in ['background', 'signal']:
    for cluster in alljets[jet_type]:
        for jet in cluster:
            is_signal = True if jet_type == 'signal' else False
            jet_data[jet_idx] = (cluster_idx, jet.px, jet.py, jet.pz, jet.eta, jet.phi, jet.mass, is_signal)
            jet_idx += 1
        cluster_idx += 1

In [11]:
jet_data

array([(0., -1195.93017811,  -474.7832617 ,  2.40069632e+02,  0.18550795, -2.76367589, 106.91212896, False),
       (0.,  1185.05508299,   492.23967647,  8.34539598e+01,  0.06498901,  0.39368817,  63.16421456, False),
       (1.,   276.07174443,  1325.95915792,  1.25128542e+03,  0.82650514,  1.36552363, 614.26910803, False),
       (1.,  -283.40146707, -1294.96542476, -1.31242053e+03, -0.87431854, -1.78624819, 439.06415038, False),
       (1.,    18.25001714,   -12.30086545,  1.98862701e+00,  0.09023469, -0.59307569,   8.62167673, False),
       (2.,  -549.9642844 ,  1083.35437176, -2.40632513e+02, -0.19678612,  2.04054478, 645.86561897, False),
       (2.,   495.68446706,  -951.03724046,  1.54786053e+02,  0.14383133, -1.09032972, 113.76883957, False),
       (3., -1267.50282197,   212.71408746,  4.30093084e+02,  0.32869279,  2.97532067, 516.83524812, False),
       (3.,  1157.28029859,  -386.93086452,  3.65032626e+02,  0.29485445, -0.32266076, 174.796077  , False),
       (3.,   126.6

In [14]:
jet_data['mass']

array([106.91212896,  63.16421456, 614.26910803, 439.06415038,
         8.62167673, 645.86561897, 113.76883957, 516.83524812,
       174.796077  ,  21.51270628, 586.00085105, 105.72741029,
        25.49975138,  18.55749508, 270.64776241, 401.83034596,
        57.75458218,  47.42033355,  19.64438562,  13.93285338,
         7.82311305,   6.74278245, 129.68302345, 132.53339436,
        13.56716822,   7.46857667, 296.61553169, 313.8714713 ,
        21.90934153, 129.49935171, 155.36226176,  13.13182436,
        11.29910926,  99.16889752, 515.0544186 ])