In [1]:
import sys
import numpy as np
import pickle
import h5py
import bz2
from numpy.random import poisson
import math
import re
from rootpy.vector import LorentzVector

Welcome to JupyROOT 6.10/06


# Convert pileup ascii to numpy

In [5]:
def dat2arrays(filenames_in, filename_out):  
    number = r"[+\-]?(?:0|[1-9]\d*)(?:\.\d*)?(?:[eE][+\-]?\d+)?"
    vector = r"\[(%s),(%s),(%s),(%s)\]" % (number, number, number, number)
    
    n_events = 0
    n_particles = 0
    
    with bz2.BZ2File(filename_out, "w") as output_file:
        for filename in filenames_in:
            with open(filename, "r") as f:
                for line in f:
                    if line.startswith("HADS: "):
                        n_events += 1
                        e = []
                        for p in re.findall(vector, line):
                            n_particles += 1
                            
                            # Option 1
                            v = LorentzVector()
                            v.set_pt_eta_phi_e(float(p[0]), float(p[1]), float(p[2]), float(p[3]))
                            e.append([v.px, v.py, v.pz, v.e])
                            
                            # Option 2 (old)
                            #e.append([float(p[0]), float(p[1]), float(p[2]), float(p[3])])
                    
                        e = np.array(e, dtype=np.float16)
                        pickle.dump(e, output_file, protocol=2)

                        if n_events % 10000 == 0:
                            print(n_events, 'events with mean of', n_particles / n_events, 'particles done')

In [7]:
# Go!
folder = '/scratch/jb6504/data'
#folder = '../data'

filenames_in = [folder + '/pileup/pileup' + str(i) + '_ascii.dat' for i in range(1,5)]
filename_out = folder + '/pileup/pileup_new.dat.tar.bz2'

dat2arrays(filenames_in, filename_out)

10000 events with mean of 188.3852 particles done
20000 events with mean of 187.86085 particles done
30000 events with mean of 187.37263333333334 particles done
40000 events with mean of 187.707475 particles done
50000 events with mean of 187.99068 particles done
60000 events with mean of 188.43116666666666 particles done
70000 events with mean of 188.40937142857143 particles done
80000 events with mean of 188.451175 particles done
90000 events with mean of 188.34774444444446 particles done
100000 events with mean of 188.08741 particles done
110000 events with mean of 188.0740181818182 particles done
120000 events with mean of 188.13656666666665 particles done
130000 events with mean of 188.25896153846153 particles done
140000 events with mean of 188.22232142857143 particles done
150000 events with mean of 188.30068666666668 particles done
160000 events with mean of 188.13825625 particles done
170000 events with mean of 188.04893529411765 particles done
180000 events with mean of 188.1

1390000 events with mean of 188.13382230215828 particles done
1400000 events with mean of 188.14370857142856 particles done
1410000 events with mean of 188.1478375886525 particles done
1420000 events with mean of 188.15533309859154 particles done
1430000 events with mean of 188.1383132867133 particles done
1440000 events with mean of 188.12727083333334 particles done
1450000 events with mean of 188.12042689655172 particles done
1460000 events with mean of 188.12135410958905 particles done
1470000 events with mean of 188.1106448979592 particles done
1480000 events with mean of 188.114275 particles done
1490000 events with mean of 188.1282932885906 particles done
1500000 events with mean of 188.11858266666667 particles done
1510000 events with mean of 188.1213715231788 particles done
1520000 events with mean of 188.12452565789474 particles done
1530000 events with mean of 188.11845490196077 particles done
1540000 events with mean of 188.10463116883116 particles done
1550000 events with m

# Jets, particle level

In [10]:
def add_pileup_jet_particle(hard_filename, output_filename,
                            pileup_file, n_expected, use_poisson=True):
    
    ''' Adds pileup to a particle-level event file. '''

    print('')
    print('Adding pileup to ' + hard_filename + ' with pileup mean', n_expected)

    with h5py.File(hard_filename, 'r') as input_file:
        input_events = input_file['events']
        n_events_total = len(input_events)

        with h5py.File(output_filename, 'w') as output_file:
            dt_momentum = np.dtype([('E', np.float16), ('px', np.float16), ('py', np.float16), ('pz', np.float16)])
            #dt_event = h5py.special_dtype(vlen='float32, float32, float32, float32')
            dt_event = h5py.special_dtype(vlen=dt_momentum)
            output_events = output_file.create_dataset('events', (n_events_total,), dtype=dt_event)

            total_hard_particles = 0
            total_pileup_particles = 0
            n_events = 0
            
            #pileup_energies = []
            #hard_energies = []

            for event in input_events:
                
                # Get hard momenta
                n_momenta = len(event)
                momenta = np.zeros((n_momenta,), dtype = dt_momentum)
                for i, p in enumerate(event):
                    momenta[i] = (p[0], p[1], p[2], p[3])
                    #hard_energies.append((p[1]**2 + p[2]**2)**0.5)
                total_hard_particles += n_momenta

                # Calculate amount of pileup
                if use_poisson:
                    n_pileup = poisson(n_expected)
                else:
                    n_pileup = n_expected

                # Get pileup momenta
                pileup_momenta_raw = []
                for i in range(n_pileup):
                    try:
                        pileup_momenta_raw += list(pickle.load(pileup_file, encoding='latin1'))
                    except EOFError:
                        # At end of file, rewind
                        pileup_file.seek(0)
                        pileup_momenta_raw += list(pickle.load(pileup_file, encoding='latin1'))
                        print('Starting from beginning of pileup file.')
                n_pileup_particles = len(pileup_momenta_raw)
                pileup_momenta = np.zeros((n_pileup_particles,), dtype = dt_momentum)
                for i, p in enumerate(pileup_momenta_raw):
                    pileup_momenta[i] = (p[3], p[0], p[1], p[2])
                    #pileup_energies.append((p[0]**2 + p[1]**2)**0.5)
                total_pileup_particles += n_pileup_particles

                if len(pileup_momenta) > 0:
                    momenta = np.hstack((momenta, pileup_momenta))
                    
                output_events[n_events] = momenta
                n_events += 1

                if n_events % 1000 == 0:
                    print(' ', n_events, 'events done, mean particles:',
                          total_hard_particles / n_events, '+', total_pileup_particles / n_events)

    print('Summary:')
    print('  Events:', n_events)
    print('  Average hard particles:', total_hard_particles / n_events)
    print('  Average pileup particles:', total_pileup_particles / n_events)
    #print('  Average hard pT:', np.mean(hard_energies))
    #print('  Average pileup pT:', np.mean(pileup_energies))

In [11]:
# Go!
folder = '/scratch/jb6504/data'
#folder = '../data'
hard_filenames = [folder + '/w-vs-qcd/h5/w_100000.h5',
                  folder + '/w-vs-qcd/h5/qcd_100000.h5']
n_expected = [25,
              25]
output_filenames = [folder + '/w-vs-qcd/h5/w_100000_pileup25_new.h5',
                    folder + '/w-vs-qcd/h5/qcd_100000_pileup25_new.h5']
pileup_filename = folder + '/pileup/pileup_new.dat.tar.bz2'

pileup_file = bz2.BZ2File(pileup_filename, "r")

for hard_filename, n, output_filename in zip(hard_filenames, n_expected, output_filenames):
    add_pileup_jet_particle(hard_filename, output_filename, pileup_file, n, True)


Adding pileup to /scratch/jb6504/data/w-vs-qcd/h5/w_100000.h5 with pileup mean 25
  1000 events done, mean particles: 428.348 + 4719.152
  2000 events done, mean particles: 432.2455 + 4695.489
  3000 events done, mean particles: 431.94233333333335 + 4683.953333333333
  4000 events done, mean particles: 431.92525 + 4689.99525
  5000 events done, mean particles: 431.0316 + 4699.7112
  6000 events done, mean particles: 431.747 + 4695.606833333333
  7000 events done, mean particles: 430.9875714285714 + 4696.590571428572
  8000 events done, mean particles: 431.394875 + 4702.621375
  9000 events done, mean particles: 431.69244444444445 + 4700.183666666667
  10000 events done, mean particles: 432.233 + 4702.4462
  11000 events done, mean particles: 433.0108181818182 + 4699.714454545455
  12000 events done, mean particles: 433.652 + 4698.211166666667
  13000 events done, mean particles: 433.865 + 4699.861769230769
  14000 events done, mean particles: 433.34264285714283 + 4703.6675
  15000 eve

  13000 events done, mean particles: 533.7266153846153 + 4694.043384615385
  14000 events done, mean particles: 534.3152857142857 + 4697.253857142857
  15000 events done, mean particles: 533.9881333333333 + 4698.2386
  16000 events done, mean particles: 533.5546875 + 4693.6669375
  17000 events done, mean particles: 534.0166470588235 + 4694.629882352941
  18000 events done, mean particles: 533.965 + 4692.447944444444
  19000 events done, mean particles: 534.0495789473684 + 4694.102157894737
  20000 events done, mean particles: 533.89715 + 4693.69575
  21000 events done, mean particles: 533.4618571428572 + 4696.172095238096
  22000 events done, mean particles: 533.646909090909 + 4698.602136363636
  23000 events done, mean particles: 533.4540869565218 + 4698.185608695652
  24000 events done, mean particles: 533.2053333333333 + 4698.267708333334
  25000 events done, mean particles: 533.44204 + 4696.26232
  26000 events done, mean particles: 533.2870769230769 + 4695.003
  27000 events done

# Full event, particle level

In [13]:
def add_pileup_event_particle(hard_filename, output_filename,
                              pileup_file, n_expected, use_poisson=True):
    
    ''' Adds pileup to a particle-level event file. '''

    print('')
    print('Adding pileup to ' + hard_filename + ' with pileup mean', n_expected)

    with open(hard_filename, 'rb') as input_file:
    #with bz2.BZ2File(hard_filename, "r") as input_file:
        with bz2.BZ2File(output_filename, "w") as output_file:
            
            total_hard_particles = 0
            total_pileup_particles = 0
            n_events = 0

            while True:
                try:
                    momenta = pickle.load(input_file, encoding='latin1')
                except EOFError:
                    break
                n_events += 1
                total_hard_particles += len(momenta)
                
                if use_poisson:
                    n_pileup = poisson(n_expected)
                else:
                    n_pileup = n_expected
                
                pileup_momenta = []
                for i in range(n_pileup):
                    try:
                        pileup_momenta += list(pickle.load(pileup_file, encoding='latin1'))
                    except EOFError:
                        # At end of file, rewind
                        pileup_file.seek(0)
                        pileup_momenta += list(pickle.load(pileup_file, encoding='latin1'))
                        print('Starting from beginning of pileup file.')
                total_pileup_particles += len(pileup_momenta)
                pileup_momenta = np.asarray(pileup_momenta)
                
                if len(pileup_momenta) > 0:
                    momenta = np.vstack((momenta, pileup_momenta))
                pickle.dump(momenta, output_file)

                if n_events % 10000 == 0:
                    print(' ', n_events, 'events done, mean particles:',
                          total_hard_particles / n_events, '+', total_pileup_particles / n_events)

    print('Summary:')
    print('  Events:', n_events)
    print('  Average hard particles:', total_hard_particles / n_events)
    print('  Average pileup particles:', total_pileup_particles / n_events)

In [14]:
# Go!
folder = '/scratch/jb6504/data'
#folder = '../data'
hard_filenames = [folder + '/events/wprime-particles.dat',
                  folder + '/events/dijet-particles.dat']
n_expected = [25,
              25]
output_filenames = [folder + '/events/wprime-particles-pileup25.dat.bz2',
                    folder + '/events/dijet-particles-pileup25.dat.bz2']
pileup_filename = folder + '/pileup/pileup.dat.bz2'

#pileup_file = open(pileup_filename, 'rb') 
pileup_file = bz2.BZ2File(pileup_filename, "r")

for hard_filename, n, output_filename in zip(hard_filenames, n_expected, output_filenames):
    add_pileup_event_particle(hard_filename, output_filename, pileup_file, n, True)


Adding pileup to /scratch/jb6504/data/events/dijet-particles.dat with pileup mean 25
  10000 events done, mean particles: 530.4376 + 4699.4621
  20000 events done, mean particles: 531.0632 + 4694.9929
  30000 events done, mean particles: 530.9473333333333 + 4699.0671
  40000 events done, mean particles: 531.29085 + 4692.46765
  50000 events done, mean particles: 531.02312 + 4696.99316
  60000 events done, mean particles: 530.807 + 4699.600416666667
  70000 events done, mean particles: 530.9862857142857 + 4699.089557142857
Starting from beginning of pileup file.
  80000 events done, mean particles: 530.9101375 + 4702.6208375
  90000 events done, mean particles: 530.8895 + 4705.0044333333335
  100000 events done, mean particles: 530.96907 + 4704.84843
  110000 events done, mean particles: 531.0724181818182 + 4704.951145454545
  120000 events done, mean particles: 530.7914166666667 + 4704.724333333334
  130000 events done, mean particles: 530.7209769230769 + 4705.773946153846
  140000 ev