In [1]:
import h5py, os
import numpy as np

In [2]:
f = h5py.File(os.getcwd()+'/data/lhe_data.h5', 'r+')
#f = h5py.File(os.getcwd()+'/data/lhe_data_shuffled.h5', 'r+')

In [3]:
f.keys()

<KeysViewHDF5 ['benchmarks', 'morphing', 'observables', 'parameters', 'sample_summary', 'samples']>

In [4]:
bm = f['benchmarks']
bm.keys()

<KeysViewHDF5 ['is_nuisance', 'is_reference', 'names', 'values']>

In [5]:
mp = f['morphing']
mp.keys()

<KeysViewHDF5 ['components', 'morphing_matrix']>

In [6]:
ob = f['observables']
ob.keys()

<KeysViewHDF5 ['definitions', 'names']>

In [7]:
pm = f['parameters']
pm.keys()

<KeysViewHDF5 ['lha_blocks', 'lha_ids', 'max_power', 'names', 'ranges', 'transforms']>

In [8]:
ss = f['sample_summary']
ss.keys()

<KeysViewHDF5 ['background_events', 'signal_events_per_benchmark']>

In [9]:
samples = f['samples']
samples.keys()

<KeysViewHDF5 ['observations', 'sampling_benchmarks', 'weights']>

In [10]:
np.array(f['samples/sampling_benchmarks'])

array([3, 4, 3, ..., 8, 9, 4])

In [11]:
f.close()

### Samples

In [11]:
observations = np.array(samples['observations'])
observations.shape

(110000, 3)

In [12]:
weights = np.array(samples['weights'])
weights

array([[2.1131966e-08, 2.0625963e-08, 2.0327084e-08, ..., 2.0704025e-08,
        2.0638013e-08, 2.0506882e-08],
       [9.3489014e-08, 1.0419120e-07, 1.1103390e-07, ..., 1.0246854e-07,
        1.0392355e-07, 1.0687026e-07],
       [1.0805914e-08, 2.3687832e-08, 3.3784027e-08, ..., 2.1358859e-08,
        2.3320070e-08, 2.7485005e-08],
       ...,
       [5.8120055e-08, 1.1497324e-07, 1.5831494e-07, ..., 1.0486168e-07,
        1.1338000e-07, 1.3135714e-07],
       [1.0464017e-07, 1.2612355e-07, 1.3991972e-07, ..., 1.2265724e-07,
        1.2558481e-07, 1.3152000e-07],
       [9.7356379e-08, 1.0285832e-07, 1.0621982e-07, ..., 1.0199417e-07,
        1.0272455e-07, 1.0418750e-07]])

In [13]:
sampling_benchmarks = np.array(samples['sampling_benchmarks'])
print('num of sm events: '+str(sum(sampling_benchmarks == 3)))
print('num of 50 events: '+str(sum(sampling_benchmarks == 4)))
print('num of neg_50 events: '+str(sum(sampling_benchmarks == 5)))
print('num of 200 events: '+str(sum(sampling_benchmarks == 6)))
print('num of neg_200 events: '+str(sum(sampling_benchmarks == 7)))
print('num of 500 events: '+str(sum(sampling_benchmarks == 8)))
print('num of neg_500 events: '+str(sum(sampling_benchmarks == 9)))

num of sm events: 50000
num of 50 events: 10000
num of neg_50 events: 10000
num of 200 events: 10000
num of neg_200 events: 10000
num of 500 events: 10000
num of neg_500 events: 10000


### Embedding toy data

In [15]:
toydataFile = h5py.File(os.getcwd()+'/toydata/gphi_toydata.h5', 'r')

#### data

In [16]:
del f['samples/observations']
f.create_dataset('samples/observations', data=np.array(toydataFile['Data']))

<HDF5 dataset "observations": shape (6000000, 9), type "<f4">

#### sampling_benchmarks

In [17]:
N = 500000
sampling_benchmarks = np.concatenate([np.concatenate([np.ones(N)*3, np.ones(N)*i]) for i in range(4, 10)])
del f['samples/sampling_benchmarks']
f.create_dataset('samples/sampling_benchmarks', data=sampling_benchmarks)

<HDF5 dataset "sampling_benchmarks": shape (6000000,), type "<f8">

#### weights

In [18]:
Weights = np.array(toydataFile['Weights'])

In [19]:
Weights

array([7.1692028e-08, 7.1692028e-08, 7.1692028e-08, ..., 4.0921014e-07,
       4.0921014e-07, 4.0921014e-07], dtype=float32)

In [20]:
weights = np.ones([N*12, 10])*(1e10)

In [25]:
#weights[N*0:N*1, 3] = Weights[N*0]
#weights[N*1:N*2, 4] = Weights[N*1]
#weights[N*:N*1, 3] = Weights[N*0]

In [21]:
for i in range(12):
    if i%2==0:
        print('sm data, pos=%d'%(3))
        weights[N*i:N*(i+1), 3] = Weights[N*i]
    else:
        print('bsm data, pos=%d'%(3+(i+1)/2))
        weights[N*i:N*(i+1), int(3+(i+1)/2)] = Weights[N*i]

sm data, pos=3
bsm data, pos=4
sm data, pos=3
bsm data, pos=5
sm data, pos=3
bsm data, pos=6
sm data, pos=3
bsm data, pos=7
sm data, pos=3
bsm data, pos=8
sm data, pos=3
bsm data, pos=9


In [22]:
del f['samples/weights']
f.create_dataset('samples/weights', data=weights)

<HDF5 dataset "weights": shape (6000000, 10), type "<f8">

#### observable numbers

In [23]:
f['observables'].keys()

<KeysViewHDF5 ['definitions', 'names']>

In [24]:
print(np.array(f['observables/definitions']))
print(np.array(f['observables/names']))

[b'j[0].pt' b'j[0].deltaphi(j[1]) * (-1. + 2.*float(j[0].eta > j[1].eta))'
 b'met.pt']
[b'pt_j1' b'delta_phi_jj' b'met']


In [25]:
del f['observables/definitions']
del f['observables/names']

observables = ['s', 'theta', 'thetaZ', 'thetaW', 'Sin(phiZ)', 'Sin(phiW)', 'Cos(phiZ)', 'Cos(phiW)', 'Pt']
f.create_dataset('observables/definitions', data=(np.array(observables, dtype='S')))
f.create_dataset('observables/names', data=(np.array(observables, dtype='S')))

<HDF5 dataset "names": shape (9,), type "|S9">

In [26]:
f.close()