# Run Delphes and extract observables

Johann Brehmer, Kyle Cranmer, Felix Kling, Duccio Pappadopulo, Josh Ruderman 2018

In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals

import numpy as np
import matplotlib
from matplotlib import pyplot as plt
% matplotlib inline
import logging
import os

from madminer.sampling import SampleAugmenter
from madminer.sampling import multiple_benchmark_thetas
from madminer.sampling import constant_morphing_theta, multiple_morphing_thetas, random_morphing_thetas

logging.basicConfig(format='%(asctime)s  %(message)s', datefmt='%H:%M')


In [None]:
base_dir = '/Users/johannbrehmer/work/projects/madminer/diboson_mining/'
mg_dir = '/Users/johannbrehmer/work/projects/madminer/MG5_aMC_v2_6_2/'

In [None]:
sample_dir = base_dir + 'data/samples/wgamma/'
card_dir = base_dir + 'cards/wgamma/'
ufo_model_dir = card_dir + 'SMWgamma_UFO'
run_card_dir = card_dir + 'run_cards/'
mg_process_dir = base_dir + 'data/mg_processes/wgamma/'
log_dir = base_dir + 'logs/wgamma/'
temp_dir = base_dir + 'data/temp'
delphes_dir = mg_dir + 'Delphes'

## Load data (with tight cuts)

In [4]:
sa = SampleAugmenter(sample_dir + 'samples_tight.h5', debug=False)

13:55  
13:55  ------------------------------------------------------------
13:55  |                                                          |
13:55  |  MadMiner v2018.11.12                                    |
13:55  |                                                          |
13:55  |           Johann Brehmer, Kyle Cranmer, and Felix Kling  |
13:55  |                                                          |
13:55  ------------------------------------------------------------
13:55  
13:55  Loading data from /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/samples_tight.h5
13:55  Found 2 parameters:
13:55     cWWW (LHA: dim6 1, maximal power in squared ME: (2,), range: (-0.02, 0.02))
13:55     cWWWtilde (LHA: dim6 2, maximal power in squared ME: (2,), range: (-0.02, 0.02))
13:55  Found 6 benchmarks:
13:55     sm: cWWW = 0.00e+00, cWWWtilde = 0.00e+00
13:55     morphing_basis_vector_1: cWWW = -6.07e-03, cWWWtilde = -1.84e-02
13:55     morphing_basis_vect

### SALLY training data

In [None]:
for i in range(10):
    _, _, _ = sa.extract_samples_train_local(
        theta=constant_morphing_theta([0.,0.]),
        n_samples=1000000,
        folder=sample_dir + 'train_local_tight',
        filename='train_' + str(i)
    )

### RASCAL training data

In [None]:
for i in range(10):
    _, _, _ = sa.extract_samples_train_more_ratios(
        theta0=random_morphing_thetas(5000, [('gaussian', 0., 0.02), ('gaussian', 0., 0.02)]),
        theta1=random_morphing_thetas(5000, [('gaussian', 0., 0.02), ('gaussian', 0., 0.02)]),
        additional_thetas=[random_morphing_thetas(500000, [('gaussian', 0., 0.02), ('gaussian', 0., 0.02)])
                           for _ in range(4)],
        n_samples=1000000,
        folder=sample_dir + 'train_ratios_tight',
        filename='train_' + str(i)
    )

13:55  Extracting training sample for ratio-based methods. Numerator hypothesis: ('random', (5000, [('gaussian', 0.0, 0.02), ('gaussian', 0.0, 0.02)])), denominator hypothesis: ('random', (5000, [('gaussian', 0.0, 0.02), ('gaussian', 0.0, 0.02)]))


### SCANDAL training data

In [None]:
for i in range(10):
    _, _, _ = sa.extract_samples_train_global(
        theta=random_morphing_thetas(10000, [('gaussian', 0., 0.02), ('gaussian', 0., 0.02)]),
        n_samples=1000000,
        folder=sample_dir + 'train_scandal_tight',
        filename='train_' + str(i)
    )

### Validation and test data

In [None]:
_, _ = sa.extract_samples_train_plain(
    theta=constant_morphing_theta([0.,0.]),
    n_samples=1000000,
    folder=sample_dir + 'validation_tight',
    filename='validation'
)

In [None]:
x_sm, _ = sa.extract_samples_test(
    theta=constant_morphing_theta([0.,0.]),
    n_samples=1000000,
    folder=sample_dir + 'test_tight',
    filename='test'
)

### xsec test

In [None]:
thetas_benchmarks, xsecs_benchmarks, xsec_errors_benchmarks = sa.extract_cross_sections(
    theta=multiple_benchmark_thetas(['sm', 'morphing_basis_vector_1', 'morphing_basis_vector_2', 'morphing_basis_vector_3', 'morphing_basis_vector_4', 'morphing_basis_vector_5'])
)

thetas_morphing, xsecs_morphing, xsec_errors_morphing = sa.extract_cross_sections(
    theta=random_morphing_thetas(1000, [('gaussian', 0., 0.02), ('gaussian', 0., 0.02)])
)

In [None]:
cmin, cmax = 0., 0.15

fig = plt.figure(figsize=(5,4))

sc = plt.scatter(thetas_morphing[:,0], thetas_morphing[:,1], c=xsecs_morphing,
            s=40., cmap='viridis', vmin=cmin, vmax=cmax,
            marker='o')

plt.scatter(thetas_benchmarks[:,0], thetas_benchmarks[:,1], c=xsecs_benchmarks,
            s=200., cmap='viridis', vmin=cmin, vmax=cmax,lw=2., edgecolor='black',
            marker='s')

cb = plt.colorbar(sc)
cb.set_label('xsec [pb]')

plt.xlim(-0.05,.05)
plt.ylim(-0.05,.05)
plt.tight_layout()
plt.show()

### Observable distributions

In [None]:
x_bsm1, _ = sa.extract_samples_test(
    theta=constant_morphing_theta([0.02,0.]),
    n_samples=1000000,
    folder=None,
    filename=None
)

x_bsm2, _ = sa.extract_samples_test(
    theta=constant_morphing_theta([0.,0.02]),
    n_samples=1000000,
    folder=None,
    filename=None
)

In [None]:
bins = 25
n_observables = x_sm.shape[1]
n_cols = 3
n_rows = (n_observables + n_cols - 1) // n_cols
labels = sa.observables.keys()

plt.figure(figsize=(4. * n_cols, 4. * n_rows))

for i, label in enumerate(labels):
    xmin = np.percentile(x_sm[:,i], 5.)
    xmax = np.percentile(x_sm[:,i], 95.)
    xwidth = xmax - xmin
    xmin -= xwidth * 0.1
    xmax += xwidth * 0.1
    x_range = (xmin, xmax)
    
    ax = plt.subplot(n_rows, n_cols, i+1)
       
    plt.hist(x_sm[:,i], histtype='step', range=x_range, bins=bins, lw=1.5, label=r'SM', density=True) 
    plt.hist(x_bsm1[:,i], histtype='step', range=x_range, bins=bins, lw=1.5, label=r'$f_{WWW} = 0.02$', density=True)
    plt.hist(x_bsm2[:,i], histtype='step', range=x_range, bins=bins, lw=1.5, label=r'$f_{\tilde{W}WW} = 0.02$', density=True) 
    
    if i == 0:
        plt.legend()
        
    plt.xlabel(label)
    
plt.tight_layout()
plt.savefig('observables.pdf')

## Same without cuts

In [None]:
sa_all = SampleAugmenter(sample_dir + 'samples.h5', debug=False)

### SALLY training data

In [None]:
for i in range(10):
    _, _, _ = sa_all.extract_samples_train_local(
        theta=constant_morphing_theta([0.,0.]),
        n_samples=1000000,
        folder=sample_dir + 'train_local',
        filename='train_' + str(i)
    )

### RASCAL training data

In [None]:
for i in range(10):
    sa_all.extract_samples_train_more_ratios(
        theta0=random_morphing_thetas(5000, [('gaussian', 0., 0.02), ('gaussian', 0., 0.02)]),
        theta1=random_morphing_thetas(5000, [('gaussian', 0., 0.02), ('gaussian', 0., 0.02)]),
        additional_thetas=[random_morphing_thetas(500, [('gaussian', 0., 0.02), ('gaussian', 0., 0.02)])
                           for _ in range(4)],
        n_samples=1000000,
        folder=sample_dir + 'train_ratios',
        filename='train_' + str(i)
    )

### SCANDAL training data

In [None]:
for i in range(10):
    _, _, _ = sa_all.extract_samples_train_global(
        theta=random_morphing_thetas(10000, [('gaussian', 0., 0.02), ('gaussian', 0., 0.02)]),
        n_samples=1000000,
        folder=sample_dir + 'train_scandal',
        filename='train_' + str(i)
    )

### Validation and test data

In [None]:
_, _ = sa_all.extract_samples_train_plain(
    theta=constant_morphing_theta([0.,0.]),
    n_samples=1000000,
    folder=sample_dir + 'validation',
    filename='validation'
)

In [None]:
_, _ = sa_all.extract_samples_test(
    theta=constant_morphing_theta([0.,0.]),
    n_samples=1000000,
    folder=sample_dir + 'test',
    filename='test'
)