# Train RASCAL ensemble

Johann Brehmer, Kyle Cranmer, Marco Farina, Felix Kling, Duccio Pappadopulo, Josh Ruderman 2018

In [2]:
from __future__ import absolute_import, division, print_function, unicode_literals

import numpy as np
import matplotlib
from matplotlib import pyplot as plt
% matplotlib inline
import logging
import os

from madminer.sampling import SampleAugmenter
from madminer.sampling import multiple_benchmark_thetas
from madminer.sampling import constant_morphing_theta, multiple_morphing_thetas, random_morphing_thetas
from madminer.ml import MLForge, EnsembleForge

logging.basicConfig(format='%(asctime)-5.5s %(name)-20.20s %(levelname)-7.7s %(message)s', datefmt='%H:%M', level=logging.DEBUG)


In [3]:
base_dir = '/Users/johannbrehmer/work/projects/madminer/diboson_mining/'
mg_dir = '/Users/johannbrehmer/work/projects/madminer/MG5_aMC_v2_6_2/'

In [4]:
sample_dir = base_dir + 'data/samples/wgamma/'
card_dir = base_dir + 'cards/wgamma/'
ufo_model_dir = card_dir + 'SMWgamma_UFO'
run_card_dir = card_dir + 'run_cards/'
mg_process_dir = base_dir + 'data/mg_processes/wgamma/'
log_dir = base_dir + 'logs/wgamma/'
temp_dir = base_dir + 'data/temp'
delphes_dir = mg_dir + 'Delphes'
model_dir = base_dir + 'data/models/wgamma/'

## Settings

In [5]:
n_estimators = 1

## Training function

In [6]:
def train_ensemble(filename, use_tight_cuts=True, n_estimators=n_estimators, **kwargs):
    cut_label = '_tight' if use_tight_cuts else ''
    
    ensemble = EnsembleForge(n_estimators, debug=False)

    ensemble.train_all(
        method='rascal',
        x_filename=[sample_dir + 'train_ratio{}/x_train_{}.npy'.format(cut_label, i) for i in range(n_estimators)],
        theta0_filename=[sample_dir + 'train_ratio{}/theta0_train_{}.npy'.format(cut_label, i) for i in range(n_estimators)],
        t_xz0_filename=[sample_dir + 'train_ratio{}/t_xz_train_{}.npy'.format(cut_label, i) for i in range(n_estimators)],
        r_xz_filename=[sample_dir + 'train_ratio{}/r_xz_train_{}.npy'.format(cut_label, i) for i in range(n_estimators)],
        y_filename=[sample_dir + 'train_ratio{}/y_train_{}.npy'.format(cut_label, i) for i in range(n_estimators)],
        **kwargs
    )

    ensemble.save(model_dir + 'rascal_ensemble_' + filename)

## All observables

In [None]:
train_ensemble(
    'all',
    use_tight_cuts=False,
    n_epochs=10
)

11:56  Training 1 estimators in ensemble
11:56  Training estimator 1 / 1 in ensemble
11:56  Starting training
11:56    Method:                 rascal
11:56    Training data: x at /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_ratio/x_train_0.npy
11:56                   theta0 at /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_ratio/theta0_train_0.npy
11:56                   y at /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_ratio/y_train_0.npy
11:56                   r_xz at /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_ratio/r_xz_train_0.npy
11:56                   t_xz (theta0) at  /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_ratio/t_xz_train_0.npy
11:56    Features:               all
11:56    Method:                 rascal
11:56    Hidden layers:          (100, 100, 100, 100)
11:56    Activati

In [None]:
train_ensemble(
    'all_tight',
    use_tight_cuts=True,
    n_epochs=10
)

08:37  Training 1 estimators in ensemble
08:37  Training estimator 1 / 1 in ensemble
08:37  Starting training
08:37    Method:                 rascal
08:37    Training data: x at /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_ratio_tight/x_train_0.npy
08:37                   theta0 at /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_ratio_tight/theta0_train_0.npy
08:37                   y at /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_ratio_tight/y_train_0.npy
08:37                   r_xz at /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_ratio_tight/r_xz_train_0.npy
08:37                   t_xz (theta0) at  /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_ratio_tight/t_xz_train_0.npy
08:37    Features:               all
08:37    Method:                 rascal
08:37    Hidden layers:          (100, 10

## Just resurrection phi

In [None]:
train_ensemble(
    'resurrection',
    use_tight_cuts=True,
    features=[[26] for _ in range(n_estimators)],
)