# Train SALLY ensemble

Johann Brehmer, Kyle Cranmer, Felix Kling, Duccio Pappadopulo, Josh Ruderman 2018

In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals

import numpy as np
import matplotlib
from matplotlib import pyplot as plt
% matplotlib inline
import logging
import os

from madminer.sampling import SampleAugmenter
from madminer.sampling import multiple_benchmark_thetas
from madminer.sampling import constant_morphing_theta, multiple_morphing_thetas, random_morphing_thetas
from madminer.ml import MLForge, EnsembleForge

logging.basicConfig(format='%(asctime)s  %(message)s', datefmt='%H:%M')

In [None]:
base_dir = '/Users/johannbrehmer/work/projects/madminer/diboson_mining/'
mg_dir = '/Users/johannbrehmer/work/projects/madminer/MG5_aMC_v2_6_2/'

In [None]:
sample_dir = base_dir + 'data/samples/wgamma/'
card_dir = base_dir + 'cards/wgamma/'
ufo_model_dir = card_dir + 'SMWgamma_UFO'
run_card_dir = card_dir + 'run_cards/'
mg_process_dir = base_dir + 'data/mg_processes/wgamma/'
log_dir = base_dir + 'logs/wgamma/'
temp_dir = base_dir + 'data/temp'
delphes_dir = mg_dir + 'Delphes'
model_dir = base_dir + 'data/models/wgamma/'

## Settings

In [None]:
n_estimators = 10
n_hidden = (100,100)
n_epochs = 20
batch_size = 128
initial_lr = 0.001
final_lr = 0.0001

## All observables, no cut

In [None]:
ensemble_all = EnsembleForge(n_estimators)

ensemble_all.train_all(
    method='sally',
    x_filename=[sample_dir + 'train_local/x_train_{}.npy'.format(i) for i in range(n_estimators)],
    t_xz0_filename=[sample_dir + 'train_local/t_xz_train_{}.npy'.format(i) for i in range(n_estimators)],
    n_epochs=n_epochs,
    batch_size=batch_size,
    validation_split=None,
    n_hidden=n_hidden
)

ensemble_all.calculate_expectation(
    x_filename=sample_dir + 'validation/x_validation.npy'
)

ensemble_all.save(model_dir + 'sally_ensemble_all')

## All observables after cut

In [None]:
ensemble_all_tight = EnsembleForge(n_estimators)

ensemble_all_tight.train_all(
    method='sally',
    x_filename=[sample_dir + 'train_local_tight/x_train_{}.npy'.format(i) for i in range(n_estimators)],
    t_xz0_filename=[sample_dir + 'train_local_tight/t_xz_train_{}.npy'.format(i) for i in range(n_estimators)],
    n_epochs=n_epochs,
    batch_size=batch_size,
    validation_split=None,
    n_hidden=n_hidden
)

ensemble_all_tight.calculate_expectation(
    x_filename=sample_dir + 'validation_tight/x_validation.npy'
)

ensemble_all_tight.save(model_dir + 'sally_ensemble_all_tight')

## Resurrection phi after cuts

In [None]:
ensemble_res = EnsembleForge(n_estimators)

ensemble_res.train_all(
    features=[ [29] for _ in range(n_estimators)],
    method='sally',
    x_filename=[sample_dir + 'train_local_tight/x_train_{}.npy'.format(i) for i in range(n_estimators)],
    t_xz0_filename=[sample_dir + 'train_local_tight/t_xz_train_{}.npy'.format(i) for i in range(n_estimators)],
    n_epochs=n_epochs,
    batch_size=batch_size,
    validation_split=None,
    n_hidden=n_hidden
)

ensemble_res.calculate_expectation(
    x_filename=sample_dir + 'validation_tight/x_validation.npy'
)

ensemble_res.save(model_dir + 'sally_ensemble_resurrection_tight')