# Train SALLY ensemble

Johann Brehmer, Kyle Cranmer, Felix Kling, Duccio Pappadopulo, Josh Ruderman 2018

In [1]:
from __future__ import absolute_import, division, print_function, unicode_literals

import numpy as np
import matplotlib
from matplotlib import pyplot as plt
% matplotlib inline
import logging
import os

from madminer.sampling import SampleAugmenter
from madminer.sampling import multiple_benchmark_thetas
from madminer.sampling import constant_morphing_theta, multiple_morphing_thetas, random_morphing_thetas
from madminer.ml import MLForge, EnsembleForge

logging.basicConfig(format='%(asctime)s  %(message)s', datefmt='%H:%M')

In [2]:
base_dir = '/Users/johannbrehmer/work/projects/madminer/diboson_mining/'
mg_dir = '/Users/johannbrehmer/work/projects/madminer/MG5_aMC_v2_6_2/'

In [3]:
sample_dir = base_dir + 'data/samples/wgamma/'
card_dir = base_dir + 'cards/wgamma/'
ufo_model_dir = card_dir + 'SMWgamma_UFO'
run_card_dir = card_dir + 'run_cards/'
mg_process_dir = base_dir + 'data/mg_processes/wgamma/'
log_dir = base_dir + 'logs/wgamma/'
temp_dir = base_dir + 'data/temp'
delphes_dir = mg_dir + 'Delphes'
model_dir = base_dir + 'data/models/wgamma/'

## Settings

In [4]:
n_estimators = 10

## Training function

In [5]:
def train_ensemble(filename, use_tight_cuts=True, n_estimators=10, **kwargs):
    cut_label = '_tight' if use_tight_cuts else ''
    
    ensemble = EnsembleForge(n_estimators, debug=False)

    ensemble.train_all(
        method='sally',
        x_filename=[sample_dir + 'train_local{}/x_train_{}.npy'.format(cut_label, i) for i in range(n_estimators)],
        t_xz0_filename=[sample_dir + 'train_local{}/t_xz_train_{}.npy'.format(cut_label, i) for i in range(n_estimators)],
        **kwargs
    )

    ensemble.calculate_expectation(
        x_filename=sample_dir + 'validation{}/x_validation.npy'.format(cut_label)
    )

    ensemble.save(model_dir + 'sally_ensemble_' + filename)

## All observables

In [None]:
train_ensemble('all', use_tight_cuts=False)

In [7]:
train_ensemble('all_tight', use_tight_cuts=True)

19:15  Training 10 estimators in ensemble
19:15  Training estimator 1 / 10 in ensemble
19:15  Starting training
19:15    Method:                 sally
19:15    Training data: x at /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local_tight/x_train_0.npy
19:15                   t_xz (theta0) at  /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local_tight/t_xz_train_0.npy
19:15    Features:               all
19:15    Method:                 sally
19:15    Hidden layers:          (100, 100)
19:15    Activation function:    tanh
19:15    Batch size:             128
19:15    Trainer:                amsgrad
19:15    Epochs:                 50
19:15    Learning rate:          0.001 initially, decaying to 0.0001
19:15    Validation split:       0.25
19:15    Early stopping:         True
19:15    Scale inputs:           True
19:15  Loading training data
19:15  Found 1000000 samples with 2 parameters and 26 observables
19:

21:04    Epoch 40: train loss 5622.86 ([5622.85920959]), validation loss 4224.38 ([4224.37583181]) (*)
21:07    Epoch 45: train loss 5605.31 ([5605.3090844]), validation loss 4217.34 ([4217.33883237])
21:10    Epoch 50: train loss 5591.35 ([5591.34837712]), validation loss 4210.73 ([4210.73216875]) (*)
21:10  Early stopping did not improve performance
21:10  Finished training
21:10  Training estimator 5 / 10 in ensemble
21:10  Starting training
21:10    Method:                 sally
21:10    Training data: x at /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local_tight/x_train_4.npy
21:10                   t_xz (theta0) at  /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local_tight/t_xz_train_4.npy
21:10    Features:               all
21:10    Method:                 sally
21:10    Hidden layers:          (100, 100)
21:10    Activation function:    tanh
21:10    Batch size:             128
21:10    Trainer:    

22:45    Epoch 25: train loss 4508.96 ([4508.96085884]), validation loss 4228.42 ([4228.42056288]) (*)
22:48    Epoch 30: train loss 4467.84 ([4467.83964272]), validation loss 4202.91 ([4202.9077541]) (*)
22:50    Epoch 35: train loss 4434.13 ([4434.12668301]), validation loss 4185.31 ([4185.30728255]) (*)
22:52    Epoch 40: train loss 4407.15 ([4407.14500307]), validation loss 4167.85 ([4167.84575266]) (*)
22:54    Epoch 45: train loss 4387.30 ([4387.29530211]), validation loss 4157.52 ([4157.52218282]) (*)
22:56    Epoch 50: train loss 4373.32 ([4373.31881845]), validation loss 4148.20 ([4148.19841218]) (*)
22:56  Early stopping did not improve performance
22:56  Finished training
22:56  Training estimator 9 / 10 in ensemble
22:56  Starting training
22:56    Method:                 sally
22:56    Training data: x at /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local_tight/x_train_8.npy
22:56                   t_xz (theta0) at  /Users/johannbreh

In [None]:
train_ensemble(
    'all_tight_sgd',
    use_tight_cuts=True,
    trainer='sgd',
    nesterov_momentum=0.9,
    initial_lr=0.1,
    final_lr=0.01
)

In [None]:
train_ensemble('resurrection_tight', use_tight_cuts=True)