# Train SALLY ensemble

Johann Brehmer, Kyle Cranmer, Felix Kling, Duccio Pappadopulo, Josh Ruderman 2018

In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals

import numpy as np
import matplotlib
from matplotlib import pyplot as plt
% matplotlib inline
import logging
import os

from madminer.sampling import SampleAugmenter
from madminer.sampling import multiple_benchmark_thetas
from madminer.sampling import constant_morphing_theta, multiple_morphing_thetas, random_morphing_thetas
from madminer.ml import MLForge, EnsembleForge

logging.basicConfig(format='%(asctime)s  %(message)s', datefmt='%H:%M')

In [None]:
base_dir = '/Users/johannbrehmer/work/projects/madminer/diboson_mining/'
mg_dir = '/Users/johannbrehmer/work/projects/madminer/MG5_aMC_v2_6_2/'

In [3]:
sample_dir = base_dir + 'data/samples/wgamma/'
card_dir = base_dir + 'cards/wgamma/'
ufo_model_dir = card_dir + 'SMWgamma_UFO'
run_card_dir = card_dir + 'run_cards/'
mg_process_dir = base_dir + 'data/mg_processes/wgamma/'
log_dir = base_dir + 'logs/wgamma/'
temp_dir = base_dir + 'data/temp'
delphes_dir = mg_dir + 'Delphes'
model_dir = base_dir + 'data/models/wgamma/'

## Settings

In [4]:
n_estimators = 10

## Training function

In [5]:
def train_ensemble(filename, use_tight_cuts=True, n_estimators=10, **kwargs):
    cut_label = '_tight' if use_tight_cuts else ''
    
    ensemble = EnsembleForge(n_estimators, debug=False)

    ensemble.train_all(
        method='sally',
        x_filename=[sample_dir + 'train_local{}/x_train_{}.npy'.format(cut_label, i) for i in range(n_estimators)],
        t_xz0_filename=[sample_dir + 'train_local{}/t_xz_train_{}.npy'.format(cut_label, i) for i in range(n_estimators)],
        **kwargs
    )

    ensemble.calculate_expectation(
        x_filename=sample_dir + 'validation{}/x_validation.npy'.format(cut_label)
    )

    ensemble.save(model_dir + 'sally_ensemble_' + filename)

## All observables

In [None]:
train_ensemble('all', use_tight_cuts=False)

In [None]:
train_ensemble('all_tight', use_tight_cuts=True)

## Minimal observable basis

In [6]:
train_ensemble(
    'minimal',
    use_tight_cuts=False,
    features=[[0,5,6,7,9,10,11] for _ in range(10)]
)

10:17  
10:17  ------------------------------------------------------------
10:17  |                                                          |
10:17  |  MadMiner v2018.11.06                                    |
10:17  |                                                          |
10:17  |           Johann Brehmer, Kyle Cranmer, and Felix Kling  |
10:17  |                                                          |
10:17  ------------------------------------------------------------
10:17  
10:17  Training 10 estimators in ensemble
10:17  Training estimator 1 / 10 in ensemble
10:17  Starting training
10:17    Method:                 sally
10:17    Training data: x at /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local/x_train_0.npy
10:17                   t_xz (theta0) at  /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local/t_xz_train_0.npy
10:17    Features:               [0, 5, 6, 7, 9, 10, 11]
10:17    Method:

12:10    Method:                 sally
12:10    Hidden layers:          (100, 100)
12:10    Activation function:    tanh
12:10    Batch size:             128
12:10    Trainer:                amsgrad
12:10    Epochs:                 50
12:10    Learning rate:          0.001 initially, decaying to 0.0001
12:10    Validation split:       0.25
12:10    Early stopping:         True
12:10    Scale inputs:           True
12:10    Regularization:         None
12:10  Loading training data
12:10  Found 1000000 samples with 2 parameters and 27 observables
12:10  Rescaling inputs
12:10  Only using 7 of 27 observables
12:10  Creating model for method sally
12:10  Training model
12:13    Epoch 5: train loss 27.9968 (mse_score: 27.9968)
12:13              val. loss  56.4557 (mse_score: 56.4557)
12:16    Epoch 10: train loss 27.9399 (mse_score: 27.9399)
12:16              val. loss  56.4850 (mse_score: 56.4850)
12:20    Epoch 15: train loss 27.8691 (mse_score: 27.8691)
12:20              val. loss  56

13:59              val. loss  15.2806 (mse_score: 15.2806)
14:02    Epoch 20: train loss 27.0021 (mse_score: 27.0021)
14:02              val. loss  15.3122 (mse_score: 15.3122)
14:06    Epoch 25: train loss 26.7801 (mse_score: 26.7801)
14:06              val. loss  15.2731 (mse_score: 15.2731)
14:10    Epoch 30: train loss 26.6030 (mse_score: 26.6030)
14:10              val. loss  15.3450 (mse_score: 15.3450)
14:14    Epoch 35: train loss 26.4305 (mse_score: 26.4305)
14:14              val. loss  15.3525 (mse_score: 15.3525)
14:17    Epoch 40: train loss 26.2876 (mse_score: 26.2876)
14:17              val. loss  15.3528 (mse_score: 15.3528)
14:21    Epoch 45: train loss 26.1920 (mse_score: 26.1920)
14:21              val. loss  15.3670 (mse_score: 15.3670)
14:24    Epoch 50: train loss 26.0938 (mse_score: 26.0938)
14:24              val. loss  15.3836 (mse_score: 15.3836)
14:24  Early stopping after epoch 11, with loss 15.23 compared to final loss 15.38
14:24  Finished training
14:24  

16:06  Finished training
16:06  Calculating expectation for 10 estimators in ensemble
16:06  Starting evaluation for estimator 1 / 10 in ensemble
16:06  Starting evaluation for estimator 2 / 10 in ensemble
16:06  Starting evaluation for estimator 3 / 10 in ensemble
16:06  Starting evaluation for estimator 4 / 10 in ensemble
16:07  Starting evaluation for estimator 5 / 10 in ensemble
16:07  Starting evaluation for estimator 6 / 10 in ensemble
16:07  Starting evaluation for estimator 7 / 10 in ensemble
16:07  Starting evaluation for estimator 8 / 10 in ensemble
16:08  Starting evaluation for estimator 9 / 10 in ensemble
16:08  Starting evaluation for estimator 10 / 10 in ensemble


In [7]:
train_ensemble(
    'minimal_tight',
    use_tight_cuts=True,
    features=[[0,5,6,7,9,10,11] for _ in range(10)]
)

17:45  Training 10 estimators in ensemble
17:45  Training estimator 1 / 10 in ensemble
17:45  Starting training
17:45    Method:                 sally
17:45    Training data: x at /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local_tight/x_train_0.npy
17:45                   t_xz (theta0) at  /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local_tight/t_xz_train_0.npy
17:45    Features:               [0, 5, 6, 7, 9, 10, 11]
17:45    Method:                 sally
17:45    Hidden layers:          (100, 100)
17:45    Activation function:    tanh
17:45    Batch size:             128
17:45    Trainer:                amsgrad
17:45    Epochs:                 50
17:45    Learning rate:          0.001 initially, decaying to 0.0001
17:45    Validation split:       0.25
17:45    Early stopping:         True
17:45    Scale inputs:           True
17:45    Regularization:         None
17:45  Loading training data
17:45  Foun

19:16    Epochs:                 50
19:16    Learning rate:          0.001 initially, decaying to 0.0001
19:16    Validation split:       0.25
19:16    Early stopping:         True
19:16    Scale inputs:           True
19:16    Regularization:         None
19:16  Loading training data
19:16  Found 1000000 samples with 2 parameters and 27 observables
19:16  Rescaling inputs
19:16  Only using 7 of 27 observables
19:16  Creating model for method sally
19:16  Training model
19:18    Epoch 5: train loss 6256.6170 (mse_score: 6256.6170)
19:18              val. loss  5746.6299 (mse_score: 5746.6299) (*)
19:20    Epoch 10: train loss 6213.3620 (mse_score: 6213.3620)
19:20              val. loss  5713.3855 (mse_score: 5713.3855) (*)
19:22    Epoch 15: train loss 6171.7914 (mse_score: 6171.7914)
19:22              val. loss  5684.1580 (mse_score: 5684.1580) (*)
19:24    Epoch 20: train loss 6137.1892 (mse_score: 6137.1892)
19:24              val. loss  5680.8732 (mse_score: 5680.8732)
19:26    E

20:34    Epoch 15: train loss 5985.0754 (mse_score: 5985.0754)
20:34              val. loss  4580.2710 (mse_score: 4580.2710) (*)
20:36    Epoch 20: train loss 5955.6626 (mse_score: 5955.6626)
20:36              val. loss  4558.9926 (mse_score: 4558.9926) (*)
20:39    Epoch 25: train loss 5931.3096 (mse_score: 5931.3096)
20:39              val. loss  4542.4598 (mse_score: 4542.4598) (*)
20:42    Epoch 30: train loss 5912.1207 (mse_score: 5912.1207)
20:42              val. loss  4529.3317 (mse_score: 4529.3317) (*)
20:45    Epoch 35: train loss 5896.3959 (mse_score: 5896.3959)
20:45              val. loss  4519.0245 (mse_score: 4519.0245) (*)
20:48    Epoch 40: train loss 5884.4619 (mse_score: 5884.4619)
20:48              val. loss  4510.9803 (mse_score: 4510.9803) (*)
20:50    Epoch 45: train loss 5874.3930 (mse_score: 5874.3930)
20:50              val. loss  4503.8867 (mse_score: 4503.8867) (*)
20:52    Epoch 50: train loss 5866.3236 (mse_score: 5866.3236)
20:52              val. los

22:28    Epoch 40: train loss 5756.0633 (mse_score: 5756.0633)
22:28              val. loss  5544.0166 (mse_score: 5544.0166) (*)
22:31    Epoch 45: train loss 5742.0018 (mse_score: 5742.0018)
22:31              val. loss  5533.3237 (mse_score: 5533.3237) (*)
22:35    Epoch 50: train loss 5730.9445 (mse_score: 5730.9445)
22:35              val. loss  5525.4781 (mse_score: 5525.4781) (*)
22:35  Early stopping did not improve performance
22:35  Finished training
22:35  Calculating expectation for 10 estimators in ensemble
22:35  Starting evaluation for estimator 1 / 10 in ensemble
22:35  Starting evaluation for estimator 2 / 10 in ensemble
22:36  Starting evaluation for estimator 3 / 10 in ensemble
22:36  Starting evaluation for estimator 4 / 10 in ensemble
22:36  Starting evaluation for estimator 5 / 10 in ensemble
22:37  Starting evaluation for estimator 6 / 10 in ensemble
22:37  Starting evaluation for estimator 7 / 10 in ensemble
22:38  Starting evaluation for estimator 8 / 10 in ens

In [None]:
train_ensemble(
    'minimal_reg',
    use_tight_cuts=False,
    grad_x_regularization=0.1,
    features=[[0,5,6,7,9,10,11] for _ in range(10)]
)

In [None]:
train_ensemble(
    'minimal_tight_reg',
    use_tight_cuts=True,
    grad_x_regularization=0.1,
    features=[[0,5,6,7,9,10,11] for _ in range(10)]
)

## Just resurrection phi

In [None]:
train_ensemble(
    'resurrection',
    use_tight_cuts=True,
    features=[[26] for _ in range(10)]
)