# Train SALLY ensemble

Johann Brehmer, Kyle Cranmer, Felix Kling, Duccio Pappadopulo, Josh Ruderman 2018

In [1]:
from __future__ import absolute_import, division, print_function, unicode_literals

import numpy as np
import matplotlib
from matplotlib import pyplot as plt
% matplotlib inline
import logging
import os

from madminer.sampling import SampleAugmenter
from madminer.sampling import multiple_benchmark_thetas
from madminer.sampling import constant_morphing_theta, multiple_morphing_thetas, random_morphing_thetas
from madminer.ml import MLForge, EnsembleForge

logging.basicConfig(format='%(asctime)s  %(message)s', datefmt='%H:%M')

In [2]:
base_dir = '/Users/johannbrehmer/work/projects/madminer/diboson_mining/'
mg_dir = '/Users/johannbrehmer/work/projects/madminer/MG5_aMC_v2_6_2/'

In [3]:
sample_dir = base_dir + 'data/samples/wgamma/'
card_dir = base_dir + 'cards/wgamma/'
ufo_model_dir = card_dir + 'SMWgamma_UFO'
run_card_dir = card_dir + 'run_cards/'
mg_process_dir = base_dir + 'data/mg_processes/wgamma/'
log_dir = base_dir + 'logs/wgamma/'
temp_dir = base_dir + 'data/temp'
delphes_dir = mg_dir + 'Delphes'
model_dir = base_dir + 'data/models/wgamma/'

## Settings

In [4]:
n_estimators = 10

## Training function

In [5]:
def train_ensemble(filename, use_tight_cuts=True, n_estimators=10, **kwargs):
    cut_label = '_tight' if use_tight_cuts else ''
    
    ensemble = EnsembleForge(n_estimators, debug=False)

    ensemble.train_all(
        method='sally',
        x_filename=[sample_dir + 'train_local{}/x_train_{}.npy'.format(cut_label, i) for i in range(n_estimators)],
        t_xz0_filename=[sample_dir + 'train_local{}/t_xz_train_{}.npy'.format(cut_label, i) for i in range(n_estimators)],
        **kwargs
    )

    ensemble.calculate_expectation(
        x_filename=sample_dir + 'validation{}/x_validation.npy'.format(cut_label)
    )

    ensemble.save(model_dir + 'sally_ensemble_' + filename)

## All observables

In [6]:
train_ensemble('all', use_tight_cuts=False)

13:56  
13:56  ------------------------------------------------------------
13:56  |                                                          |
13:56  |  MadMiner v2018.11.12                                    |
13:56  |                                                          |
13:56  |           Johann Brehmer, Kyle Cranmer, and Felix Kling  |
13:56  |                                                          |
13:56  ------------------------------------------------------------
13:56  
13:56  Training 10 estimators in ensemble
13:56  Training estimator 1 / 10 in ensemble
13:56  Starting training
13:56    Method:                 sally
13:56    Training data: x at /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local/x_train_0.npy
13:56                   t_xz (theta0) at  /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local/t_xz_train_0.npy
13:56    Features:               all
13:56    Method:                 sal

18:18  Rescaling inputs
18:18  Creating model for method sally
18:18  Training model
18:24    Epoch 5: train loss 3.8643 (mse_score: 3.8643)
18:32    Epoch 10: train loss 3.7167 (mse_score: 3.7167)
18:42    Epoch 15: train loss 3.6556 (mse_score: 3.6556)
18:46    Epoch 20: train loss 3.5386 (mse_score: 3.5386)
18:51    Epoch 25: train loss 3.4401 (mse_score: 3.4401)
18:56    Epoch 30: train loss 3.3023 (mse_score: 3.3023)
19:01    Epoch 35: train loss 3.2202 (mse_score: 3.2202)
19:06    Epoch 40: train loss 3.1628 (mse_score: 3.1628)
19:10    Epoch 45: train loss 3.1367 (mse_score: 3.1367)
19:15    Epoch 50: train loss 3.1219 (mse_score: 3.1219)
19:15  Finished training
19:15  Training estimator 6 / 10 in ensemble
19:15  Starting training
19:15    Method:                 sally
19:15    Training data: x at /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local/x_train_5.npy
19:15                   t_xz (theta0) at  /Users/johannbrehmer/work/projects/m

23:20    Regularization:         None
23:20  Loading training data
23:20  Found 1000000 samples with 2 parameters and 27 observables
23:20  Rescaling inputs
23:20  Creating model for method sally
23:20  Training model
23:26    Epoch 5: train loss 7.2573 (mse_score: 7.2573)
23:32    Epoch 10: train loss 7.1772 (mse_score: 7.1772)
23:38    Epoch 15: train loss 7.1024 (mse_score: 7.1024)
23:44    Epoch 20: train loss 7.0042 (mse_score: 7.0042)
23:50    Epoch 25: train loss 6.9463 (mse_score: 6.9463)
23:56    Epoch 30: train loss 6.8851 (mse_score: 6.8851)
00:01    Epoch 35: train loss 6.8331 (mse_score: 6.8331)
00:05    Epoch 40: train loss 6.8053 (mse_score: 6.8053)
00:09    Epoch 45: train loss 6.7862 (mse_score: 6.7862)
00:14    Epoch 50: train loss 6.7776 (mse_score: 6.7776)
00:14  Finished training
00:14  Calculating expectation for 10 estimators in ensemble
00:14  Starting evaluation for estimator 1 / 10 in ensemble
00:14  Starting evaluation for estimator 2 / 10 in ensemble
00:14  

In [7]:
train_ensemble('all_tight', use_tight_cuts=True)

00:17  Training 10 estimators in ensemble
00:17  Training estimator 1 / 10 in ensemble
00:17  Starting training
00:17    Method:                 sally
00:17    Training data: x at /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local_tight/x_train_0.npy
00:17                   t_xz (theta0) at  /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local_tight/t_xz_train_0.npy
00:17    Features:               all
00:17    Method:                 sally
00:17    Hidden layers:          (100, 100, 100, 100)
00:17    Activation function:    tanh
00:17    Batch size:             128
00:17    Trainer:                amsgrad
00:17    Epochs:                 50
00:17    Learning rate:          0.01 initially, decaying to 0.0001
00:17    Validation split:       None
00:17    Early stopping:         True
00:17    Scale inputs:           True
00:17    Regularization:         None
00:17  Loading training data
00:17  Found 1000000 s

03:42    Epoch 20: train loss 352.5294 (mse_score: 352.5294)
03:47    Epoch 25: train loss 348.0134 (mse_score: 348.0134)
03:51    Epoch 30: train loss 344.3583 (mse_score: 344.3583)
03:56    Epoch 35: train loss 342.1161 (mse_score: 342.1161)
04:01    Epoch 40: train loss 341.0843 (mse_score: 341.0843)
04:05    Epoch 45: train loss 340.6171 (mse_score: 340.6171)
04:10    Epoch 50: train loss 340.4170 (mse_score: 340.4170)
04:10  Finished training
04:10  Training estimator 6 / 10 in ensemble
04:10  Starting training
04:10    Method:                 sally
04:10    Training data: x at /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local_tight/x_train_5.npy
04:10                   t_xz (theta0) at  /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local_tight/t_xz_train_5.npy
04:10    Features:               all
04:10    Method:                 sally
04:10    Hidden layers:          (100, 100, 100, 100)
04:10    Acti

07:17    Regularization:         None
07:17  Loading training data
07:17  Found 1000000 samples with 2 parameters and 27 observables
07:17  Rescaling inputs
07:17  Creating model for method sally
07:17  Training model
07:20    Epoch 5: train loss 452.5377 (mse_score: 452.5377)
07:23    Epoch 10: train loss 444.4663 (mse_score: 444.4663)
07:26    Epoch 15: train loss 436.2341 (mse_score: 436.2341)
07:29    Epoch 20: train loss 429.0399 (mse_score: 429.0399)
07:31    Epoch 25: train loss 421.1423 (mse_score: 421.1423)
07:34    Epoch 30: train loss 415.0749 (mse_score: 415.0749)
07:37    Epoch 35: train loss 410.7962 (mse_score: 410.7962)
07:40    Epoch 40: train loss 408.3174 (mse_score: 408.3174)
07:42    Epoch 45: train loss 407.0590 (mse_score: 407.0590)
07:46    Epoch 50: train loss 406.3461 (mse_score: 406.3461)
07:46  Finished training
07:46  Calculating expectation for 10 estimators in ensemble
07:46  Starting evaluation for estimator 1 / 10 in ensemble
07:46  Starting evaluation 

## Minimal observable basis

In [None]:
train_ensemble(
    'minimal',
    use_tight_cuts=False,
    features=[[0,5,6,7,9,10,11] for _ in range(10)]
)

In [None]:
train_ensemble(
    'minimal_tight',
    use_tight_cuts=True,
    features=[[0,5,6,7,9,10,11] for _ in range(10)]
)

## Just resurrection phi

In [None]:
train_ensemble(
    'resurrection',
    use_tight_cuts=True,
    features=[[26] for _ in range(10)]
)

08:15  Training 10 estimators in ensemble
08:15  Training estimator 1 / 10 in ensemble
08:15  Starting training
08:15    Method:                 sally
08:15    Training data: x at /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local_tight/x_train_0.npy
08:15                   t_xz (theta0) at  /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local_tight/t_xz_train_0.npy
08:15    Features:               [26]
08:15    Method:                 sally
08:15    Hidden layers:          (100, 100, 100, 100)
08:15    Activation function:    tanh
08:15    Batch size:             128
08:15    Trainer:                amsgrad
08:15    Epochs:                 50
08:15    Learning rate:          0.01 initially, decaying to 0.0001
08:15    Validation split:       None
08:15    Early stopping:         True
08:15    Scale inputs:           True
08:15    Regularization:         None
08:15  Loading training data
08:15  Found 1000000 

11:22    Epoch 5: train loss 428.2557 (mse_score: 428.2557)
11:25    Epoch 10: train loss 428.0868 (mse_score: 428.0868)
11:28    Epoch 15: train loss 427.9306 (mse_score: 427.9306)
11:32    Epoch 20: train loss 427.8273 (mse_score: 427.8273)
11:35    Epoch 25: train loss 427.7919 (mse_score: 427.7919)
11:39    Epoch 30: train loss 427.8116 (mse_score: 427.8116)
11:42    Epoch 35: train loss 427.7349 (mse_score: 427.7349)
11:46    Epoch 40: train loss 427.7345 (mse_score: 427.7345)
11:49    Epoch 45: train loss 427.7359 (mse_score: 427.7359)
11:53    Epoch 50: train loss 427.7105 (mse_score: 427.7105)
11:53  Finished training
11:53  Training estimator 6 / 10 in ensemble
11:53  Starting training
11:53    Method:                 sally
11:53    Training data: x at /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local_tight/x_train_5.npy
11:53                   t_xz (theta0) at  /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wga

15:57    Activation function:    tanh
15:57    Batch size:             128
15:57    Trainer:                amsgrad
15:57    Epochs:                 50
15:57    Learning rate:          0.01 initially, decaying to 0.0001
15:57    Validation split:       None
15:57    Early stopping:         True
15:57    Scale inputs:           True
15:57    Regularization:         None
15:57  Loading training data
15:57  Found 1000000 samples with 2 parameters and 27 observables
15:57  Rescaling inputs
15:57  Only using 1 of 27 observables
15:57  Creating model for method sally
15:57  Training model
16:00    Epoch 5: train loss 507.3529 (mse_score: 507.3529)
16:03    Epoch 10: train loss 507.0352 (mse_score: 507.0352)
16:07    Epoch 15: train loss 506.7964 (mse_score: 506.7964)
16:11    Epoch 20: train loss 506.6972 (mse_score: 506.6972)
