# Train SALLY ensemble

Johann Brehmer, Kyle Cranmer, Marco Farina, Felix Kling, Duccio Pappadopulo, Josh Ruderman 2018

In [1]:
from __future__ import absolute_import, division, print_function, unicode_literals

import numpy as np
import matplotlib
from matplotlib import pyplot as plt
%matplotlib inline
import logging
import os

from madminer.sampling import SampleAugmenter
from madminer.sampling import multiple_benchmark_thetas
from madminer.sampling import constant_morphing_theta, multiple_morphing_thetas, random_morphing_thetas
from madminer.ml import MLForge, EnsembleForge

logging.basicConfig(format='%(asctime)s  %(message)s', datefmt='%H:%M')

In [2]:
base_dir = '/Users/johannbrehmer/work/projects/madminer/diboson_mining/'
mg_dir = '/Users/johannbrehmer/work/projects/madminer/MG5_aMC_v2_6_2/'

In [3]:
sample_dir = base_dir + 'data/samples/wgamma/'
card_dir = base_dir + 'cards/wgamma/'
ufo_model_dir = card_dir + 'SMWgamma_UFO'
run_card_dir = card_dir + 'run_cards/'
mg_process_dir = base_dir + 'data/mg_processes/wgamma/'
log_dir = base_dir + 'logs/wgamma/'
temp_dir = base_dir + 'data/temp'
delphes_dir = mg_dir + 'Delphes'
model_dir = base_dir + 'data/models/wgamma/'

## Settings

In [4]:
n_estimators = 10

## Training function

In [5]:
def train_ensemble(filename, use_tight_cuts=True, n_estimators=n_estimators, **kwargs):
    cut_label = '_tight' if use_tight_cuts else ''
    
    ensemble = EnsembleForge(n_estimators, debug=False)

    ensemble.train_all(
        method='sally',
        x_filename=[sample_dir + 'train_local{}/x_train_{}.npy'.format(cut_label, i) for i in range(n_estimators)],
        t_xz0_filename=[sample_dir + 'train_local{}/t_xz_train_{}.npy'.format(cut_label, i) for i in range(n_estimators)],
        **kwargs
    )

    ensemble.calculate_expectation(
        x_filename=sample_dir + 'validation{}/x_validation.npy'.format(cut_label)
    )

    ensemble.save(model_dir + 'sally_ensemble_' + filename)

## All observables

In [6]:
train_ensemble(
    'all',
    use_tight_cuts=False,
)

19:26  
19:26  ------------------------------------------------------------
19:26  |                                                          |
19:26  |  MadMiner v0.1.0                                         |
19:26  |                                                          |
19:26  |           Johann Brehmer, Kyle Cranmer, and Felix Kling  |
19:26  |                                                          |
19:26  ------------------------------------------------------------
19:26  
19:26  Training 10 estimators in ensemble
19:26  Training estimator 1 / 10 in ensemble
19:26  Starting training
19:26    Method:                 sally
19:26    Training data: x at /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local/x_train_0.npy
19:26                   t_xz (theta0) at  /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local/t_xz_train_0.npy
19:26    Features:               all
19:26    Method:                 sal

20:27    Regularization:         None
20:27  Loading training data
20:27  Found 1000000 samples with 2 parameters and 27 observables
20:27  Rescaling inputs
20:27  Creating model for method sally
20:27  Training model
20:29    Epoch 5: train loss 3.3958 (mse_score: 3.3958)
20:31    Epoch 10: train loss 3.2031 (mse_score: 3.2031)
20:32    Epoch 15: train loss 3.0584 (mse_score: 3.0584)
20:34    Epoch 20: train loss 2.9610 (mse_score: 2.9610)
20:35    Epoch 25: train loss 2.8745 (mse_score: 2.8745)
20:37    Epoch 30: train loss 2.8114 (mse_score: 2.8114)
20:38    Epoch 35: train loss 2.7557 (mse_score: 2.7557)
20:40    Epoch 40: train loss 2.7131 (mse_score: 2.7131)
20:41    Epoch 45: train loss 2.6793 (mse_score: 2.6793)
20:43    Epoch 50: train loss 2.6543 (mse_score: 2.6543)
20:43  Finished training
20:43  Training estimator 6 / 10 in ensemble
20:43  Starting training
20:43    Method:                 sally
20:43    Training data: x at /Users/johannbrehmer/work/projects/madminer/diboso

21:49    Trainer:                amsgrad
21:49    Epochs:                 50
21:49    Learning rate:          0.001 initially, decaying to 0.0001
21:49    Validation split:       None
21:49    Early stopping:         True
21:49    Scale inputs:           True
21:49    Shuffle labels          False
21:49    Regularization:         None
21:49  Loading training data
21:49  Found 1000000 samples with 2 parameters and 27 observables
21:49  Rescaling inputs
21:49  Creating model for method sally
21:49  Training model
21:51    Epoch 5: train loss 6.7792 (mse_score: 6.7792)
21:53    Epoch 10: train loss 6.5311 (mse_score: 6.5311)
21:54    Epoch 15: train loss 6.3307 (mse_score: 6.3307)
21:56    Epoch 20: train loss 6.1841 (mse_score: 6.1841)
21:58    Epoch 25: train loss 6.0529 (mse_score: 6.0529)
21:59    Epoch 30: train loss 5.9517 (mse_score: 5.9517)
22:01    Epoch 35: train loss 5.8785 (mse_score: 5.8785)
22:02    Epoch 40: train loss 5.8195 (mse_score: 5.8195)
22:03    Epoch 45: train los

In [7]:
train_ensemble(
    'all_tight',
    use_tight_cuts=True,
)

22:06  Training 10 estimators in ensemble
22:06  Training estimator 1 / 10 in ensemble
22:06  Starting training
22:06    Method:                 sally
22:06    Training data: x at /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local_tight/x_train_0.npy
22:06                   t_xz (theta0) at  /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local_tight/t_xz_train_0.npy
22:06    Features:               all
22:06    Method:                 sally
22:06    Hidden layers:          (100, 100)
22:06    Activation function:    tanh
22:06    Batch size:             128
22:06    Trainer:                amsgrad
22:06    Epochs:                 50
22:06    Learning rate:          0.001 initially, decaying to 0.0001
22:06    Validation split:       None
22:06    Early stopping:         True
22:06    Scale inputs:           True
22:06    Shuffle labels          False
22:06    Regularization:         None
22:06  Loading traini

23:11    Epoch 10: train loss 338.6960 (mse_score: 338.6960)
23:12    Epoch 15: train loss 332.8965 (mse_score: 332.8965)
23:14    Epoch 20: train loss 328.5067 (mse_score: 328.5067)
23:15    Epoch 25: train loss 325.4370 (mse_score: 325.4370)
23:17    Epoch 30: train loss 323.1936 (mse_score: 323.1936)
23:18    Epoch 35: train loss 322.2772 (mse_score: 322.2772)
23:20    Epoch 40: train loss 320.0165 (mse_score: 320.0165)
23:21    Epoch 45: train loss 318.8839 (mse_score: 318.8839)
23:23    Epoch 50: train loss 318.1191 (mse_score: 318.1191)
23:23  Finished training
23:23  Training estimator 6 / 10 in ensemble
23:23  Starting training
23:23    Method:                 sally
23:23    Training data: x at /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local_tight/x_train_5.npy
23:23                   t_xz (theta0) at  /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local_tight/t_xz_train_5.npy
23:23    Features:   

00:24    Epochs:                 50
00:24    Learning rate:          0.001 initially, decaying to 0.0001
00:24    Validation split:       None
00:24    Early stopping:         True
00:24    Scale inputs:           True
00:24    Shuffle labels          False
00:24    Regularization:         None
00:24  Loading training data
00:24  Found 1000000 samples with 2 parameters and 27 observables
00:24  Rescaling inputs
00:24  Creating model for method sally
00:24  Training model
00:26    Epoch 5: train loss 421.4048 (mse_score: 421.4048)
00:27    Epoch 10: train loss 412.4377 (mse_score: 412.4377)
00:29    Epoch 15: train loss 406.4234 (mse_score: 406.4234)
00:30    Epoch 20: train loss 402.2608 (mse_score: 402.2608)
00:32    Epoch 25: train loss 398.9653 (mse_score: 398.9653)
00:33    Epoch 30: train loss 396.4053 (mse_score: 396.4053)
00:35    Epoch 35: train loss 393.9905 (mse_score: 393.9905)
00:36    Epoch 40: train loss 392.2333 (mse_score: 392.2333)
00:38    Epoch 45: train loss 390.969

## Minimal observable basis

In [8]:
min_obs = [0,1] + list(range(4,12)) + list(range(16,27))

In [9]:
train_ensemble(
    'minimal',
    use_tight_cuts=False,
    features=[min_obs for _ in range(n_estimators)],
)

00:41  Training 10 estimators in ensemble
00:41  Training estimator 1 / 10 in ensemble
00:41  Starting training
00:41    Method:                 sally
00:41    Training data: x at /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local/x_train_0.npy
00:41                   t_xz (theta0) at  /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local/t_xz_train_0.npy
00:41    Features:               [0, 1, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]
00:41    Method:                 sally
00:41    Hidden layers:          (100, 100)
00:41    Activation function:    tanh
00:41    Batch size:             128
00:41    Trainer:                amsgrad
00:41    Epochs:                 50
00:41    Learning rate:          0.001 initially, decaying to 0.0001
00:41    Validation split:       None
00:41    Early stopping:         True
00:41    Scale inputs:           True
00:41    Shuffle labels          Fals

01:41    Shuffle labels          False
01:41    Regularization:         None
01:41  Loading training data
01:41  Found 1000000 samples with 2 parameters and 27 observables
01:41  Rescaling inputs
01:41  Only using 21 of 27 observables
01:41  Creating model for method sally
01:41  Training model
01:43    Epoch 5: train loss 3.3806 (mse_score: 3.3806)
01:44    Epoch 10: train loss 3.2256 (mse_score: 3.2256)
01:46    Epoch 15: train loss 3.0871 (mse_score: 3.0871)
01:47    Epoch 20: train loss 2.9972 (mse_score: 2.9972)
01:49    Epoch 25: train loss 2.9390 (mse_score: 2.9390)
01:50    Epoch 30: train loss 2.8811 (mse_score: 2.8811)
01:52    Epoch 35: train loss 2.8422 (mse_score: 2.8422)
01:53    Epoch 40: train loss 2.8104 (mse_score: 2.8104)
01:55    Epoch 45: train loss 2.7860 (mse_score: 2.7860)
01:56    Epoch 50: train loss 2.7628 (mse_score: 2.7628)
01:56  Finished training
01:56  Training estimator 6 / 10 in ensemble
01:56  Starting training
01:56    Method:                 sally
0

02:57    Method:                 sally
02:57    Training data: x at /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local/x_train_9.npy
02:57                   t_xz (theta0) at  /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local/t_xz_train_9.npy
02:57    Features:               [0, 1, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]
02:57    Method:                 sally
02:57    Hidden layers:          (100, 100)
02:57    Activation function:    tanh
02:57    Batch size:             128
02:57    Trainer:                amsgrad
02:57    Epochs:                 50
02:57    Learning rate:          0.001 initially, decaying to 0.0001
02:57    Validation split:       None
02:57    Early stopping:         True
02:57    Scale inputs:           True
02:57    Shuffle labels          False
02:57    Regularization:         None
02:57  Loading training data
02:57  Found 1000000 samples with 2 paramet

In [10]:
train_ensemble(
    'minimal_tight',
    use_tight_cuts=True,
    features=[min_obs for _ in range(10)],
)

03:13  Training 10 estimators in ensemble
03:13  Training estimator 1 / 10 in ensemble
03:13  Starting training
03:13    Method:                 sally
03:13    Training data: x at /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local_tight/x_train_0.npy
03:13                   t_xz (theta0) at  /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local_tight/t_xz_train_0.npy
03:13    Features:               [0, 1, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]
03:13    Method:                 sally
03:13    Hidden layers:          (100, 100)
03:13    Activation function:    tanh
03:13    Batch size:             128
03:13    Trainer:                amsgrad
03:13    Epochs:                 50
03:13    Learning rate:          0.001 initially, decaying to 0.0001
03:13    Validation split:       None
03:13    Early stopping:         True
03:13    Scale inputs:           True
03:13    Shuffle labels  

04:14    Epochs:                 50
04:14    Learning rate:          0.001 initially, decaying to 0.0001
04:14    Validation split:       None
04:14    Early stopping:         True
04:14    Scale inputs:           True
04:14    Shuffle labels          False
04:14    Regularization:         None
04:14  Loading training data
04:14  Found 1000000 samples with 2 parameters and 27 observables
04:14  Rescaling inputs
04:14  Only using 21 of 27 observables
04:14  Creating model for method sally
04:14  Training model
04:16    Epoch 5: train loss 350.6380 (mse_score: 350.6380)
04:17    Epoch 10: train loss 343.6671 (mse_score: 343.6671)
04:19    Epoch 15: train loss 338.3468 (mse_score: 338.3468)
04:20    Epoch 20: train loss 334.7338 (mse_score: 334.7338)
04:22    Epoch 25: train loss 332.1687 (mse_score: 332.1687)
04:23    Epoch 30: train loss 330.1940 (mse_score: 330.1940)
04:25    Epoch 35: train loss 328.6740 (mse_score: 328.6740)
04:26    Epoch 40: train loss 327.4954 (mse_score: 327.4954

05:23    Epoch 25: train loss 355.3250 (mse_score: 355.3250)
05:24    Epoch 30: train loss 353.3730 (mse_score: 353.3730)
05:26    Epoch 35: train loss 351.8657 (mse_score: 351.8657)
05:27    Epoch 40: train loss 350.7304 (mse_score: 350.7304)
05:29    Epoch 45: train loss 349.8270 (mse_score: 349.8270)
05:30    Epoch 50: train loss 349.0436 (mse_score: 349.0436)
05:30  Finished training
05:30  Training estimator 10 / 10 in ensemble
05:30  Starting training
05:30    Method:                 sally
05:30    Training data: x at /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local_tight/x_train_9.npy
05:30                   t_xz (theta0) at  /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local_tight/t_xz_train_9.npy
05:30    Features:               [0, 1, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]
05:30    Method:                 sally
05:30    Hidden layers:          (100, 100)
05:30    A

## Just resurrection phi

In [11]:
train_ensemble(
    'resurrection',
    use_tight_cuts=True,
    features=[[26] for _ in range(10)]
)

05:47  Training 10 estimators in ensemble
05:47  Training estimator 1 / 10 in ensemble
05:47  Starting training
05:47    Method:                 sally
05:47    Training data: x at /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local_tight/x_train_0.npy
05:47                   t_xz (theta0) at  /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local_tight/t_xz_train_0.npy
05:47    Features:               [26]
05:47    Method:                 sally
05:47    Hidden layers:          (100, 100)
05:47    Activation function:    tanh
05:47    Batch size:             128
05:47    Trainer:                amsgrad
05:47    Epochs:                 50
05:47    Learning rate:          0.001 initially, decaying to 0.0001
05:47    Validation split:       None
05:47    Early stopping:         True
05:47    Scale inputs:           True
05:47    Shuffle labels          False
05:47    Regularization:         None
05:47  Loading train

06:45  Rescaling inputs
06:45  Only using 1 of 27 observables
06:45  Creating model for method sally
06:45  Training model
06:47    Epoch 5: train loss 419.4220 (mse_score: 419.4220)
06:49    Epoch 10: train loss 419.3840 (mse_score: 419.3840)
06:50    Epoch 15: train loss 419.3414 (mse_score: 419.3414)
06:51    Epoch 20: train loss 419.3266 (mse_score: 419.3266)
06:53    Epoch 25: train loss 419.2919 (mse_score: 419.2919)
06:54    Epoch 30: train loss 419.3090 (mse_score: 419.3090)
06:56    Epoch 35: train loss 419.2714 (mse_score: 419.2714)
06:57    Epoch 40: train loss 419.2486 (mse_score: 419.2486)
06:59    Epoch 45: train loss 419.2454 (mse_score: 419.2454)
07:00    Epoch 50: train loss 419.2493 (mse_score: 419.2493)
07:00  Finished training
07:00  Training estimator 6 / 10 in ensemble
07:00  Starting training
07:00    Method:                 sally
07:00    Training data: x at /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local_tight/x_train_

07:59    Features:               [26]
07:59    Method:                 sally
07:59    Hidden layers:          (100, 100)
07:59    Activation function:    tanh
07:59    Batch size:             128
07:59    Trainer:                amsgrad
07:59    Epochs:                 50
07:59    Learning rate:          0.001 initially, decaying to 0.0001
07:59    Validation split:       None
07:59    Early stopping:         True
07:59    Scale inputs:           True
07:59    Shuffle labels          False
07:59    Regularization:         None
07:59  Loading training data
07:59  Found 1000000 samples with 2 parameters and 27 observables
07:59  Rescaling inputs
07:59  Only using 1 of 27 observables
07:59  Creating model for method sally
07:59  Training model
08:01    Epoch 5: train loss 498.4867 (mse_score: 498.4867)
08:03    Epoch 10: train loss 498.4280 (mse_score: 498.4280)
08:06    Epoch 15: train loss 498.3980 (mse_score: 498.3980)
08:09    Epoch 20: train loss 498.3540 (mse_score: 498.3540)
08:12 