# Train SALLY ensemble

Johann Brehmer, Kyle Cranmer, Marco Farina, Felix Kling, Duccio Pappadopulo, Josh Ruderman 2018

In [4]:
from __future__ import absolute_import, division, print_function, unicode_literals

import numpy as np
import matplotlib
from matplotlib import pyplot as plt
%matplotlib inline
import logging
import os

from madminer.sampling import SampleAugmenter
from madminer.sampling import multiple_benchmark_thetas
from madminer.sampling import constant_morphing_theta, multiple_morphing_thetas, random_morphing_thetas
from madminer.ml import MLForge, EnsembleForge

logging.basicConfig(format='%(asctime)s  %(message)s', datefmt='%H:%M')

In [5]:
base_dir = '/Users/johannbrehmer/work/projects/madminer/diboson_mining/'
mg_dir = '/Users/johannbrehmer/work/projects/madminer/MG5_aMC_v2_6_2/'

In [6]:
sample_dir = base_dir + 'data/samples/wgamma/'
card_dir = base_dir + 'cards/wgamma/'
ufo_model_dir = card_dir + 'SMWgamma_UFO'
run_card_dir = card_dir + 'run_cards/'
mg_process_dir = base_dir + 'data/mg_processes/wgamma/'
log_dir = base_dir + 'logs/wgamma/'
temp_dir = base_dir + 'data/temp'
delphes_dir = mg_dir + 'Delphes'
model_dir = base_dir + 'data/models/wgamma/'

## Settings

In [12]:
n_estimators = 5

## Training function

In [13]:
def train_ensemble(filename, use_tight_cuts=True, n_estimators=n_estimators, **kwargs):
    cut_label = '_tight' if use_tight_cuts else ''
    
    ensemble = EnsembleForge(n_estimators, debug=False)

    ensemble.train_all(
        method='sally',
        x_filename=[sample_dir + 'train_local{}/x_train_{}.npy'.format(cut_label, i) for i in range(n_estimators)],
        t_xz0_filename=[sample_dir + 'train_local{}/t_xz_train_{}.npy'.format(cut_label, i) for i in range(n_estimators)],
        **kwargs
    )

    ensemble.calculate_expectation(
        x_filename=sample_dir + 'validation{}/x_validation.npy'.format(cut_label)
    )

    ensemble.save(model_dir + 'sally_ensemble_' + filename)

## All observables

In [14]:
train_ensemble(
    'all',
    use_tight_cuts=False,
    initial_lr=0.001,
    n_hidden=(100,100,)
)

15:14  Training 5 estimators in ensemble
15:14  Training estimator 1 / 5 in ensemble
15:14  Starting training
15:14    Method:                 sally
15:14    Training data: x at /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local/x_train_0.npy
15:14                   t_xz (theta0) at  /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local/t_xz_train_0.npy
15:14    Features:               all
15:14    Method:                 sally
15:14    Hidden layers:          (100, 100)
15:14    Activation function:    tanh
15:14    Batch size:             128
15:14    Trainer:                amsgrad
15:14    Epochs:                 50
15:14    Learning rate:          0.001 initially, decaying to 0.0001
15:14    Validation split:       None
15:14    Early stopping:         True
15:14    Scale inputs:           True
15:14    Shuffle labels          False
15:14    Regularization:         None
15:14  Loading training data
15:14 

16:57    Epoch 30: train loss 2.7942 (mse_score: 2.7942)
17:03    Epoch 35: train loss 2.7452 (mse_score: 2.7452)
17:04    Epoch 40: train loss 2.7072 (mse_score: 2.7072)
17:06    Epoch 45: train loss 2.6765 (mse_score: 2.6765)
17:08    Epoch 50: train loss 2.6536 (mse_score: 2.6536)
17:08  Finished training
17:08  Calculating expectation for 5 estimators in ensemble
17:08  Starting evaluation for estimator 1 / 5 in ensemble
17:08  Starting evaluation for estimator 2 / 5 in ensemble
17:08  Starting evaluation for estimator 3 / 5 in ensemble
17:08  Starting evaluation for estimator 4 / 5 in ensemble
17:08  Starting evaluation for estimator 5 / 5 in ensemble


In [15]:
train_ensemble(
    'all_tight',
    use_tight_cuts=True,
    initial_lr=0.001,
    n_hidden=(100,100,)
)

17:08  Training 5 estimators in ensemble
17:08  Training estimator 1 / 5 in ensemble
17:08  Starting training
17:08    Method:                 sally
17:08    Training data: x at /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local_tight/x_train_0.npy
17:08                   t_xz (theta0) at  /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local_tight/t_xz_train_0.npy
17:08    Features:               all
17:08    Method:                 sally
17:08    Hidden layers:          (100, 100)
17:08    Activation function:    tanh
17:08    Batch size:             128
17:08    Trainer:                amsgrad
17:08    Epochs:                 50
17:08    Learning rate:          0.001 initially, decaying to 0.0001
17:08    Validation split:       None
17:08    Early stopping:         True
17:08    Scale inputs:           True
17:08    Shuffle labels          False
17:08    Regularization:         None
17:08  Loading training

KeyboardInterrupt: 

## Minimal observable basis

In [6]:
min_obs = [0,1] + list(range(4,12)) + list(range(16,27))

In [7]:
train_ensemble(
    'minimal',
    use_tight_cuts=False,
    features=[min_obs for _ in range(n_estimators)],
    initial_lr=0.001,
    n_hidden=(100,100,)
)

22:03  
22:03  ------------------------------------------------------------
22:03  |                                                          |
22:03  |  MadMiner v2018.11.13                                    |
22:03  |                                                          |
22:03  |           Johann Brehmer, Kyle Cranmer, and Felix Kling  |
22:03  |                                                          |
22:03  ------------------------------------------------------------
22:03  
22:03  Training 10 estimators in ensemble
22:03  Training estimator 1 / 10 in ensemble
22:03  Starting training
22:03    Method:                 sally
22:03    Training data: x at /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local/x_train_0.npy
22:03                   t_xz (theta0) at  /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local/t_xz_train_0.npy
22:03    Features:               [0, 1, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17,

23:40    Features:               [0, 1, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]
23:40    Method:                 sally
23:40    Hidden layers:          (100, 100, 100, 100)
23:40    Activation function:    tanh
23:40    Batch size:             128
23:40    Trainer:                amsgrad
23:40    Epochs:                 50
23:40    Learning rate:          0.01 initially, decaying to 0.0001
23:40    Validation split:       None
23:40    Early stopping:         True
23:40    Scale inputs:           True
23:40    Shuffle labels          False
23:40    Regularization:         None
23:40  Loading training data
23:40  Found 1000000 samples with 2 parameters and 27 observables
23:40  Rescaling inputs
23:40  Only using 21 of 27 observables
23:40  Creating model for method sally
23:40  Training model
23:42    Epoch 5: train loss 3.8729 (mse_score: 3.8729)
23:45    Epoch 10: train loss 3.7855 (mse_score: 3.7855)
23:47    Epoch 15: train loss 3.7326 (mse_score: 3.732

01:26    Epoch 15: train loss 3.9553 (mse_score: 3.9553)
01:29    Epoch 20: train loss 3.8798 (mse_score: 3.8798)
01:31    Epoch 25: train loss 3.7901 (mse_score: 3.7901)
01:34    Epoch 30: train loss 3.6920 (mse_score: 3.6920)
01:36    Epoch 35: train loss 3.6289 (mse_score: 3.6289)
01:39    Epoch 40: train loss 3.5943 (mse_score: 3.5943)
01:41    Epoch 45: train loss 3.5787 (mse_score: 3.5787)
01:44    Epoch 50: train loss 3.5697 (mse_score: 3.5697)
01:44  Finished training
01:44  Training estimator 10 / 10 in ensemble
01:44  Starting training
01:44    Method:                 sally
01:44    Training data: x at /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local/x_train_9.npy
01:44                   t_xz (theta0) at  /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local/t_xz_train_9.npy
01:44    Features:               [0, 1, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]
01:44    Method

In [8]:
train_ensemble(
    'minimal_tight',
    use_tight_cuts=True,
    features=[min_obs for _ in range(10)],
    initial_lr=0.001,
    n_hidden=(100,100,)
)

02:11  Training 10 estimators in ensemble
02:11  Training estimator 1 / 10 in ensemble
02:11  Starting training
02:11    Method:                 sally
02:11    Training data: x at /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local_tight/x_train_0.npy
02:11                   t_xz (theta0) at  /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local_tight/t_xz_train_0.npy
02:11    Features:               [0, 1, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]
02:11    Method:                 sally
02:11    Hidden layers:          (100, 100, 100, 100)
02:11    Activation function:    tanh
02:11    Batch size:             128
02:11    Trainer:                amsgrad
02:11    Epochs:                 50
02:11    Learning rate:          0.01 initially, decaying to 0.0001
02:11    Validation split:       None
02:11    Early stopping:         True
02:11    Scale inputs:           True
02:11    Shuffle

03:54    Trainer:                amsgrad
03:54    Epochs:                 50
03:54    Learning rate:          0.01 initially, decaying to 0.0001
03:54    Validation split:       None
03:54    Early stopping:         True
03:54    Scale inputs:           True
03:54    Shuffle labels          False
03:54    Regularization:         None
03:54  Loading training data
03:54  Found 1000000 samples with 2 parameters and 27 observables
03:54  Rescaling inputs
03:54  Only using 21 of 27 observables
03:54  Creating model for method sally
03:54  Training model
03:57    Epoch 5: train loss 374.5579 (mse_score: 374.5579)
03:59    Epoch 10: train loss 368.1496 (mse_score: 368.1496)
04:02    Epoch 15: train loss 362.3705 (mse_score: 362.3705)
04:05    Epoch 20: train loss 356.7275 (mse_score: 356.7275)
04:07    Epoch 25: train loss 351.1099 (mse_score: 351.1099)
04:10    Epoch 30: train loss 345.7920 (mse_score: 345.7920)
04:12    Epoch 35: train loss 341.7555 (mse_score: 341.7555)
04:15    Epoch 40: 

05:46    Epoch 20: train loss 374.2649 (mse_score: 374.2649)
05:49    Epoch 25: train loss 369.4312 (mse_score: 369.4312)
05:51    Epoch 30: train loss 365.5496 (mse_score: 365.5496)
05:54    Epoch 35: train loss 362.2990 (mse_score: 362.2990)
05:56    Epoch 40: train loss 360.8422 (mse_score: 360.8422)
05:59    Epoch 45: train loss 360.4164 (mse_score: 360.4164)
06:01    Epoch 50: train loss 359.6699 (mse_score: 359.6699)
06:01  Finished training
06:01  Training estimator 10 / 10 in ensemble
06:01  Starting training
06:01    Method:                 sally
06:01    Training data: x at /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local_tight/x_train_9.npy
06:01                   t_xz (theta0) at  /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local_tight/t_xz_train_9.npy
06:01    Features:               [0, 1, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]
06:01    Method:                

## Just resurrection phi

In [9]:
train_ensemble(
    'resurrection',
    use_tight_cuts=True,
    features=[[26] for _ in range(10)]
)

08:15  Training 10 estimators in ensemble
08:15  Training estimator 1 / 10 in ensemble
08:15  Starting training
08:15    Method:                 sally
08:15    Training data: x at /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local_tight/x_train_0.npy
08:15                   t_xz (theta0) at  /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local_tight/t_xz_train_0.npy
08:15    Features:               [26]
08:15    Method:                 sally
08:15    Hidden layers:          (100, 100, 100, 100)
08:15    Activation function:    tanh
08:15    Batch size:             128
08:15    Trainer:                amsgrad
08:15    Epochs:                 50
08:15    Learning rate:          0.01 initially, decaying to 0.0001
08:15    Validation split:       None
08:15    Early stopping:         True
08:15    Scale inputs:           True
08:15    Regularization:         None
08:15  Loading training data
08:15  Found 1000000 

11:22    Epoch 5: train loss 428.2557 (mse_score: 428.2557)
11:25    Epoch 10: train loss 428.0868 (mse_score: 428.0868)
11:28    Epoch 15: train loss 427.9306 (mse_score: 427.9306)
11:32    Epoch 20: train loss 427.8273 (mse_score: 427.8273)
11:35    Epoch 25: train loss 427.7919 (mse_score: 427.7919)
11:39    Epoch 30: train loss 427.8116 (mse_score: 427.8116)
11:42    Epoch 35: train loss 427.7349 (mse_score: 427.7349)
11:46    Epoch 40: train loss 427.7345 (mse_score: 427.7345)
11:49    Epoch 45: train loss 427.7359 (mse_score: 427.7359)
11:53    Epoch 50: train loss 427.7105 (mse_score: 427.7105)
11:53  Finished training
11:53  Training estimator 6 / 10 in ensemble
11:53  Starting training
11:53    Method:                 sally
11:53    Training data: x at /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local_tight/x_train_5.npy
11:53                   t_xz (theta0) at  /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wga

15:57    Activation function:    tanh
15:57    Batch size:             128
15:57    Trainer:                amsgrad
15:57    Epochs:                 50
15:57    Learning rate:          0.01 initially, decaying to 0.0001
15:57    Validation split:       None
15:57    Early stopping:         True
15:57    Scale inputs:           True
15:57    Regularization:         None
15:57  Loading training data
15:57  Found 1000000 samples with 2 parameters and 27 observables
15:57  Rescaling inputs
15:57  Only using 1 of 27 observables
15:57  Creating model for method sally
15:57  Training model
16:00    Epoch 5: train loss 507.3529 (mse_score: 507.3529)
16:03    Epoch 10: train loss 507.0352 (mse_score: 507.0352)
16:07    Epoch 15: train loss 506.7964 (mse_score: 506.7964)
16:11    Epoch 20: train loss 506.6972 (mse_score: 506.6972)
16:15    Epoch 25: train loss 506.6570 (mse_score: 506.6570)
16:18    Epoch 30: train loss 506.6066 (mse_score: 506.6066)
16:22    Epoch 35: train loss 506.5862 (mse_s