# Train SALLY ensemble

Johann Brehmer, Kyle Cranmer, Felix Kling, Duccio Pappadopulo, Josh Ruderman 2018

In [1]:
from __future__ import absolute_import, division, print_function, unicode_literals

import numpy as np
import matplotlib
from matplotlib import pyplot as plt
% matplotlib inline
import logging
import os

from madminer.sampling import SampleAugmenter
from madminer.sampling import multiple_benchmark_thetas
from madminer.sampling import constant_morphing_theta, multiple_morphing_thetas, random_morphing_thetas
from madminer.ml import MLForge, EnsembleForge

logging.basicConfig(format='%(asctime)s  %(message)s', datefmt='%H:%M')

In [2]:
base_dir = '/Users/johannbrehmer/work/projects/madminer/diboson_mining/'
mg_dir = '/Users/johannbrehmer/work/projects/madminer/MG5_aMC_v2_6_2/'

In [3]:
sample_dir = base_dir + 'data/samples/wgamma/'
card_dir = base_dir + 'cards/wgamma/'
ufo_model_dir = card_dir + 'SMWgamma_UFO'
run_card_dir = card_dir + 'run_cards/'
mg_process_dir = base_dir + 'data/mg_processes/wgamma/'
log_dir = base_dir + 'logs/wgamma/'
temp_dir = base_dir + 'data/temp'
delphes_dir = mg_dir + 'Delphes'
model_dir = base_dir + 'data/models/wgamma/'

## Settings

In [4]:
n_estimators = 10

## Training function

In [5]:
def train_ensemble(filename, use_tight_cuts=True, n_estimators=10, **kwargs):
    cut_label = '_tight' if use_tight_cuts else ''
    
    ensemble = EnsembleForge(n_estimators, debug=False)

    ensemble.train_all(
        method='sally',
        x_filename=[sample_dir + 'train_local{}/x_train_{}.npy'.format(cut_label, i) for i in range(n_estimators)],
        t_xz0_filename=[sample_dir + 'train_local{}/t_xz_train_{}.npy'.format(cut_label, i) for i in range(n_estimators)],
        **kwargs
    )

    ensemble.calculate_expectation(
        x_filename=sample_dir + 'validation{}/x_validation.npy'.format(cut_label)
    )

    ensemble.save(model_dir + 'sally_ensemble_' + filename)

## All observables

In [None]:
train_ensemble('all', use_tight_cuts=False)

In [None]:
train_ensemble('all_tight', use_tight_cuts=True)

## Minimal observable basis

In [None]:
train_ensemble(
    'minimal',
    use_tight_cuts=False,
    features=[[0,5,6,7,9,10,11] for _ in range(10)]
)

In [None]:
train_ensemble(
    'minimal_tight',
    use_tight_cuts=True,
    features=[[0,5,6,7,9,10,11] for _ in range(10)]
)

In [None]:
train_ensemble(
    'minimal_reg',
    use_tight_cuts=False,
    grad_x_regularization=0.1,
    features=[[0,5,6,7,9,10,11] for _ in range(10)]
)

In [None]:
train_ensemble(
    'minimal_tight_reg',
    use_tight_cuts=True,
    grad_x_regularization=0.1,
    features=[[0,5,6,7,9,10,11] for _ in range(10)]
)

## Just resurrection phi

In [6]:
train_ensemble(
    'resurrection',
    use_tight_cuts=True,
    features=[[26] for _ in range(10)]
)

10:16  
10:16  ------------------------------------------------------------
10:16  |                                                          |
10:16  |  MadMiner v2018.11.06                                    |
10:16  |                                                          |
10:16  |           Johann Brehmer, Kyle Cranmer, and Felix Kling  |
10:16  |                                                          |
10:16  ------------------------------------------------------------
10:16  
10:16  Training 10 estimators in ensemble
10:16  Training estimator 1 / 10 in ensemble
10:16  Starting training
10:16    Method:                 sally
10:16    Training data: x at /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local_tight/x_train_0.npy
10:16                   t_xz (theta0) at  /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local_tight/t_xz_train_0.npy
10:16    Features:               [26]
10:16    Method:       

12:14                   t_xz (theta0) at  /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local_tight/t_xz_train_3.npy
12:14    Features:               [26]
12:14    Method:                 sally
12:14    Hidden layers:          (100, 100)
12:14    Activation function:    tanh
12:14    Batch size:             128
12:14    Trainer:                amsgrad
12:14    Epochs:                 50
12:14    Learning rate:          0.001 initially, decaying to 0.0001
12:14    Validation split:       0.25
12:14    Early stopping:         True
12:14    Scale inputs:           True
12:14    Regularization:         None
12:14  Loading training data
12:14  Found 1000000 samples with 2 parameters and 27 observables
12:14  Rescaling inputs
12:14  Only using 1 of 27 observables
12:14  Creating model for method sally
12:14  Training model
12:18    Epoch 5: train loss 6405.0802 (mse_score: 6405.0802)
12:18              val. loss  5341.6985 (mse_score: 5341.6985) (*)
12:

13:57  Rescaling inputs
13:57  Only using 1 of 27 observables
13:57  Creating model for method sally
13:57  Training model
14:01    Epoch 5: train loss 5899.8593 (mse_score: 5899.8593)
14:01              val. loss  5117.2436 (mse_score: 5117.2436)
14:04    Epoch 10: train loss 5899.0472 (mse_score: 5899.0472)
14:04              val. loss  5117.3679 (mse_score: 5117.3679)
14:09    Epoch 15: train loss 5898.5102 (mse_score: 5898.5102)
14:09              val. loss  5115.5048 (mse_score: 5115.5048) (*)
14:13    Epoch 20: train loss 5898.2822 (mse_score: 5898.2822)
14:13              val. loss  5116.2670 (mse_score: 5116.2670)
14:16    Epoch 25: train loss 5898.0643 (mse_score: 5898.0643)
14:16              val. loss  5115.4544 (mse_score: 5115.4544)
14:20    Epoch 30: train loss 5900.0306 (mse_score: 5900.0306)
14:20              val. loss  5115.3264 (mse_score: 5115.3264) (*)
14:24    Epoch 35: train loss 5897.8785 (mse_score: 5897.8785)
14:24              val. loss  5115.2483 (mse_score:

16:06    Epoch 30: train loss 6168.5940 (mse_score: 6168.5940)
16:06              val. loss  5347.7905 (mse_score: 5347.7905) (*)
16:08    Epoch 35: train loss 6168.2177 (mse_score: 6168.2177)
16:08              val. loss  5348.2809 (mse_score: 5348.2809)
16:11    Epoch 40: train loss 6167.9663 (mse_score: 6167.9663)
16:11              val. loss  5348.1841 (mse_score: 5348.1841)
16:13    Epoch 45: train loss 6167.9075 (mse_score: 6167.9075)
16:13              val. loss  5348.5417 (mse_score: 5348.5417)
16:15    Epoch 50: train loss 6167.8521 (mse_score: 6167.8521)
16:15              val. loss  5347.5072 (mse_score: 5347.5072) (*)
16:15  Early stopping did not improve performance
16:15  Finished training
16:15  Calculating expectation for 10 estimators in ensemble
16:15  Starting evaluation for estimator 1 / 10 in ensemble
16:15  Starting evaluation for estimator 2 / 10 in ensemble
16:15  Starting evaluation for estimator 3 / 10 in ensemble
16:15  Starting evaluation for estimator 4 / 1