# Train SALLY ensemble

Johann Brehmer, Kyle Cranmer, Felix Kling, Duccio Pappadopulo, Josh Ruderman 2018

In [1]:
from __future__ import absolute_import, division, print_function, unicode_literals

import numpy as np
import matplotlib
from matplotlib import pyplot as plt
% matplotlib inline
import logging
import os

from madminer.sampling import SampleAugmenter
from madminer.sampling import multiple_benchmark_thetas
from madminer.sampling import constant_morphing_theta, multiple_morphing_thetas, random_morphing_thetas
from madminer.ml import MLForge, EnsembleForge

logging.basicConfig(format='%(asctime)s  %(message)s', datefmt='%H:%M')

In [2]:
base_dir = '/Users/johannbrehmer/work/projects/madminer/diboson_mining/'
mg_dir = '/Users/johannbrehmer/work/projects/madminer/MG5_aMC_v2_6_2/'

In [3]:
sample_dir = base_dir + 'data/samples/wgamma/'
card_dir = base_dir + 'cards/wgamma/'
ufo_model_dir = card_dir + 'SMWgamma_UFO'
run_card_dir = card_dir + 'run_cards/'
mg_process_dir = base_dir + 'data/mg_processes/wgamma/'
log_dir = base_dir + 'logs/wgamma/'
temp_dir = base_dir + 'data/temp'
delphes_dir = mg_dir + 'Delphes'
model_dir = base_dir + 'data/models/wgamma/'

## Settings

In [4]:
n_estimators = 10

## Training function

In [5]:
def train_ensemble(filename, use_tight_cuts=True, n_estimators=10, **kwargs):
    cut_label = '_tight' if use_tight_cuts else ''
    
    ensemble = EnsembleForge(n_estimators, debug=True)

    ensemble.train_all(
        method='sally',
        x_filename=[sample_dir + 'train_local{}/x_train_{}.npy'.format(cut_label, i) for i in range(n_estimators)],
        t_xz0_filename=[sample_dir + 'train_local{}/t_xz_train_{}.npy'.format(cut_label, i) for i in range(n_estimators)],
        **kwargs
    )

    ensemble.calculate_expectation(
        x_filename=sample_dir + 'validation{}/x_validation.npy'.format(cut_label)
    )

    ensemble.save(model_dir + 'sally_ensemble_' + filename)

## All observables

In [6]:
train_ensemble('all', use_tight_cuts=False, grad_x_regularization=0.0)

15:33  
15:33  ------------------------------------------------------------
15:33  |                                                          |
15:33  |  MadMiner v2018.11.02                                    |
15:33  |                                                          |
15:33  |           Johann Brehmer, Kyle Cranmer, and Felix Kling  |
15:33  |                                                          |
15:33  ------------------------------------------------------------
15:33  
15:33  Training 10 estimators in ensemble
15:33  Training estimator 1 / 10 in ensemble
15:33  Starting training
15:33    Method:                 sally
15:33    Training data: x at /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local/x_train_0.npy
15:33                   t_xz (theta0) at  /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local/t_xz_train_0.npy
15:33    Features:               all
15:33    Method:                 sal

KeyboardInterrupt: 

In [None]:
train_ensemble('all_tight', use_tight_cuts=True, grad_x_regularization=0.0)

In [None]:
train_ensemble('all_reg', use_tight_cuts=False, grad_x_regularization=0.1)

In [None]:
train_ensemble('all_tight_reg', use_tight_cuts=True, grad_x_regularization=0.1)

## Minimal observable basis

In [None]:
train_ensemble(
    'minimal',
    use_tight_cuts=False,
    grad_x_regularization=0.0,
    features=[[0,5,6,7,9,10,11] for _ in range(10)]
)

15:35  Training 10 estimators in ensemble
15:35  Training estimator 1 / 10 in ensemble
15:35  Starting training
15:35    Method:                 sally
15:35    Training data: x at /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local/x_train_0.npy
15:35                   t_xz (theta0) at  /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local/t_xz_train_0.npy
15:35    Features:               [0, 5, 6, 7, 9, 10, 11]
15:35    Method:                 sally
15:35    Hidden layers:          (100, 100)
15:35    Activation function:    tanh
15:35    Batch size:             128
15:35    Trainer:                amsgrad
15:35    Epochs:                 50
15:35    Learning rate:          0.001 initially, decaying to 0.0001
15:35    Validation split:       0.25
15:35    Early stopping:         True
15:35    Scale inputs:           True
15:35    Regularization:         0.0 * |grad_x f(x)|^2
15:35  Loading training data
15:35 

15:46    Batch size:             128
15:46    Trainer:                amsgrad
15:46    Epochs:                 50
15:46    Learning rate:          0.001 initially, decaying to 0.0001
15:46    Validation split:       0.25
15:46    Early stopping:         True
15:46    Scale inputs:           True
15:46    Regularization:         0.0 * |grad_x f(x)|^2
15:46  Loading training data
15:46  Found 1000000 samples with 2 parameters and 26 observables
15:46  Rescaling inputs
15:46  Observable ranges:
15:46    x_1: mean 1.2072121080564103e-16, std 1.0000000000000426, range -1.2696488812250217 ... 47.764449103988355
15:46    x_2: mean 3.385025593161117e-17, std 1.0000000000000195, range -1.724847206027408 ... 1.7370336522745242
15:46    x_3: mean -1.0077410905751095e-14, std 0.9999999999999807, range -0.6623534385666117 ... 20.19121695021946
15:46    x_4: mean 1.0373923942097463e-18, std 1.0000000000000493, range -5.237725345301356 ... 5.777473918519132
15:46    x_5: mean -1.963940121640917e-17, 

16:04    Epoch 32: train loss 77.2839 (mse_score: 77.2839, l2_grad_x: 2.1981)
            val. loss  24.6806 (mse_score: 24.6806, l2_grad_x: 0.0000) (*)
16:05    Epoch 33: train loss 77.2587 (mse_score: 77.2587, l2_grad_x: 2.2737)
            val. loss  24.6693 (mse_score: 24.6693, l2_grad_x: 0.0000) (*)
16:05    Epoch 34: train loss 77.2279 (mse_score: 77.2279, l2_grad_x: 2.3399)
            val. loss  24.6704 (mse_score: 24.6704, l2_grad_x: 0.0000)
16:06    Epoch 35: train loss 77.1943 (mse_score: 77.1943, l2_grad_x: 2.3411)
            val. loss  24.6687 (mse_score: 24.6687, l2_grad_x: 0.0000) (*)
16:07    Epoch 36: train loss 77.1764 (mse_score: 77.1764, l2_grad_x: 2.4068)
            val. loss  24.6706 (mse_score: 24.6706, l2_grad_x: 0.0000)
16:07    Epoch 37: train loss 77.1481 (mse_score: 77.1481, l2_grad_x: 2.4525)
            val. loss  24.6546 (mse_score: 24.6546, l2_grad_x: 0.0000) (*)
16:08    Epoch 38: train loss 77.1198 (mse_score: 77.1198, l2_grad_x: 2.4683)
            

16:20    Epoch 9: train loss 24.8872 (mse_score: 24.8872, l2_grad_x: 1.0081)
            val. loss  15.5160 (mse_score: 15.5160, l2_grad_x: 0.0000)
16:20    Epoch 10: train loss 24.8689 (mse_score: 24.8689, l2_grad_x: 1.0575)
            val. loss  15.5059 (mse_score: 15.5059, l2_grad_x: 0.0000)
16:21    Epoch 11: train loss 24.8585 (mse_score: 24.8585, l2_grad_x: 1.2029)
            val. loss  15.5216 (mse_score: 15.5216, l2_grad_x: 0.0000)
16:22    Epoch 12: train loss 24.8385 (mse_score: 24.8385, l2_grad_x: 1.2526)
            val. loss  15.5416 (mse_score: 15.5416, l2_grad_x: 0.0000)
16:22    Epoch 13: train loss 24.8163 (mse_score: 24.8163, l2_grad_x: 1.4874)
            val. loss  15.5479 (mse_score: 15.5479, l2_grad_x: 0.0000)
16:23    Epoch 14: train loss 24.7992 (mse_score: 24.7992, l2_grad_x: 1.6468)
            val. loss  15.5636 (mse_score: 15.5636, l2_grad_x: 0.0000)
16:23    Epoch 15: train loss 24.7775 (mse_score: 24.7775, l2_grad_x: 1.6966)
            val. loss  15.560

In [None]:
train_ensemble(
    'minimal_tight',
    use_tight_cuts=True,
    grad_x_regularization=0.0,
    features=[[0,5,6,7,9,10,11] for _ in range(10)]
)

In [None]:
train_ensemble(
    'minimal_reg',
    use_tight_cuts=False,
    grad_x_regularization=0.1,
    features=[[0,5,6,7,9,10,11] for _ in range(10)]
)

In [None]:
train_ensemble(
    'minimal_tight_reg',
    use_tight_cuts=True,
    grad_x_regularization=0.1,
    features=[[0,5,6,7,9,10,11] for _ in range(10)]
)