# Train SALLY ensemble

Johann Brehmer, Kyle Cranmer, Felix Kling, Duccio Pappadopulo, Josh Ruderman 2018

In [1]:
from __future__ import absolute_import, division, print_function, unicode_literals

import numpy as np
import matplotlib
from matplotlib import pyplot as plt
% matplotlib inline
import logging
import os

from madminer.sampling import SampleAugmenter
from madminer.sampling import multiple_benchmark_thetas
from madminer.sampling import constant_morphing_theta, multiple_morphing_thetas, random_morphing_thetas
from madminer.ml import MLForge, EnsembleForge

logging.basicConfig(format='%(asctime)s  %(message)s', datefmt='%H:%M')

In [2]:
base_dir = '/Users/johannbrehmer/work/projects/madminer/diboson_mining/'
mg_dir = '/Users/johannbrehmer/work/projects/madminer/MG5_aMC_v2_6_2/'

In [3]:
sample_dir = base_dir + 'data/samples/wgamma/'
card_dir = base_dir + 'cards/wgamma/'
ufo_model_dir = card_dir + 'SMWgamma_UFO'
run_card_dir = card_dir + 'run_cards/'
mg_process_dir = base_dir + 'data/mg_processes/wgamma/'
log_dir = base_dir + 'logs/wgamma/'
temp_dir = base_dir + 'data/temp'
delphes_dir = mg_dir + 'Delphes'
model_dir = base_dir + 'data/models/wgamma/'

## Ensemble

In [4]:
n_estimators = 10

ensemble = EnsembleForge(
    [MLForge(debug=False) for _ in range(n_estimators)]
)

18:14  
18:14  ------------------------------------------------------------
18:14  |                                                          |
18:14  |  MadMiner v2018.10.22                                    |
18:14  |                                                          |
18:14  |           Johann Brehmer, Kyle Cranmer, and Felix Kling  |
18:14  |                                                          |
18:14  ------------------------------------------------------------
18:14  


## Train SALLY on all observables

In [5]:
ensemble.train_all(
    method='sally',
    x_filename=[sample_dir + 'train_local/x_train_{}.npy'.format(i) for i in range(10)],
    t_xz0_filename=[sample_dir + 'train_local/t_xz_train_{}.npy'.format(i) for i in range(10)],
    n_epochs=10,
    batch_size=256,
    validation_split=None,
    n_hidden=[(50,), (100,), (50,50), (50,20), (100,100), (100, 20), (50,50,50), (50,20,10), (100,100,100), (100, 50, 20)]
)

ensemble.save(model_dir + 'sally_ensemble_all')

18:14  Training 10 estimators in ensemble
18:14  Training estimator 1 / 10 in ensemble
18:14  Starting training
18:14    Method:                 sally
18:14    Training data: x at /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local/x_train_0.npy
18:14                   t_xz (theta0) at  /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local/t_xz_train_0.npy
18:14    Features:               all
18:14    Method:                 sally
18:14    Hidden layers:          (50,)
18:14    Activation function:    tanh
18:14    Batch size:             256
18:14    Epochs:                 10
18:14    Learning rate:          0.002 initially, decaying to 0.0001
18:14    Validation split:       None
18:14    Early stopping:         True
18:14  Loading training data
18:14  Found 1000000 samples with 2 parameters and 27 observables
18:14  Creating model for method sally
18:14  Training model
18:14    Epoch 1: train loss 29.17 ([2

18:28  Training model
18:28    Epoch 1: train loss 77.17 ([77.1709123])
18:28    Epoch 2: train loss 77.16 ([77.16493485])
18:29    Epoch 3: train loss 77.16 ([77.15595141])
18:29    Epoch 4: train loss 77.14 ([77.14477613])
18:29    Epoch 5: train loss 77.14 ([77.14116496])
18:29    Epoch 6: train loss 77.14 ([77.13569414])
18:30    Epoch 7: train loss 77.12 ([77.12474735])
18:30    Epoch 8: train loss 77.12 ([77.11678682])
18:30    Epoch 9: train loss 77.11 ([77.11386909])
18:31    Epoch 10: train loss 77.12 ([77.11762383])
18:31  Finished training
18:31  Training estimator 7 / 10 in ensemble
18:31  Starting training
18:31    Method:                 sally
18:31    Training data: x at /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local/x_train_6.npy
18:31                   t_xz (theta0) at  /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local/t_xz_train_6.npy
18:31    Features:               all
18:31    Meth

KeyboardInterrupt: 

In [9]:
ensemble.calculate_expectation(
    x_filename=sample_dir + 'validation/x_validation.npy'
)

17:40  Calculating expectation for 10 estimators in ensemble
17:40  Starting evaluation for estimator 1 / 10 in ensemble
17:40  Loading evaluation data
17:40  Starting score evaluation
17:40  Starting evaluation for estimator 2 / 10 in ensemble
17:40  Loading evaluation data
17:40  Starting score evaluation
17:40  Starting evaluation for estimator 3 / 10 in ensemble
17:40  Loading evaluation data
17:40  Starting score evaluation
17:40  Starting evaluation for estimator 4 / 10 in ensemble
17:40  Loading evaluation data
17:40  Starting score evaluation
17:41  Starting evaluation for estimator 5 / 10 in ensemble
17:41  Loading evaluation data
17:41  Starting score evaluation
17:41  Starting evaluation for estimator 6 / 10 in ensemble
17:41  Loading evaluation data
17:41  Starting score evaluation
17:41  Starting evaluation for estimator 7 / 10 in ensemble
17:41  Loading evaluation data
17:41  Starting score evaluation
17:41  Starting evaluation for estimator 8 / 10 in ensemble
17:41  Load

array([[ 0.00634092,  0.06508537],
       [ 0.02007575,  0.02556378],
       [ 0.03975902, -0.01588391],
       [-0.02189546,  0.02988753],
       [-0.01648937, -0.00555408],
       [-0.00935855,  0.00249242],
       [ 0.04448106, -0.01657623],
       [ 0.0275156 , -0.01794838],
       [-0.05467386,  0.020074  ],
       [-0.00354157, -0.00803116]], dtype=float32)

In [11]:
ensemble.save(model_dir + 'sally_ensemble_all')

--- Logging error ---
Traceback (most recent call last):
  File "/Users/johannbrehmer/anaconda3/envs/higgs_inference/lib/python3.6/logging/__init__.py", line 992, in emit
    msg = self.format(record)
  File "/Users/johannbrehmer/anaconda3/envs/higgs_inference/lib/python3.6/logging/__init__.py", line 838, in format
    return fmt.format(record)
  File "/Users/johannbrehmer/anaconda3/envs/higgs_inference/lib/python3.6/logging/__init__.py", line 575, in format
    record.message = record.getMessage()
  File "/Users/johannbrehmer/anaconda3/envs/higgs_inference/lib/python3.6/logging/__init__.py", line 338, in getMessage
    msg = msg % self.args
ValueError: unsupported format character '/' (0x2f) at index 26
Call stack:
  File "/Users/johannbrehmer/anaconda3/envs/higgs_inference/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/Users/johannbrehmer/anaconda3/envs/higgs_inference/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_glob

TypeError: Object of type 'ndarray' is not JSON serializable

## 1d toy study (delta phi)

In [None]:
ensemble.train_all(
    method='sally',
    x_filename=[sample_dir + 'train_local/x_train_{}.npy'.format(i) for i in range(10)],
    t_xz0_filename=[sample_dir + 'train_local/t_xz_train_{}.npy'.format(i) for i in range(10)],
    features=[25],
    n_epochs=10,
    batch_size=256,
    validation_split=None,
    n_hidden=[(50,), (100,), (50,50), (50,20), (100,100), (100, 20), (50,50,50), (50,20,10), (100,100,100), (100, 50, 20)]
)

ensemble.save(model_dir + 'sally_ensemble_deltaphi')