# Train SALLY ensemble

Johann Brehmer, Kyle Cranmer, Felix Kling, Duccio Pappadopulo, Josh Ruderman 2018

In [1]:
from __future__ import absolute_import, division, print_function, unicode_literals

import numpy as np
import matplotlib
from matplotlib import pyplot as plt
% matplotlib inline
import logging
import os

from madminer.sampling import SampleAugmenter
from madminer.sampling import multiple_benchmark_thetas
from madminer.sampling import constant_morphing_theta, multiple_morphing_thetas, random_morphing_thetas
from madminer.ml import MLForge, EnsembleForge

logging.basicConfig(format='%(asctime)s  %(message)s', datefmt='%H:%M')

In [2]:
base_dir = '/Users/johannbrehmer/work/projects/madminer/diboson_mining/'
mg_dir = '/Users/johannbrehmer/work/projects/madminer/MG5_aMC_v2_6_2/'

In [3]:
sample_dir = base_dir + 'data/samples/wgamma/'
card_dir = base_dir + 'cards/wgamma/'
ufo_model_dir = card_dir + 'SMWgamma_UFO'
run_card_dir = card_dir + 'run_cards/'
mg_process_dir = base_dir + 'data/mg_processes/wgamma/'
log_dir = base_dir + 'logs/wgamma/'
temp_dir = base_dir + 'data/temp'
delphes_dir = mg_dir + 'Delphes'
model_dir = base_dir + 'data/models/wgamma/'

## Settings

In [4]:
n_estimators = 20
n_hidden = (100,100)
n_epochs = 20
batch_size = 128
initial_lr = 0.001
final_lr = 0.0001

## Train SALLY on all observables

In [5]:
ensemble_all = EnsembleForge(n_estimators)

ensemble_all.train_all(
    method='sally',
    x_filename=[sample_dir + 'train_local/x_train_{}.npy'.format(i) for i in range(n_estimators)],
    t_xz0_filename=[sample_dir + 'train_local/t_xz_train_{}.npy'.format(i) for i in range(n_estimators)],
    n_epochs=n_epochs,
    batch_size=batch_size,
    validation_split=None,
    n_hidden=n_hidden
)

22:56  
22:56  ------------------------------------------------------------
22:56  |                                                          |
22:56  |  MadMiner v2018.10.30                                    |
22:56  |                                                          |
22:56  |           Johann Brehmer, Kyle Cranmer, and Felix Kling  |
22:56  |                                                          |
22:56  ------------------------------------------------------------
22:56  
22:56  Training 20 estimators in ensemble
22:56  Training estimator 1 / 20 in ensemble
22:56  Starting training
22:56    Method:                 sally
22:56    Training data: x at /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local/x_train_0.npy
22:56                   t_xz (theta0) at  /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local/t_xz_train_0.npy
22:56    Features:               all
22:56    Method:                 sal

23:30    Features:               all
23:30    Method:                 sally
23:30    Hidden layers:          (100, 100)
23:30    Activation function:    tanh
23:30    Batch size:             128
23:30    Epochs:                 20
23:30    Learning rate:          0.002 initially, decaying to 0.0001
23:30    Validation split:       None
23:30    Early stopping:         True
23:30  Loading training data
23:30  Found 1000000 samples with 2 parameters and 30 observables
23:30  Creating model for method sally
23:30  Training model
23:31    Epoch 2: train loss 24.33 ([24.32849032])
23:32    Epoch 4: train loss 24.32 ([24.32031129])
23:32    Epoch 6: train loss 24.31 ([24.31024602])
23:33    Epoch 8: train loss 24.30 ([24.30384182])
23:34    Epoch 10: train loss 24.30 ([24.30124268])
23:34    Epoch 12: train loss 24.29 ([24.29117569])
23:35    Epoch 14: train loss 24.29 ([24.28974107])
23:36    Epoch 16: train loss 24.28 ([24.28351236])
23:36    Epoch 18: train loss 24.28 ([24.27662582])
23:3

00:10    Epoch 16: train loss 28.45 ([28.45021547])
00:10    Epoch 18: train loss 28.45 ([28.44517417])
00:11    Epoch 20: train loss 28.44 ([28.44359385])
00:11  Finished training
00:11  Training estimator 12 / 20 in ensemble
00:11  Starting training
00:11    Method:                 sally
00:11    Training data: x at /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local/x_train_11.npy
00:11                   t_xz (theta0) at  /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local/t_xz_train_11.npy
00:11    Features:               all
00:11    Method:                 sally
00:11    Hidden layers:          (100, 100)
00:11    Activation function:    tanh
00:11    Batch size:             128
00:11    Epochs:                 20
00:11    Learning rate:          0.002 initially, decaying to 0.0001
00:11    Validation split:       None
00:11    Early stopping:         True
00:11  Loading training data
00:11  Found 10000

00:46    Validation split:       None
00:46    Early stopping:         True
00:46  Loading training data
00:46  Found 1000000 samples with 2 parameters and 30 observables
00:46  Creating model for method sally
00:46  Training model
00:46    Epoch 2: train loss 20.86 ([20.86496105])
00:47    Epoch 4: train loss 20.86 ([20.86055829])
00:48    Epoch 6: train loss 20.84 ([20.84274821])
00:48    Epoch 8: train loss 20.84 ([20.83727186])
00:49    Epoch 10: train loss 20.84 ([20.83520169])
00:50    Epoch 12: train loss 20.83 ([20.83014117])
00:50    Epoch 14: train loss 20.83 ([20.8262549])
00:51    Epoch 16: train loss 20.82 ([20.82119595])
00:52    Epoch 18: train loss 20.82 ([20.81750904])
00:52    Epoch 20: train loss 20.81 ([20.81498551])
00:52  Finished training
00:52  Training estimator 18 / 20 in ensemble
00:52  Starting training
00:52    Method:                 sally
00:52    Training data: x at /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_loca

In [6]:
ensemble_all.calculate_expectation(
    x_filename=sample_dir + 'validation/x_validation.npy'
)

01:13  Calculating expectation for 20 estimators in ensemble
01:13  Starting evaluation for estimator 1 / 20 in ensemble
01:13  Starting evaluation for estimator 2 / 20 in ensemble
01:13  Starting evaluation for estimator 3 / 20 in ensemble
01:13  Starting evaluation for estimator 4 / 20 in ensemble
01:13  Starting evaluation for estimator 5 / 20 in ensemble
01:14  Starting evaluation for estimator 6 / 20 in ensemble
01:14  Starting evaluation for estimator 7 / 20 in ensemble
01:14  Starting evaluation for estimator 8 / 20 in ensemble
01:14  Starting evaluation for estimator 9 / 20 in ensemble
01:14  Starting evaluation for estimator 10 / 20 in ensemble
01:14  Starting evaluation for estimator 11 / 20 in ensemble
01:15  Starting evaluation for estimator 12 / 20 in ensemble
01:15  Starting evaluation for estimator 13 / 20 in ensemble
01:15  Starting evaluation for estimator 14 / 20 in ensemble
01:15  Starting evaluation for estimator 15 / 20 in ensemble
01:15  Starting evaluation for es

array([[ 0.03204757, -0.04126279],
       [-0.02273991,  0.05664141],
       [ 0.05441407,  0.01417246],
       [-0.03075575, -0.04044286],
       [-0.02910792, -0.07734152],
       [ 0.00972841, -0.06626226],
       [ 0.0228152 ,  0.1098204 ],
       [-0.05857595,  0.01893184],
       [ 0.02957168, -0.08383568],
       [-0.3199839 ,  0.15417713],
       [-0.04143749, -0.0601419 ],
       [ 0.01575857,  0.0789884 ],
       [ 0.05620619, -0.05044384],
       [ 0.04178018,  0.01603008],
       [-0.03680419,  0.0245736 ],
       [-0.02385111,  0.02015075],
       [ 0.02806177, -0.01884802],
       [ 0.06489246, -0.02968051],
       [-0.01572647, -0.00190887],
       [ 0.05007704, -0.02756988]], dtype=float32)

In [7]:
ensemble_all.save(model_dir + 'sally_ensemble_all')

## 1d toy study (delta phi)

In [8]:
ensemble_deltaphi = EnsembleForge(n_estimators)

ensemble_deltaphi.train_all(
    features=[ [20] for _ in range(n_estimators)],
    method='sally',
    x_filename=[sample_dir + 'train_local/x_train_{}.npy'.format(i) for i in range(n_estimators)],
    t_xz0_filename=[sample_dir + 'train_local/t_xz_train_{}.npy'.format(i) for i in range(n_estimators)],
    n_epochs=n_epochs,
    batch_size=batch_size,
    validation_split=None,
    n_hidden=n_hidden
)

01:16  Training 20 estimators in ensemble
01:16  Training estimator 1 / 20 in ensemble
01:16  Starting training
01:16    Method:                 sally
01:16    Training data: x at /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local/x_train_0.npy
01:16                   t_xz (theta0) at  /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local/t_xz_train_0.npy
01:16    Features:               [20]
01:16    Method:                 sally
01:16    Hidden layers:          (100, 100)
01:16    Activation function:    tanh
01:16    Batch size:             128
01:16    Epochs:                 20
01:16    Learning rate:          0.002 initially, decaying to 0.0001
01:16    Validation split:       None
01:16    Early stopping:         True
01:16  Loading training data
01:16  Found 1000000 samples with 2 parameters and 30 observables
01:16  Only using 1 of 30 observables
01:16  Creating model for method sally
01:16  Training 

01:46    Validation split:       None
01:46    Early stopping:         True
01:46  Loading training data
01:46  Found 1000000 samples with 2 parameters and 30 observables
01:46  Only using 1 of 30 observables
01:46  Creating model for method sally
01:46  Training model
01:46    Epoch 2: train loss 24.31 ([24.30815676])
01:47    Epoch 4: train loss 24.31 ([24.30557742])
01:48    Epoch 6: train loss 24.30 ([24.30285415])
01:48    Epoch 8: train loss 24.30 ([24.30458712])
01:49    Epoch 10: train loss 24.30 ([24.30048575])
01:49    Epoch 12: train loss 24.30 ([24.2997615])
01:50    Epoch 14: train loss 24.30 ([24.29957524])
01:50    Epoch 16: train loss 24.30 ([24.29908328])
01:51    Epoch 18: train loss 24.30 ([24.29886703])
01:52    Epoch 20: train loss 24.30 ([24.29891977])
01:52  Finished training
01:52  Training estimator 7 / 20 in ensemble
01:52  Starting training
01:52    Method:                 sally
01:52    Training data: x at /Users/johannbrehmer/work/projects/madminer/diboson_

02:21    Epoch 18: train loss 28.46 ([28.45653325])
02:21    Epoch 20: train loss 28.46 ([28.45730849])
02:21  Finished training
02:21  Training estimator 12 / 20 in ensemble
02:21  Starting training
02:21    Method:                 sally
02:21    Training data: x at /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local/x_train_11.npy
02:21                   t_xz (theta0) at  /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local/t_xz_train_11.npy
02:21    Features:               [20]
02:21    Method:                 sally
02:21    Hidden layers:          (100, 100)
02:21    Activation function:    tanh
02:21    Batch size:             128
02:21    Epochs:                 20
02:21    Learning rate:          0.002 initially, decaying to 0.0001
02:21    Validation split:       None
02:21    Early stopping:         True
02:21  Loading training data
02:21  Found 1000000 samples with 2 parameters and 30 observables
02:

02:51    Batch size:             128
02:51    Epochs:                 20
02:51    Learning rate:          0.002 initially, decaying to 0.0001
02:51    Validation split:       None
02:51    Early stopping:         True
02:51  Loading training data
02:51  Found 1000000 samples with 2 parameters and 30 observables
02:51  Only using 1 of 30 observables
02:51  Creating model for method sally
02:51  Training model
02:52    Epoch 2: train loss 20.85 ([20.84531817])
02:52    Epoch 4: train loss 20.84 ([20.83725951])
02:53    Epoch 6: train loss 20.84 ([20.83585127])
02:54    Epoch 8: train loss 20.84 ([20.8355927])
02:54    Epoch 10: train loss 20.83 ([20.83496546])
02:55    Epoch 12: train loss 20.84 ([20.83512471])
02:55    Epoch 14: train loss 20.83 ([20.83469329])
02:56    Epoch 16: train loss 20.83 ([20.83485474])
02:57    Epoch 18: train loss 20.84 ([20.83506246])
02:57    Epoch 20: train loss 20.83 ([20.83369829])
02:57  Finished training
02:57  Training estimator 18 / 20 in ensemble
02

In [9]:
ensemble_deltaphi.calculate_expectation(
    x_filename=sample_dir + 'validation/x_validation.npy'
)

03:15  Calculating expectation for 20 estimators in ensemble
03:15  Starting evaluation for estimator 1 / 20 in ensemble
03:15  Starting evaluation for estimator 2 / 20 in ensemble
03:15  Starting evaluation for estimator 3 / 20 in ensemble
03:15  Starting evaluation for estimator 4 / 20 in ensemble
03:16  Starting evaluation for estimator 5 / 20 in ensemble
03:16  Starting evaluation for estimator 6 / 20 in ensemble
03:16  Starting evaluation for estimator 7 / 20 in ensemble
03:16  Starting evaluation for estimator 8 / 20 in ensemble
03:16  Starting evaluation for estimator 9 / 20 in ensemble
03:16  Starting evaluation for estimator 10 / 20 in ensemble
03:16  Starting evaluation for estimator 11 / 20 in ensemble
03:16  Starting evaluation for estimator 12 / 20 in ensemble
03:17  Starting evaluation for estimator 13 / 20 in ensemble
03:17  Starting evaluation for estimator 14 / 20 in ensemble
03:17  Starting evaluation for estimator 15 / 20 in ensemble
03:17  Starting evaluation for es

array([[ 0.01428281,  0.00531502],
       [-0.00108248, -0.0074827 ],
       [ 0.00490564,  0.00044522],
       [ 0.00950521,  0.00056793],
       [ 0.00695332,  0.00272985],
       [-0.00010192, -0.00506927],
       [-0.00208527, -0.00592995],
       [ 0.02129456,  0.01054829],
       [ 0.00924479, -0.00488937],
       [-0.00105973,  0.00674628],
       [-0.00418518, -0.00493223],
       [-0.00204606, -0.00598308],
       [ 0.00781755,  0.00542245],
       [ 0.00062396,  0.00213693],
       [ 0.00560929,  0.00486967],
       [ 0.0071533 ,  0.0064686 ],
       [ 0.00738978, -0.00364019],
       [-0.00066078,  0.00456094],
       [-0.0004693 ,  0.00517488],
       [-0.00488432, -0.00201409]], dtype=float32)

In [10]:
ensemble_deltaphi.save(model_dir + 'sally_ensemble_deltaphi')

## 1d toy study (MET)

In [11]:
ensemble_met = EnsembleForge(n_estimators)

ensemble_met.train_all(
    features=[ [0] for _ in range(n_estimators)],
    method='sally',
    x_filename=[sample_dir + 'train_local/x_train_{}.npy'.format(i) for i in range(n_estimators)],
    t_xz0_filename=[sample_dir + 'train_local/t_xz_train_{}.npy'.format(i) for i in range(n_estimators)],
    n_epochs=n_epochs,
    batch_size=batch_size,
    validation_split=None,
    n_hidden=n_hidden
)

03:18  Training 20 estimators in ensemble
03:18  Training estimator 1 / 20 in ensemble
03:18  Starting training
03:18    Method:                 sally
03:18    Training data: x at /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local/x_train_0.npy
03:18                   t_xz (theta0) at  /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local/t_xz_train_0.npy
03:18    Features:               [0]
03:18    Method:                 sally
03:18    Hidden layers:          (100, 100)
03:18    Activation function:    tanh
03:18    Batch size:             128
03:18    Epochs:                 20
03:18    Learning rate:          0.002 initially, decaying to 0.0001
03:18    Validation split:       None
03:18    Early stopping:         True
03:18  Loading training data
03:18  Found 1000000 samples with 2 parameters and 30 observables
03:18  Only using 1 of 30 observables
03:18  Creating model for method sally
03:18  Training m

03:51    Validation split:       None
03:51    Early stopping:         True
03:51  Loading training data
03:51  Found 1000000 samples with 2 parameters and 30 observables
03:51  Only using 1 of 30 observables
03:51  Creating model for method sally
03:51  Training model
03:52    Epoch 2: train loss 24.34 ([24.33624374])
03:53    Epoch 4: train loss 24.33 ([24.32618657])
03:53    Epoch 6: train loss 24.32 ([24.31784503])
03:54    Epoch 8: train loss 24.31 ([24.3141966])
03:55    Epoch 10: train loss 24.31 ([24.30976874])
03:55    Epoch 12: train loss 24.31 ([24.30660416])
03:56    Epoch 14: train loss 24.31 ([24.30582231])
03:57    Epoch 16: train loss 24.30 ([24.3038493])
03:57    Epoch 18: train loss 24.30 ([24.30312586])
03:58    Epoch 20: train loss 24.30 ([24.30403295])
03:58  Finished training
03:58  Training estimator 7 / 20 in ensemble
03:58  Starting training
03:58    Method:                 sally
03:58    Training data: x at /Users/johannbrehmer/work/projects/madminer/diboson_m

04:30    Epoch 18: train loss 28.46 ([28.46122071])
04:30    Epoch 20: train loss 28.52 ([28.52363099])
04:30  Finished training
04:30  Training estimator 12 / 20 in ensemble
04:30  Starting training
04:30    Method:                 sally
04:30    Training data: x at /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local/x_train_11.npy
04:30                   t_xz (theta0) at  /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local/t_xz_train_11.npy
04:30    Features:               [0]
04:30    Method:                 sally
04:30    Hidden layers:          (100, 100)
04:30    Activation function:    tanh
04:30    Batch size:             128
04:30    Epochs:                 20
04:30    Learning rate:          0.002 initially, decaying to 0.0001
04:30    Validation split:       None
04:30    Early stopping:         True
04:30  Loading training data
04:30  Found 1000000 samples with 2 parameters and 30 observables
04:3

05:04    Batch size:             128
05:04    Epochs:                 20
05:04    Learning rate:          0.002 initially, decaying to 0.0001
05:04    Validation split:       None
05:04    Early stopping:         True
05:04  Loading training data
05:04  Found 1000000 samples with 2 parameters and 30 observables
05:04  Only using 1 of 30 observables
05:04  Creating model for method sally
05:04  Training model
05:05    Epoch 2: train loss 20.86 ([20.86238446])
05:05    Epoch 4: train loss 20.86 ([20.86030294])
05:06    Epoch 6: train loss 20.85 ([20.85164231])
05:07    Epoch 8: train loss 20.85 ([20.84855587])
05:07    Epoch 10: train loss 20.84 ([20.84386759])
05:08    Epoch 12: train loss 20.84 ([20.84115645])
05:08    Epoch 14: train loss 20.84 ([20.84137902])
05:09    Epoch 16: train loss 20.84 ([20.83879972])
05:10    Epoch 18: train loss 20.84 ([20.8376898])
05:11    Epoch 20: train loss 20.84 ([20.83776957])
05:11  Finished training
05:11  Training estimator 18 / 20 in ensemble
05

In [12]:
ensemble_met.calculate_expectation(
    x_filename=sample_dir + 'validation/x_validation.npy'
)

05:31  Calculating expectation for 20 estimators in ensemble
05:31  Starting evaluation for estimator 1 / 20 in ensemble
05:31  Starting evaluation for estimator 2 / 20 in ensemble
05:31  Starting evaluation for estimator 3 / 20 in ensemble
05:31  Starting evaluation for estimator 4 / 20 in ensemble
05:31  Starting evaluation for estimator 5 / 20 in ensemble
05:31  Starting evaluation for estimator 6 / 20 in ensemble
05:31  Starting evaluation for estimator 7 / 20 in ensemble
05:31  Starting evaluation for estimator 8 / 20 in ensemble
05:32  Starting evaluation for estimator 9 / 20 in ensemble
05:32  Starting evaluation for estimator 10 / 20 in ensemble
05:32  Starting evaluation for estimator 11 / 20 in ensemble
05:32  Starting evaluation for estimator 12 / 20 in ensemble
05:32  Starting evaluation for estimator 13 / 20 in ensemble
05:32  Starting evaluation for estimator 14 / 20 in ensemble
05:32  Starting evaluation for estimator 15 / 20 in ensemble
05:33  Starting evaluation for es

array([[ 0.02272519, -0.01953053],
       [ 0.02308884,  0.00164204],
       [ 0.00784216,  0.00719034],
       [-0.01566505,  0.00717336],
       [-0.06711584, -0.04679087],
       [ 0.06617928,  0.00036818],
       [-0.03279284, -0.00281756],
       [-0.03282881,  0.00791341],
       [ 0.00587244, -0.00919737],
       [ 0.03765859,  0.01425881],
       [-0.0481303 , -0.00811759],
       [-0.00631865, -0.02273407],
       [-0.03515056, -0.00830877],
       [ 0.00461069,  0.04432376],
       [-0.02545383, -0.04821105],
       [ 0.0126791 , -0.0854085 ],
       [-0.01543825, -0.0395059 ],
       [-0.05105131, -0.00641482],
       [ 0.044085  ,  0.07499459],
       [-0.09711312, -0.0586315 ]], dtype=float32)

In [13]:
ensemble_met.save(model_dir + 'sally_ensemble_met')

## 1d toy study (dummy observable: phi(v))

In [14]:
ensemble_dummy = EnsembleForge(n_estimators)

ensemble_dummy.train_all(
    features=[ [1] for _ in range(n_estimators)],
    method='sally',
    x_filename=[sample_dir + 'train_local/x_train_{}.npy'.format(i) for i in range(n_estimators)],
    t_xz0_filename=[sample_dir + 'train_local/t_xz_train_{}.npy'.format(i) for i in range(n_estimators)],
    n_epochs=n_epochs,
    batch_size=batch_size,
    validation_split=None,
    n_hidden=n_hidden
)

05:33  Training 20 estimators in ensemble
05:33  Training estimator 1 / 20 in ensemble
05:33  Starting training
05:33    Method:                 sally
05:33    Training data: x at /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local/x_train_0.npy
05:33                   t_xz (theta0) at  /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local/t_xz_train_0.npy
05:33    Features:               [1]
05:33    Method:                 sally
05:33    Hidden layers:          (100, 100)
05:33    Activation function:    tanh
05:33    Batch size:             128
05:33    Epochs:                 20
05:33    Learning rate:          0.002 initially, decaying to 0.0001
05:33    Validation split:       None
05:33    Early stopping:         True
05:33  Loading training data
05:33  Found 1000000 samples with 2 parameters and 30 observables
05:33  Only using 1 of 30 observables
05:33  Creating model for method sally
05:33  Training m

06:03    Validation split:       None
06:03    Early stopping:         True
06:03  Loading training data
06:03  Found 1000000 samples with 2 parameters and 30 observables
06:03  Only using 1 of 30 observables
06:03  Creating model for method sally
06:03  Training model
06:04    Epoch 2: train loss 24.30 ([24.30225461])
06:04    Epoch 4: train loss 24.30 ([24.30140223])
06:05    Epoch 6: train loss 24.30 ([24.30086916])
06:06    Epoch 8: train loss 24.30 ([24.30054559])
06:06    Epoch 10: train loss 24.30 ([24.30057391])
06:07    Epoch 12: train loss 24.30 ([24.30096213])
06:07    Epoch 14: train loss 24.30 ([24.30062188])
06:08    Epoch 16: train loss 24.30 ([24.30042757])
06:08    Epoch 18: train loss 24.30 ([24.30051092])
06:09    Epoch 20: train loss 24.30 ([24.30032086])
06:09  Finished training
06:09  Training estimator 7 / 20 in ensemble
06:09  Starting training
06:09    Method:                 sally
06:09    Training data: x at /Users/johannbrehmer/work/projects/madminer/diboson

06:38    Epoch 18: train loss 28.46 ([28.458448])
06:39    Epoch 20: train loss 28.46 ([28.45820355])
06:39  Finished training
06:39  Training estimator 12 / 20 in ensemble
06:39  Starting training
06:39    Method:                 sally
06:39    Training data: x at /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local/x_train_11.npy
06:39                   t_xz (theta0) at  /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local/t_xz_train_11.npy
06:39    Features:               [1]
06:39    Method:                 sally
06:39    Hidden layers:          (100, 100)
06:39    Activation function:    tanh
06:39    Batch size:             128
06:39    Epochs:                 20
06:39    Learning rate:          0.002 initially, decaying to 0.0001
06:39    Validation split:       None
06:39    Early stopping:         True
06:39  Loading training data
06:39  Found 1000000 samples with 2 parameters and 30 observables
06:39 

07:09    Batch size:             128
07:09    Epochs:                 20
07:09    Learning rate:          0.002 initially, decaying to 0.0001
07:09    Validation split:       None
07:09    Early stopping:         True
07:09  Loading training data
07:09  Found 1000000 samples with 2 parameters and 30 observables
07:09  Only using 1 of 30 observables
07:09  Creating model for method sally
07:09  Training model
07:09    Epoch 2: train loss 20.84 ([20.83650621])
07:10    Epoch 4: train loss 20.84 ([20.83588832])
07:11    Epoch 6: train loss 20.84 ([20.83547923])
07:11    Epoch 8: train loss 20.84 ([20.8353616])
07:12    Epoch 10: train loss 20.84 ([20.835339])
07:12    Epoch 12: train loss 20.84 ([20.835413])
07:13    Epoch 14: train loss 20.84 ([20.83536101])
07:13    Epoch 16: train loss 20.84 ([20.83514525])
07:14    Epoch 18: train loss 20.84 ([20.83520989])
07:15    Epoch 20: train loss 20.84 ([20.83528076])
07:15  Finished training
07:15  Training estimator 18 / 20 in ensemble
07:15 

In [15]:
ensemble_dummy.calculate_expectation(
    x_filename=sample_dir + 'validation/x_validation.npy'
)

07:32  Calculating expectation for 20 estimators in ensemble
07:32  Starting evaluation for estimator 1 / 20 in ensemble
07:33  Starting evaluation for estimator 2 / 20 in ensemble
07:33  Starting evaluation for estimator 3 / 20 in ensemble
07:33  Starting evaluation for estimator 4 / 20 in ensemble
07:33  Starting evaluation for estimator 5 / 20 in ensemble
07:33  Starting evaluation for estimator 6 / 20 in ensemble
07:33  Starting evaluation for estimator 7 / 20 in ensemble
07:33  Starting evaluation for estimator 8 / 20 in ensemble
07:34  Starting evaluation for estimator 9 / 20 in ensemble
07:34  Starting evaluation for estimator 10 / 20 in ensemble
07:34  Starting evaluation for estimator 11 / 20 in ensemble
07:34  Starting evaluation for estimator 12 / 20 in ensemble
07:34  Starting evaluation for estimator 13 / 20 in ensemble
07:34  Starting evaluation for estimator 14 / 20 in ensemble
07:34  Starting evaluation for estimator 15 / 20 in ensemble
07:34  Starting evaluation for es

array([[ 0.00336673, -0.00288502],
       [ 0.0068463 , -0.00944023],
       [ 0.00100317, -0.0060065 ],
       [ 0.00568094, -0.00070863],
       [ 0.006575  , -0.00283663],
       [ 0.00195738,  0.00871882],
       [ 0.0051679 , -0.00715192],
       [ 0.01136171,  0.00861109],
       [ 0.00392051, -0.010827  ],
       [-0.00224715,  0.00736448],
       [-0.00815651, -0.00631816],
       [-0.00376409, -0.00368507],
       [ 0.00249377,  0.00758593],
       [ 0.005772  , -0.00649926],
       [-0.00109865,  0.00498505],
       [-0.00260419,  0.00814076],
       [ 0.00449551, -0.00786066],
       [ 0.00568148, -0.00503886],
       [ 0.00205642,  0.00773895],
       [ 0.0042204 , -0.00023774]], dtype=float32)

In [16]:
ensemble_dummy.save(model_dir + 'sally_ensemble_dummy')

## 1d toy study (resurrection phi)

In [17]:
ensemble_res = EnsembleForge(n_estimators)

ensemble_res.train_all(
    features=[ [29] for _ in range(n_estimators)],
    method='sally',
    x_filename=[sample_dir + 'train_local/x_train_{}.npy'.format(i) for i in range(n_estimators)],
    t_xz0_filename=[sample_dir + 'train_local/t_xz_train_{}.npy'.format(i) for i in range(n_estimators)],
    n_epochs=n_epochs,
    batch_size=batch_size,
    validation_split=None,
    n_hidden=n_hidden
)

07:35  Training 20 estimators in ensemble
07:35  Training estimator 1 / 20 in ensemble
07:35  Starting training
07:35    Method:                 sally
07:35    Training data: x at /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local/x_train_0.npy
07:35                   t_xz (theta0) at  /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local/t_xz_train_0.npy
07:35    Features:               [29]
07:35    Method:                 sally
07:35    Hidden layers:          (100, 100)
07:35    Activation function:    tanh
07:35    Batch size:             128
07:35    Epochs:                 20
07:35    Learning rate:          0.002 initially, decaying to 0.0001
07:35    Validation split:       None
07:35    Early stopping:         True
07:35  Loading training data
07:35  Found 1000000 samples with 2 parameters and 30 observables
07:35  Only using 1 of 30 observables
07:35  Creating model for method sally
07:35  Training 

08:05    Validation split:       None
08:05    Early stopping:         True
08:05  Loading training data
08:05  Found 1000000 samples with 2 parameters and 30 observables
08:05  Only using 1 of 30 observables
08:05  Creating model for method sally
08:05  Training model
08:06    Epoch 2: train loss 24.30 ([24.30287919])
08:06    Epoch 4: train loss 24.30 ([24.30093839])
08:07    Epoch 6: train loss 24.30 ([24.3011309])
08:08    Epoch 8: train loss 24.30 ([24.30098106])
08:08    Epoch 10: train loss 24.30 ([24.30100083])
08:09    Epoch 12: train loss 24.30 ([24.30084711])
08:09    Epoch 14: train loss 24.30 ([24.30056967])
08:10    Epoch 16: train loss 24.30 ([24.30098907])
08:11    Epoch 18: train loss 24.30 ([24.30038112])
08:11    Epoch 20: train loss 24.30 ([24.30041937])
08:11  Finished training
08:11  Training estimator 7 / 20 in ensemble
08:11  Starting training
08:11    Method:                 sally
08:11    Training data: x at /Users/johannbrehmer/work/projects/madminer/diboson_

08:54    Epoch 18: train loss 28.46 ([28.45819304])
08:54    Epoch 20: train loss 28.46 ([28.4581199])
08:54  Finished training
08:54  Training estimator 12 / 20 in ensemble
08:54  Starting training
08:54    Method:                 sally
08:54    Training data: x at /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local/x_train_11.npy
08:54                   t_xz (theta0) at  /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma/train_local/t_xz_train_11.npy
08:54    Features:               [29]
08:54    Method:                 sally
08:54    Hidden layers:          (100, 100)
08:54    Activation function:    tanh
08:54    Batch size:             128
08:54    Epochs:                 20
08:54    Learning rate:          0.002 initially, decaying to 0.0001
08:54    Validation split:       None
08:54    Early stopping:         True
08:54  Loading training data
08:54  Found 1000000 samples with 2 parameters and 30 observables
08:5

09:27    Batch size:             128
09:27    Epochs:                 20
09:27    Learning rate:          0.002 initially, decaying to 0.0001
09:27    Validation split:       None
09:27    Early stopping:         True
09:27  Loading training data
09:27  Found 1000000 samples with 2 parameters and 30 observables
09:27  Only using 1 of 30 observables
09:27  Creating model for method sally
09:27  Training model
09:28    Epoch 2: train loss 20.84 ([20.83663648])
09:28    Epoch 4: train loss 20.84 ([20.83580615])
09:29    Epoch 6: train loss 20.84 ([20.83554541])
09:29    Epoch 8: train loss 20.84 ([20.83553139])
09:30    Epoch 10: train loss 20.84 ([20.83533335])
09:31    Epoch 12: train loss 20.84 ([20.83519458])
09:31    Epoch 14: train loss 20.84 ([20.83546944])
09:32    Epoch 16: train loss 20.84 ([20.83511996])
09:32    Epoch 18: train loss 20.84 ([20.83537666])
09:33    Epoch 20: train loss 20.84 ([20.83511081])
09:33  Finished training
09:33  Training estimator 18 / 20 in ensemble
0

In [18]:
ensemble_res.calculate_expectation(
    x_filename=sample_dir + 'validation/x_validation.npy'
)

09:51  Calculating expectation for 20 estimators in ensemble
09:51  Starting evaluation for estimator 1 / 20 in ensemble
09:51  Starting evaluation for estimator 2 / 20 in ensemble
09:51  Starting evaluation for estimator 3 / 20 in ensemble
09:52  Starting evaluation for estimator 4 / 20 in ensemble
09:52  Starting evaluation for estimator 5 / 20 in ensemble
09:52  Starting evaluation for estimator 6 / 20 in ensemble
09:52  Starting evaluation for estimator 7 / 20 in ensemble
09:52  Starting evaluation for estimator 8 / 20 in ensemble
09:52  Starting evaluation for estimator 9 / 20 in ensemble
09:52  Starting evaluation for estimator 10 / 20 in ensemble
09:53  Starting evaluation for estimator 11 / 20 in ensemble
09:53  Starting evaluation for estimator 12 / 20 in ensemble
09:53  Starting evaluation for estimator 13 / 20 in ensemble
09:53  Starting evaluation for estimator 14 / 20 in ensemble
09:53  Starting evaluation for estimator 15 / 20 in ensemble
09:53  Starting evaluation for es

array([[ 0.0031571 , -0.00529903],
       [-0.00704187, -0.01263378],
       [ 0.00147036, -0.00874161],
       [ 0.00024858,  0.00263365],
       [ 0.00405722, -0.00343769],
       [-0.00119947,  0.00644899],
       [ 0.00158439, -0.00339344],
       [ 0.00563954,  0.00829799],
       [-0.00332506, -0.00051809],
       [-0.00134955,  0.00435185],
       [-0.00233581, -0.01089808],
       [-0.00815972, -0.00128444],
       [ 0.00607474,  0.00218514],
       [ 0.00638331, -0.00159927],
       [ 0.00032959,  0.00698967],
       [-0.00381187,  0.00743328],
       [ 0.01006584, -0.00946279],
       [ 0.0073764 ,  0.00974631],
       [ 0.00366369,  0.00336566],
       [-0.00764222,  0.0042087 ]], dtype=float32)

In [19]:
ensemble_res.save(model_dir + 'sally_ensemble_resurrection')