# Evaluate Fisher information

Johann Brehmer, Kyle Cranmer, Marco Farina, Felix Kling, Duccio Pappadopulo, Josh Ruderman 2018

In [2]:
from __future__ import absolute_import, division, print_function, unicode_literals

import numpy as np
import matplotlib
from matplotlib import pyplot as plt
%matplotlib inline
import logging
import os

import madminer.core
from madminer.fisherinformation import FisherInformation, profile_information, project_information
from madminer.plotting import plot_fisher_information_contours_2d


In [3]:
logging.basicConfig(
    format='%(asctime)-5.5s %(name)-20.20s %(levelname)-7.7s %(message)s',
    datefmt='%H:%M',
    level=logging.INFO
)

for key in logging.Logger.manager.loggerDict:
    if "madminer" not in key:
        logging.getLogger(key).setLevel(logging.WARNING)

In [4]:
base_dir = '/Users/johannbrehmer/work/projects/madminer/diboson_mining/'
mg_dir = '/Users/johannbrehmer/work/projects/madminer/MG5_aMC_v2_6_4/'

In [5]:
sample_dir = base_dir + 'data/samples/wgamma_sys/'
card_dir = base_dir + 'cards/wgamma/'
ufo_model_dir = card_dir + 'SMWgamma_UFO'
run_card_dir = card_dir + 'run_cards/'
mg_process_dir = base_dir + 'data/mg_processes/wgamma_sys/'
log_dir = base_dir + 'logs/wgamma/'
temp_dir = base_dir + 'data/temp'
delphes_dir = mg_dir + 'Delphes'
model_dir = base_dir + 'data/models/wgamma_sys/'
result_dir = base_dir + 'data/results/wgamma_sys/'

## Settings

In [6]:
batch_size=100000

## Main functions

In [7]:
fisher_all = FisherInformation(sample_dir + 'samples.h5')
fisher_tight = FisherInformation(sample_dir + 'samples_tight.h5')
fisher_antitight = FisherInformation(sample_dir + 'samples_antitight.h5')

15:55 madminer.fisherinfor INFO    Loading data from /Users/johannbrehmer/work/projects/madminer/diboson_mining/data/samples/wgamma_sys/samples.h5
15:55 madminer.fisherinfor INFO    Found 2 parameters
15:55 madminer.fisherinfor INFO    Found 32 nuisance parameters
15:55 madminer.fisherinfor INFO    Found 45 benchmarks, of which 6 physical
15:55 madminer.fisherinfor INFO    Found 33 observables: et_miss, phi_miss, e_visible, eta_visible, e_l1, pt_l1, eta_l1, phi_l1, e_a1, pt_a1, eta_a1, phi_a1, e_j1, pt_j1, eta_j1, phi_j1, pdgid_l1, m_la, m_lmet, m_amet, pt_la, pt_lmet, pt_amet, deltaphi_la, deltaphi_lmet, deltaphi_amet, deltaeta_la, m_almet, pt_almet, mt, phi_minus, phi_plus, phi
15:55 madminer.fisherinfor INFO    Found 1341798 events
15:55 madminer.fisherinfor INFO    Found morphing setup with 6 components
15:55 madminer.fisherinfor INFO    Found nuisance morphing setup
15:55 madminer.fisherinfor INFO    Loading data from /Users/johannbrehmer/work/projects/madminer/diboson_mining/data

In [8]:
def calculate_info(filename, calculation_mode, tight_cuts=False, antitight_cuts=False, **kwargs):
    if tight_cuts:
        fisher_ = fisher_tight
        ending_ = '_tight.npy'
    elif antitight_cuts:
        fisher_ = fisher_antitight
        ending_ = '_antitight.npy'
    else:
        fisher_ =  fisher_all
        ending_ = '.npy'
    
    if calculation_mode == "truth":
        fisher_info, cov = fisher_.calculate_fisher_information_full_truth(
            theta=[0.,0.],
            luminosity = 3000000.,
            **kwargs
        )
    elif calculation_mode == "rate":
        fisher_info, cov = fisher_.calculate_fisher_information_rate(
            theta=[0.,0.],
            luminosity = 3000000.,
            **kwargs
        )
    elif calculation_mode == "histo":
        fisher_info, cov = fisher_.calculate_fisher_information_hist1d(
            theta=[0.,0.],
            luminosity = 3000000.,
            **kwargs
        )
    elif calculation_mode == "histo2d":
        fisher_info, cov = fisher_.calculate_fisher_information_hist2d(
            theta=[0.,0.],
            luminosity = 3000000.,
            **kwargs
        )
    elif calculation_mode == "ml":
        fisher_info, cov = fisher_.calculate_fisher_information_full_detector(
            theta=[0.,0.],
            luminosity = 3000000.,
            **kwargs
        )

    np.save(result_dir + 'information_{}{}'.format(filename, ending_), fisher_info)
    np.save(result_dir + 'covariance_{}{}'.format(filename, ending_), cov)

    fisher_info_nuisance = fisher_.calculate_fisher_information_nuisance_constraints()
    fisher_info, cov = profile_information(fisher_info + fisher_info_nuisance, [0,1], covariance=cov)
    
    np.save(result_dir + 'profiled_information_{}{}'.format(filename, ending_), fisher_info)
    np.save(result_dir + 'profiled_covariance_{}{}'.format(filename, ending_), cov)


## Truth-level info

In [9]:
#calculate_info('parton', 'truth', False)
#calculate_info('parton', 'truth', True)
calculate_info('parton', 'truth', False, antitight_cuts=True)

  a.append(np.log(benchmark_weights[:, i_pos] / benchmark_weights[:, self.i_benchmark_ref]))
  size=error_propagation_n_ensemble,


## Rate-only info

In [9]:
#calculate_info('rate', 'rate', False)
calculate_info('rate', 'rate', True)

## Full Fisher info (ML)

In [None]:
calculate_info('full', 'ml', False, model_file=model_dir + "sally_ensemble_all", batch_size=batch_size)
calculate_info('full', 'ml', True, model_file=model_dir + "sally_ensemble_all_tight", batch_size=batch_size)
calculate_info('full', 'ml', False, antitight_cuts=True, model_file=model_dir + "sally_ensemble_all_antitight", batch_size=batch_size)


In [None]:
#calculate_info('full_infomode', 'ml', False, model_file=model_dir + "sally_ensemble_all", mode="information", batch_size=batch_size)
#calculate_info('full_infomode', 'ml', True, model_file=model_dir + "sally_ensemble_all_tight", mode="information", batch_size=batch_size)


In [43]:
calculate_info('minimal', 'ml', False, model_file=model_dir + "sally_ensemble_minimal", batch_size=batch_size)
#calculate_info('minimal', 'ml', True, model_file=model_dir + "sally_ensemble_minimal_tight", batch_size=batch_size)


16:31 madminer.ml          INFO    Found ensemble with 10 estimators and expectations None
16:32 madminer.fisherinfor INFO    Evaluating rate Fisher information
16:34 madminer.fisherinfor INFO    Evaluating kinematic Fisher information on batch 1 / 7
16:35 madminer.fisherinfor INFO    Evaluating kinematic Fisher information on batch 2 / 7
16:35 madminer.fisherinfor INFO    Evaluating kinematic Fisher information on batch 3 / 7
16:36 madminer.fisherinfor INFO    Evaluating kinematic Fisher information on batch 4 / 7
16:36 madminer.fisherinfor INFO    Evaluating kinematic Fisher information on batch 5 / 7
16:37 madminer.fisherinfor INFO    Evaluating kinematic Fisher information on batch 6 / 7
16:37 madminer.fisherinfor INFO    Evaluating kinematic Fisher information on batch 7 / 7


In [None]:
calculate_info('phi', 'ml', True, model_file=model_dir + "sally_ensemble_phi_tight", batch_size=batch_size)


In [None]:
calculate_info('pta_phi', 'ml', True, model_file=model_dir + "sally_ensemble_pta_phi_tight", batch_size=batch_size)


## Histogram info

In [16]:
filenames = ['phi', 'met', 'ptl', 'pta', 'deltaphi_lv', 'deltaphi_la']
observables = ['phi', 'et_miss', 'pt_l1', 'pt_a1', 'deltaphi_lmet', 'deltaphi_la']
bins = 25

for filename, obs in zip(filenames, observables):
    calculate_info('histo_' + filename, 'histo', True, observable=obs, nbins=bins, histrange=None)
    

In [17]:
calculate_info(
    'histo_pta_phi',
    'histo2d',
    True,
    observable1='pt_a1',
    nbins1=5,
    histrange1=[300.,800.],
    observable2='phi',
    nbins2=10,
    histrange2=None,
)


  inv_sigma = sanitize_array(1.0 / sigma)  # Shape (n_events,)
  a.append(0.5 * np.log(benchmark_weights[:, i_pos] / benchmark_weights[:, i_neg]))
  a.append(np.log(benchmark_weights[:, i_pos] / benchmark_weights[:, self.i_benchmark_ref]))
