# Systematics Notebook
From results of the phi fitting.  Before running this notebook the output from `src/fit/run-fitter.py` is needed.  

In [1]:
import glob 
import matplotlib.pyplot as plt
import numpy as np 
import pandas as pd
import pickle
import os
import sys
import time 

from datetime import datetime

%matplotlib inline

plt.rc('font', family='serif')
plt.rc('font', size=18)
plt.rc('text', usetex = True)

### Load Configurations
There are several files with different results for phi-distributions.  

In [2]:
database_files = glob.glob('database/fit/*.csv')
print('Found %d files in the database.' % len(database_files))

for f in database_files:
    print(f, datetime.utcfromtimestamp(
        os.path.getmtime(f)).strftime('%Y-%m-%d %H:%M:%S'))

Found 27 files in the database.
('database/fit/variation_dist_ecv_1.csv', '2018-11-28 15:53:04')
('database/fit/variation_alpha_1.csv', '2018-11-28 15:53:04')
('database/fit/variation_dist_dcr3_1.csv', '2018-11-28 15:53:04')
('database/fit/variation_dist_dcr1_1.csv', '2018-11-28 15:53:04')
('database/fit/variation_alpha_-1.csv', '2018-11-28 15:53:04')
('database/fit/variation_dvz_1.csv', '2018-11-28 15:53:04')
('database/fit/variation_dist_dcr3_-1.csv', '2018-11-28 15:53:04')
('database/fit/variation_dist_dcr1_-1.csv', '2018-11-28 15:53:04')
('database/fit/variation_dist_ec_edep_1.csv', '2018-11-28 15:53:04')
('database/fit/variation_dist_ecu_1.csv', '2018-11-28 15:53:04')
('database/fit/variation_dist_ecsf_1.csv', '2018-11-28 15:53:04')
('database/fit/variation_dist_ecu_-1.csv', '2018-11-28 15:53:04')
('database/fit/variation_dist_vz_1.csv', '2018-11-28 15:53:04')
('database/fit/variation_dist_ecw_1.csv', '2018-11-28 15:53:04')
('database/fit/variation_dist_cc_-1.csv', '2018-11-28 15:

In [3]:
def load_database_files(file_list):
    
    dataframe_store = {}
    for f in file_list:
        dataframe_store[f] = pd.read_csv(f)
        
    return dataframe_store

In [4]:
dataframe_store = load_database_files(database_files)

Finally, fit the nominal phi distributions.

In [5]:
nominal = pd.read_csv('database/fit/sys.csv')

### Systematic Uncertainties 

In [6]:
def database_filename_parser(file_name):
    file_name = file_name.split('variation_')[-1].strip('.csv')
    
    tokens = file_name.split('_')
    n_tokens = len(tokens)

    parameter_name = '_'.join(tokens[0:n_tokens-1])
    index = int(tokens[-1])
    
    return index, parameter_name

In [7]:
def build_parameter_variation_dict(path_to_db = 'database/fit/'):

    parameters = {}

    database_files = glob.glob(path_to_db + 'variation*.csv')
    
    for database_file in database_files:
        index, parameter = database_filename_parser(database_file)
        
        if parameter in parameters.keys():
            parameters[parameter][index] = pd.read_csv(database_file)
        else:
            parameters[parameter] = {}
            parameters[parameter][index] = pd.read_csv(database_file)
            
    return parameters

In [8]:
def load_systematic_sources_list(file_name):
    systematic_sources = pickle.load(open(file_name, 'rb'))
    
    reverse_dict = {}

    for key, value in systematic_sources.iteritems():
        reverse_dict[value] = key
    
    return reverse_dict

In [9]:
parameter_variation = build_parameter_variation_dict()

In [10]:
for key, value in parameter_variation.iteritems():
    print(key, value.keys())

('dist_dcr3', [1, -1])
('dist_dcr1', [1, -1])
('dist_vz', [1, -1])
('dist_ecsf', [1, -1])
('dist_cc', [0, -1])
('dist_ec_edep', [1, -1])
('dvz', [1, -1])
('p_mes', [1, -1])
('alpha', [1, -1])
('missing_mass', [1, -1])
('dist_ecu', [1, -1])
('dist_ecw', [1, -1])
('dist_ecv', [1, -1])


In [11]:
systematic_sources = load_systematic_sources_list('systematic_sources.pkl')

In [12]:
def add_systematics(nominal_fit, parameter_variation, systematic_sources):

    nominal_fit_sys = nominal_fit.copy(deep=True)
    nominal_fit_sys['sys_total_0'] = np.zeros(len(nominal_fit_sys))
    nominal_fit_sys['sys_total_1'] = np.zeros(len(nominal_fit_sys))
    nominal_fit_sys['sys_total_2'] = np.zeros(len(nominal_fit_sys))
    
    for key in parameter_variation.keys():

        min_index = parameter_variation[key].keys()[0]    
        max_index = parameter_variation[key].keys()[-1]

        merged_data = pd.merge(parameter_variation[key][min_index], 
                 parameter_variation[key][max_index],
                 on = ['axis', 'axis_bin'])

        merged_data[systematic_sources[key] + '_par0'] = np.abs(merged_data.par_0_y - merged_data.par_0_x)
        merged_data[systematic_sources[key] + '_par1'] = np.abs(merged_data.par_1_y - merged_data.par_1_x)
        merged_data[systematic_sources[key] + '_par2'] = np.abs(merged_data.par_2_y - merged_data.par_2_x)

        merge_cols = ['axis', 'axis_bin', systematic_sources[key] + '_par0', 
                     systematic_sources[key] + '_par1', systematic_sources[key] + '_par2']

        nominal_fit_sys = pd.merge(nominal_fit_sys, merged_data[merge_cols], 
                                   on = ['axis', 'axis_bin'])
    
        nominal_fit_sys.sys_total_0 += nominal_fit_sys[systematic_sources[key] + '_par0']**2
        nominal_fit_sys.sys_total_1 += nominal_fit_sys[systematic_sources[key] + '_par1']**2
        nominal_fit_sys.sys_total_2 += nominal_fit_sys[systematic_sources[key] + '_par2']**2


    nominal_fit_sys.sys_total_0 = np.sqrt(nominal_fit_sys.sys_total_0)
    nominal_fit_sys.sys_total_1 = np.sqrt(nominal_fit_sys.sys_total_1)
    nominal_fit_sys.sys_total_2 = np.sqrt(nominal_fit_sys.sys_total_2)

    
    return nominal_fit_sys

In [13]:
nominal_fit_sys = add_systematics(nominal, parameter_variation, systematic_sources)

In [14]:
nominal_fit_sys.to_csv('results/fit/sys.csv', index=False)

In [15]:
nominal_fit_sys.head(24)

Unnamed: 0,axis,axis_bin,axis_max,axis_min,bootstraps_0,bootstraps_1,bootstraps_2,err_0,err_1,err_2,...,sys_10_par2,sys_11_par0,sys_11_par1,sys_11_par2,sys_5_par0,sys_5_par1,sys_5_par2,sys_13_par0,sys_13_par1,sys_13_par2
0,missing_mass,0,1.482751,1.200002,[0.02481311 0.01897994 0.01708755 0.01640064 0...,[-0.51024175 -0.6830489 0.7246748 -0.708794...,[-0.26208287 -0.972964 -1. -0.126053...,0.004768,0.449468,0.428083,...,0.103906,0.000343,0.153865,0.030437,0.001569,0.04235,0.034707,0.000849,0.002051,0.062576
1,missing_mass,1,1.608926,1.482751,[0.04074286 0.04362369 0.03855268 0.04849915 0...,[-0.61438423 -0.1794666 -0.5824035 -0.371154...,[-1.0000000e+00 -3.2721090e-01 -1.0000000e+00 ...,0.011557,0.47631,0.488689,...,0.00528,0.000467,0.175606,0.157146,0.001869,0.101841,0.097333,0.002108,0.17462,0.191519
2,missing_mass,2,1.749693,1.608926,[0.03387824 0.02299557 0.02044531 0.0371739 0...,[-0.61058134 0.45628008 -0.18214026 -0.675426...,[-0.72744775 0.39307526 0.57278466 -0.678892...,0.006507,0.389291,0.585786,...,0.10796,0.001569,0.031263,0.030326,0.000718,0.070256,0.032262,0.000288,0.039785,0.024814
3,missing_mass,3,1.884849,1.749693,[0.03981149 0.03897938 0.0353425 0.04264331 0...,[-0.6139615 -0.30249062 -0.43932122 -0.256774...,[-1. -0.47696275 -0.28559002 -0.326918...,0.007936,0.417574,0.34578,...,0.028041,7.5e-05,0.053023,0.101701,0.00076,0.081398,0.063392,0.001249,0.036254,0.092855
4,missing_mass,4,2.039865,1.884849,[0.01741274 0.02707191 0.01101774 0.01132567 0...,[-0.90861666 -0.5492127 -0.9240855 -0.991123...,[-0.57310283 -0.21063036 0.6233333 0.618366...,0.005851,0.53635,0.604296,...,0.006204,0.002008,0.051947,0.056352,0.001238,0.109013,0.055903,0.001555,0.075269,0.077452
5,missing_mass,5,2.49567,2.039865,[ 0.02191559 0.02271443 0.00234284 0.022330...,[-0.47846806 -0.66428846 0.5324118 -0.399486...,[ 0.15747876 0.20941709 -1. 0.086967...,0.005845,0.49682,0.559349,...,0.083181,0.000321,0.011896,0.036043,0.000107,0.036228,0.05832,0.000797,0.186367,0.035736
6,pt,0,0.233604,0.000488,[0.01173213 0.01546262 0.03092979 0.0198946 0...,[ 0.78083885 -0.62021977 -0.55134845 -0.527783...,[-1. 0.3312461 -1. 0.311296...,0.007144,0.421502,0.571288,...,0.0118,0.003464,0.018812,0.073774,0.003844,0.043676,0.132537,0.002499,0.120756,0.018946
7,pt,1,0.321051,0.233604,[0.01755237 0.03336266 0.04396328 0.03940822 0...,[ 7.64268756e-01 -5.31926692e-01 -5.17241597e-...,[-1. 0.0512178 -0.34558386 -0.480095...,0.008698,0.418881,0.546379,...,0.058918,0.004541,0.160076,0.171065,0.002234,0.049429,0.149154,0.000306,0.096087,0.053503
8,pt,2,0.395307,0.321051,[0.02386473 0.02500148 0.03469681 0.02309731 0...,[ 0.71468854 -0.7553025 -0.85073805 -0.743504...,[-1.00000000e+00 2.56747395e-01 -5.41684508e-...,0.005589,0.517594,0.478913,...,0.038372,0.000744,0.099484,0.044058,3e-05,0.024656,0.003399,0.000193,0.093804,0.055878
9,pt,3,0.472615,0.395307,[0.01151191 0.01249874 0.01983923 0.02951408 0...,[ 0.80957633 0.7354267 -0.61234355 -0.306941...,[-1. -1. -0.9037174 -0.248378...,0.006809,0.464766,0.544248,...,0.030599,0.001584,0.181477,0.113964,0.002715,0.199392,0.038788,0.002926,0.037255,0.120699
