# Systematics Notebook
From results of the phi fitting.  Before running this notebook the output from `src/fit/run-fitter.py` is needed.  

In [1]:
import glob 
import matplotlib.pyplot as plt
import numpy as np 
import pandas as pd
import pickle
import os
import sys
import time 

from datetime import datetime

%matplotlib inline

plt.rc('font', family='serif')
plt.rc('font', size=18)
plt.rc('text', usetex = True)

### Load Configurations
There are several files with different results for phi-distributions.  

In [2]:
database_files = glob.glob('database/fit/*.csv')
print('Found %d files in the database.' % len(database_files))

for f in database_files:
    print(f, datetime.utcfromtimestamp(
        os.path.getmtime(f)).strftime('%Y-%m-%d %H:%M:%S'))

Found 27 files in the database.
('database/fit/variation_dist_ecv_1.csv', '2018-11-27 14:38:00')
('database/fit/variation_alpha_1.csv', '2018-11-27 14:38:00')
('database/fit/variation_dist_dcr3_1.csv', '2018-11-27 14:38:00')
('database/fit/variation_dist_dcr1_1.csv', '2018-11-27 14:38:00')
('database/fit/variation_alpha_-1.csv', '2018-11-27 14:38:00')
('database/fit/variation_dvz_1.csv', '2018-11-27 14:38:00')
('database/fit/variation_dist_dcr3_-1.csv', '2018-11-27 14:38:00')
('database/fit/variation_dist_dcr1_-1.csv', '2018-11-27 14:38:00')
('database/fit/variation_dist_ec_edep_1.csv', '2018-11-27 14:38:00')
('database/fit/variation_dist_ecu_1.csv', '2018-11-27 14:38:00')
('database/fit/variation_dist_ecsf_1.csv', '2018-11-27 14:38:00')
('database/fit/variation_dist_ecu_-1.csv', '2018-11-27 14:38:00')
('database/fit/variation_dist_vz_1.csv', '2018-11-27 14:38:00')
('database/fit/variation_dist_ecw_1.csv', '2018-11-27 14:38:00')
('database/fit/variation_dist_cc_-1.csv', '2018-11-27 14:

In [3]:
def load_database_files(file_list):
    
    dataframe_store = {}
    for f in file_list:
        dataframe_store[f] = pd.read_csv(f)
        
    return dataframe_store

In [4]:
dataframe_store = load_database_files(database_files)

Finally, fit the nominal phi distributions.

In [5]:
nominal = pd.read_csv('database/fit/sys.csv')

### Systematic Uncertainties 

In [6]:
def database_filename_parser(file_name):
    file_name = file_name.split('variation_')[-1].strip('.csv')
    
    tokens = file_name.split('_')
    n_tokens = len(tokens)

    parameter_name = '_'.join(tokens[0:n_tokens-1])
    index = int(tokens[-1])
    
    return index, parameter_name

In [7]:
def build_parameter_variation_dict(path_to_db = 'database/fit/'):

    parameters = {}

    database_files = glob.glob(path_to_db + 'variation*.csv')
    
    for database_file in database_files:
        index, parameter = database_filename_parser(database_file)
        
        if parameter in parameters.keys():
            parameters[parameter][index] = pd.read_csv(database_file)
        else:
            parameters[parameter] = {}
            parameters[parameter][index] = pd.read_csv(database_file)
            
    return parameters

In [8]:
def load_systematic_sources_list(file_name):
    systematic_sources = pickle.load(open(file_name, 'rb'))
    
    reverse_dict = {}

    for key, value in systematic_sources.iteritems():
        reverse_dict[value] = key
    
    return reverse_dict

In [9]:
parameter_variation = build_parameter_variation_dict()

In [10]:
for key, value in parameter_variation.iteritems():
    print(key, value.keys())

('dist_dcr3', [1, -1])
('dist_dcr1', [1, -1])
('dist_vz', [1, -1])
('dist_ecsf', [1, -1])
('dist_cc', [0, -1])
('dist_ec_edep', [1, -1])
('dvz', [1, -1])
('p_mes', [1, -1])
('alpha', [1, -1])
('missing_mass', [1, -1])
('dist_ecu', [1, -1])
('dist_ecw', [1, -1])
('dist_ecv', [1, -1])


In [11]:
systematic_sources = load_systematic_sources_list('systematic_sources.pkl')

In [12]:
def add_systematics(nominal_fit, parameter_variation, systematic_sources):

    nominal_fit_sys = nominal_fit.copy(deep=True)
    nominal_fit_sys['sys_total_0'] = np.zeros(len(nominal_fit_sys))
    nominal_fit_sys['sys_total_1'] = np.zeros(len(nominal_fit_sys))
    nominal_fit_sys['sys_total_2'] = np.zeros(len(nominal_fit_sys))
    
    for key in parameter_variation.keys():

        min_index = parameter_variation[key].keys()[0]    
        max_index = parameter_variation[key].keys()[-1]

        merged_data = pd.merge(parameter_variation[key][min_index], 
                 parameter_variation[key][max_index],
                 on = ['axis', 'axis_bin'])

        merged_data[systematic_sources[key] + '_par0'] = np.abs(merged_data.par_0_y - merged_data.par_0_x)
        merged_data[systematic_sources[key] + '_par1'] = np.abs(merged_data.par_1_y - merged_data.par_1_x)
        merged_data[systematic_sources[key] + '_par2'] = np.abs(merged_data.par_2_y - merged_data.par_2_x)

        merge_cols = ['axis', 'axis_bin', systematic_sources[key] + '_par0', 
                     systematic_sources[key] + '_par1', systematic_sources[key] + '_par2']

        nominal_fit_sys = pd.merge(nominal_fit_sys, merged_data[merge_cols], 
                                   on = ['axis', 'axis_bin'])
    
        nominal_fit_sys.sys_total_0 += nominal_fit_sys[systematic_sources[key] + '_par0']**2
        nominal_fit_sys.sys_total_1 += nominal_fit_sys[systematic_sources[key] + '_par1']**2
        nominal_fit_sys.sys_total_2 += nominal_fit_sys[systematic_sources[key] + '_par2']**2


    nominal_fit_sys.sys_total_0 = np.sqrt(nominal_fit_sys.sys_total_0)
    nominal_fit_sys.sys_total_1 = np.sqrt(nominal_fit_sys.sys_total_1)
    nominal_fit_sys.sys_total_2 = np.sqrt(nominal_fit_sys.sys_total_2)

    
    return nominal_fit_sys

In [13]:
nominal_fit_sys = add_systematics(nominal, parameter_variation, systematic_sources)

In [14]:
nominal_fit_sys.to_csv('results/fit/sys.csv', index=False)

In [15]:
nominal_fit_sys.head(24)

Unnamed: 0,axis,axis_bin,axis_max,axis_min,bootstraps_0,bootstraps_1,bootstraps_2,err_0,err_1,err_2,...,sys_10_par2,sys_11_par0,sys_11_par1,sys_11_par2,sys_5_par0,sys_5_par1,sys_5_par2,sys_13_par0,sys_13_par1,sys_13_par2
0,missing_mass,0,1.404561,1.200002,[1.36773046e-02 8.64623021e-03 2.13189200e-02 ...,[-1. -0.75936526 -0.27842376 0.629036...,[ 0.07404664 0.10561353 -0.4668497 -1. ...,0.006007,0.673716,0.558392,...,0.193003,0.001202,0.10014,0.013597,0.00053,0.138542,0.056633,0.000255,0.090209,0.020242
1,missing_mass,1,1.510155,1.404561,[0.01143278 0.02401592 0.02463553 0.03268127 0...,[ 8.6970854e-01 -5.5710793e-01 -4.8972581e-02 ...,[-1. -1. -0.8163317 -0.302826...,0.009712,0.482727,0.629958,...,0.056042,0.001051,0.276157,0.181155,0.001709,0.061287,0.027601,0.001058,0.15528,0.032909
2,missing_mass,2,1.580246,1.510155,[0.04085728 0.05093626 0.0359352 0.04672783 0...,[-0.5771516 -0.16389354 -0.610977 -0.147316...,[-1. -0.5489499 0.17530787 -0.101354...,0.010981,0.428489,0.492282,...,0.047476,0.001252,0.036833,0.19004,8.9e-05,0.086156,0.089743,0.003111,0.009609,0.069501
3,missing_mass,3,1.665859,1.580246,[0.03132711 0.03027624 0.03455269 0.02902774 0...,[-0.4702143 0.63237464 -0.7444955 -0.458655...,[ 2.87706912e-01 -1.00000000e+00 2.67022550e-...,0.00654,0.424039,0.531871,...,0.057062,0.00268,0.04017,0.079175,0.001984,0.045299,0.142938,0.002069,0.02408,0.054302
4,missing_mass,4,1.749693,1.665859,[0.00318344 0.00696477 0.02266925 0.01367283 0...,[ 5.89546785e-02 3.25880200e-02 -6.88760996e-...,[ 1. 0.9283434 -0.51030785 0.643036...,0.007248,0.454715,0.665178,...,0.099021,4.4e-05,0.122204,0.021207,0.001075,0.006882,0.073423,0.002258,0.060015,0.058574
5,missing_mass,5,1.830956,1.749693,[0.04378885 0.01554632 0.03038656 0.02615038 0...,[-2.56931305e-01 7.99382448e-01 7.21859813e-...,[-4.57138687e-01 -1.00000000e+00 -1.00000000e+...,0.009596,0.453942,0.490861,...,0.04553,0.000337,0.009007,0.111682,0.00171,0.109875,0.045305,0.004987,0.061417,0.143035
6,missing_mass,6,1.913228,1.830956,[0.02660809 0.03525928 0.03025514 0.03702202 0...,[-0.6399952 -0.09584235 0.3508847 -0.158392...,[-1. -0.55016017 -0.11248519 -0.627348...,0.008075,0.467703,0.355105,...,0.031,0.001215,0.055115,0.059649,0.00098,0.049433,0.003959,0.001124,0.044867,0.057434
7,missing_mass,7,2.00597,1.913228,[ 9.76926275e-03 1.54768350e-02 4.50782571e-...,[-1. -0.9380006 -1. -1. ...,[-0.41587743 0.01577182 -0.34382105 -0.996999...,0.00502,0.426617,0.612345,...,4.5e-05,0.00096,0.096028,0.054819,0.000657,0.118578,0.017936,0.001488,0.006595,0.124188
8,missing_mass,8,2.116995,2.00597,[ 2.01514009e-02 3.24099474e-02 2.56686583e-...,[-0.08298495 -0.16798016 -0.28513342 0.713298...,[ 0.41122064 -0.13555577 0.2303995 -1. ...,0.007393,0.49604,0.614608,...,0.023614,0.000747,0.092531,0.066437,0.001252,0.00759,0.075179,0.001304,0.090442,0.140026
9,missing_mass,9,2.49567,2.116995,[0.01548617 0.00803233 0.00810465 0.0239551 0...,[-1.0000000e+00 -9.7047406e-01 9.4132525e-01 ...,[-9.68645275e-01 7.83408955e-02 -1.00000000e+...,0.005301,0.59802,0.60874,...,0.03218,5e-06,0.020423,0.085044,0.000791,0.002954,0.062193,0.001982,0.201911,0.105819
