# Systematics Notebook
From results of the phi fitting.  Before running this notebook the output from `src/fit/run-fitter.py` is needed.  

In [1]:
import glob 
import matplotlib.pyplot as plt
import numpy as np 
import pandas as pd
import pickle
import os
import sys
import time 

from datetime import datetime

%matplotlib inline

plt.rc('font', family='serif')
plt.rc('font', size=18)
plt.rc('text', usetex = True)

### Load Configurations
There are several files with different results for phi-distributions.  

In [2]:
database_files = glob.glob('database/fit/*.csv')
print('Found %d files in the database.' % len(database_files))

for f in database_files:
    print(f, datetime.utcfromtimestamp(
        os.path.getmtime(f)).strftime('%Y-%m-%d %H:%M:%S'))

Found 27 files in the database.
('database/fit/variation_dist_ecv_1.csv', '2018-12-05 20:23:27')
('database/fit/variation_alpha_1.csv', '2018-12-05 20:23:27')
('database/fit/variation_dist_dcr3_1.csv', '2018-12-05 20:23:27')
('database/fit/variation_dist_dcr1_1.csv', '2018-12-05 20:23:27')
('database/fit/variation_alpha_-1.csv', '2018-12-05 20:23:27')
('database/fit/variation_dvz_1.csv', '2018-12-05 20:23:27')
('database/fit/variation_dist_dcr3_-1.csv', '2018-12-05 20:23:27')
('database/fit/variation_dist_dcr1_-1.csv', '2018-12-05 20:23:27')
('database/fit/variation_dist_ec_edep_1.csv', '2018-12-05 20:23:27')
('database/fit/variation_dist_ecu_1.csv', '2018-12-05 20:23:27')
('database/fit/variation_dist_ecsf_1.csv', '2018-12-05 20:23:27')
('database/fit/variation_dist_ecu_-1.csv', '2018-12-05 20:23:27')
('database/fit/variation_dist_vz_1.csv', '2018-12-05 20:23:27')
('database/fit/variation_dist_ecw_1.csv', '2018-12-05 20:23:27')
('database/fit/variation_dist_cc_-1.csv', '2018-12-05 20:

In [3]:
def load_database_files(file_list):
    
    dataframe_store = {}
    for f in file_list:
        dataframe_store[f] = pd.read_csv(f)
        
    return dataframe_store

In [4]:
dataframe_store = load_database_files(database_files)

Finally, load the nominal phi distributions.

In [5]:
nominal = pd.read_csv('database/fit/sys.csv')

In [6]:
def fix_bootstrap_entry(x):
    return np.array([float(entry) for entry in x.strip('[').strip(']').split()])

def exclude_outlier_samples(x, op = np.mean):
    return op(x[np.where((x>-0.5)&(x<0.5))[0]])

In [7]:
nominal['bootstraps_0'] = nominal['bootstraps_0'].apply(fix_bootstrap_entry)
nominal['bootstraps_1'] = nominal['bootstraps_1'].apply(fix_bootstrap_entry)
nominal['bootstraps_2'] = nominal['bootstraps_2'].apply(fix_bootstrap_entry)

#nominal['par_0'] = nominal['bootstraps_0'].apply(lambda x: exclude_outlier_samples(x, np.mean))
#nominal['err_0'] = nominal['bootstraps_0'].apply(lambda x: exclude_outlier_samples(x, np.std))
#nominal['par_1'] = nominal['bootstraps_1'].apply(lambda x: exclude_outlier_samples(x, np.mean))
#nominal['err_1'] = nominal['bootstraps_1'].apply(lambda x: exclude_outlier_samples(x, np.std))
#nominal['par_2'] = nominal['bootstraps_2'].apply(lambda x: exclude_outlier_samples(x, np.mean))
#nominal['err_2'] = nominal['bootstraps_2'].apply(lambda x: exclude_outlier_samples(x, np.std))


for index in dataframe_store.keys():
    dataframe_store[index]['bootstraps_0'] = dataframe_store[index]['bootstraps_0'].apply(fix_bootstrap_entry)
    dataframe_store[index]['bootstraps_1'] = dataframe_store[index]['bootstraps_1'].apply(fix_bootstrap_entry)
    dataframe_store[index]['bootstraps_2'] = dataframe_store[index]['bootstraps_2'].apply(fix_bootstrap_entry)
    
    #dataframe_store[index]['par_0'] = dataframe_store[index]['bootstraps_0'].apply(lambda x: exclude_outlier_samples(x, np.mean))
    #dataframe_store[index]['err_0'] = dataframe_store[index]['bootstraps_0'].apply(lambda x: exclude_outlier_samples(x, np.std))
    #dataframe_store[index]['par_1'] = dataframe_store[index]['bootstraps_1'].apply(lambda x: exclude_outlier_samples(x, np.mean))
    #dataframe_store[index]['err_1'] = dataframe_store[index]['bootstraps_1'].apply(lambda x: exclude_outlier_samples(x, np.std))
    #dataframe_store[index]['par_2'] = dataframe_store[index]['bootstraps_2'].apply(lambda x: exclude_outlier_samples(x, np.mean))
    #dataframe_store[index]['err_2'] = dataframe_store[index]['bootstraps_2'].apply(lambda x: exclude_outlier_samples(x, np.std))

### Systematic Uncertainties 

In [8]:
def database_filename_parser(file_name):
    file_name = file_name.split('variation_')[-1].strip('.csv')
    
    tokens = file_name.split('_')
    n_tokens = len(tokens)

    parameter_name = '_'.join(tokens[0:n_tokens-1])
    index = int(tokens[-1])
    
    return index, parameter_name

In [9]:
def build_parameter_variation_dict(path_to_db = 'database/fit/'):

    parameters = {}

    database_files = glob.glob(path_to_db + 'variation*.csv')
    
    for database_file in database_files:
        index, parameter = database_filename_parser(database_file)
        
        if parameter in parameters.keys():
            parameters[parameter][index] = pd.read_csv(database_file)
        else:
            parameters[parameter] = {}
            parameters[parameter][index] = pd.read_csv(database_file)
            
    return parameters

In [10]:
def load_systematic_sources_list(file_name):
    systematic_sources = pickle.load(open(file_name, 'rb'))
    
    reverse_dict = {}

    for key, value in systematic_sources.iteritems():
        reverse_dict[value] = key
    
    return reverse_dict

In [11]:
parameter_variation = build_parameter_variation_dict()

In [12]:
for key, value in parameter_variation.iteritems():
    print(key, value.keys())

('dist_dcr3', [1, -1])
('dist_dcr1', [1, -1])
('dist_vz', [1, -1])
('dist_ecsf', [1, -1])
('dist_cc', [0, -1])
('dist_ec_edep', [1, -1])
('dvz', [1, -1])
('p_mes', [1, -1])
('alpha', [1, -1])
('missing_mass', [1, -1])
('dist_ecu', [1, -1])
('dist_ecw', [1, -1])
('dist_ecv', [1, -1])


In [13]:
systematic_sources = load_systematic_sources_list('systematic_sources.pkl')

In [14]:
def add_systematics(nominal_fit, parameter_variation, systematic_sources):

    nominal_fit_sys = nominal_fit.copy(deep=True)
    nominal_fit_sys['sys_total_0'] = np.zeros(len(nominal_fit_sys))
    nominal_fit_sys['sys_total_1'] = np.zeros(len(nominal_fit_sys))
    nominal_fit_sys['sys_total_2'] = np.zeros(len(nominal_fit_sys))
    
    for key in parameter_variation.keys():

        min_index = parameter_variation[key].keys()[0]    
        max_index = parameter_variation[key].keys()[-1]

        merged_data = pd.merge(parameter_variation[key][min_index], 
                 parameter_variation[key][max_index],
                 on = ['axis', 'axis_bin'])

        merged_data[systematic_sources[key] + '_par0'] = np.abs(merged_data.par_0_y - merged_data.par_0_x)
        merged_data[systematic_sources[key] + '_par1'] = np.abs(merged_data.par_1_y - merged_data.par_1_x)
        merged_data[systematic_sources[key] + '_par2'] = np.abs(merged_data.par_2_y - merged_data.par_2_x)

        merge_cols = ['axis', 'axis_bin', systematic_sources[key] + '_par0', 
                     systematic_sources[key] + '_par1', systematic_sources[key] + '_par2']

        nominal_fit_sys = pd.merge(nominal_fit_sys, merged_data[merge_cols], 
                                   on = ['axis', 'axis_bin'])
    
        nominal_fit_sys.sys_total_0 += nominal_fit_sys[systematic_sources[key] + '_par0']**2
        nominal_fit_sys.sys_total_1 += nominal_fit_sys[systematic_sources[key] + '_par1']**2
        nominal_fit_sys.sys_total_2 += nominal_fit_sys[systematic_sources[key] + '_par2']**2


    nominal_fit_sys.sys_total_0 = np.sqrt(nominal_fit_sys.sys_total_0)
    nominal_fit_sys.sys_total_1 = np.sqrt(nominal_fit_sys.sys_total_1)
    nominal_fit_sys.sys_total_2 = np.sqrt(nominal_fit_sys.sys_total_2)

    
    return nominal_fit_sys

In [15]:
nominal_fit_sys = add_systematics(nominal, parameter_variation, systematic_sources)

In [16]:
nominal_fit_sys.to_csv('results/fit/sys.csv', index=False)

In [17]:
nominal_fit_sys.head(24)

Unnamed: 0,axis,axis_bin,axis_max,axis_min,bootstraps_0,bootstraps_1,bootstraps_2,err_0,err_1,err_2,...,sys_10_par2,sys_11_par0,sys_11_par1,sys_11_par2,sys_5_par0,sys_5_par1,sys_5_par2,sys_13_par0,sys_13_par1,sys_13_par2
0,missing_mass,0,1.510155,1.200002,"[0.02225417, 0.01868085, 0.02430717, 0.0157584...","[-0.20593838, -0.6251733, -0.14924845, 0.70103...","[-0.00546303, 0.29879084, -0.07955831, -1.0, 0...",0.005068,0.417107,0.415341,...,0.095727,0.000345,0.122739,0.084732,0.000261,0.08184,0.031163,0.00053,0.031641,0.02736
1,missing_mass,1,1.665859,1.510155,"[0.03776505, 0.0436645, 0.02406418, 0.03907096...","[-0.71551317, -0.22400042, 0.7214608, -0.56691...","[0.1075915, 0.10739436, -1.0, 0.12252712, -1.0...",0.007821,0.465271,0.530635,...,0.071746,0.001366,0.051893,0.087805,0.001436,0.054398,0.046528,0.00126,0.034409,0.068812
2,missing_mass,2,1.830956,1.665859,"[0.03101197, 0.02944994, 0.02324346, 0.0330808...","[-0.108128265, -0.535562098, -0.57210809, -0.1...","[-0.03885911, -0.9521305, 0.31993136, -0.05299...",0.005587,0.409944,0.586307,...,0.016564,0.001277,0.022554,0.09825,0.000959,0.025283,0.115873,0.001376,0.015058,0.051217
3,missing_mass,3,2.00597,1.830956,"[0.02259868, 0.02790317, 0.01437908, 0.0240659...","[-0.62269145, -0.65569043, -0.69016474, -0.456...","[-0.1976166, -1.0, 0.5478301, -0.09141493, -1....",0.006617,0.519003,0.48651,...,0.084617,0.002939,0.182898,0.186247,0.001364,0.107271,0.04837,0.001085,0.017738,0.036881
4,missing_mass,4,2.49567,2.00597,"[0.01692972, 0.01338771, 0.022899, 0.00839217,...","[-0.32300177, -0.70230949, -0.5787133, -0.7516...","[0.15275206, 0.52411085, 0.20496316, 0.3734867...",0.005037,0.49641,0.519936,...,0.008491,0.000579,0.056927,0.16426,0.000245,0.008949,0.013617,0.000686,0.094528,0.037351
5,pt,0,0.253536,0.000488,"[0.02049274, 0.01349443, 0.01273007, 0.0251871...","[-0.08304062, 0.5930067, 0.652695, -0.45434463...","[0.4583286, -1.0, -1.0, 0.27753884, -1.0, -0.1...",0.006203,0.400321,0.590879,...,0.032034,0.003449,0.012966,0.183229,0.002426,0.046197,0.055553,0.002854,0.117627,0.176846
6,pt,1,0.351464,0.253536,"[0.03841079, 0.01530423, 0.03091508, 0.0128123...","[-0.0318800285, 0.83938396, -0.0204907525, 0.6...","[-0.22442913, -1.0, -0.18762596, -1.0, -1.0, -...",0.008492,0.45261,0.423315,...,0.061582,0.003358,0.068132,0.144505,0.0026,0.053202,0.177643,0.001775,0.065469,0.103015
7,pt,2,0.440606,0.351464,"[0.020991605, 0.020683922, 0.020233154, 0.0202...","[-0.93390507, 0.6750558, -0.97763944, -1.0, -0...","[0.2604469, -1.0, 0.37291485, 0.23829395, -0.8...",0.00713,0.54789,0.532182,...,0.052552,0.001622,0.043511,0.025775,0.000541,0.042819,0.063508,0.001184,0.075742,0.050686
8,pt,3,0.548205,0.440606,"[0.0266843103, 0.0230616927, 0.0101447189, 0.0...","[-0.563071, -0.87180334, 0.7756018, 0.6145658,...","[0.0672993436, 0.259729475, -1.0, -1.0, -0.012...",0.006525,0.414079,0.474389,...,0.013005,0.001095,0.127005,0.085811,0.000827,0.133602,0.105235,0.000662,0.058841,0.061198
9,pt,4,1.140706,0.548205,"[0.01578828, 0.01915646, 0.00964967, 0.0051770...","[-0.06819157, -0.16193026, -0.79267955, -0.649...","[0.32670283, 0.01935413, 0.6003635, 0.7862173,...",0.005285,0.444429,0.729768,...,0.236025,0.000777,0.005997,0.078743,0.000973,0.052449,0.007464,0.001081,0.270106,0.30134
