# Systematics Notebook
From results of the phi fitting.  Before running this notebook the output from `src/fit/run-fitter.py` is needed.  

In [1]:
import glob 
import matplotlib.pyplot as plt
import numpy as np 
import pandas as pd
import pickle
import os
import sys
import time 

from datetime import datetime

%matplotlib inline

plt.rc('font', family='serif')
plt.rc('font', size=18)
plt.rc('text', usetex = True)

### Load Configurations
There are several files with different results for phi-distributions.  

In [2]:
database_files = glob.glob('database/fit/*.csv')
print('Found %d files in the database.' % len(database_files))

for f in database_files:
    print(f, datetime.utcfromtimestamp(
        os.path.getmtime(f)).strftime('%Y-%m-%d %H:%M:%S'))

Found 27 files in the database.
('database/fit/variation_dist_ecv_1.csv', '2018-12-05 13:23:09')
('database/fit/variation_alpha_1.csv', '2018-12-05 13:23:09')
('database/fit/variation_dist_dcr3_1.csv', '2018-12-05 13:23:09')
('database/fit/variation_dist_dcr1_1.csv', '2018-12-05 13:23:09')
('database/fit/variation_alpha_-1.csv', '2018-12-05 13:23:09')
('database/fit/variation_dvz_1.csv', '2018-12-05 13:23:09')
('database/fit/variation_dist_dcr3_-1.csv', '2018-12-05 13:23:09')
('database/fit/variation_dist_dcr1_-1.csv', '2018-12-05 13:23:09')
('database/fit/variation_dist_ec_edep_1.csv', '2018-12-05 13:23:09')
('database/fit/variation_dist_ecu_1.csv', '2018-12-05 13:23:09')
('database/fit/variation_dist_ecsf_1.csv', '2018-12-05 13:23:09')
('database/fit/variation_dist_ecu_-1.csv', '2018-12-05 13:23:09')
('database/fit/variation_dist_vz_1.csv', '2018-12-05 13:23:09')
('database/fit/variation_dist_ecw_1.csv', '2018-12-05 13:23:09')
('database/fit/variation_dist_cc_-1.csv', '2018-12-05 13:

In [3]:
def load_database_files(file_list):
    
    dataframe_store = {}
    for f in file_list:
        dataframe_store[f] = pd.read_csv(f)
        
    return dataframe_store

In [4]:
dataframe_store = load_database_files(database_files)

Finally, load the nominal phi distributions.

In [5]:
nominal = pd.read_csv('database/fit/sys.csv')

In [6]:
def fix_bootstrap_entry(x):
    return np.array([float(entry) for entry in x.strip('[').strip(']').split()])

def exclude_outlier_samples(x, op = np.mean):
    return op(x[np.where((x>-0.5)&(x<0.5))[0]])

In [7]:
nominal['bootstraps_0'] = nominal['bootstraps_0'].apply(fix_bootstrap_entry)
nominal['bootstraps_1'] = nominal['bootstraps_1'].apply(fix_bootstrap_entry)
nominal['bootstraps_2'] = nominal['bootstraps_2'].apply(fix_bootstrap_entry)

#nominal['par_0'] = nominal['bootstraps_0'].apply(lambda x: exclude_outlier_samples(x, np.mean))
#nominal['err_0'] = nominal['bootstraps_0'].apply(lambda x: exclude_outlier_samples(x, np.std))
#nominal['par_1'] = nominal['bootstraps_1'].apply(lambda x: exclude_outlier_samples(x, np.mean))
#nominal['err_1'] = nominal['bootstraps_1'].apply(lambda x: exclude_outlier_samples(x, np.std))
#nominal['par_2'] = nominal['bootstraps_2'].apply(lambda x: exclude_outlier_samples(x, np.mean))
#nominal['err_2'] = nominal['bootstraps_2'].apply(lambda x: exclude_outlier_samples(x, np.std))


for index in dataframe_store.keys():
    dataframe_store[index]['bootstraps_0'] = dataframe_store[index]['bootstraps_0'].apply(fix_bootstrap_entry)
    dataframe_store[index]['bootstraps_1'] = dataframe_store[index]['bootstraps_1'].apply(fix_bootstrap_entry)
    dataframe_store[index]['bootstraps_2'] = dataframe_store[index]['bootstraps_2'].apply(fix_bootstrap_entry)
    
    #dataframe_store[index]['par_0'] = dataframe_store[index]['bootstraps_0'].apply(lambda x: exclude_outlier_samples(x, np.mean))
    #dataframe_store[index]['err_0'] = dataframe_store[index]['bootstraps_0'].apply(lambda x: exclude_outlier_samples(x, np.std))
    #dataframe_store[index]['par_1'] = dataframe_store[index]['bootstraps_1'].apply(lambda x: exclude_outlier_samples(x, np.mean))
    #dataframe_store[index]['err_1'] = dataframe_store[index]['bootstraps_1'].apply(lambda x: exclude_outlier_samples(x, np.std))
    #dataframe_store[index]['par_2'] = dataframe_store[index]['bootstraps_2'].apply(lambda x: exclude_outlier_samples(x, np.mean))
    #dataframe_store[index]['err_2'] = dataframe_store[index]['bootstraps_2'].apply(lambda x: exclude_outlier_samples(x, np.std))

### Systematic Uncertainties 

In [8]:
def database_filename_parser(file_name):
    file_name = file_name.split('variation_')[-1].strip('.csv')
    
    tokens = file_name.split('_')
    n_tokens = len(tokens)

    parameter_name = '_'.join(tokens[0:n_tokens-1])
    index = int(tokens[-1])
    
    return index, parameter_name

In [9]:
def build_parameter_variation_dict(path_to_db = 'database/fit/'):

    parameters = {}

    database_files = glob.glob(path_to_db + 'variation*.csv')
    
    for database_file in database_files:
        index, parameter = database_filename_parser(database_file)
        
        if parameter in parameters.keys():
            parameters[parameter][index] = pd.read_csv(database_file)
        else:
            parameters[parameter] = {}
            parameters[parameter][index] = pd.read_csv(database_file)
            
    return parameters

In [10]:
def load_systematic_sources_list(file_name):
    systematic_sources = pickle.load(open(file_name, 'rb'))
    
    reverse_dict = {}

    for key, value in systematic_sources.iteritems():
        reverse_dict[value] = key
    
    return reverse_dict

In [11]:
parameter_variation = build_parameter_variation_dict()

In [12]:
for key, value in parameter_variation.iteritems():
    print(key, value.keys())

('dist_dcr3', [1, -1])
('dist_dcr1', [1, -1])
('dist_vz', [1, -1])
('dist_ecsf', [1, -1])
('dist_cc', [0, -1])
('dist_ec_edep', [1, -1])
('dvz', [1, -1])
('p_mes', [1, -1])
('alpha', [1, -1])
('missing_mass', [1, -1])
('dist_ecu', [1, -1])
('dist_ecw', [1, -1])
('dist_ecv', [1, -1])


In [13]:
systematic_sources = load_systematic_sources_list('systematic_sources.pkl')

In [14]:
def add_systematics(nominal_fit, parameter_variation, systematic_sources):

    nominal_fit_sys = nominal_fit.copy(deep=True)
    nominal_fit_sys['sys_total_0'] = np.zeros(len(nominal_fit_sys))
    nominal_fit_sys['sys_total_1'] = np.zeros(len(nominal_fit_sys))
    nominal_fit_sys['sys_total_2'] = np.zeros(len(nominal_fit_sys))
    
    for key in parameter_variation.keys():

        min_index = parameter_variation[key].keys()[0]    
        max_index = parameter_variation[key].keys()[-1]

        merged_data = pd.merge(parameter_variation[key][min_index], 
                 parameter_variation[key][max_index],
                 on = ['axis', 'axis_bin'])

        merged_data[systematic_sources[key] + '_par0'] = np.abs(merged_data.par_0_y - merged_data.par_0_x)
        merged_data[systematic_sources[key] + '_par1'] = np.abs(merged_data.par_1_y - merged_data.par_1_x)
        merged_data[systematic_sources[key] + '_par2'] = np.abs(merged_data.par_2_y - merged_data.par_2_x)

        merge_cols = ['axis', 'axis_bin', systematic_sources[key] + '_par0', 
                     systematic_sources[key] + '_par1', systematic_sources[key] + '_par2']

        nominal_fit_sys = pd.merge(nominal_fit_sys, merged_data[merge_cols], 
                                   on = ['axis', 'axis_bin'])
    
        nominal_fit_sys.sys_total_0 += nominal_fit_sys[systematic_sources[key] + '_par0']**2
        nominal_fit_sys.sys_total_1 += nominal_fit_sys[systematic_sources[key] + '_par1']**2
        nominal_fit_sys.sys_total_2 += nominal_fit_sys[systematic_sources[key] + '_par2']**2


    nominal_fit_sys.sys_total_0 = np.sqrt(nominal_fit_sys.sys_total_0)
    nominal_fit_sys.sys_total_1 = np.sqrt(nominal_fit_sys.sys_total_1)
    nominal_fit_sys.sys_total_2 = np.sqrt(nominal_fit_sys.sys_total_2)

    
    return nominal_fit_sys

In [15]:
nominal_fit_sys = add_systematics(nominal, parameter_variation, systematic_sources)

In [16]:
nominal_fit_sys.to_csv('results/fit/sys.csv', index=False)

In [17]:
nominal_fit_sys.head(24)

Unnamed: 0,axis,axis_bin,axis_max,axis_min,bootstraps_0,bootstraps_1,bootstraps_2,err_0,err_1,err_2,...,sys_10_par2,sys_11_par0,sys_11_par1,sys_11_par2,sys_5_par0,sys_5_par1,sys_5_par2,sys_13_par0,sys_13_par1,sys_13_par2
0,missing_mass,0,1.510155,1.200002,"[0.01366159, 0.02182501, 0.0178643, 0.01249743...","[0.76271826, -0.53539646, -0.06132226, 0.84701...","[-1.0, -0.11566047, 0.20258816, -1.0, -0.90585...",0.005055,0.443858,0.437474,...,0.100483,7e-05,0.105506,0.097887,0.00048,0.100099,0.138522,0.000713,0.025094,0.015964
1,missing_mass,1,1.665859,1.510155,"[0.02167565, 0.04026756, 0.04665247, 0.0456585...","[0.64418274, -0.6159453, -0.6914012, -0.415001...","[-1.0, -1.0, -0.89570236, -0.18715747, -1.0, 0...",0.007256,0.396816,0.515056,...,0.033045,0.002054,0.007838,0.078959,0.001293,0.088209,0.071463,0.002359,0.048729,0.017967
2,missing_mass,2,1.830956,1.665859,"[0.03157147, 0.017839, 0.02547376, 0.01854351,...","[-0.5531475, 0.6743476, -0.3595048, -0.1384682...","[-0.87804514, -1.0, 0.20927255, 0.53117096, 0....",0.005346,0.384284,0.564753,...,0.005418,0.001533,0.019769,0.043248,0.001847,0.034999,0.143023,0.001729,0.03342,0.073266
3,missing_mass,3,2.00597,1.830956,"[0.0323621, 0.02634896, 0.01364422, 0.0182718,...","[-0.30367035, -0.55917746, -0.91862875, 0.6994...","[-0.3507193, -0.2713851, 0.467974, -1.0, -0.45...",0.00683,0.482634,0.493671,...,0.016925,0.003963,0.026196,0.098234,0.000132,0.090365,0.084488,0.00229,0.054497,0.126334
4,missing_mass,4,2.49567,2.00597,"[0.01924505, 0.02356177, 0.0178251, 0.0193935,...","[-0.78730536, -0.78019166, -0.27961016, 0.0153...","[0.15794757, -0.578073, 0.14397484, 0.1649916,...",0.005205,0.476897,0.506317,...,0.019682,9.5e-05,0.011904,0.088592,0.00061,0.099567,0.101562,0.00119,0.078302,0.035244
5,pt,0,0.253536,0.000488,"[0.01931378, 0.02640953, 0.02495099, 0.0357335...","[-0.3804943, -0.40077272, -0.57681316, -0.2234...","[0.490044475, 0.376699299, -1.0, -0.11137937, ...",0.006141,0.378622,0.578815,...,0.024485,0.004242,0.082065,0.041628,0.000361,0.078265,0.010767,0.002138,0.036953,0.01459
6,pt,1,0.351464,0.253536,"[0.02703444, 0.03846807, 0.02040036, 0.0366247...","[-0.506893218, -0.161080718, 0.824983597, -0.6...","[-0.14958926, -0.53585571, -1.0, -0.85978138, ...",0.008237,0.430718,0.425561,...,0.012009,0.004011,0.13496,0.179308,0.002336,0.028761,0.14269,0.000601,0.055702,0.052133
7,pt,2,0.440606,0.351464,"[0.01957952, 0.01708727, 0.01811688, 0.0182658...","[-1.0, -1.0, -0.8754169, 0.64802206, -1.0, -0....","[0.28890485, 0.31669578, 0.4435338, -1.0, 0.35...",0.006609,0.421831,0.497415,...,0.06175,0.000843,0.060841,0.168102,0.000737,0.079395,0.028349,0.000309,0.022317,0.010665
8,pt,3,0.548205,0.440606,"[0.02320478, 0.01592269, 0.01298043, 0.0241069...","[-0.4966263, 0.5907548, 0.587601, -0.5818203, ...","[0.19498245, -1.0, -1.0, 0.07624662, 0.1904049...",0.004943,0.472831,0.468437,...,0.016957,0.001286,0.014123,0.015217,0.00136,0.005671,0.027111,0.001204,0.042079,0.048605
9,pt,4,1.140706,0.548205,"[0.00694563, 0.0130919, 0.00818419, 0.0143098,...","[-0.195818886, 0.646081626, 0.00999769382, 0.5...","[0.785032, -1.0, 0.6620307, 0.10184543, -0.703...",0.005366,0.440756,0.719449,...,0.204308,0.000225,0.062103,0.17826,0.000302,0.112968,0.141616,0.000772,0.28428,0.119015
