# Systematics Notebook
From results of the phi fitting.  Before running this notebook the output from `src/fit/run-fitter.py` is needed.  

In [3]:
import glob 
import matplotlib.pyplot as plt
import numpy as np 
import pandas as pd
import pickle
import sys
import time 

%matplotlib inline

plt.rc('font', family='serif')
plt.rc('font', size=18)

### Load Configurations
There are several files with different results for phi-distributions.  

In [4]:
database_files = glob.glob('database/fit/*.csv')
print('Found %d files in the database.' % len(database_files))

Found 25 files in the database.


In [5]:
def load_database_files(file_list):
    
    dataframe_store = {}
    for f in file_list:
        dataframe_store[f] = pd.read_csv(f)
        
    return dataframe_store

In [6]:
dataframe_store = load_database_files(database_files)

Finally, fit the nominal phi distributions.

In [7]:
nominal = pd.read_csv('database/fit/sys.csv')

### Systematic Uncertainties 

In [8]:
def database_filename_parser(file_name):
    file_name = file_name.split('variation_')[-1].strip('.csv')
    
    tokens = file_name.split('_')
    n_tokens = len(tokens)

    parameter_name = '_'.join(tokens[0:n_tokens-1])
    index = int(tokens[-1])
    
    return index, parameter_name

In [9]:
def build_parameter_variation_dict(path_to_db = 'database/fit/'):

    parameters = {}

    database_files = glob.glob(path_to_db + 'variation*.csv')
    
    for database_file in database_files:
        index, parameter = database_filename_parser(database_file)
        
        if parameter in parameters.keys():
            parameters[parameter][index] = pd.read_csv(database_file)
        else:
            parameters[parameter] = {}
            parameters[parameter][index] = pd.read_csv(database_file)
            
    return parameters

In [10]:
def load_systematic_sources_list(file_name):
    systematic_sources = pickle.load(open(file_name, 'rb'))
    
    reverse_dict = {}

    for key, value in systematic_sources.iteritems():
        reverse_dict[value] = key
    
    return reverse_dict

In [11]:
parameter_variation = build_parameter_variation_dict()

In [12]:
for key, value in parameter_variation.iteritems():
    print(key, value.keys())

('dist_dcr3', [1, -1])
('dist_dcr1', [1, -1])
('dist_vz', [1, -1])
('dist_cc', [0, -1])
('dist_ec_edep', [1, -1])
('dist_ecsf', [1, -1])
('p_mes', [0, -1])
('alpha', [1, -1])
('missing_mass', [1, -1])
('dist_ecu', [1, -1])
('dist_ecw', [1, -1])
('dist_ecv', [1, -1])


In [13]:
systematic_sources = load_systematic_sources_list('systematic_sources.pkl')

In [14]:
def add_systematics(nominal_fit, parameter_variation, systematic_sources):

    nominal_fit_sys = nominal_fit.copy(deep=True)
    nominal_fit_sys['sys_total_0'] = np.zeros(len(nominal_fit_sys))
    nominal_fit_sys['sys_total_1'] = np.zeros(len(nominal_fit_sys))
    nominal_fit_sys['sys_total_2'] = np.zeros(len(nominal_fit_sys))
    
    for key in parameter_variation.keys():

        min_index = parameter_variation[key].keys()[0]    
        max_index = parameter_variation[key].keys()[-1]

        merged_data = pd.merge(parameter_variation[key][min_index], 
                 parameter_variation[key][max_index],
                 on = ['axis', 'axis_bin'])

        merged_data[systematic_sources[key] + '_par0'] = np.abs(merged_data.par_0_y - merged_data.par_0_x)
        merged_data[systematic_sources[key] + '_par1'] = np.abs(merged_data.par_1_y - merged_data.par_1_x)
        merged_data[systematic_sources[key] + '_par2'] = np.abs(merged_data.par_2_y - merged_data.par_2_x)

        merge_cols = ['axis', 'axis_bin', systematic_sources[key] + '_par0', 
                     systematic_sources[key] + '_par1', systematic_sources[key] + '_par2']

        nominal_fit_sys = pd.merge(nominal_fit_sys, merged_data[merge_cols], 
                                   on = ['axis', 'axis_bin'])
    
        nominal_fit_sys.sys_total_0 += nominal_fit_sys[systematic_sources[key] + '_par0']**2
        nominal_fit_sys.sys_total_1 += nominal_fit_sys[systematic_sources[key] + '_par1']**2
        nominal_fit_sys.sys_total_2 += nominal_fit_sys[systematic_sources[key] + '_par2']**2


    nominal_fit_sys.sys_total_0 = np.sqrt(nominal_fit_sys.sys_total_0)
    nominal_fit_sys.sys_total_1 = np.sqrt(nominal_fit_sys.sys_total_1)
    nominal_fit_sys.sys_total_2 = np.sqrt(nominal_fit_sys.sys_total_2)

    
    return nominal_fit_sys

In [15]:
nominal_fit_sys = add_systematics(nominal, parameter_variation, systematic_sources)

In [16]:
nominal_fit_sys.to_csv('results/fit/sys.csv', index=False)

In [17]:
nominal_fit_sys.head(24)

Unnamed: 0,axis,axis_bin,axis_max,axis_min,err_0,err_1,err_2,par_0,par_1,par_2,...,sys_9_par2,sys_10_par0,sys_10_par1,sys_10_par2,sys_5_par0,sys_5_par1,sys_5_par2,sys_12_par0,sys_12_par1,sys_12_par2
0,pt,0,0.202108,0.000284,0.005192,0.431005,0.505917,0.022703,0.035135,-0.305324,...,0.02097,0.001066,0.023424,0.028128,0.000956,0.056018,0.037605,0.001519,0.047335,0.025574
1,pt,1,0.274544,0.202108,0.006773,0.411621,0.516845,0.02635,-0.45814,-0.025374,...,0.01438,0.003295,0.019527,0.193278,0.001348,0.049193,0.031217,0.000926,0.000289,0.018729
2,pt,2,0.328453,0.274544,0.007233,0.398686,0.426705,0.029421,0.124094,-0.404097,...,0.079441,4.1e-05,0.003665,0.031996,2.8e-05,0.110571,0.024419,0.000715,0.010407,0.043842
3,pt,3,0.376169,0.328453,0.004955,0.391704,0.4333,0.034144,-0.101711,-0.217505,...,0.054347,0.000823,0.029208,0.015899,0.000635,0.094364,0.016297,0.002553,0.067756,0.016653
4,pt,4,0.422123,0.376169,0.008203,0.393624,0.487244,0.033847,0.041822,-0.33952,...,0.07526,0.002379,0.097317,0.007849,0.000967,0.063191,0.009643,0.00131,0.004947,0.059945
5,pt,5,0.469541,0.422123,0.004816,0.402747,0.448046,0.032313,-0.295824,-0.189819,...,0.038299,0.000583,0.125574,0.043258,0.000462,0.031886,0.086002,0.000874,0.004168,0.084696
6,pt,6,0.52194,0.469541,0.007372,0.458481,0.45975,0.033803,0.23264,-0.322405,...,0.11629,0.001793,0.024666,0.00154,0.000721,0.024062,0.10765,0.000565,0.087564,0.008329
7,pt,7,0.585257,0.52194,0.007066,0.389036,0.420212,0.034461,-0.161264,-0.342975,...,0.120403,0.002917,0.050213,0.040496,0.000481,0.026368,0.027694,0.002617,0.006996,0.031045
8,pt,8,0.674587,0.585257,0.006881,0.437974,0.536124,0.030766,-0.224303,-0.281907,...,0.052712,0.005665,0.050647,0.197729,0.002875,0.008774,0.118567,0.00076,0.063589,0.027354
9,pt,9,1.193084,0.674587,0.004335,0.435679,0.574297,0.019302,-0.421409,-0.096595,...,0.054196,0.000925,0.059188,0.002949,0.000314,0.141209,0.07106,0.001399,0.022375,0.146348
