# Systematics Notebook
From results of the phi fitting.

In [16]:
import glob 
import matplotlib.pyplot as plt
import numpy as np 
import pandas as pd
import pickle
import sys
import time 

%matplotlib inline

plt.rc('font', family='serif')
plt.rc('font', size=18)

### Load Configurations
There are several files with different results for phi-distributions.  

In [17]:
database_files = glob.glob('database/fit/*.csv')
print('Found %d files in the database.' % len(database_files))

Found 23 files in the database.


In [18]:
def load_database_files(file_list):
    
    dataframe_store = {}
    for f in file_list:
        dataframe_store[f] = pd.read_csv(f)
        
    return dataframe_store

In [19]:
dataframe_store = load_database_files(database_files)

Finally, fit the nominal phi distributions.

In [20]:
nominal = pd.read_csv('database/fit/sys.csv')

### Systematic Uncertainties 

In [21]:
def database_filename_parser(file_name):
    file_name = file_name.split('variation_')[-1].strip('.csv')
    
    tokens = file_name.split('_')
    n_tokens = len(tokens)

    parameter_name = '_'.join(tokens[0:n_tokens-1])
    index = int(tokens[-1])
    
    return index, parameter_name

In [22]:
def build_parameter_variation_dict(path_to_db = 'database/fit/'):

    parameters = {}

    database_files = glob.glob(path_to_db + 'variation*.csv')
    
    for database_file in database_files:
        index, parameter = database_filename_parser(database_file)
        
        if parameter in parameters.keys():
            parameters[parameter][index] = pd.read_csv(database_file)
        else:
            parameters[parameter] = {}
            parameters[parameter][index] = pd.read_csv(database_file)
            
    return parameters

In [23]:
def load_systematic_sources_list(file_name):
    systematic_sources = pickle.load(open(file_name, 'rb'))
    
    reverse_dict = {}

    for key, value in systematic_sources.iteritems():
        reverse_dict[value] = key
    
    return reverse_dict

In [24]:
parameter_variation = build_parameter_variation_dict()

In [25]:
for key, value in parameter_variation.iteritems():
    print(key, value.keys())

('dist_dcr3', [1, -1])
('dist_dcr1', [1, -1])
('dist_vz', [1, -1])
('dist_ecw', [1, -1])
('dist_ec_edep', [1, -1])
('dist_ecsf', [1, -1])
('p_mes', [0, -1])
('alpha', [1, -1])
('dist_ecu', [1, -1])
('dist_cc', [0, -1])
('dist_ecv', [1, -1])


In [26]:
systematic_sources = load_systematic_sources_list('systematic_sources.pkl')

In [27]:
def add_systematics(nominal_fit, parameter_variation, systematic_sources):

    nominal_fit_sys = nominal_fit.copy(deep=True)
    nominal_fit_sys['sys_total_0'] = np.zeros(len(nominal_fit_sys))
    nominal_fit_sys['sys_total_1'] = np.zeros(len(nominal_fit_sys))
    nominal_fit_sys['sys_total_2'] = np.zeros(len(nominal_fit_sys))
    
    for key in parameter_variation.keys():

        min_index = parameter_variation[key].keys()[0]    
        max_index = parameter_variation[key].keys()[-1]

        merged_data = pd.merge(parameter_variation[key][min_index], 
                 parameter_variation[key][max_index],
                 on = ['axis', 'axis_bin'])

        merged_data[systematic_sources[key] + '_par0'] = np.abs(merged_data.par_0_y - merged_data.par_0_x)
        merged_data[systematic_sources[key] + '_par1'] = np.abs(merged_data.par_1_y - merged_data.par_1_x)
        merged_data[systematic_sources[key] + '_par2'] = np.abs(merged_data.par_2_y - merged_data.par_2_x)

        merge_cols = ['axis', 'axis_bin', systematic_sources[key] + '_par0', 
                     systematic_sources[key] + '_par1', systematic_sources[key] + '_par2']

        nominal_fit_sys = pd.merge(nominal_fit_sys, merged_data[merge_cols], 
                                   on = ['axis', 'axis_bin'])
    
        nominal_fit_sys.sys_total_0 += nominal_fit_sys[systematic_sources[key] + '_par0']**2
        nominal_fit_sys.sys_total_1 += nominal_fit_sys[systematic_sources[key] + '_par1']**2
        nominal_fit_sys.sys_total_2 += nominal_fit_sys[systematic_sources[key] + '_par2']**2


    nominal_fit_sys.sys_total_0 = np.sqrt(nominal_fit_sys.sys_total_0)
    nominal_fit_sys.sys_total_1 = np.sqrt(nominal_fit_sys.sys_total_1)
    nominal_fit_sys.sys_total_2 = np.sqrt(nominal_fit_sys.sys_total_2)

    
    return nominal_fit_sys

In [28]:
nominal_fit_sys = add_systematics(nominal, parameter_variation, systematic_sources)

In [29]:
nominal_fit_sys.to_csv('results/fit/sys.csv', index=False)

In [30]:
nominal_fit_sys.head(24)

Unnamed: 0,axis,axis_bin,axis_max,axis_min,err_0,err_1,err_2,par_0,par_1,par_2,...,sys_4_par2,sys_3_par0,sys_3_par1,sys_3_par2,sys_5_par0,sys_5_par1,sys_5_par2,sys_11_par0,sys_11_par1,sys_11_par2
0,pt,0,0.202108,0.000284,0.005276,0.396362,0.487153,0.023408,0.034646,-0.257422,...,0.03192,0.000289,0.10572,0.098312,0.00047,0.075193,0.048049,0.000736,0.040891,0.006359
1,pt,1,0.274544,0.202108,0.00646,0.429919,0.511412,0.025577,-0.405092,0.016048,...,0.118941,0.003464,0.094344,0.127323,0.000896,0.016575,0.077313,0.000935,0.000633,0.065156
2,pt,2,0.328453,0.274544,0.007214,0.390009,0.424867,0.029393,0.153543,-0.379188,...,0.085404,0.000757,0.015184,0.089965,0.000395,0.050939,0.066657,0.000459,0.023407,0.020062
3,pt,3,0.376169,0.328453,0.004396,0.395246,0.437662,0.033788,-0.110783,-0.23143,...,0.003496,0.001507,0.020716,0.014896,0.000113,0.03301,0.011819,0.000571,0.024859,0.008354
4,pt,4,0.422123,0.376169,0.007989,0.373763,0.467116,0.034543,0.052581,-0.323595,...,0.10787,0.001562,0.06264,0.098177,0.001061,0.034369,0.10188,0.000638,0.003667,0.038124
5,pt,5,0.469541,0.422123,0.005029,0.398504,0.441394,0.032499,-0.330032,-0.190095,...,0.027386,0.001111,0.136373,0.002543,0.000269,0.067547,0.022726,0.000825,0.039288,0.099719
6,pt,6,0.52194,0.469541,0.007928,0.474836,0.472046,0.034158,0.211,-0.377392,...,0.025365,0.002511,0.023537,0.152025,0.000251,0.023379,0.004088,0.000399,0.057492,0.016369
7,pt,7,0.585257,0.52194,0.006869,0.364297,0.42417,0.034739,-0.199503,-0.311186,...,0.015319,0.002083,0.111395,0.016325,0.000261,0.014948,0.054083,0.001303,0.086908,0.039876
8,pt,8,0.674587,0.585257,0.007083,0.458296,0.541616,0.029773,-0.185709,-0.263196,...,0.014443,0.004861,0.096668,0.177238,0.001936,0.058927,0.020294,0.001045,0.091227,0.03152
9,pt,9,1.193084,0.674587,0.004666,0.420607,0.580574,0.019273,-0.446098,-0.091768,...,0.128991,0.000559,0.025359,0.008232,1.6e-05,0.029186,0.045602,0.000368,0.006979,0.006997
