# Systematics Notebook
From results of the phi fitting.

In [1]:
import glob 
import matplotlib.pyplot as plt
import numpy as np 
import pandas as pd
import pickle
import sys
import time 

%matplotlib inline

plt.rc('font', family='serif')
plt.rc('font', size=18)

### Load Configurations
There are several files with different results for phi-distributions.  

In [2]:
database_files = glob.glob('database/fit/*.csv')
print('Found %d files in the database.' % len(database_files))

Found 24 files in the database.


In [3]:
def load_database_files(file_list):
    
    dataframe_store = {}
    for f in file_list:
        dataframe_store[f] = pd.read_csv(f)
        
    return dataframe_store

In [4]:
dataframe_store = load_database_files(database_files)

Finally, fit the nominal phi distributions.

In [5]:
nominal = pd.read_csv('database/fit/sys.csv')

### Systematic Uncertainties 

In [6]:
def database_filename_parser(file_name):
    file_name = file_name.split('variation_')[-1].strip('.csv')
    
    tokens = file_name.split('_')
    n_tokens = len(tokens)

    parameter_name = '_'.join(tokens[0:n_tokens-1])
    index = int(tokens[-1])
    
    return index, parameter_name

In [7]:
def build_parameter_variation_dict(path_to_db = 'database/fit/'):

    parameters = {}

    database_files = glob.glob(path_to_db + 'variation*.csv')
    
    for database_file in database_files:
        index, parameter = database_filename_parser(database_file)
        
        if parameter in parameters.keys():
            parameters[parameter][index] = pd.read_csv(database_file)
        else:
            parameters[parameter] = {}
            parameters[parameter][index] = pd.read_csv(database_file)
            
    return parameters

In [8]:
def load_systematic_sources_list(file_name):
    systematic_sources = pickle.load(open(file_name, 'rb'))
    
    reverse_dict = {}

    for key, value in systematic_sources.iteritems():
        reverse_dict[value] = key
    
    return reverse_dict

In [9]:
parameter_variation = build_parameter_variation_dict()

In [10]:
for key, value in parameter_variation.iteritems():
    print(key, value.keys())

('dist_dcr3', [1, -1])
('dist_dcr1', [1, -1])
('dist_vz', [1, -1])
('dist_ecw', [1, -1])
('dist_ec_edep', [1, -1])
('dist_ecsf', [1, -1])
('p_mes', [0, -1])
('alpha', [1, -1])
('dist_ecu', [1, -1])
('dist_cc', [0, -1])
('dist_ecv', [1, -1])


In [11]:
systematic_sources = load_systematic_sources_list('systematic_sources.pkl')

In [12]:
def add_systematics(nominal_fit, parameter_variation, systematic_sources):

    nominal_fit_sys = nominal_fit.copy(deep = True)
    nominal_fit_sys['sys_total'] = np.zeros(len(nominal_fit_sys))

    for key in parameter_variation.keys():

        min_index = parameter_variation[key].keys()[0]    
        max_index = parameter_variation[key].keys()[-1]

    
        merged_data = pd.merge(parameter_variation[key][min_index], 
                 parameter_variation[key][max_index],
                 on = ['axis', 'axis_bin'])

        merged_data[systematic_sources[key]] = np.abs(merged_data.par_0_y - merged_data.par_0_x)
        # merged_data['shift_1'] = np.abs(merged_data.par_1_y - merged_data.par_1_x)
        # merged_data['shift_2'] = np.abs(merged_data.par_2_y - merged_data.par_2_x)

        merge_cols = ['axis', 'axis_bin', systematic_sources[key]]
        nominal_fit_sys = pd.merge(nominal_fit_sys, merged_data[merge_cols], 
                                   on = ['axis', 'axis_bin'])
    
        nominal_fit_sys.sys_total += nominal_fit_sys[systematic_sources[key]]**2

    nominal_fit_sys.sys_total = np.sqrt(nominal_fit_sys.sys_total)
    
    return nominal_fit_sys

In [13]:
nominal_fit_sys = add_systematics(nominal, parameter_variation, systematic_sources)

In [14]:
nominal_fit_sys.to_csv('results/fit/sys.csv', index=False)

In [16]:
nominal_fit_sys.head(24)

Unnamed: 0,axis,axis_bin,axis_max,axis_min,err_0,err_1,err_2,par_0,par_1,par_2,...,sys_2,sys_9,sys_10,sys_6,sys_8,sys_7,sys_4,sys_3,sys_5,sys_11
0,pt,0,0.202108,0.000284,0.003848,0.201847,0.201488,0.024732,0.018225,-0.009308,...,0.000517,0.001543,0.001082,8.6e-05,0.000566,2.4e-05,0.001806,0.001102,8.8e-05,0.000179
1,pt,1,0.274544,0.202108,0.004326,0.161944,0.150773,0.026399,-0.499265,0.269604,...,0.000471,0.00321,0.000348,5.6e-05,0.000488,0.000697,5.5e-05,0.003593,0.001202,0.000359
2,pt,2,0.328453,0.274544,0.004196,0.185239,0.182566,0.03066,0.1354,-0.147474,...,0.001868,0.003764,0.000409,0.001838,0.001017,0.00064,0.000946,0.000413,0.00063,5.2e-05
3,pt,3,0.376169,0.328453,0.004066,0.168019,0.144424,0.034853,-0.131565,0.006722,...,8e-06,0.000463,0.000832,0.000534,9.2e-05,0.001316,0.00189,0.001698,0.000838,0.000725
4,pt,4,0.422123,0.376169,0.004451,0.154862,0.171526,0.036418,0.045705,-0.059875,...,0.000882,0.001446,0.000893,0.001112,0.00027,0.002883,0.002022,0.002113,0.000173,0.000868
5,pt,5,0.469541,0.422123,0.004105,0.149596,0.139497,0.033018,-0.369652,0.061894,...,0.001959,0.000428,0.000187,0.000529,0.000361,0.000378,0.001919,0.001405,0.000312,0.000621
6,pt,6,0.52194,0.469541,0.004933,0.192376,0.157255,0.036059,0.243873,-0.089681,...,0.000321,0.002852,0.000777,0.00282,0.002022,0.001762,0.002199,0.002695,0.00068,0.001958
7,pt,7,0.585257,0.52194,0.003973,0.156984,0.152007,0.035456,-0.211701,-0.081571,...,0.00032,0.000556,0.000152,0.001024,0.000636,0.005031,0.002032,0.001696,0.000854,0.001592
8,pt,8,0.674587,0.585257,0.004647,0.156082,0.188118,0.032423,-0.306766,0.056407,...,0.000632,0.001026,0.001251,0.001009,0.000417,0.000811,0.002075,0.005322,0.0007,0.000434
9,pt,9,1.193084,0.674587,0.003853,0.197304,0.190875,0.02043,-0.456216,0.190335,...,0.000132,0.002127,0.000221,0.000503,0.000127,0.001668,0.000798,0.000189,7.5e-05,0.000633
