In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy.optimize import curve_fit
from scipy.stats import norm
from statsmodels.base.model import GenericLikelihoodModel
import seaborn as sns
sns.set_palette("muted")
sns.set_color_codes()
sns.set_style("ticks")
sns.set_style({"xtick.direction": "in","ytick.direction": "in"})
sns.set_style({"axes.grid": "True", "grid.color": "0.95"})

In [None]:
plt.rcParams["figure.figsize"] = [6,6]
plt.rcParams["figure.dpi"] = 100

In [None]:
blue = (114/256, 147/256, 203/256)
orange = (225/256, 151/256,  76/256)
green = (132/256, 186/256,  91/256)
red = (211/256,  94/256,  96/256)
grey = (128/256, 133/256, 133/256)
violet = (144/256, 103/256, 167/256)
brown = (171/256, 104/256,  87/256)
yellow = (204/256, 194/256,  16/256)

In [None]:
SMALL_SIZE = 14
MEDIUM_SIZE = 16
BIGGER_SIZE = 18

plt.rc('font', size=SMALL_SIZE)          # controls default text sizes
plt.rc('axes', titlesize=SMALL_SIZE)     # fontsize of the axes title
plt.rc('axes', labelsize=MEDIUM_SIZE)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('ytick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('legend', fontsize=SMALL_SIZE)    # legend fontsize
plt.rc('figure', titlesize=BIGGER_SIZE)  # fontsize of the figure title

In [None]:
import root_pandas as rpd
import glob

variables = [
    'phit',
    'thetab',
    'thetat',
    'vrusable',
    'vtusable',
    'vrchi2',
    'vtchi2',
    'vrndf',
    'vtndf',
    'vrntrk',
    'vtntrk',
    'vrerr6',
    'vterr6',
    'vtistagl',
    'evmcflag',
    'csbdtg',
    'shcosthb',
    'benergy',
    'mbc',
    'de',
    'dt'
]

df_mcsig = rpd.read_root(glob.glob("../data/Kpi/realistic_mc/DSRho-*.root"), columns=variables)
df_mcbkg = rpd.read_root(glob.glob("../data/Kpi/realistic_mc/DSRhoSkim*.root"), columns=variables)

### Apply cuts to datasets

In [None]:
def apply_cuts(df):
    return df[ 
        (df.vrusable == 1) &
        (df.vtusable == 1) &
        (((df.vrchi2/df.vrndf) < 50) | (df.vrntrk == 1)) &
        (((df.vtchi2/df.vtndf) < 50) | (df.vtntrk == 1)) &
        (((np.sqrt(df.vrerr6) < 0.02) & (df.vrntrk > 1)) | ((np.sqrt(df.vrerr6) < 0.05) & (df.vrntrk == 1))) &
        (((np.sqrt(df.vterr6) < 0.02) & (df.vtntrk > 1)) | ((np.sqrt(df.vterr6) < 0.05) & (df.vtntrk == 1))) &
        (df.csbdtg > -0.6) &
        ((df.de > -0.14) & (df.de < 0.068)) &
        ((df.dt > -10) & (df.dt < 10)) &
        (df.thetab > 0.5)
    ]

def apply_evmcflag_cut(df, cr):
    if cr:
        return df[ 
            (df.evmcflag == 1)
        ]
    else:
        return df[ 
            (df.evmcflag != 1)
        ]

df_mcbkg = apply_cuts(df_mcbkg)
df_mcsig = apply_cuts(df_mcsig)
df_mccr = apply_evmcflag_cut(df_mcsig, 1)
df_mcscf = apply_evmcflag_cut(df_mcsig, 0)

In [None]:
def plot_with_ratio(datasets, labels, xlabel, colors):
    fig, (ax1, ax2) = plt.subplots(nrows=2, 
                                   sharex=True, 
                                   gridspec_kw = {'height_ratios':[3, 1]})

    fig.subplots_adjust(hspace=0.1)

    ns, bins, patches = ax1.hist([datasets[0], datasets[1]],
                                histtype="step",
                                bins=50,
                                linewidth=1,
                                color=colors,
                                label=labels)

    ax1.legend()
    plt.xlabel(xlabel)

    ratios = ns[0]/ns[1]
    errors = ns[0]/ns[1]*np.sqrt(1/ns[0] + 1/ns[1])
    
    ax2.errorbar(x=bins[:-1], 
                 y=ratios, 
                 yerr=errors, 
                 fmt='o',
                 color=grey)
    
    # Sets maximal number of ticks
    ax2.yaxis.set_major_locator(plt.MaxNLocator(5))

    ax1.set_ylabel('Data')
    ax2.set_ylabel('Ratio')
    
    fig.savefig('{xlabel}_{label1}_{label2}.pdf'.format(xlabel=datasets[0].name, 
                                                        label1=labels[0].replace(" ", "_").replace(".",""), 
                                                        label2=labels[1].replace(" ", "_").replace(".","")),
                format="pdf", bbox_inches = 'tight')

In [None]:
datasets = (df_mccr.vrchi2, df_mcscf.vrchi2)
xlabel = r'vrchi2'
labels = ['MC CR', 'MC SCF']
colors = [green, violet]
plot_with_ratio(datasets, labels, xlabel, colors)

In [None]:
datasets = (df_mccr.vrchi2, df_mcbkg.vrchi2)
xlabel = r'vrchi2'
labels = ['MC CR', 'MC BKG']
colors = [green, red]
plot_with_ratio(datasets, labels, xlabel, colors)

In [None]:
datasets = (df_mccr.vtchi2, df_mcscf.vtchi2)
xlabel = r'vtchi2'
labels = ['MC CR', 'MC SCF']
colors = [green, violet]
plot_with_ratio(datasets, labels, xlabel, colors)

In [None]:
datasets = (df_mccr.vtchi2, df_mcbkg.vtchi2)
xlabel = r'vtchi2'
labels = ['MC CR', 'MC BKG']
colors = [green, red]
plot_with_ratio(datasets, labels, xlabel, colors)

In [None]:
datasets = (df_mccr.vrndf, df_mcscf.vrndf)
xlabel = r'vrndf'
labels = ['MC CR', 'MC SCF']
colors = [green, violet]
plot_with_ratio(datasets, labels, xlabel, colors)

In [None]:
datasets = (df_mccr.vrndf, df_mcbkg.vrndf)
xlabel = r'vrndf'
labels = ['MC CR', 'MC BKG']
colors = [green, red]
plot_with_ratio(datasets, labels, xlabel, colors)

In [None]:
datasets = (df_mccr.vtndf, df_mcscf.vtndf)
xlabel = r'vtndf'
labels = ['MC CR', 'MC SCF']
colors = [green, violet]
plot_with_ratio(datasets, labels, xlabel, colors)

In [None]:
datasets = (df_mccr.vtndf, df_mcbkg.vtndf)
xlabel = r'vtndf'
labels = ['MC CR', 'MC BKG']
colors = [green, red]
plot_with_ratio(datasets, labels, xlabel, colors)

In [None]:
datasets = (df_mccr.vrntrk, df_mcscf.vrntrk)
xlabel = r'vrntrk'
labels = ['MC CR', 'MC SCF']
colors = [green, violet]
plot_with_ratio(datasets, labels, xlabel, colors)

In [None]:
datasets = (df_mccr.vrntrk, df_mcbkg.vrntrk)
xlabel = r'vrntrk'
labels = ['MC CR', 'MC BKG']
colors = [green, red]
plot_with_ratio(datasets, labels, xlabel, colors)

In [None]:
datasets = (df_mccr.vtntrk, df_mcscf.vtntrk)
xlabel = r'vtntrk'
labels = ['MC CR', 'MC SCF']
colors = [green, violet]
plot_with_ratio(datasets, labels, xlabel, colors)

In [None]:
datasets = (df_mccr.vtntrk, df_mcbkg.vtntrk)
xlabel = r'vtntrk'
labels = ['MC CR', 'MC BKG']
colors = [green, red]
plot_with_ratio(datasets, labels, xlabel, colors)

In [None]:
datasets = (np.sqrt(df_mccr.vrerr6), np.sqrt(df_mcscf.vrerr6))
xlabel = r'vrzerr'
labels = ['MC CR', 'MC SCF']
colors = [green, violet]
plot_with_ratio(datasets, labels, xlabel, colors)

In [None]:
datasets = (np.sqrt(df_mccr.vrerr6), np.sqrt(df_mcbkg.vrerr6))
xlabel = r'vrzerr'
labels = ['MC CR', 'MC BKG']
colors = [green, red]
plot_with_ratio(datasets, labels, xlabel, colors)

In [None]:
datasets = (np.sqrt(df_mccr.vterr6), np.sqrt(df_mcscf.vterr6))
xlabel = r'vtzerr'
labels = ['MC CR', 'MC SCF']
colors = [green, violet]
plot_with_ratio(datasets, labels, xlabel, colors)

In [None]:
datasets = (np.sqrt(df_mccr.vterr6), np.sqrt(df_mcbkg.vterr6))
xlabel = r'vtzerr'
labels = ['MC CR', 'MC BKG']
colors = [green, red]
plot_with_ratio(datasets, labels, xlabel, colors)

In [None]:
datasets = (df_mccr.vtistagl, df_mcscf.vtistagl)
xlabel = r'vtistagl'
labels = ['MC CR', 'MC SCF']
colors = [green, violet]
plot_with_ratio(datasets, labels, xlabel, colors)

In [None]:
datasets = (df_mccr.vtistagl, df_mcbkg.vtistagl)
xlabel = r'vtistagl'
labels = ['MC CR', 'MC BKG']
colors = [green, red]
plot_with_ratio(datasets, labels, xlabel, colors)

In [None]:
datasets = (df_mccr.shcosthb, df_mcscf.shcosthb)
xlabel = r'shcosthb'
labels = ['MC CR', 'MC SCF']
colors = [green, violet]
plot_with_ratio(datasets, labels, xlabel, colors)

In [None]:
datasets = (df_mccr.shcosthb, df_mcbkg.shcosthb)
xlabel = r'shcosthb'
labels = ['MC CR', 'MC BKG']
colors = [green, red]
plot_with_ratio(datasets, labels, xlabel, colors)

In [None]:
datasets = (df_mccr.benergy, df_mcscf.benergy)
xlabel = r'benergy'
labels = ['MC CR', 'MC SCF']
colors = [green, violet]
plot_with_ratio(datasets, labels, xlabel, colors)

In [None]:
datasets = (df_mccr.benergy, df_mcbkg.benergy)
xlabel = r'benergy'
labels = ['MC CR', 'MC BKG']
colors = [green, red]
plot_with_ratio(datasets, labels, xlabel, colors)

In [None]:
datasets = (df_mccr.mbc, df_mcscf.mbc)
xlabel = r'mbc'
labels = ['MC CR', 'MC SCF']
colors = [green, violet]
plot_with_ratio(datasets, labels, xlabel, colors)

In [None]:
datasets = (df_mccr.mbc, df_mcbkg.mbc)
xlabel = r'mbc'
labels = ['MC CR', 'MC BKG']
colors = [green, red]
plot_with_ratio(datasets, labels, xlabel, colors)