In [None]:
# Standard includes
%matplotlib inline
import glob
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import uproot
# from scipy.optimize import curve_fit
# from scipy.stats import norm
# from statsmodels.base.model import GenericLikelihoodModel

In [None]:
# Style setup
import seaborn as sns
sns.set_palette('muted')
sns.set_color_codes()
sns.set_style('ticks')
sns.set_style({'xtick.direction': 'in','ytick.direction': 'in'})
sns.set_style({'axes.grid': 'True', 'grid.color': '0.95'})

plt.rcParams['figure.figsize'] = [6,6]
plt.rcParams['figure.dpi'] = 100
def darken_color(color, p):
    return (color[0]*p,color[1]*p,color[2]*p)

import matplotlib as mpl
colors = sns.color_palette('muted') + [(.1, .1, .1)]
for code, color in zip(['bd','gd','rd','md','yd','cd','kd'], colors):
    rgb = mpl.colors.colorConverter.to_rgb(darken_color(color,0.8))
    mpl.colors.colorConverter.colors[code] = rgb
    mpl.colors.colorConverter.cache[code] = rgb

blue = (114/256, 147/256, 203/256)
orange = (225/256, 151/256,  76/256)
green = (132/256, 186/256,  91/256)
red = (211/256,  94/256,  96/256)
grey = (128/256, 133/256, 133/256)
violet = (144/256, 103/256, 167/256)
brown = (171/256, 104/256,  87/256)
yellow = (204/256, 194/256,  16/256)

SMALL_SIZE = 14
MEDIUM_SIZE = 16
BIGGER_SIZE = 18

plt.rc('font', size=SMALL_SIZE)          # controls default text sizes
plt.rc('axes', titlesize=SMALL_SIZE)     # fontsize of the axes title
plt.rc('axes', labelsize=MEDIUM_SIZE)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('ytick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('legend', fontsize=SMALL_SIZE)    # legend fontsize
plt.rc('figure', titlesize=BIGGER_SIZE)  # fontsize of the figure title

In [None]:
def apply_cuts(df):
    """Apply common cuts to a dataset and return the result."""
    return df[ 
        (df.vrusable == 1) &
        (df.vtusable == 1) &
        (((df.vrchi2/df.vrndf) < 50) | (df.vrntrk == 1)) &
        (((df.vtchi2/df.vtndf) < 50) | (df.vtntrk == 1)) &
        (((np.sqrt(df.vrerr6) < 0.02) & (df.vrntrk > 1)) | ((np.sqrt(df.vrerr6) < 0.05) & (df.vrntrk == 1))) &
        (((np.sqrt(df.vterr6) < 0.02) & (df.vtntrk > 1)) | ((np.sqrt(df.vterr6) < 0.05) & (df.vtntrk == 1))) &
        (df.csbdtg > -0.6) &
        ((df.de > -0.14) & (df.de < 0.068)) &
        ((df.dt > -10) & (df.dt < 10)) &
        (df.thetab > 0.65) & (df.thetab < 2.95) &
        (df.nocand <= 3)
    ]

def get_dataframe(paths, branches=None):
    """Read ROOT files, apply common cuts, and return as dataframe."""
    assert paths, "No paths supplied"
    df = pd.DataFrame()
    for path in paths:
        temp_df = uproot.open(path)["h2000"].pandas.df()
        temp_df = apply_cuts(temp_df)
        if branches:
            if isinstance(branches, str):
                branches = [branches]
            df = df.append(temp_df[branches])
        else:
            df = df.append(temp_df)
    return df

In [None]:
df_sig = get_dataframe(glob.glob("../../data/Kpi/realistic_mc/*basf2*.root"))
df_bkg = get_dataframe(glob.glob("../../data/Kpi/realistic_mc/*evtgen*.root"))

In [None]:
df_sig["r"] = 1 - 2 * df_sig["tagwtag"]
df_bkg["r"] = 1 - 2 * df_bkg["tagwtag"]

In [None]:
def get_r_bin(row):
    r = row['r']
    if (0.000 <= r and r <= 0.100):
        return 0
    elif (0.100 < r and r <= 0.250):
        return 1;
    elif (0.250 < r and r <= 0.500):
        return 2
    elif (0.500 < r and r <= 0.625):
        return 3
    elif (0.625 < r and r <= 0.750):
        return 4
    elif (0.750 < r and r <= 0.875):
        return 5
    elif (0.875 < r and r <= 1.000):
        return 6
    else:
        return 7

In [None]:
df_sig['rbin'] = df_sig.apply(lambda row: get_r_bin(row), axis=1)
df_bkg['rbin'] = df_bkg.apply(lambda row: get_r_bin(row), axis=1)

In [None]:
df_sig.hist('thetab', by='rbin', density=True, sharey=True, sharex=True)

In [None]:
g = sns.FacetGrid(df_sig, col_wrap=3, col="rbin", margin_titles=True)
bins = np.linspace(0.65, 2.95, 10)
g.map(plt.hist, "thetab", bins=bins, density=True)

In [None]:
g = sns.FacetGrid(df_bkg, col_wrap=3, col="rbin", margin_titles=True)
bins = np.linspace(0.65, 2.95, 10)
g.map(plt.hist, "thetab", bins=bins, density=True)

In [None]:
df_sig.hist("thetab", by="rbin")

In [None]:
df_data = get_dataframe(glob.glob("../../data/Kpi/*.root"))
df_data["r"] = 1 - 2 * df_data["tagwtag"]
df_data['rbin'] = df_data.apply(lambda row: get_r_bin(row), axis=1)

In [None]:
g = sns.FacetGrid(df_data, col_wrap=3, col="rbin", margin_titles=True)
bins = np.linspace(0.65, 2.95, 10)
g.map(plt.hist, "thetab", bins=bins, density=True)

In [None]:
df_cr = df_sig.query('evmcflag==1')
df_scf = df_sig.query('evmcflag!=1')

In [None]:
cr_counts = [df_cr.query(f'rbin=={rbin}').shape[0] for rbin in range(7)]
scf_counts = [df_scf.query(f'rbin=={rbin}').shape[0] for rbin in range(7)]
bkg_counts = [df_bkg.query(f'rbin=={rbin}').shape[0] for rbin in range(7)]

In [None]:
print("rbin | SCF/CR | BKG/CR")
print("-----|--------|-------")
for rbin in range(7):
    print(f"{rbin:4} | {scf_counts[rbin] / cr_counts[rbin]:6.3f} | {bkg_counts[rbin] / cr_counts[rbin]:6.3f}")

In [None]:
g = sns.FacetGrid(df_scf, col_wrap=3, col="rbin", margin_titles=True)
bins = np.linspace(-10, 10, 20)
g.map(plt.hist, "dt", bins=bins, density=True)

In [None]:
g = sns.FacetGrid(df_bkg, col_wrap=3, col="rbin", margin_titles=True)
bins = np.linspace(-10, 10, 20)
g.map(plt.hist, "dt", bins=bins, density=True)

In [None]:
def create_overlayed_histos(dfs, legend, var, density=False, xlabel=None, path=None):
    fig, ax = plt.subplots()
            
    bins = None
    for df in dfs:
        if bins is None:
            counts, bins = np.histogram(df[var], bins=5)
        df.hist(
            var, ax=ax, bins=bins, histtype="step", linewidth=1.5, density=density
        )

    if xlabel:
        ax.set_xlabel(xlabel)
        ax.set_title("")

    plt.legend(legend, loc="upper left", fontsize=10, title_fontsize=10, title="rbin")
    plt.tight_layout()

    if path:
        plt.savefig(path + ".png")
        plt.savefig(path + ".pdf")

In [None]:
create_overlayed_histos(
    [df_scf.query(f"rbin=={i}") for i in range(7)],
    [f"{i}" for i in range(7)],
    "dt",
    True,
    "SCF $\Delta t$ [ps]",
    "dt_scf_rbins"
)

In [None]:
create_overlayed_histos(
    [df_bkg.query(f"rbin=={i}") for i in range(7)],
    [f"{i}" for i in range(7)],
    "dt",
    True,
    "BKG $\Delta t$ [ps]",
    "dt_bkg_rbins"
)

In [None]:
print(df_scf.shape)
print(df_bkg.shape)

In [None]:
df_cr[['tagwtag', 'rbin']].describe()

In [None]:
w_svd2_mc = [0.5, 0.412222, 0.307838, 0.212765, 0.149933, 0.0913264, 0.0218754]
dw_svd2_mc = [0., 0.00408778, 0.010326,  -0.00479522, 0.00151989, 0.0143633,  0.00189979]

In [None]:
w = 0
dw = 0
for i in range(7):
    w += w_svd2_mc[i] * df_cr.query(f'rbin=={i}').shape[0] / df_cr.shape[0]
    dw += dw_svd2_mc[i] * df_cr.query(f'rbin=={i}').shape[0] / df_cr.shape[0]
print(w)
print(dw)

In [None]:
create_overlayed_histos(
    [df_cr.query(f"rbin=={i}") for i in range(7)],
    [f"{i}" for i in range(7)],
    "de",
    True,
    "CR $\Delta E$ [GeV]",
    "de_cr_rbins"
)

In [None]:
create_overlayed_histos(
    [df_scf.query(f"rbin=={i}") for i in range(7)],
    [f"{i}" for i in range(7)],
    "de",
    True,
    "SCF $\Delta E$ [GeV]",
    "de_scf_rbins"
)

In [None]:
create_overlayed_histos(
    [df_bkg.query(f"rbin=={i}") for i in range(7)],
    [f"{i}" for i in range(7)],
    "de",
    True,
    "BKG $\Delta E$ [GeV]",
    "de_bkg_rbins"
)

In [None]:
plt.hist([df_cr.rbin, df_scf.rbin, df_bkg.rbin], bins=np.linspace(-0.5,6.5,8), histtype='step')
plt.legend(['CR','SCF','BKG'])

In [None]:
cr_counts = [df_cr.query(f'rbin=={rbin}').shape[0] for rbin in range(7)]
scf_counts = [df_scf.query(f'rbin=={rbin}').shape[0] for rbin in range(7)]
bkg_counts = [df_bkg.query(f'rbin=={rbin}').shape[0] for rbin in range(7)]
all_counts = list(np.array(cr_counts) + np.array(scf_counts) + np.array(bkg_counts))

In [None]:
print("rbin | CR/all | SCF/all | BKG/CR")
print("-----|--------|---------|-------")
for rbin in range(7):
    print(
        f"{rbin:4} | {cr_counts[rbin] / (all_counts[rbin]):6.3f} | {scf_counts[rbin] / all_counts[rbin]:7.3f} | {bkg_counts[rbin] / all_counts[rbin]:6.3f}"
    )
print(f" all | {df_cr.shape[0] / (df_cr.shape[0] + df_scf.shape[0] + df_bkg.shape[0]):6.3f} | "
      f"{df_scf.shape[0] / (df_cr.shape[0] + df_scf.shape[0] + df_bkg.shape[0]):7.3f} | "
      f"{df_bkg.shape[0] / (df_cr.shape[0] + df_scf.shape[0] + df_bkg.shape[0]):6.3f}")