In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy.optimize import curve_fit
from scipy.stats import norm
from statsmodels.base.model import GenericLikelihoodModel
import seaborn as sns
sns.set_palette("muted")
sns.set_color_codes()
sns.set_style("ticks")
sns.set_style({"xtick.direction": "in","ytick.direction": "in"})
sns.set_style({"axes.grid": "True", "grid.color": "0.95"})

plt.rcParams["figure.figsize"] = [6,4]
plt.rcParams["figure.dpi"] = 100

In [None]:
SMALL_SIZE = 14
MEDIUM_SIZE = 16
BIGGER_SIZE = 18

plt.rc('font', size=SMALL_SIZE)          # controls default text sizes
plt.rc('axes', titlesize=SMALL_SIZE)     # fontsize of the axes title
plt.rc('axes', labelsize=MEDIUM_SIZE)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('ytick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('legend', fontsize=SMALL_SIZE)    # legend fontsize
plt.rc('figure', titlesize=BIGGER_SIZE)  # fontsize of the figure title

In [None]:
import uproot
import glob

In [None]:
def get_dataframe(paths, branches=None):
    assert paths, "No paths supplied"
    df = pd.DataFrame()
    for path in paths:
        temp_df = uproot.open(path)['h2000'].pandas.df()
        temp_df = apply_cuts(temp_df)
        if branches:
            if isinstance(branches, str):
                branches = [branches]
            df = df.append(temp_df[branches])
        else:
            df = df.append(temp_df)
    return df

In [None]:
def apply_cuts(df):
    return df[ 
        (df.vrusable == 1) &
        (df.vtusable == 1) &
        (((df.vrchi2/df.vrndf) < 50) | (df.vrntrk == 1)) &
        (((df.vtchi2/df.vtndf) < 50) | (df.vtntrk == 1)) &
        (((np.sqrt(df.vrerr6) < 0.02) & (df.vrntrk > 1)) | ((np.sqrt(df.vrerr6) < 0.05) & (df.vrntrk == 1))) &
        (((np.sqrt(df.vterr6) < 0.02) & (df.vtntrk > 1)) | ((np.sqrt(df.vterr6) < 0.05) & (df.vtntrk == 1))) &
        (df.csbdtg > -0.6) &
        ((df.de > -0.14) & (df.de < 0.068)) &
        ((df.dt > -10) & (df.dt < 10)) &
        (df.thetab > 0.5)
    ]

In [None]:
df_signal = {}
df_generic = {}
df_mc = {}
df_data = {}

df_signal["Kpi"] = get_dataframe(glob.glob('../data/Kpi/realistic_mc/DSRho-mdst_Kpi_basf2_00_svd*.root'), ["nocand"])
df_generic["Kpi"] = get_dataframe(glob.glob('../data/Kpi/mc_wo_signal/*0.root'), ["nocand"])
df_mc["Kpi"] = get_dataframe(glob.glob('../data/Kpi/realistic_mc/stream0/*.root'), ["nocand"])
df_data["Kpi"] = get_dataframe(glob.glob('../data/Kpi/*.root'), "nocand")

df_signal["Kpipi0"] = get_dataframe(glob.glob('../data/Kpipi0/realistic_mc/DSRho-mdst_Kpipi0_basf2_00_svd*.root'), ["nocand"])
df_generic["Kpipi0"] = get_dataframe(glob.glob('../data/Kpipi0/mc_wo_signal/*0.root'), ["nocand"])
df_mc["Kpipi0"] = get_dataframe(glob.glob('../data/Kpipi0/realistic_mc/stream0/*.root'), ["nocand"])
df_data["Kpipi0"] = get_dataframe(glob.glob('../data/Kpipi0/*.root'), "nocand")

df_signal["K3pi"] = get_dataframe(glob.glob('../data/K3pi/realistic_mc/DSRho-mdst_K3pi_basf2_00_svd*.root'), ["nocand"])
df_generic["K3pi"] = get_dataframe(glob.glob('../data/K3pi/mc_wo_signal/*0.root'), ["nocand"])
df_mc["K3pi"] = get_dataframe(glob.glob('../data/K3pi/realistic_mc/stream0/*.root'), ["nocand"])
df_data["K3pi"] = get_dataframe(glob.glob('../data/K3pi/*.root'), "nocand")

In [None]:
for channel in ['Kpi', 'Kpipi0', 'K3pi']:
    plt.hist(df_generic[channel]['nocand'], log=True, bins=5, align='left', range=(1,6), label='generic MC', hatch='//', edgecolor='b', fill=False, histtype='step', linewidth=2)
    plt.hist(df_signal[channel]['nocand'], log=True, bins=5, align='left', range=(1,6), label='signal MC', hatch='\\\\', edgecolor='g', fill=False, histtype='step', linewidth=2)
    plt.locator_params(axis='x', nbins=6)
    plt.ylim(bottom=1)
    plt.legend()
    plt.xlabel("Number of candidates")
    # plt.savefig(channel + "_nocand_genvssig.pdf")
    plt.show()

In [None]:
for channel in ['Kpi', 'Kpipi0', 'K3pi']:
    plt.hist(df_mc[channel]['nocand'], log=True, bins=5, align='left', range=(1,6), label='MC', hatch='//', edgecolor='b', fill=False, histtype='step', linewidth=2)
    plt.hist(df_data[channel]['nocand'], log=True, bins=5, align='left', range=(1,6), label='data', hatch='\\\\', edgecolor='g', fill=False, histtype='step', linewidth=2)
    plt.xlabel("Number of candidates")
    plt.legend()
    plt.locator_params(axis='x', nbins=6)
    plt.savefig(channel + "_nocand_datavsmc.pdf")
    plt.show()

In [None]:
ratios = {}
for channel in ['Kpi', 'Kpipi0', 'K3pi']:
    channel_ratios = []
    for i in range(1, 6):
        num_tot_data = df_data[channel].count()
        num_one_data = df_data[channel][df_data[channel].nocand == i].count()
        num_tot_mc = df_mc[channel].count()
        num_one_mc = df_mc[channel][df_mc[channel].nocand == i].count()
        channel_ratios.append(float((num_one_data/num_tot_data) / (num_one_mc/num_tot_mc)))
    ratios[channel] = channel_ratios

In [None]:
x = np.arange(1, 6)  # the label locations
width = 0.25  # the width of the bars

fig, ax = plt.subplots()
rects1 = ax.bar(x - width, ratios["Kpi"], width, label='Kpi')
rects2 = ax.bar(x, ratios["Kpipi0"], width, label='Kpipi0')
rects3 = ax.bar(x + width, ratios["K3pi"], width, label='K3pi')

ax.set_ylabel('Data/MC Ratio')
ax.set_xlabel('Number of Candidates')
ax.legend()

fig.tight_layout()

plt.savefig("nocand_ratios.pdf")
plt.show()