In [None]:
import uproot
import matplotlib.pyplot as plt
import numpy as np
import os
import time
import multiprocessing
import math
import csv
import pandas as pd
import scipy
import tensorflow as tf
import zfit

In [None]:
def partition_helper(slice_entries, file_entries, file_curr, entry_curr):
    if slice_entries <= file_entries[file_curr] - entry_curr:
        return [file_curr, slice_entries + entry_curr]
    elif file_curr == len(file_entries) - 1:
        return [file_curr, file_entries[-1]]
    else:
        return partition_helper(slice_entries - file_entries[file_curr] + entry_curr, file_entries, file_curr + 1, 0)

def partition(files, n_processes):
    file_entries = [file.num_entries for file in files]
    slice_entries = math.ceil(sum(file_entries) / n_processes)
    slices = []
    file_start = 0
    entry_start = 0
    for i in range(n_processes):
        slices.append([file_start, entry_start] + partition_helper(slice_entries, file_entries, file_start, entry_start))
        file_start = slices[-1][-2]
        entry_start = slices[-1][-1]
    return slices

def read_slice(files, slices, index, expressions, cut, data):
    data_slice = []
    for i in range(slices[index][0], slices[index][2] + 1):
        data_slice.append(files[i].arrays(expressions=expressions, 
                              cut=cut,
                              entry_start=slices[index][1] if i == slices[index][0] else None,
                              entry_stop=slices[index][3] if i == slices[index][2] else None,
                              library="pd"))
    data.append(pd.concat(data_slice))

def to_pandas(path, tree, n_files, n_processes, expressions, cut=None):
    files = [uproot.open(path=path + filename + ":" + tree) for filename in sorted(os.listdir(path))[:n_files]]
    slices = partition(files, n_processes)
    data = multiprocessing.Manager().list()
    processes = []
    for i in range(n_processes):
        p = multiprocessing.Process(target=read_slice, args=[files, slices, i, expressions, cut, data])
        p.start()
        processes.append(p)

    for p in processes:
        p.join()
    
    return pd.concat(data)

In [None]:
path = "../data/128_files/"

In [None]:
start = time.time()
data = to_pandas(path, "rootuple/CandidateTree", 128, 32, 
                "candidate_vMass",
                "(candidate_charge == 0)\
                & (candidate_cosAlpha > 0.99)\
                & (candidate_lxy / candidate_lxyErr > 3.0)\
                & (candidate_vProb > 0.05)\
                & (ditrack_mass > 1.014) & (ditrack_mass < 1.024)\
                & (candidate_vMass > 5.33) & (candidate_vMass < 5.4)")
time.time() - start

In [None]:
plt.figure(figsize=(15, 5))
plt.hist(data["candidate_vMass"], bins=200)
plt.show()

In [None]:
# def parameter(name, value, lower=None, upper=None):
#     try:
        

In [None]:
mu = zfit.Parameter("mu", 5.36, 5.2, 5.4)
sigma = zfit.Parameter("sigma", 0.1, 0.01, 0.2)
frac = zfit.Parameter("frac_gauss", 0.5, 0, 1)

In [None]:
a0 = zfit.Parameter("a0", 0.5, 0.0, 1.0)
a1 = zfit.Parameter("a1", 0.2, 0.0, 1.0)

In [None]:
mu.lower = 5.33
mu.upper = 5.40
mu.set_value(5.366)

sigma.lower = 0.001
sigma.upper = 0.2
sigma.set_value(0.1)

a0.lower = 0.0
a0.upper = 1.0
a0.set_value(0.1)

a1.lower = -1
a1.upper = 1.0
a1.set_value(-0.2)

frac.lower = 0.0
frac.upper = 1.0
frac.set_value(0.5)

In [None]:
mass = zfit.Space('candidate_vMass', limits=(5.33, 5.4))
gauss = zfit.pdf.Gauss(obs=mass, mu=mu, sigma=sigma)
chebyshev = zfit.pdf.Chebyshev(obs=mass, coeffs=[a0, a1])

In [None]:
model = zfit.pdf.SumPDF([gauss, chebyshev], frac)

In [None]:
nll = zfit.loss.UnbinnedNLL(model=model, data=zfit.Data.from_pandas(data))  # loss

# Load and instantiate a minimizer
minimizer = zfit.minimize.Minuit()
minimum = minimizer.minimize(loss=nll)


print(minimum)

In [None]:
mass_range = (5.33, 5.4)
n_bins = 200

plt.figure(figsize=(15, 5))
plt.hist(data["candidate_vMass"], bins=n_bins)

mass = np.linspace(*range, num=1000)
pdf_fit = zfit.run(model.pdf(mass))
scale = len(data["candidate_vMass"]) / n_bins * (range[1] - range[0]) * pdf_fit

plt.plot(mass, scale)