In [None]:
1+1

In [None]:
%load_ext autoreload
%autoreload 2

import os
import uproot
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import matplotlib.cbook as cbook
from matplotlib.legend_handler import HandlerLine2D, HandlerTuple
import numpy as np
import pandas as pd
from decimal import Decimal
from scipy.stats import norm
from scipy.optimize import curve_fit
from scipy import stats
import datetime as dt
import scipy.optimize
import landau
from scipy.interpolate import CubicSpline, interp2d
from scipy.integrate import quad
from scipy.special import erf
from numpy import linalg

import importlib
from multiprocessing import Pool

from tqdm.auto import tqdm

# local imports
from lib.constants import *

In [None]:
dosave = True
savedir = "./plots_9_13_23/blehhh"

isMC = True
datadir = "/icarus/data/users/gputnam/DMCP2023G/calib-data/"

# DATA
# filedir = "/icarus/data/users/gputnam/DMCP2023G/data/"
# f = filedir + "numiRun1_unblind_reprodC_chi2u40_chi2p80_protonhit.df"

# MC
# filedir = "/icarus/data/users/gputnam/DMCP2023G/mc/"
# # f = filedir + "numimc_miniprod_protonhit.df"
# # f = filedir + "numimc_fitwvfkstunegain_protonhit.df"
# f = filedir + "mcnuphase2_protonhit.df"
filedir = "/icarus/data/users/gputnam/DMCP2023G/EMB/"
f = filedir + "mcnuphase2F_fakeEMB_highgain2_protonhit.df"
datadir = filedir
TRUE_NELEC = False


plt.rcParams.update({'font.size': 14})
colors = plt.rcParams['axes.prop_cycle'].by_key()['color']

THXW_CORRECT = True
cut_TPCEE = False

In [None]:
data = pd.read_hdf(f, key="phit")

In [None]:
data = data[data.rr < 30].copy()
data

In [None]:
data.columns = ["_".join([cc for cc in c if cc]) if isinstance(c, tuple) else c for c in data.columns]

In [None]:
data["itpc"] = data.cryostat*2 + data.tpc // 2

In [None]:
data["tpcEE"] = data.itpc == 0
data["tpcEW"] = data.itpc == 1
data["tpcWE"] = data.itpc == 2
data["tpcWW"] = data.itpc == 3

In [None]:
def fid(data, iny=20, inz=50, inxcathode=15, inxanode=10):
    ymax = 134
    ymin = -180
    
    zmin = -900
    zmax = 900
    
    # per-TPC xmin, xmax
    xmin0 = -358.49
    xmax0 = -210.29
    xmin1 = -210.14
    xmax1 = -61.94
    
    xmin2 = 61.94
    xmax2 = 210.14
    xmin3 = 210.29
    xmax3 = 358.49
    
    fidX = ((data.x > xmin0 + inxanode) & (data.x < xmax0 - inxcathode)) |\
           ((data.x > xmin1 + inxcathode) & (data.x < xmax1 - inxanode)) |\
           ((data.x > xmin2 + inxanode) & (data.x < xmax2 - inxcathode)) |\
           ((data.x > xmin3 + inxcathode) & (data.x < xmax3 - inxanode))
    
    fid = (data.y > ymin + iny) & (data.y < ymax - iny)\
        & (data.z < zmax - inz) & (data.z > zmin + inz)\
        & fidX
    
    if not isMC:
        # Cut out some problem regions in the detector
        fid = fid & (np.abs(data.z) > 10)

        # TPC EW -- not actually that bad
#         bad_tpcEW = data.tpcEW & (data.z > 700) & (data.y < 0)
        bad_tpcEW = False

        # TPC WW
        bad_tpcWW = data.tpcWW & (data.y > 80) & (data.z > 0)

        fid = fid & ~bad_tpcEW & ~bad_tpcWW
        
    if cut_TPCEE: # is TPC EE borked?
        fid = fid & ~data.tpcEE
    
    return fid

In [None]:
data["fid"] = fid(data)

In [None]:
CALS = {}

# Divide out the gain in the input files
if not isMC:
#     with open(datadir + "tpc_ratio_physicsdata.txt") as f:
#     with open(datadir + "tpc_ratio_Run1.txt") as f:
#         for line in f:
#             TPC, CAL = line.rstrip("\n").split(" ")
#             CALS[TPC] = float(CAL)
#     print(CALS)
    
    # Gain
#     gain_EE_old = 80.32
#     gain_EW_old = 79.82
#     gain_WE_old = 82.24
#     gain_WW_old = 81.68

#     CALS["EE"] = CALS["EE"] / gain_EE_old
#     CALS["EW"] = CALS["EW"] / gain_EW_old
#     CALS["WE"] = CALS["WE"] / gain_WE_old
#     CALS["WW"] = CALS["WW"] / gain_WW_old
    
    CALS["EE"] = 1
    CALS["EW"] = 1
    CALS["WE"] = 1
    CALS["WW"] = 1

else:
    CALS["MC"] = 1.
    if TRUE_NELEC:
        CALS["MC"] = 1/78.1

CALS

In [None]:
if isMC:
#     data["dqdx_nom"] = data.integral / data.pitch
    data["dqdx_nom"] = data.dqdx
    if TRUE_NELEC:
        data["dqdx_nom"] = data.truth_h_nelec / data.pitch

else:
    data["dqdx_nom"] = data.dqdx
    
data["phi"] = np.arccos(np.abs(data.dir_x))*180/np.pi
data["thxw"] = np.abs(np.arctan(data.dir_x*data.pitch/0.3)*180/np.pi)

In [None]:
data["thit"] = (data.t - 850)*0.4

In [None]:
data["CAL"] = 0
if not isMC:
    data.loc[data.itpc == 0, "CAL"] = CALS["EE"]
    data.loc[data.itpc == 1, "CAL"] = CALS["EW"]
    data.loc[data.itpc == 2, "CAL"] = CALS["WE"]
    data.loc[data.itpc == 3, "CAL"] = CALS["WW"]
    #data["CAL"] = 1
else:
    data.CAL = CALS["MC"]
    data["hsum"] = data.integral
    data["tsum"] = data.integral
    data.CAL = CALS["MC"]*np.exp(data.thit/3e3)

In [None]:
data.dqdx = data.dqdx_nom * data.CAL

In [None]:
# Whether to do angle correction
if THXW_CORRECT:
    thxws = np.array([5, 10, 20, 30, 40, 50, 60, 70])
    if "thxw_bin" in data.columns:
        del data["thxw_bin"]
    data["thxw_bin"] = np.searchsorted(thxws, data.thxw.values) - 1
    data = data[(data.thxw_bin >= 0 ) & (data.thxw_bin < (len(thxws)-1))].copy()
    
    if isMC:
        correction = np.array([1.        , 0.99720325, 0.99156045, 0.98546707, 0.9793954 ,
               0.97420377, 0.97335549])
    else:
        correction = np.array([1.        , 0.99610562, 0.99034266, 0.98329818, 0.97787762,
               0.97512796, 0.97599466])
        
    data.dqdx = data.dqdx / correction[data.thxw_bin]

In [None]:
np.unique(correction[data.thxw_bin])

In [None]:
data.dqdx_nom / (data.dqdx / data.CAL)

In [None]:
rrs = np.array([25, 20, 15, 12.5, 10, 8, 6, 5, 4, 3.5, 3, 2.5, 2, 1.75, 1.5, 1.25, 1]) #, 2, 1])
rrs = np.array([25, 20, 17, 14, 12, 10, 9, 8, 7, 6.25, 5.5, 4.75, 4, 3.5, 3, 2.5, 2])
rrlos = rrs[1:]
rrhis = rrs[:-1]
rrvals = (rrlos + rrhis) / 2
rrdiff = np.diff(rrs) / 2

phis = np.array([30, 40, 50, 60, 70, 80, 85])
phi_los = phis[:-1]
phi_his = phis[1:]
phi_vals = (phi_los + phi_his) / 2
phi_diff = np.diff(phis) / 2

philabels=["$%.0f^{\\circ} < \\phi < %.0f^{\\circ}$" % t for t in zip(phi_los, phi_his)]
lgnd_y = 1.45

In [None]:
def gaus(x, A, sigma, mu):
    return A*np.exp(-(x-mu)**2/(2*sigma**2))

def landau_gaus(X, A, eta, sigma, mpv):
    sigma = np.minimum(sigma, 100*eta)
    return landau.landau.gauss_landau(X, mpv, eta, sigma, A)

def opt_to_mpv(popt, perr):
    mpv = popt[-1] - 0.22278*popt[1]
    mpv_err = np.sqrt(np.diag(perr)[-1] + (0.22278)**2*np.diag(perr)[1] - 0.22278*(perr[-1,1] + perr[1,-1]))
    return mpv, mpv_err

In [None]:
data.fid.sum()

In [None]:
dqdx_mean = []
dqdx_mean_err = []
var = data.dqdx

ifig = 0
for i, (rrlo, rrhi) in enumerate(zip(rrlos, rrhis)):
    dqdx_mean.append([])
    dqdx_mean_err.append([])
    plt.figure(ifig)
    for j, (plo, phi) in enumerate(zip(phi_los, phi_his)):            
        whenplt = data.fid &\
            (data.rr > rrlo) & (data.rr < rrhi) &\
            (data.phi > plo) & (data.phi < phi)
        
        _,bins,_ = plt.hist(var[whenplt], bins=np.linspace(0, 5e3, 50), 
                     label=philabels[j], color=colors[j],
                    histtype="step", density=False)
        Nfit,bins = np.histogram(data.dqdx[whenplt], bins=bins)
        centers = (bins[:-1] + bins[1:]) / 2
        
        where_fit = (np.abs(np.array(list(range(Nfit.size))) - np.argmax(Nfit)) < 8) & (Nfit > 1)
        
        p0 = [np.max(Nfit)*200, 100, 150, centers[np.argmax(Nfit)]]
        p0 = [np.max(Nfit)*200, 25, 150, centers[np.argmax(Nfit)]]
        
        popt, perr = curve_fit(landau_gaus, centers[where_fit], Nfit[where_fit], 
                               p0=p0, 
                               sigma=np.maximum(np.sqrt(Nfit), 1)[where_fit], absolute_sigma=True, maxfev=100_000)
        mean, meanerr = opt_to_mpv(popt, perr)        
        plt.plot(centers, landau_gaus(centers, *popt), color=colors[j])
        
        dqdx_mean[-1].append(mean)
        dqdx_mean_err[-1].append(meanerr)
        
    ifig += 1
    plt.legend(ncol=2, loc='upper center', bbox_to_anchor=(0.5, lgnd_y))
    
    txt_x = 0.5 if rrlo > 5 else 0.05
    
    plt.text(txt_x, 0.5, "Protons\n%.1f < R.R. < %.1f cm\n0.3 < pitch < 1 cm\n$5^\\circ < \\theta_{xw} < 70^\\circ$" % (rrlo, rrhi), 
             transform=plt.gca().transAxes)
    plt.xlabel("dQ/dx [ADC/cm]")
    plt.ylabel("Hits")
    
    #mean_list = ["%.1f < |$\\hat{x}$| < %.1f: %.0f ke$^-$/cm" % (dlo, dhi, m/1e3) for (dlo, dhi, m) in zip(dir_xlos, dir_xhis, dqdx_mean[-1])]
    #plt.text(txt_x, 0.05, "Mean dQ/dx:\n" + "\n".join(mean_list),
    #         transform=plt.gca().transAxes, fontsize=10)
    if dosave:
        plt.savefig(savedir + "dqdx_%iRR%i_hits.pdf" % (int(rrlo), int(rrhi)),
                   bbox_inches="tight")
    
dqdx_mean = np.array(dqdx_mean)
dqdx_mean_err = np.array(dqdx_mean_err)
    
    
dqdx_mean_uncorr = dqdx_mean

In [None]:
# if isMC:
#     debias = np.array([0.97060827, 0.97295713, 0.97659747, 0.98638114, 0.99418072, 1.        ])
# else:
#     debias = np.array([0.97052393, 0.97282666, 0.97705924, 0.98331222, 0.99248394, 1.        ])
# dqdx_mean = dqdx_mean_uncorr / debias

In [None]:
dqdx_mean = dqdx_mean_uncorr # THXW Correction applied

In [None]:
rrdiff

In [None]:
for i, label in enumerate(philabels):
    plt.errorbar(rrvals, dqdx_mean[:, i], xerr=np.abs(rrdiff), yerr=dqdx_mean_err[:, i], 
                 linestyle="none", label=label)
plt.legend(ncol=2, loc='upper center', bbox_to_anchor=(0.5, 1.45))
plt.xlabel("Residual Range [cm]")
plt.ylabel("dQ/dx [$e^-$/cm]")
if dosave:
    plt.savefig(savedir + "dqdx_RR_hits.pdf",
               bbox_inches="tight")

In [None]:
for i, label in enumerate(philabels):
    plt.errorbar(rrvals, dqdx_mean[:, i] / dqdx_mean[:, -1], xerr=np.abs(rrdiff), yerr=dqdx_mean_err[:, i]/ dqdx_mean[:, -1], 
                 linestyle="none", label=label)
plt.legend(ncol=2, loc='upper center', bbox_to_anchor=(0.5, lgnd_y))
plt.xlabel("Residual Range [cm]")
plt.ylabel("dQ/dx Angular Ratio")
if dosave:
    plt.savefig(savedir + "dqdxratio_RR_hits.pdf",
               bbox_inches="tight")

In [None]:
mpv_df = pd.DataFrame(
    columns = [
        "rr",
        "rrlo",
        "rrhi",
        "pitch",
        "pitchlo",
        "pitchhi",
        "mpv",
        "mpv_err",
        "phi",
        "philo",
        "phihi",
        "tdrift"
    ]
)

In [None]:
for i, (rrlo, rrhi) in enumerate(zip(rrlos, rrhis)):
    for j, (plo, phi) in enumerate(zip(phi_los, phi_his)):
        when = data.fid &\
            (data.rr > rrlo) & (data.rr < rrhi) &\
            (data.phi > plo) & (data.phi < phi)
        
        mpv = dqdx_mean[i, j]
        mpv_err = dqdx_mean_err[i, j]
        pitch = np.mean(data.pitch[when])
        pitch_std = np.std(data.pitch[when])
        
        row = {
            "rr": (rrlo + rrhi) / 2.,
            "rrlo": rrlo,
            "rrhi": rrhi,
            "pitch": pitch,
            "pitchlo": pitch - pitch_std,
            "pitchhi": pitch + pitch_std,
            "mpv": mpv,
            "mpv_err": mpv_err,
            "phi": (plo + phi) / 2,
            "philo": plo,
            "phihi": phi,
            "tdrift": 500,
            "tlo": 0,
            "thi": 1000,
        }
        for k in row.keys():
            row[k] = [row[k]]
        
        mpv_df = pd.concat([mpv_df, pd.DataFrame(row)], ignore_index=True)
        

In [None]:
mpv_df = mpv_df.infer_objects()

In [None]:
if dosave:
     mpv_df.to_hdf(datadir + "mcnuphase2F-fakeEMB2-mpvs.df", key="df")

In [None]:
mpv_df

In [None]:
datadir + "mcnuphase2F-fakeEMB-mpvs.df"

In [None]:
datadir