In [None]:
#%matplotlib notebook
%matplotlib inline
#%config InlineBackend.figure_formats=['svg']
#%config InlineBackend.figure_formats=['pdf']

import matplotlib

matplotlib.rcParams['figure.figsize'] = (12, 9)

from matplotlib.colors import LogNorm
from matplotlib.ticker import FuncFormatter

import scipy
import scipy.stats
import math
import numpy as np
import pandas as pd

#import ipywidgets
#from ipywidgets import interact

#import sys
#sys.path.append("/Users/jdecock/git/pub/jdhp-sap/sap-cta-data-pipeline/utils/")
#import common_functions as common

## Delta psi

In [None]:
CSV_FILE_PATH_C = "../xps/2017_05_15_cropped_fits_files/2017_05_15_cropped_fits_files.csv"
CSV_FILE_PATH_NC = "../xps/2017_05_12/2017_05_12.csv"

#WAVELET_LABEL = "WT-K-k-C1-m3-n4-s3"
WAVELET_LABEL = "WT-K-k-C1-m3-n4-s2-2-3-3"

TAILCUT_LABEL = "Tailcut-5-10"

PART = 0         # 0 for gamma, 1 for protons
BORDER = 0       # 0 all images, 1 images on border only, 2 images not on border only

FAINT_BRIGHT_BORDER = 100
LOW_CUT = 50
HIGH_CUT = 2000

full_df_c = pd.read_csv(CSV_FILE_PATH_C)
full_df_nc = pd.read_csv(CSV_FILE_PATH_NC)

ref_c = full_df_c[full_df_c.Type == 'Ref'][full_df_c.Part == PART]
ref_nc = full_df_nc[full_df_nc.Type == 'Ref'][full_df_nc.Part == PART]

tc_c = full_df_c[full_df_c.Type == TAILCUT_LABEL][full_df_c.Part == PART]
tc_nc = full_df_nc[full_df_nc.Type == TAILCUT_LABEL][full_df_nc.Part == PART]

wt_c = full_df_c[full_df_c.Type == WAVELET_LABEL][full_df_c.Part == PART]
wt_nc = full_df_nc[full_df_nc.Type == WAVELET_LABEL][full_df_nc.Part == PART]

tc_wt_c = pd.merge(tc_c, wt_c, on="Id", how="outer", suffixes=('_tc', '_wt'))  #.dropna(how='any')
df_c = pd.merge(tc_wt_c, ref_c, on="Id", how="outer")
tc_wt_nc = pd.merge(tc_nc, wt_nc, on="Id", how="outer", suffixes=('_tc', '_wt'))  #.dropna(how='any')
df_nc = pd.merge(tc_wt_nc, ref_nc, on="Id", how="outer")

df_c["delta_tc"] = np.fmod(((df_c['hPsi'] - df_c['hPsi_tc']) * 180. / np.pi), 90.)
df_c["delta_wt"] = np.fmod(((df_c['hPsi'] - df_c['hPsi_wt']) * 180. / np.pi), 90.)
df_nc["delta_tc"] = np.fmod(((df_nc['hPsi'] - df_nc['hPsi_tc']) * 180. / np.pi), 90.)
df_nc["delta_wt"] = np.fmod(((df_nc['hPsi'] - df_nc['hPsi_wt']) * 180. / np.pi), 90.)

df_c.delta_tc = abs(df_c.delta_tc)
df_c.delta_wt = abs(df_c.delta_wt)
df_nc.delta_tc = abs(df_nc.delta_tc)
df_nc.delta_wt = abs(df_nc.delta_wt)

if BORDER == 1:
    df_c = df_c[df_c.border == 0]  # NOT CONTAINED
    df_nc = df_nc[df_nc.border == 0]  # NOT CONTAINED
elif BORDER == 2:
    df_c = df_c[df_c.border > 0]   # CONTAINED
    df_nc = df_nc[df_nc.border > 0]   # CONTAINED

In [None]:
#df_c[df.peSum > 50][df.peSum < 55].loc[:,['hPsi', 'hPsi_tc', 'hPsi_wt']]

In [None]:
#df_c["delta_tc"].isnull().sum()

In [None]:
# %load /Users/jdecock/git/pub/jdhp/snippets/python/matplotlib/hist2d_scatter_plot_logscale_xy.py

fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, figsize=(16, 9))

x1 = df_c['delta_tc']
y1 = np.log10(df_c['peSum_tc'])
x2 = df_nc['delta_tc']
y2 = np.log10(df_nc['peSum_tc'])

xbins1 = np.linspace(0, 90, 90)
ybins1 = np.linspace(0, 6, 150)

xbins2 = np.linspace(0, 90, 90)
ybins2 = np.linspace(0, 6, 150)

hist1, xedges1, yedges1 = np.histogram2d(x1, y1, bins=(xbins1, ybins1))
hist2, xedges2, yedges2 = np.histogram2d(x2, y2, bins=(xbins2, ybins2))

# See http://stackoverflow.com/questions/27156381/python-creating-a-2d-histogram-from-a-numpy-matrix

xidx1 = np.clip(np.digitize(x1, xedges1), 0, hist1.shape[0]-1)
yidx1 = np.clip(np.digitize(y1, yedges1), 0, hist1.shape[1]-1)
c1 = hist1[xidx1, yidx1]

xidx2 = np.clip(np.digitize(x2, xedges2), 0, hist2.shape[0]-1)
yidx2 = np.clip(np.digitize(y2, yedges2), 0, hist2.shape[1]-1)
c2 = hist2[xidx2, yidx2]

sc1 = ax1.scatter(x1, y1,
                  c=c1,
                  s=5,
                  marker='o',
                  #cmap='gnuplot2',
                  linewidth=0,
                  alpha=1)

sc2 = ax2.scatter(x2, y2,
                  c=c2,
                  s=5,
                  marker='o',
                  #cmap='gnuplot2',
                  linewidth=0,
                  alpha=1)

fig.colorbar(sc1, ax=ax1)
fig.colorbar(sc2, ax=ax2)

# Use "10^n" instead "n" as ticks label
func_formatter = lambda x, pos: r'$10^{{{}}}$'.format(int(x))
ax1.yaxis.set_major_formatter(FuncFormatter(func_formatter))
ax2.yaxis.set_major_formatter(FuncFormatter(func_formatter))

ax1.axhline(y=np.log10(LOW_CUT), linewidth=2, linestyle='--', color='red', alpha=0.8)
ax1.axhline(y=np.log10(HIGH_CUT), linewidth=2, linestyle='--', color='red', alpha=0.8)
ax1.axhline(y=np.log10(FAINT_BRIGHT_BORDER), linewidth=2, linestyle=':', color='red', alpha=0.8)
ax2.axhline(y=np.log10(LOW_CUT), linewidth=2, linestyle='--', color='red', alpha=0.8)
ax2.axhline(y=np.log10(HIGH_CUT), linewidth=2, linestyle='--', color='red', alpha=0.8)
ax2.axhline(y=np.log10(FAINT_BRIGHT_BORDER), linewidth=2, linestyle=':', color='red', alpha=0.8)

ax1.set_xlabel(r"$\Delta\psi$", fontsize=20)
ax1.set_ylabel("npe", fontsize=20)
ax2.set_xlabel(r"$\Delta\psi$", fontsize=20)
ax2.set_ylabel("npe", fontsize=20)

#ax1.legend(prop={'size': 11}, loc='best', fancybox=True, framealpha=0.5)
#ax2.legend(prop={'size': 11}, loc='best', fancybox=True, framealpha=0.5)

ax1.set_xlim(0, 90)
ax2.set_xlim(0, 90)

ax1.set_ylim(0, 5)
ax2.set_ylim(0, 5)

ax1.set_title(TAILCUT_LABEL + " (crop)", fontsize=20)
ax2.set_title(TAILCUT_LABEL + " (no crop)", fontsize=20)

title = r"$\Delta\psi$ [{}]".format("Gamma" if PART == 0 else "Proton")
fig.suptitle(title, fontsize=20)

In [None]:
# %load /Users/jdecock/git/pub/jdhp/snippets/python/matplotlib/hist2d_scatter_plot_logscale_xy.py

fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, figsize=(16, 9))

x1 = df_c['delta_wt']
y1 = np.log10(df_c['peSum_wt'])
x2 = df_nc['delta_wt']
y2 = np.log10(df_nc['peSum_wt'])

xbins1 = np.linspace(0, 90, 90)
ybins1 = np.linspace(0, 6, 150)

xbins2 = np.linspace(0, 90, 90)
ybins2 = np.linspace(0, 6, 150)

hist1, xedges1, yedges1 = np.histogram2d(x1, y1, bins=(xbins1, ybins1))
hist2, xedges2, yedges2 = np.histogram2d(x2, y2, bins=(xbins2, ybins2))

# See http://stackoverflow.com/questions/27156381/python-creating-a-2d-histogram-from-a-numpy-matrix

xidx1 = np.clip(np.digitize(x1, xedges1), 0, hist1.shape[0]-1)
yidx1 = np.clip(np.digitize(y1, yedges1), 0, hist1.shape[1]-1)
c1 = hist1[xidx1, yidx1]

xidx2 = np.clip(np.digitize(x2, xedges2), 0, hist2.shape[0]-1)
yidx2 = np.clip(np.digitize(y2, yedges2), 0, hist2.shape[1]-1)
c2 = hist2[xidx2, yidx2]

sc1 = ax1.scatter(x1, y1,
                  c=c1,
                  s=5,
                  marker='o',
                  #cmap='gnuplot2',
                  linewidth=0,
                  alpha=1)

sc2 = ax2.scatter(x2, y2,
                  c=c2,
                  s=5,
                  marker='o',
                  #cmap='gnuplot2',
                  linewidth=0,
                  alpha=1)

fig.colorbar(sc1, ax=ax1)
fig.colorbar(sc2, ax=ax2)

# Use "10^n" instead "n" as ticks label
func_formatter = lambda x, pos: r'$10^{{{}}}$'.format(int(x))
ax1.yaxis.set_major_formatter(FuncFormatter(func_formatter))
ax2.yaxis.set_major_formatter(FuncFormatter(func_formatter))

ax1.axhline(y=np.log10(LOW_CUT), linewidth=2, linestyle='--', color='red', alpha=0.8)
ax1.axhline(y=np.log10(HIGH_CUT), linewidth=2, linestyle='--', color='red', alpha=0.8)
ax1.axhline(y=np.log10(FAINT_BRIGHT_BORDER), linewidth=2, linestyle=':', color='red', alpha=0.8)
ax2.axhline(y=np.log10(LOW_CUT), linewidth=2, linestyle='--', color='red', alpha=0.8)
ax2.axhline(y=np.log10(HIGH_CUT), linewidth=2, linestyle='--', color='red', alpha=0.8)
ax2.axhline(y=np.log10(FAINT_BRIGHT_BORDER), linewidth=2, linestyle=':', color='red', alpha=0.8)

ax1.set_xlabel(r"$\Delta\psi$", fontsize=20)
ax1.set_ylabel("npe", fontsize=20)
ax2.set_xlabel(r"$\Delta\psi$", fontsize=20)
ax2.set_ylabel("npe", fontsize=20)

#ax1.legend(prop={'size': 11}, loc='best', fancybox=True, framealpha=0.5)
#ax2.legend(prop={'size': 11}, loc='best', fancybox=True, framealpha=0.5)

ax1.set_xlim(0, 90)
ax2.set_xlim(0, 90)

ax1.set_ylim(0, 5)
ax2.set_ylim(0, 5)

ax1.set_title(WAVELET_LABEL + " (crop)", fontsize=20)
ax2.set_title(WAVELET_LABEL + " (no crop)", fontsize=20)

title = r"$\Delta\psi$ [{}]".format("Gamma" if PART == 0 else "Proton")
fig.suptitle(title, fontsize=20)

## Delta psi ratio

In [None]:
# Plot delta psi #####################

def plot_ratio(ax, res_tuple_c, res_tuple_nc):
    val_of_bins_c, bins_c, patches_c = res_tuple_c
    val_of_bins_nc, bins_nc, patches_nc = res_tuple_nc
    edges_of_bins = bins_c

    # Set ratio where val_of_bins_data is not zero
    ratio = np.divide(val_of_bins_nc,
                      val_of_bins_c,
                      where=(val_of_bins_c != 0))

    # Compute error on ratio (null if cannot be computed)
    # This is wrong as it's made for Gaussian distributions and here we have Poisson distribution
    error = np.divide(val_of_bins_nc * np.sqrt(val_of_bins_c) + val_of_bins_c * np.sqrt(val_of_bins_nc),
                       np.power(val_of_bins_c, 2),
                       where=(val_of_bins_c != 0))

    ax.set_ylabel('Ratio (Crop/No crop)', fontsize=20)
    ax.axhline(y=1, linewidth=2, linestyle='--', color='gray', alpha=0.5)

    bincenter = 0.5 * (edges_of_bins[1:] + edges_of_bins[:-1])
    ax.errorbar(bincenter, ratio, yerr=error, fmt='o', color='k', elinewidth=3, capsize=4, capthick=3, linewidth=6)
    ax.plot(bincenter, ratio, 'ok', linewidth=6)

In [None]:
################################

fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(nrows=2, ncols=2, figsize=(16, 9))

#BINS_START, BINS_STOP, BINS_STEP = 0., 99., 9.
#BINS_START, BINS_STOP, BINS_STEP = 0., 99., 9.

#BINS = np.arange(BINS_START, BINS_STOP, BINS_STEP)
#BINS = 30
#BINS = np.linspace(BINS_START, BINS_STOP, 30)
#BINS = [3.53056780e-03, 5.99515231e+00, 1.19867741e+01, 1.79783958e+01, 2.39700176e+01, 2.99616393e+01,
#        3.59532610e+01, 4.19448828e+01, 4.79365045e+01, 5.39281263e+01, 5.99197480e+01, 6.59113698e+01,
#        7.19029915e+01, 7.78946133e+01, 8.38862350e+01, 8.98778568e+01]
#BINS = 15                   # <- The one used for the postdoc presentation
BINS = np.arange(0, 90, 5)

################################

NPE_MIN, NPE_MAX = LOW_CUT, FAINT_BRIGHT_BORDER

df1 = df_c[df_c.peSum_tc > NPE_MIN][df_c.peSum_tc <= NPE_MAX]['delta_tc']
df2 = df_nc[df_nc.peSum_tc > NPE_MIN][df_nc.peSum_tc <= NPE_MAX]['delta_tc']

res_tuple_tc_c1 = ax1.hist(df1,
                         label="TC crop num={}".format(len(df1)),
                         bins=BINS,
                         linewidth=2, alpha=.5, color='blue', histtype="step")

res_tuple_tc_nc1 = ax1.hist(df2,
                         label="TC full num={}".format(len(df2)),
                         bins=BINS,
                         linewidth=2, alpha=.5, color='red',  histtype="step")

ax1.set_title("{} to {} NPE".format(NPE_MIN, NPE_MAX), fontsize=20)
ax1.set_xlabel("delta_psi", fontsize=20)
ax1.set_ylabel("counts", fontsize=20)
ax1.legend(prop={'size': 18}, loc='best', fancybox=True, framealpha=0.5)

plot_ratio(ax3, res_tuple_tc_c1, res_tuple_tc_nc1)

################################

NPE_MIN, NPE_MAX = FAINT_BRIGHT_BORDER, HIGH_CUT

df1 = df_c[df_c.peSum_tc > NPE_MIN][df_c.peSum_tc <= NPE_MAX]['delta_tc']
df2 = df_nc[df_nc.peSum_tc > NPE_MIN][df_nc.peSum_tc <= NPE_MAX]['delta_tc']

res_tuple_tc_c2 = ax2.hist(df1,
                         label="TC crop num={}".format(len(df1)),
                         bins=BINS,
                         linewidth=2, alpha=.5, color='blue', histtype="step")

res_tuple_tc_nc2 = ax2.hist(df2,
                         label="TC full num={}".format(len(df2)),
                         bins=BINS,
                         linewidth=2, alpha=.5, color='red',  histtype="step")

ax2.set_title("{} to {} NPE".format(NPE_MIN, NPE_MAX), fontsize=20)
ax2.set_xlabel("delta_psi", fontsize=20)
ax2.set_ylabel("counts", fontsize=20)
ax2.legend(prop={'size': 18}, loc='best', fancybox=True, framealpha=0.5)

plot_ratio(ax4, res_tuple_tc_c2, res_tuple_tc_nc2)

#ax.set_yscale('log')

suptitle_str = "{} / {} [{}]".format(TAILCUT_LABEL + " (crop)", TAILCUT_LABEL + " (full)", "Gamma" if PART == 0 else "Proton")

if BORDER == 1:
    suptitle_str += " (NOT CONTAINED)"
elif BORDER == 2:
    suptitle_str += " (CONTAINED)"
    
fig.suptitle(suptitle_str, fontsize=20)

ax1.set_ylim(top=1200)
ax2.set_ylim(top=5500)
ax3.set_ylim(top=2, bottom=0.2)
ax4.set_ylim(top=2.4, bottom=0.3)

In [None]:
################################

fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(nrows=2, ncols=2, figsize=(16, 9))

#BINS_START, BINS_STOP, BINS_STEP = 0., 99., 9.
#BINS_START, BINS_STOP, BINS_STEP = 0., 99., 9.

#BINS = np.arange(BINS_START, BINS_STOP, BINS_STEP)
#BINS = 30
#BINS = np.linspace(BINS_START, BINS_STOP, 30)
#BINS = [3.53056780e-03, 5.99515231e+00, 1.19867741e+01, 1.79783958e+01, 2.39700176e+01, 2.99616393e+01,
#        3.59532610e+01, 4.19448828e+01, 4.79365045e+01, 5.39281263e+01, 5.99197480e+01, 6.59113698e+01,
#        7.19029915e+01, 7.78946133e+01, 8.38862350e+01, 8.98778568e+01]
#BINS = 15                   # <- The one used for the postdoc presentation
BINS = np.arange(0, 90, 5)

################################

NPE_MIN, NPE_MAX = LOW_CUT, FAINT_BRIGHT_BORDER

df1 = df_c[df_c.peSum_wt > NPE_MIN][df_c.peSum_wt <= NPE_MAX]['delta_wt']
df2 = df_nc[df_nc.peSum_wt > NPE_MIN][df_nc.peSum_wt <= NPE_MAX]['delta_wt']

res_tuple_wt_c1 = ax1.hist(df1,
                         label="WT crop num={}".format(len(df1)),
                         bins=BINS,
                         linewidth=2, alpha=.5, color='blue', histtype="step")

res_tuple_wt_nc1 = ax1.hist(df2,
                         label="WT full num={}".format(len(df2)),
                         bins=BINS,
                         linewidth=2, alpha=.5, color='red',  histtype="step")

ax1.set_title("{} to {} NPE".format(NPE_MIN, NPE_MAX), fontsize=20)
ax1.set_xlabel("delta_psi", fontsize=20)
ax1.set_ylabel("counts", fontsize=20)
ax1.legend(prop={'size': 18}, loc='best', fancybox=True, framealpha=0.5)

plot_ratio(ax3, res_tuple_wt_c1, res_tuple_wt_nc1)

################################

NPE_MIN, NPE_MAX = FAINT_BRIGHT_BORDER, HIGH_CUT

df1 = df_c[df_c.peSum_wt > NPE_MIN][df_c.peSum_wt <= NPE_MAX]['delta_wt']
df2 = df_nc[df_nc.peSum_wt > NPE_MIN][df_nc.peSum_wt <= NPE_MAX]['delta_wt']

res_tuple_wt_c2 = ax2.hist(df1,
                         label="WT crop num={}".format(len(df1)),
                         bins=BINS,
                         linewidth=2, alpha=.5, color='blue', histtype="step")

res_tuple_wt_nc2 = ax2.hist(df2,
                         label="WT full num={}".format(len(df2)),
                         bins=BINS,
                         linewidth=2, alpha=.5, color='red',  histtype="step")

ax2.set_title("{} to {} NPE".format(NPE_MIN, NPE_MAX), fontsize=20)
ax2.set_xlabel("delta_psi", fontsize=20)
ax2.set_ylabel("counts", fontsize=20)
ax2.legend(prop={'size': 18}, loc='best', fancybox=True, framealpha=0.5)

plot_ratio(ax4, res_tuple_wt_c2, res_tuple_wt_nc2)

#ax.set_yscale('log')

suptitle_str = "{} / {} [{}]".format(WAVELET_LABEL + " (crop)", WAVELET_LABEL + " (full)", "Gamma" if PART == 0 else "Proton")

if BORDER == 1:
    suptitle_str += " (NOT CONTAINED)"
elif BORDER == 2:
    suptitle_str += " (CONTAINED)"
    
fig.suptitle(suptitle_str, fontsize=20)

ax1.set_ylim(top=1700)
ax2.set_ylim(top=5500)
ax3.set_ylim(top=2, bottom=0.2)
ax4.set_ylim(top=2.4, bottom=0.3)