### This notebook makes a couple of plots of p(z) for individual objects

The usual suspects

In [None]:
import tables_io
import numpy as np
import matplotlib.pyplot as plt
import qp

Change this to be the root of the current PZ working area

In [None]:
pz_dir = '/global/cfs/cdirs/lsst/groups/PZ/DP1'

Get the data, note the switch between different algorithms

In [None]:
algo = 'tpz'
data = qp.read(f'{pz_dir}/data/sandbox_data/5063/{algo}_5063_gold_baseline_v0.hdf5')
#data = qp.read(f'{pz_dir}/projects/dp1/data/gold_baseline/output_estimate_{algo}.hdf5')
#cat = tables_io.read(f'{pz_dir}/data/dp1_v29.0.0/5063/object.parquet')
#cat = tables_io.read(f'{pz_dir}/data/test/dp1_matched_test.hdf5')

In [None]:
#cat['objectId'][23]

Simple function to print some values for a particular object

In [None]:
def print_obj(cat, idx, col_names=None):
    if col_names is None:
        col_names = list(cat.column_names)
    for col_name_ in col_names:
        print(col_name_, cat[col_name_][idx])

In [None]:
col_names = [f"{band}_psfFlux" for band in 'ugrizy']
col_names += [f"{band}_extendedness" for band in 'griz']
col_names += ['objectId']
#print_obj(cat, 23, col_names)

Function to make a "nice_plot"

In [None]:
def nice_plot(subdata, grid=None):
    fig = plt.figure()
    if grid is None:
        grid = np.linspace(0., 3., 301)
    cdfs = subdata.cdf(grid)
    pdfs = subdata.pdf(grid)    
    _ = plt.plot(grid, cdfs, label='cdf')
    _ = plt.plot(grid, pdfs/pdfs.max(), label='pdf')
    _ = plt.axvline(x=subdata.ancil['z_q2p5'], linestyle='dotted', color='gray')
    _ = plt.axvline(x=subdata.ancil['z_q84'], linestyle='dashdot', color='gray')
    _ = plt.axvline(x=subdata.ancil['z_median'], linestyle='dashed', color='gray')
    _ = plt.axvline(x=subdata.ancil['z_q16'], linestyle='dashdot', color='gray')
    _ = plt.axvline(x=subdata.ancil['z_97p5'], linestyle='dotted', color='gray')
    
    _ = plt.legend()
    zmode = subdata.ancil['z_mode'][0]
    zmean = subdata.ancil['z_mean'][0]
    zmedian = subdata.ancil['z_median'][0]
    zstd = subdata.ancil['z_std'][0]
    zq16 = subdata.ancil['z_q16'][0]
    zq84 = subdata.ancil['z_q84'][0]
    zq025 = subdata.ancil['z_q2p5'][0]
    zq975 = subdata.ancil['z_97p5'][0]    
    _ = plt.annotate(f"mode: {zmode:0.3f}", xy=(0.4, 0.8)) 
    _ = plt.annotate(f"mean: {zmean:0.3f}", xy=(0.4, 0.75))     
    _ = plt.annotate(f"std: {zstd:0.3f}", xy=(0.4, 0.7)) 
    _ = plt.annotate(f"q16 -- q86: {zq16:0.3f} -- {zq84:0.3f}", xy=(0.4, 0.65))
    _ = plt.annotate(f"q2.5 -- q97.5: {zq025:0.3f} -- {zq975:0.3f}", xy=(0.4, 0.60))
    return fig

Find a good p(z) with a narrow pdf

In [None]:
np.argmin(data[slice(000, 20000)].ancil['z_std'], axis=0)

Plot it

In [None]:
fig = nice_plot(data[11556], grid=np.linspace(0.1, 0.6, 101))

In [None]:
data[534].ancil

In [None]:
fig.savefig('pdf.pdf')

Grab a randomly not great pdf

In [None]:
fig = nice_plot(data[534])

In [None]:
fig.savefig('bad_pdf.pdf')

Some checking on the mean and standard deviation, the Mixture Model in qp seems to have issues

In [None]:
grid = np.linspace(0., 3., 301)

In [None]:
qp_dstn = data

In [None]:
def my_mean_std(qp_dstn, grid):
    pdfs = qp_dstn.pdf(grid)
    norms = pdfs.sum(axis=1)
    means = np.sum(pdfs * grid, axis=1) / norms
    diffs = (np.expand_dims(grid, -1) - means).T
    wt_diffs = diffs * diffs * pdfs
    stds = np.sqrt((wt_diffs).sum(axis=1)/norms)            
    return means, stds


In [None]:
means, stds, = my_mean_std(qp_dstn, grid)

In [None]:
# qp_dstn.mode(grid)

In [None]:
qp_dstn.mean()

In [None]:
qp_dstn.std()