### This notebook shows how making a cut on the standard deviation of p(z) helps clean up the outlier rate

The usual imports

In [None]:
import tables_io
import numpy as np
import matplotlib.pyplot as plt
import qp
from rail.plotting.pz_plotters import PZPlotterPointEstimateVsTrueHist2D

Change this to be the root of the current PZ working area

In [None]:
pz_dir = '/global/cfs/cdirs/lsst/groups/PZ/DP1'
# if that fails you can use this
# pz_dir = '/global/u2/e/echarles/dx'
pz_dir = '/Users/echarles/pz'

Get the data, note the switch between different algorithms

In [None]:
algo = 'knn'
d = tables_io.read(f"{pz_dir}/data/test/dp1_matched_test.hdf5")
pz = qp.read(f"{pz_dir}/projects/dp1/data/gold_dp1_optimize/output_estimate_{algo}.hdf5")
redshifts = d['redshift']

The function computes the standard deviation of the PDF of each object numerically on a grid

In [None]:
def calc_std(qp_dstn, grid):
    pdfs = qp_dstn.pdf(grid)
    norms = pdfs.sum(axis=1)
    means = np.sum(pdfs * grid, axis=1) / norms
    diffs = (np.expand_dims(grid, -1) - means).T
    wt_diffs = diffs * diffs * pdfs
    stds = np.sqrt((wt_diffs).sum(axis=1)/norms)
    return np.expand_dims(stds, -1)

Compute the standard deviations of p(z) using a grid from 0 to 4.

In [None]:
grid = np.linspace(0., 4., 401)

In [None]:
std = calc_std(pz, grid)

Original version of the plot

In [None]:
plotter = PZPlotterPointEstimateVsTrueHist2D()
out = plotter._make_2d_hist_plot('plot', redshifts, np.squeeze(pz.ancil['zmode']))
#out.figure.savefig('tpz_scatter_orig.pdf')

Version of the plot with cleaned p(z)

In [None]:
mask = np.squeeze(std < 0.15)
plotter = PZPlotterPointEstimateVsTrueHist2D()
out = plotter._make_2d_hist_plot('plot', redshifts[mask], np.squeeze(pz.ancil['zmode'])[mask])
#out.figure.savefig('tpz_scatter_clean.pdf')

For comparision were is a spec-z photo-z scatter plot for faint galaxies only

In [None]:
mask = np.squeeze(d['i_cModelMag'] > 23.5)
plotter = PZPlotterPointEstimateVsTrueHist2D()
out = plotter._make_2d_hist_plot('plot', redshifts[mask], np.squeeze(pz.ancil['zmode'])[mask])
out.figure.savefig('tpz_scatter_faint.pdf')