### This notebook shows how to check the look at different flux measures


usual imports

In [None]:
import tables_io
import numpy as np
import qp
import matplotlib.pyplot as plt
from rail.raruma import plotting_functions as raruma_plot
from rail.raruma import utility_functions as raruma_util

In [None]:
from rail.estimation.algos.k_nearneigh import KNearNeighInformer, KNearNeighEstimator
from rail.core.data import Hdf5Handle, DataStore, DATA_STORE
from rail.utils.catalog_utils import RomanRubinCatalogConfig
DS = DATA_STORE()
DataStore.allow_overwrite = True
RomanRubinCatalogConfig.apply(RomanRubinCatalogConfig.tag)

Change this to be the root of the current PZ working area

In [None]:
#pz_dir = '/global/cfs/cdirs/lsst/groups/PZ/DP1'
# if that fails you can use this
# pz_dir = '/global/u2/e/echarles/dx'
pz_dir = '/Users/echarles/pz/'

Read the test / training data

In [None]:
train = tables_io.read(f"{pz_dir}/data/train/dp1_matched_v4_train.hdf5")
test = tables_io.read(f"{pz_dir}/data/test/dp1_matched_v4_test.hdf5")
knn = qp.read(f"{pz_dir}/projects/dp1_v4/data/gold_dp1_6band_paper/output_estimate_knn.hdf5")
#d.keys()
#train = tables_io.sliceObj(d, slice(0, -1, 10))
#test = tables_io.sliceObj(d, slice(1, -1, 10))

In [None]:
_ = plt.hist2d(knn.ancil['redshift'], test['i_psfMag'],  bins=(np.linspace(0, 4, 101), np.linspace(16, 26, 101)), cmap='gray', norm='log')
_ = plt.xlabel(r"$z_{ref}$")
_ = plt.ylabel("i [mag]")

In [None]:
#dh = DS.read_file('input', Hdf5Handle, "/Users/echarles/pz/data/test/dp1_ecdfs_matched_specgold_test.hdf5")

In [None]:
#dh.data

In [None]:
# knn_inform = KNearNeighInformer.make_stage(name='knn_inform')
# knn_inform.inform(train)
# knn_inform.finalize()
# knn_estimate = KNearNeighEstimator.make_stage(name='knn_estimate', connections=dict(model=knn_inform.io.model))

Get all the different fluxes

In [None]:
train_targets, train_features = raruma_util.prepare_data_total_mag_and_colors(train, '{band}_gaap1p0Mag', 'ugrizy')
test_targets, test_features = raruma_util.prepare_data_total_mag_and_colors(test, '{band}_gaap1p0Mag', 'ugrizy')

In [None]:
train_gaap3p0_targets, train_gaap3p0_features = raruma_util.prepare_data_total_mag_and_colors(train, '{band}_gaap3p0Mag', 'ugrizy')
test_gaap3p0_targets, test_gaap3p0_features = raruma_util.prepare_data_total_mag_and_colors(test, '{band}_gaap3p0Mag', 'ugrizy')

In [None]:
train_psf_targets, train_psf_features = raruma_util.prepare_data_total_mag_and_colors(train, '{band}_psfMag', 'ugrizy')
test_psf_targets, test_psf_features = raruma_util.prepare_data_total_mag_and_colors(test, '{band}_psfMag', 'ugrizy')

In [None]:
train_sersic_targets, train_sersic_features = raruma_util.prepare_data_total_mag_and_colors(train, '{band}_sersicMag', 'ugrizy')
test_sersic_targets, test_sersic_features = raruma_util.prepare_data_total_mag_and_colors(test, '{band}_sersicMag', 'ugrizy')

In [None]:
train_cmodel_targets, train_cmodel_features = raruma_util.prepare_data_total_mag_and_colors(train, '{band}_cModelMag', 'ugrizy')
test_cmodel_targets, test_cmodel_features = raruma_util.prepare_data_total_mag_and_colors(test, '{band}_cModelMag', 'ugrizy')

Make some plots

In [None]:
_ = plt.scatter( train_features[:,0], train_gaap3p0_features[:,0]-train_features[:,0])

In [None]:
_ = plt.hist2d(train_targets, train_cmodel_features[:,0]-train_features[:,0], bins=(100, 100), cmap='gray', norm='log')

In [None]:
fig, axes = plt.subplots(3, 2)
fig.tight_layout()

for i in range(6):
    icol = i % 3
    irow = int(i / 3)
    axes[icol][irow].hist(train_psf_features[:,i]-train_features[:,i], bins=np.linspace(-2, 2, 101))

In [None]:
fig, axes = plt.subplots(3, 2)
fig.tight_layout()

for i in range(6):
    icol = i % 3
    irow = int(i / 3)
    axes[icol][irow].hist(train_sersic_features[:,i]-train_features[:,i], bins=np.linspace(-2, 2, 101))

In [None]:
fig, axes = plt.subplots(3, 2)
fig.tight_layout()

for i in range(6):
    icol = i % 3
    irow = int(i / 3)
    axes[icol][irow].hist(train_gaap3p0_features[:,i]-train_features[:,i], bins=np.linspace(-2, 2, 101))

In [None]:
fig, axes = plt.subplots(3, 2)
fig.tight_layout()

for i in range(6):
    icol = i % 3
    irow = int(i / 3)
    axes[icol][irow].hist(train_cmodel_features[:,i]-train_features[:,i], bins=np.linspace(-2, 2, 101))

In [None]:
_ = plt.scatter( train_features[:,0], train_psf_features[:,0]-train_features[:,0])

In [None]:
_ = plt.scatter(train_features[:,0], train_sersic_features[:,0]-train_features[:,0])
_ = plt.ylim(-2, 2.)

In [None]:
_ = plt.scatter(train_features[:,0], train_cmodel_features[:,0]-train_features[:,0])
_ = plt.ylim(-2, 2.)

In [None]:
_ = plt.scatter(train_gaap3p0_features[:,3]-train_features[:,3], train_gaap3p0_features[:,2]-train_features[:,2], c=train_targets)
_ = plt.ylim(-2, 2.)

In [None]:
train_xx_features = np.hstack([train_gaap3p0_features, train_features, train_psf_features])
test_xx_features = np.hstack([test_gaap3p0_features, test_features, test_psf_features])

In [None]:
_ = raruma_plot.plot_true_nz(train_targets)

Look at the effect on regression

In [None]:
from sklearn import preprocessing
from sklearn.decomposition import PCA

In [None]:
scaler = preprocessing.StandardScaler().fit(train_features)
pca = PCA(n_components=train_features.shape[-1], whiten=False)

In [None]:
scaler_xx = preprocessing.StandardScaler().fit(train_xx_features)
pca_xx = PCA(n_components=train_xx_features.shape[-1], whiten=False)

In [None]:
scaled_train = scaler.transform(train_features).clip(-5, 5)
pca_train = pca.fit(scaled_train)

In [None]:
scaled_xx_train = scaler_xx.transform(train_xx_features).clip(-5, 5)
pca_xx_train = pca_xx.fit(scaled_xx_train)

In [None]:
scaled_test = scaler.transform(test_features).clip(-5, 5)
pca_test = pca.fit(scaled_test)

In [None]:
scaled_xx_test = scaler_xx.transform(test_xx_features).clip(-5, 5)
pca_xx_test = pca_xx.fit(scaled_xx_test)

In [None]:
from sklearn.ensemble import (HistGradientBoostingRegressor, ExtraTreesRegressor, AdaBoostRegressor)
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.isotonic import IsotonicRegression
from sklearn.linear_model import HuberRegressor, LinearRegression, QuantileRegressor
from sklearn.svm import NuSVR
from sklearn.neighbors import KNeighborsRegressor, RadiusNeighborsRegressor

In [None]:
hbr = HistGradientBoostingRegressor()
etr = ExtraTreesRegressor()
abr = AdaBoostRegressor()
gpr = GaussianProcessRegressor()
isr = IsotonicRegression()
hur = HuberRegressor()
lir = LinearRegression()
qur = QuantileRegressor()
nsr = NuSVR()
knr = KNeighborsRegressor()
rnr = RadiusNeighborsRegressor()

In [None]:
def run_it(reg):
    preds = raruma_util.run_regression(reg, scaled_train, train_targets, scaled_test)
    _ = raruma_plot.plot_true_predict_fancy(test_targets, np.nan_to_num(preds, -0.4))
    _ = raruma_plot.plot_biweight_stats_v_redshift(test_targets, preds)

In [None]:
def run_it_xx(reg):
    preds = raruma_util.run_regression(reg, scaled_xx_train, train_targets, scaled_xx_test)
    _ = raruma_plot.plot_true_predict_fancy(test_targets, np.nan_to_num(preds, -0.4))
    _ = raruma_plot.plot_biweight_stats_v_redshift(test_targets, preds)

In [None]:
run_it(knr)

In [None]:
run_it_xx(knr)