Run on NERSC

In [None]:
import os, sys, warnings, time
import numpy as np
import matplotlib.pyplot as plt
from astropy.table import Table
import astropy.stats as astats
import scipy
import fitsio

from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import cross_val_predict, train_test_split, KFold

In [None]:
import sklearn
print(sklearn.__version__)

In [None]:
params = {'legend.fontsize': 'x-large',
         'axes.labelsize': 'x-large',
         'axes.titlesize':'x-large',
         'xtick.labelsize':'x-large',
         'ytick.labelsize':'x-large',
         'figure.facecolor':'w'} 
plt.rcParams.update(params)

In [None]:
def mass_plot(mass_pred, mass_true, label='Result'):
   
    # Calculate test MSE (mean squared error) 
    SSres = np.sum((mass_pred - mass_true)**2)
    mse = SSres/len(mass_true)

    # Calculate coefficient of determination
    SStot = np.sum((mass_true - mass_true.mean())**2)
    Rsq = 1-SSres/SStot

    # Calculate median absolute error
    mae = np.median(np.abs(mass_pred-mass_true))

    alpha_level=0.5*(40000/len(mass_pred))
    # Define outliers
    res = np.abs(mass_pred-mass_true)
    nmad=astats.mad_std(mass_true-mass_pred)
    threshold = np.maximum(3*nmad,0.5)
    outlier = res > threshold
    outrate = np.sum(outlier)/len(mass_pred)
    
    plt.figure(figsize=(18*1.5,5.5*1.5))
    # Scatter plot of mass_predict vs. mass_true
    ax1 = plt.subplot(131)
    ax1.scatter(mass_true[outlier][::10], mass_pred[outlier][::10], s=0.5, c='r',alpha=alpha_level*2)
    ax1.scatter(mass_true[~outlier][::10], mass_pred[~outlier][::10], s=0.5,alpha=alpha_level)
    ax1.plot(np.arange(20),np.arange(20),'k-.',linewidth=0.5)
    ax1.plot(np.arange(20),np.arange(20)+threshold,'k-.',linewidth=1)
    ax1.plot(np.arange(20),np.arange(20)-threshold,'k-.',linewidth=1)
    ax1.set_title(f'Mean squared error = {mse:.4f}'+'\n'
                  '$f_{outlier}$ = '+f'{outrate*100:.2f}%')
    xrange=(8,12)
    yrange=(8,12)
    ax1.set_xlim(xrange)
    ax1.set_ylim(yrange)
    ax1.set_xlabel(f'Log Stellar Mass ($M_{{Sun}}$)', fontsize=20)
    ax1.set_ylabel(f'Log Predicted Stellar Mass ($M_{{Sun}}$)', fontsize=20)
    ax1.grid(alpha = 0.6)
    
    ax2 = plt.subplot(132)
    ax2.scatter(mass_true[outlier][::10],
                mass_pred[outlier][::10]-mass_true[outlier][::10], s=0.5, c='r',alpha=alpha_level*4)
    ax2.scatter(mass_true[~outlier][::10],
                mass_pred[~outlier][::10]-mass_true[~outlier][::10], s=0.5,alpha=alpha_level*2)
    masses = scipy.stats.binned_statistic(mass_true[~outlier][::10],
                                          mass_true[~outlier][::10],bins=20,statistic='median')
    residuals = scipy.stats.binned_statistic(mass_true[~outlier][::10],
                                             mass_pred[~outlier][::10]-mass_true[~outlier][::10],
                                             bins=20,statistic='median')
    plt.plot(masses.statistic,residuals.statistic,'r-')

    xrange=(8,12)
    ax2.hlines(xmin=xrange[0], xmax=xrange[1], y=0, linestyle='-.', linewidth=0.5)
    ax2.hlines(xmin=xrange[0], xmax=xrange[1], y=threshold, linestyle='-.', linewidth=1)
    ax2.hlines(xmin=xrange[0], xmax=xrange[1], y=-threshold, linestyle='-.', linewidth=1)
    ax2.set_xlim(xrange)
    ax2.set_xlabel(f'Log Stellar Mass ($M_{{Sun}}$)', fontsize=20)
    ax2.set_ylabel('Mass residual (dex)', fontsize=20)
    ax2.set_title(r'$\sigma_{\mathrm{NMAD}}$ = '+f'{nmad:.4f}'+' dex\n'
                  '$\mathrm{f_{outlier}}$ = '+f'{outrate*100:.2f}%', fontsize=20)
    ax2.grid(alpha = 0.6)
    plt.ylim(-1.2,1.2)

    ax3 = plt.subplot(133)
    ax3.hist(mass_pred, bins=[7+x*0.06 for x in range(100)], histtype='step', label='Prediction')
    ax3.hist(mass_true, bins=[7+x*0.06 for x in range(100)], histtype='step', label='Test')
    ax3.legend(fontsize=14)
    ax3.set_xlabel(f'Log Stellar Mass ($M_{{Sun}}$)', fontsize=18)
    ax3.set_ylabel('Number', fontsize=18)
    
    # plt.savefig(label+'_plot.png')

__Load data__

In [None]:
cat_dir =  "/global/cfs/cdirs/desi/users/rongpu/truth/dr9.0/south/matched/"

decals_filename = 'ls-dr9.0-mcat_v4_3-match.fits'
mgc_filename = 'mcat_v4_3-match.fits'

decals_columns = ['FLUX_G', 'FLUX_R', 'FLUX_Z', 'FLUX_W1', 'FLUX_W2', 'MW_TRANSMISSION_G', 'MW_TRANSMISSION_R', 'MW_TRANSMISSION_Z', 'MW_TRANSMISSION_W1', 'MW_TRANSMISSION_W2', 'NOBS_G', 'NOBS_R', 'NOBS_Z', 'MASKBITS', 'z_phot_median']

decals_loc = os.path.join(cat_dir, decals_filename)
mgc_loc = os.path.join(cat_dir, mgc_filename)

# DECaLS catalog
decals = fitsio.read(decals_loc, columns=decals_columns)
decals = Table(decals)
# matched best_ukwide catalog
mgc = Table.read(mgc_loc)

print(len(mgc))
print(' '.join(mgc.colnames))

# Add MGC photo-z value
mgc_zadd1_full_loc = '/global/cfs/cdirs/desi/users/rongpu/truth/parent/S82_zadd1_v1_0--pcat_v4_0.fits.gz'
mgc_zadd1 = fitsio.read(mgc_zadd1_full_loc, columns=['OBJID', 'ZREIS', 'ZREIS_ERR'])
mgc_zadd1 = Table(mgc_zadd1)
allobjects = np.load('/global/cfs/cdirs/desi/users/rongpu/truth/dr9.0/south/allobjects/ls-dr9.0-mcat_v4_3.npy')
mgc_zadd1 = mgc_zadd1[allobjects]
if len(mgc_zadd1)!=len(mgc) or (not np.all(mgc_zadd1['OBJID']==mgc['OBJID'])):
    raise ValueError('mgc_zadd1 is not line-matched with mgc')

mgc['ZREIS'] = mgc_zadd1['ZREIS']
mgc['ZREIS_ERR'] = mgc_zadd1['ZREIS_ERR']

In [None]:
# Compute extinction-corrected magnitudes and errors for DECaLS
with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    
    decals['gmag'] = 22.5 - 2.5*np.log10(decals['FLUX_G']/decals['MW_TRANSMISSION_G'])
    decals['rmag'] = 22.5 - 2.5*np.log10(decals['FLUX_R']/decals['MW_TRANSMISSION_R'])
    decals['zmag'] = 22.5 - 2.5*np.log10(decals['FLUX_Z']/decals['MW_TRANSMISSION_Z'])
    decals['w1mag'] = 22.5 - 2.5*np.log10(decals['FLUX_W1']/decals['MW_TRANSMISSION_W1'])
    decals['w2mag'] = 22.5 - 2.5*np.log10(decals['FLUX_W2']/decals['MW_TRANSMISSION_W2'])
    # decals['gmagerr'] = 1/np.sqrt(decals['FLUX_IVAR_G'])/decals['FLUX_G']
    # decals['rmagerr'] = 1/np.sqrt(decals['FLUX_IVAR_R'])/decals['FLUX_R']
    # decals['zmagerr'] = 1/np.sqrt(decals['FLUX_IVAR_Z'])/decals['FLUX_Z']
    # decals['w1magerr'] = 1/np.sqrt(decals['FLUX_IVAR_W1'])/decals['FLUX_W1']
    # decals['w2magerr'] = 1/np.sqrt(decals['FLUX_IVAR_W2'])/decals['FLUX_W2']
    
# Restrict to DECaLS objects with 2+ exposures in grz bands
mask = (decals['NOBS_G'] >= 2) & (decals['NOBS_R'] >= 2) & (decals['NOBS_Z'] >= 2)
print(np.sum(mask)/len(mask))
# Apply maskbit
mask_bad = (decals['MASKBITS']&(2**1)>0) | (decals['MASKBITS']&(2**8)>0) | (decals['MASKBITS']&(2**9)>0)
mask &= ~mask_bad
print(np.sum(mask)/len(mask))
decals = decals[mask]
mgc = mgc[mask]

# Require valid grzW1W2 photometry and remove stars (log10(z)<-2)
x = decals['rmag']-decals['zmag']
y = decals['rmag']-decals['w1mag']
mask = np.isfinite(decals['gmag']) & np.isfinite(decals['rmag']) & np.isfinite(decals['zmag']) & \
        np.isfinite(decals['w1mag']) & np.isfinite(decals['w2mag']) & (y > 1.75*x-1.1)
decals = decals[mask]
mgc = mgc[mask]

# Plot the star-galaxy separation
x = x[::10]
y = y[::10]
mask = (y > 1.75*x-1.1)
plt.figure(figsize=(10, 7))
plt.plot(x[mask], y[mask], '.', ms=0.1, alpha=0.2)
plt.plot(x[~mask], y[~mask], '.', ms=0.1, alpha=0.2)
plt.axis([-0.2, 3., -3, 5])
plt.show()

decals['redshift'] = mgc['ZREIS']
decals['redshift_err'] = mgc['ZREIS_ERR']
decals['mass_opt'] = mgc['MASS_OPT_ZREIS']
decals['mass_opt_err'] = mgc['MASSERR_OPT_ZREIS']

In [None]:
frac_keep = 0.1
np.random.seed(532)

# Drop data with really small redshift which may be some nearby stars, not galaxies
mask = (np.log10(decals['redshift'])>-2.5) & (decals['zmag'] < 21)
print(np.sum(mask)/len(mask))
mask &= decals['mass_opt'] > 5
print(np.sum(mask)/len(mask))
decals=decals[mask]

if frac_keep<1:
    randnum=np.random.rand(len(decals))
    decals=decals[randnum < frac_keep]

# Drop data with really small redshift which may be some nearby stars, not galaxies
print(len(decals))

In [None]:
plt.figure(figsize=(15, 5))
plt.plot((mgc['RA']+180)%360-180, mgc['DEC'], '.', ms=0.5, alpha=0.1)
plt.show()

In [None]:
from astropy.cosmology import FlatLambdaCDM
cosmo = FlatLambdaCDM(H0=70, Om0=0.3)

__Train and test with DECaLS photo-z's__

This is how the full stellar masses are computed.

In [None]:
data = np.column_stack((decals['gmag']-decals['rmag'], decals['rmag']-decals['zmag'],
                             decals['zmag']-decals['w1mag'], decals['w1mag']-decals['w2mag'],
                            decals['z_phot_median']))

distmod = cosmo.distmod(decals['z_phot_median'].value)
m_to_l = decals['mass_opt'] - 0.4*(4.45-(decals['zmag'] - distmod.value ))

In [None]:
n_folds = 5
kf = KFold(n_splits=n_folds, shuffle=True, random_state=1456)
np.random.seed(123)
m_to_l_predict = np.zeros(len(decals))
kf_index = 0

for idx_train, idx_test in kf.split(data):
    print('Fold %d'%(kf_index+1))
    regrf = RandomForestRegressor(n_estimators=60, max_depth=18, random_state=1456, n_jobs=4)
    regrf.fit(data[idx_train], m_to_l[idx_train])
    m_to_l_predict[idx_test] = regrf.predict(data[idx_test])
    kf_index += 1

mass_opt_predict = m_to_l_predict + 0.4*(4.45-(decals['zmag'] - distmod.to_value() ))

In [None]:
import matplotlib.pyplot as plt
import mpl_scatter_density
import numpy as np
import pandas as pd
import scipy
import seaborn as sns
from matplotlib import cm
from matplotlib import colors
from matplotlib.colors import ListedColormap
from astropy.visualization import LogStretch
from astropy.visualization.mpl_normalize import ImageNormalize
from scipy.special import softmax
from scipy.stats import gaussian_kde


def hodges_lehmann(data, max_pairs=1e6, random_seed=200):
    """The Hodges-Lehmann estimator.

    Adapted from code written by Rongpu Zhou.

    Args:
        data (1D array): Data set for which the estimator is being
            computed.
        max_pairs (int): If number of pairs is larger than this,
            randomly sample pairs.
        random_seed (int): Seed for randomly sampling pairs.

    Returns:
        float: H-L estimate
    """

    import itertools

    max_pairs = int(max_pairs)
    n_data = len(data)
    n_pairs = n_data * (n_data - 1) / 2

    if n_data == 0:
        raise ValueError("Must pass in non-empty array.")

    if n_pairs <= max_pairs:
        # non-identical indices
        ind1, ind2 = np.array(list(itertools.combinations(np.arange(n_data), 2))).T
        pair_means = np.mean([data[ind1], data[ind2]], axis=0)

        #  identical indices
        pair_means = np.concatenate([pair_means, data])

    else:
        if random_seed is not None:
            np.random.seed(random_seed)

        ind1, ind2 = np.random.choice(n_data, size=(max_pairs, 2)).transpose()
        pair_means = np.mean([data[ind1], data[ind2]], axis=0)

    return np.median(pair_means)


def better_step(bin_edges, y, ax=None, **kwargs):
    """A 'better' version of matplotlib's step function
    
    Given a set of bin edges and bin heights, this plots the thing
    that I wish matplotlib's ``step`` command plotted. All extra
    arguments are passed directly to matplotlib's ``plot`` command.
    
    Args:
        bin_edges: The bin edges. This should be one element longer than
            the bin heights array ``y``.
        y: The bin heights.
        ax (Optional): The axis where this should be plotted.
    
    """
    new_x = [a for row in zip(bin_edges[:-1], bin_edges[1:]) for a in row]
    new_y = [a for row in zip(y, y) for a in row]
    if ax is None:
        ax = plt.gca()
    ax.plot(new_x, new_y, **kwargs)
    return ax



In [None]:
class Metrics(object):
    """Produce metrics for the model.

    Args:
        mass_predict (array): Predicted photometric redshifts.
        mass_true (array): Measured spectroscopic redshifts.
    """

    def __init__(
        self,
        mass_predict,
        mass_true,
        mass_min=None,
        mass_max=None,
        outlier_threshold=None,
        mag=None,
        **kwargs,
    ):
        mass_mask = (mass_true >= mass_min) & (mass_true <= mass_max)
        self.mass_predict = mass_predict[mass_mask]
        self.mass_true = mass_true[mass_mask]
        self.mass_min = mass_min
        self.mass_max = mass_max
        self.outlier_threshold = outlier_threshold
        self.mag = mag


        self.delta_m = (self.mass_predict - self.mass_true)
        # Normalized median absolute deviation
        self.sigma_nmad = 1.4826 * np.median(
            np.abs(self.delta_m - np.median(self.delta_m))
        )
        self.bias = np.mean(self.delta_m)
        # Number of objects larger than outlier threshold
        self.n_outlier = np.sum(np.abs(self.delta_m) > self.outlier_threshold)

        # Outlier percentage
        self.percent_outlier = self.n_outlier * 100.0 / len(self.mass_true)

    def _gaussian(self, x, mean=0, sigma=1):
        return np.exp((-0.5 * ((x - mean) / sigma) ** 2)) / np.sqrt(2 * np.pi) / sigma

    def phot_vs_spec(self, show=False, ax=None, fig=None, **kwargs):
        """Photo-z vs. spec-z."""

        if ax is None:
            fig, ax = plt.subplots(
                subplot_kw={"projection": "scatter_density"}, **kwargs,
            )

        print(f"Normalized MAD: {self.sigma_nmad:.6f}")
        print(f"{self.outlier_threshold:.2f} outliers: {self.percent_outlier:.6f}%")

        x = np.linspace(self.mass_min, self.mass_max, 10)
        outlier_upper = x + self.outlier_threshold 
        outlier_lower = x - self.outlier_threshold
        ax.plot(x, outlier_upper, "k--")
        ax.plot(x, outlier_lower, "k--")

        # Define new cmap viridis_white
        viridis = cm.get_cmap("viridis", 256)
        newcolors = viridis(np.linspace(0, 1, 256))
        white = np.array([1, 1, 1, 1])
        newcolors[:1, :] = white
        viridis_white = ListedColormap(newcolors, name="viridis_white")

        

        # # Plot scatter density
        # norm = ImageNormalize(vmin=20, vmax=140, stretch=LogStretch())

        scatter_density = ax.scatter_density(
            self.mass_true,
            self.mass_predict,
            cmap=viridis_white,
            dpi=60,
            downres_factor=1,
            vmin=0.8, 
            vmax=40,
            # norm=norm,
        )
        cbar = fig.colorbar(scatter_density, fraction=0.046, pad=0.04,)
        cbar.ax.tick_params(labelsize=20)
        cbar.set_label(label="Number of galaxies per pixel", fontsize=30)

        ax.plot(x, x, linewidth=1.5, color="grey")

        ax.set_xlim([self.mass_min, self.mass_max])
        ax.set_ylim([self.mass_min, self.mass_max])
        ax.set_xlabel(r"True $\log_{10}(\mathrm{M}_{\star}[\mathrm{M}_{\odot}])$", fontsize=40)
        ax.set_ylabel(r"Predicted $\log_{10}(\mathrm{M}_{\star}[\mathrm{M}_{\odot}])$", fontsize=40)
        ax.yaxis.grid(alpha=0.8, ls="--")
        ax.xaxis.grid(alpha=0.8)
        ax.set_aspect("equal")
        xticks = ax.xaxis.get_major_ticks()
        xticks[0].label1.set_visible(False)
        ax.tick_params(axis="both", which="major", labelsize=25)
        ax.tick_params(axis="both", which="minor", labelsize=25)

        textstr = "\n".join(
            (
                r"$\sigma_{\mathrm{NMAD}}=%.3f$" % (self.sigma_nmad,),
                r"$\mathrm{f}_{\mathrm{outlier}}=%.2f$" % (self.percent_outlier,) + "%",
                r"$\langle \Delta \log_{10}(\mathrm{M}_{\star}) \rangle=%.4f$"
                % (self.bias),
            )
        )

        # these are matplotlib.patch.Patch properties
        props = dict(boxstyle="round, pad=0.5", facecolor="white", alpha=1)

        # place a text box in upper left in axes coords
        ax.text(
            0.05,
            0.95,
            textstr,
            transform=ax.transAxes,
            fontsize=25,
            verticalalignment="top",
            bbox=props,
        )

        if show:
            plt.show()

        return fig, ax

    def metrics_vs_mass(self, show=False, ax=None, fig=None, **kwargs):
        """σ_NMAD and bias as a function of redshift"""
        num_bins = 10
        _, bins = pd.qcut(self.mass_true, num_bins, retbins=True)  # equal population bins
        #         bins = np.linspace(self.mass_min, self.mass_max, numbins+1) #equal width bins
        bias_bin = np.zeros(num_bins)
        z_bins_mean = np.zeros(num_bins)
        sigma_nmad_bins = np.zeros(num_bins)

        for i in range(num_bins):
            mask = (self.mass_true >= bins[i]) & (self.mass_true < bins[i + 1])
            bias_bin[i] = hodges_lehmann(self.delta_m[mask], max_pairs=1e6)
            sigma_nmad_bins[i] = 1.4826 * np.median(
                (np.abs(self.delta_m[mask] - np.median(self.delta_m[mask])))
            )

        if ax is None:
            fig, ax = plt.subplots(
                subplot_kw={"projection": "scatter_density"}, **kwargs
            )
        norm = ImageNormalize(vmin=0.0, vmax=300, stretch=LogStretch())
        ax.scatter_density(
            self.mass_true,
            self.delta_m,
            cmap="Greys",
            dpi=30,
            downres_factor=1,
            alpha=0.6,
            #             norm=norm,
        )
        #         ax.plot(
        #             self.mass_true, self.delta_z_norm,ls ="", marker=".", alpha=1, color="k", markersize=0.2
        #         )

        better_step(bins, bias_bin, ax=ax, color="C1",
                    linewidth=3, label=r"$\langle \Delta \log_{10}(\mathrm{M}_{\star}) \rangle$")
        better_step(
            bins,
            sigma_nmad_bins,
            ax=ax,
            color="C0",
            ls="--",
            linewidth=3,
            label=r"$\sigma_{\mathrm{NMAD}}$",
        )

        #         # plot lines of constant mass_predict
        #         mass_predict_fixed = np.linspace(self.mass_min, self.mass_max, 5)
        #         x = np.linspace(self.mass_min, self.mass_max, 100)
        #         for it in mass_predict_fixed:
        #             ax.plot(x, (it - x) / (1 + x), "--", color="gray", lw=1, alpha=0.5)

        ax.set_ylabel(r"$\Delta \log_{10}(\mathrm{M}_{\star})$", fontsize=40)
        ax.set_xlabel(r"True $\log_{10}(\mathrm{M}_{\star}[\mathrm{M}_{\odot}])$", fontsize=40)
        ax.axhline(0, linestyle="--", color="black", lw=3)
        ax.tick_params(axis="both", which="major", labelsize=25)
        ax.tick_params(axis="both", which="minor", labelsize=25)
        ax.set_xlim(self.mass_min, self.mass_max)
        ax.set_ylim(-0.35,0.35)
        ax.grid()
        ax.legend(loc="lower left", fontsize=30)

        if show:
            plt.show()

        return fig, ax

#     def metrics_vs_mag(self, show=False, ax=None, fig=None, **kwargs):
#         """σ_NMAD and bias as a function of magnitude"""
#         num_bins = 10
#         _, bins = pd.qcut(self.mag, num_bins, retbins=True)  # equal population bins
#         bias_bin = np.zeros(num_bins)
#         z_bins_mean = np.zeros(num_bins)
#         sigma_nmad_bins = np.zeros(num_bins)

#         for i in range(num_bins):
#             mask = (self.mag >= bins[i]) & (self.mag < bins[i + 1])
#             bias_bin[i] = hodges_lehmann(self.delta_z_norm[mask], max_pairs=1e6)
#             sigma_nmad_bins[i] = 1.4826 * np.median(
#                 (np.abs(self.delta_z_norm[mask] - np.median(self.delta_z_norm[mask])))
#             )

#         if ax is None:
#             fig, ax = plt.subplots(
#                 subplot_kw={"projection": "scatter_density"}, **kwargs
#             )
#         norm = ImageNormalize(vmin=0.0, vmax=300, stretch=LogStretch())
#         ax.scatter_density(
#             self.mag,
#             self.delta_z_norm,
#             cmap="Greys",
#             dpi=15,
#             downres_factor=1,
#             alpha=0.6,
#             #             norm=norm,
#         )
#         #         ax.plot(
#         #             self.mass_true, self.delta_z_norm,ls ="", marker=".", alpha=1, color="k", markersize=0.2
#         #         )

#         better_step(bins, bias_bin, ax=ax, color="C1", linewidth=3, label="bias")
#         better_step(
#             bins,
#             sigma_nmad_bins,
#             ax=ax,
#             color="C0",
#             ls="--",
#             linewidth=3,
#             label=r"$\sigma_{\mathrm{NMAD}}$",
#         )
#         print(sigma_nmad_bins)

#         # plot lines of constant mass_predict
#         mass_predict_fixed = np.linspace(self.mass_min, self.mass_max, 5)
#         x = np.linspace(self.mass_min, self.mass_max, 100)
#         for it in mass_predict_fixed:
#             ax.plot(x, (it - x) / (1 + x), "--", color="gray", lw=1, alpha=0.5)

#         ax.set_ylabel(r"$\dfrac{\Delta z}{1 + z_\mathrm{spec}}$", fontsize=40)
#         ax.set_xlabel("$r$ magnitude", fontsize=40)
#         ax.axhline(0, linestyle="--", color="black", lw=3)
#         ax.tick_params(axis="both", which="major", labelsize=25)
#         ax.tick_params(axis="both", which="minor", labelsize=25)
#         ax.set_xlim(15, 17.9)
#         ax.set_ylim(-0.013, 0.013)
#         ax.grid()
#         ax.legend(loc="lower left", fontsize=30)

#         if show:
#             plt.show()

#         return fig, ax

#     def error_dist(self, show=False, ax=None, fig=None, **kwargs):
#         """Histogram of normalized redshift residuals."""
#         print("Bias: {:.6f}".format(self.bias))
#         print("Sigma MAD: {:.6f}".format(self.sigma_nmad))

#         if ax is None:
#             fig, ax = plt.subplots(**kwargs)

#         pop, bins, patches = ax.hist(
#             self.delta_z_norm,
#             #             range=(-1 * self.outlier_threshold, self.outlier_threshold),
#             bins=150,
#             histtype="stepfilled",
#             color="C0",
#             alpha=0.5,
#             density=True,
#         )

#         bin_width = bins[1] - bins[0]
#         x = np.linspace(bins.min(), bins.max(), 501)
#         ax.plot(
#             x,
#             self._gaussian(x, self.bias, self.sigma_nmad) * pop.sum() * bin_width,
#             c="C1",
#             ls="-",
#             lw=3,
#         )
#         ax.grid(alpha=0.5)
#         ax.set_ylabel("Relative Frequency", fontsize=40)
#         ax.set_xlabel(r"$\dfrac{\Delta z}{1 + z_\mathrm{spec}}$", fontsize=40)
#         ax_pad = 0.01
#         ax.axvspan(
#             self.outlier_threshold,
#             self.outlier_threshold + ax_pad,
#             color="gray",
#             alpha=0.3,
#             lw=0,
#         )
#         ax.axvspan(
#             -1 * self.outlier_threshold - ax_pad,
#             -1 * self.outlier_threshold,
#             color="gray",
#             alpha=0.3,
#             lw=0,
#         )
#         ax.set_xlim(
#             [-1 * self.outlier_threshold - ax_pad, self.outlier_threshold + ax_pad]
#         )
#         ax.tick_params(axis="both", which="major", labelsize=25)
#         ax.tick_params(axis="both", which="minor", labelsize=25)
#         yticks = ax.yaxis.get_major_ticks()
#         yticks[0].label1.set_visible(False)
#         if show:
#             plt.show()

#         return fig, ax


In [None]:
metrics = Metrics(mass_opt_predict, decals['mass_opt'], 8.5, 12, 0.5)

In [None]:
fig, ax = metrics.phot_vs_spec(show=True, figsize=(12, 12))
fig.savefig("mass_comparison.pdf", dpi=300, bbox_inches="tight")

In [None]:
fig, ax = metrics.metrics_vs_mass(show=True, figsize=(12,8))
fig.savefig("metric_v_mass.pdf", dpi=300, bbox_inches="tight")

In [None]:
mass_plot(mass_opt_predict, decals['mass_opt'])

__Train and test with MGC redshifts (photo-z's)__

In [None]:
data = np.column_stack((decals['gmag']-decals['rmag'], decals['rmag']-decals['zmag'],
                             decals['zmag']-decals['w1mag'], decals['w1mag']-decals['w2mag'],
                            decals['redshift']))
distmod = cosmo.distmod(decals['redshift'].value)

m_to_l = decals['mass_opt'] - 0.4*(4.45-(decals['zmag'] - distmod.value ))

In [None]:
n_folds = 5
kf = KFold(n_splits=n_folds, shuffle=True, random_state=1456)
np.random.seed(123)
m_to_l_predict = np.zeros(len(decals))
kf_index = 0

for idx_train, idx_test in kf.split(data):
    print('Fold %d'%(kf_index+1))
    regrf = RandomForestRegressor(n_estimators=60, max_depth=18, random_state=1456, n_jobs=4)
    regrf.fit(data[idx_train], m_to_l[idx_train])
    m_to_l_predict[idx_test] = regrf.predict(data[idx_test])
    kf_index += 1

mass_opt_predict = m_to_l_predict + 0.4*(4.45-(decals['zmag'] - distmod.value ))

In [None]:
metrics = Metrics(mass_opt_predict, decals['mass_opt'], 8.5, 12, 0.5)
fig, ax = metrics.phot_vs_spec(show=True, figsize=(12, 12))

In [None]:
fig, ax = metrics.metrics_vs_mass(show=True, figsize=(12,8))

In [None]:
mass_plot(mass_opt_predict, decals['mass_opt'])

__Train with MGC redshifts and test with DECaLS photo-z's__

In [None]:
# MGC redshifts:
data = np.column_stack((decals['gmag']-decals['rmag'], decals['rmag']-decals['zmag'],
                             decals['zmag']-decals['w1mag'], decals['w1mag']-decals['w2mag'],
                            decals['redshift']))
# DECaLS redshifts:
data1 = np.column_stack((decals['gmag']-decals['rmag'], decals['rmag']-decals['zmag'],
                             decals['zmag']-decals['w1mag'], decals['w1mag']-decals['w2mag'],
                            decals['z_phot_median']))

distmod = cosmo.distmod(decals['redshift'].value)
m_to_l = decals['mass_opt'] - 0.4*(4.45-(decals['zmag'] - distmod.value ))

In [None]:
n_folds = 5
kf = KFold(n_splits=n_folds, shuffle=True, random_state=1456)
np.random.seed(123)
m_to_l_predict = np.zeros(len(decals))
kf_index = 0

for idx_train, idx_test in kf.split(data):
    print('Fold %d'%(kf_index+1))
    regrf = RandomForestRegressor(n_estimators=60, max_depth=18, random_state=1456, n_jobs=4)
    regrf.fit(data[idx_train], m_to_l[idx_train])
    m_to_l_predict[idx_test] = regrf.predict(data1[idx_test])
    kf_index += 1

distmod = cosmo.distmod(decals['z_phot_median'].value)
mass_opt_predict = m_to_l_predict + 0.4*(4.45-(decals['zmag'] - distmod.value ))

In [None]:
mass_plot(mass_opt_predict, decals['mass_opt'])

### Train with LS photo-z test with MGC

In [None]:
# MGC redshifts:
data = np.column_stack((decals['gmag']-decals['rmag'], decals['rmag']-decals['zmag'],
                             decals['zmag']-decals['w1mag'], decals['w1mag']-decals['w2mag'],
                            decals['redshift']))
# DECaLS redshifts:
data1 = np.column_stack((decals['gmag']-decals['rmag'], decals['rmag']-decals['zmag'],
                             decals['zmag']-decals['w1mag'], decals['w1mag']-decals['w2mag'],
                            decals['z_phot_median']))

distmod = cosmo.distmod(decals['z_phot_median'].value)
m_to_l = decals['mass_opt'] - 0.4*(4.45-(decals['zmag'] - distmod.value ))

In [None]:
n_folds = 5
kf = KFold(n_splits=n_folds, shuffle=True, random_state=1456)
np.random.seed(123)
m_to_l_predict = np.zeros(len(decals))
kf_index = 0

for idx_train, idx_test in kf.split(data):
    print('Fold %d'%(kf_index+1))
    regrf = RandomForestRegressor(n_estimators=60, max_depth=18, random_state=1456, n_jobs=4)
    regrf.fit(data1[idx_train], m_to_l[idx_train])
    m_to_l_predict[idx_test] = regrf.predict(data[idx_test])
    kf_index += 1

distmod = cosmo.distmod(decals['redshift'].value)
mass_opt_predict = m_to_l_predict + 0.4*(4.45-(decals['zmag'] - distmod.value ))

In [None]:
mass_plot(mass_opt_predict, decals['mass_opt'])