# Figure that illustrates the training data in comparison to NSA

In [1]:
import numpy as np 
from sedflow import obs as Obs
from sedflow import train as Train

In [2]:
from IPython.display import IFrame
# --- plotting --- 
import corner as DFM
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.use('PDF')
mpl.rcParams['text.usetex'] = True
mpl.rcParams['font.family'] = 'serif'
mpl.rcParams['axes.linewidth'] = 1.5
mpl.rcParams['axes.xmargin'] = 1
mpl.rcParams['xtick.labelsize'] = 'x-large'
mpl.rcParams['xtick.major.size'] = 5
mpl.rcParams['xtick.major.width'] = 1.5
mpl.rcParams['ytick.labelsize'] = 'x-large'
mpl.rcParams['ytick.major.size'] = 5
mpl.rcParams['ytick.major.width'] = 1.5
mpl.rcParams['legend.frameon'] = False

## read in training data

In [3]:
# x = theta_sps
# y = [u, g, r, i, z, sigma_u, sigma_g, sigma_r, sigma_i, sigma_z, z]
x_train, y_train = Train.load_data('train', version=1, sample='flow', params='thetas_sps')

In [4]:
train_data = np.vstack([x_train[:,0], x_train[:,1], y_train[:,-1], y_train[:,1], y_train[:,2], y_train[:,4]]).T

## read in NSA data

In [5]:
nsa, _ = Obs.NSA()

In [6]:
flux_nsa = nsa['NMGY'][:,2:]
ivar_nsa = nsa['NMGY_IVAR'][:,2:]

zred_nsa = nsa['Z']

mags_nsa = Train.flux2mag(flux_nsa)
sigs_nsa = Train.sigma_flux2mag(ivar_nsa**-0.5, flux_nsa)

  return 22.5 - 2.5 * np.log10(flux)


In [7]:
i_nsa = 25

In [8]:
obs_data = np.vstack([np.ones(len(mags_nsa)), np.ones(len(mags_nsa)), zred_nsa, mags_nsa[:,1], mags_nsa[:,2], mags_nsa[:,4]]).T

In [9]:
ranges = [(8., 12.5), (0., 1.), (0., 0.06), (10., 22), (10., 22), (10., 22.)]

In [14]:
fig = DFM.corner(train_data[::100], 
                 labels=[r'$\log M_*$', r'$\beta_1$', r'redshift', r'$g$', r'$r$', r'$z$'], 
                 label_kwargs={'fontsize': 25}, 
                 hist_kwargs={'density': True},
                 range=ranges, 
                 hist2d_kwargs={'levels': [0.68, 0.95]})
DFM.overplot_lines(fig, [None, None, zred_nsa[i_nsa], mags_nsa[i_nsa,1], mags_nsa[i_nsa,2], mags_nsa[i_nsa,4]], color='C3')
ndim = int(np.sqrt(len(fig.axes)))
axes = np.array(fig.axes).reshape((ndim, ndim))

# Loop over the diagonal
for i in range(ndim):
    if i > 1: 
        ax = axes[i, i]
        h = ax.hist(obs_data[:,i], color='C0', density=True, range=ranges[i], histtype='stepfilled', alpha=0.5, bins=20)
        ax.set_ylim(0., 1.1*h[0].max())

# Loop over the histograms
for yi in range(ndim):
    for xi in range(yi):
        if xi > 1 and yi > 1: 
            ax = axes[yi, xi]
            ax.scatter(obs_data[:,xi], obs_data[:,yi], color='C0', s=1, rasterized=True)
            #ax.scatter(obs_data[igals,xi], obs_data[igals,yi], color='C1', s=3, rasterized=True)
            #DFM.hist2d(data_set[:,xi], data_set[:,yi], color='C0', 
            #           range=[ranges[xi], ranges[yi]], 
            #           levels=[0.68, 0.95, 0.99], 
            #           plot_datapoints=False, 
            #           plot_density=False, 
            #           ax=ax)


ax = axes[2, ndim-2]
ax.fill_between([], [], [], color='k', label='Training Set')
ax.fill_between([], [], [], color='C0', label='NSA Catalog')
ax.legend(handletextpad=0.2, markerscale=10, fontsize=25)
fig.savefig('paper/figs/training.pdf', bbox_inches='tight')

In [15]:
IFrame("paper/figs/training.pdf", width=600, height=600)