# Comparison of training data to observations

In [1]:
import os
import numpy as np 

In [2]:
from IPython.display import IFrame
# --- plotting --- 
import corner as DFM
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.use('PDF')
mpl.rcParams['text.usetex'] = True
mpl.rcParams['font.family'] = 'serif'
mpl.rcParams['axes.linewidth'] = 1.5
mpl.rcParams['axes.xmargin'] = 1
mpl.rcParams['xtick.labelsize'] = 'x-large'
mpl.rcParams['xtick.major.size'] = 5
mpl.rcParams['xtick.major.width'] = 1.5
mpl.rcParams['ytick.labelsize'] = 'x-large'
mpl.rcParams['ytick.major.size'] = 5
mpl.rcParams['ytick.major.width'] = 1.5
mpl.rcParams['legend.frameon'] = False

## load training data

In [3]:
dat_dir = '/scratch/network/chhahn/sedflow/'

In [4]:
train_theta = np.load(os.path.join(dat_dir, 'sedflow_p.train.v0.1.theta_sps.npy'))
train_enc_spec = np.load(os.path.join(dat_dir, 'sedflow_p.train.v0.1.encoded.npy'))
train_enc_ivar = np.load(os.path.join(dat_dir, 'sedflow_p.train.v0.1.ivar.encoded.npy'))
train_zred = np.load(os.path.join(dat_dir, 'sedflow_p.train.v0.1.zred.npy'))

In [5]:
# log M*, beta1, Aspec, h_1, Aivar, z
train_data = np.concatenate([
    train_theta[:,:2], 
    train_enc_spec[:,:2], 
    train_enc_ivar[:,:1], 
    train_zred[:,None]], axis=1)

## load observations

In [6]:
obs_enc_spec = np.load(os.path.join(dat_dir, 'sedflow_p.obs.v0.1.encoded.npy'))
obs_enc_ivar = np.load(os.path.join(dat_dir, 'sedflow_p.obs.v0.1.ivar.encoded.npy'))
obs_zred = np.load(os.path.join(dat_dir, 'sedflow_p.obs.v0.1.zred.npy'))

In [7]:
obs_data = np.concatenate([
    obs_enc_spec[:,:2], 
    obs_enc_ivar[:,:1], 
    obs_zred[:,None]], axis=1)

In [12]:
ranges = [(8., 12.5), (0., 1.), (0., 5e2), (-3., 3.), (0., 10), (0.0, 0.025)]

In [13]:
fig = DFM.corner(train_data[::100], 
                 labels=[r'$\log M_*$', r'$\beta_1$', r'$A_{\rm spec}$', r'$h_1$', r'$A_{\rm ivar}$', r'($z$) redshift'], 
                 label_kwargs={'fontsize': 25}, 
                 hist_kwargs={'density': True},
                 range=ranges, 
                 hist2d_kwargs={'levels': [0.68, 0.95]})

#DFM.overplot_lines(fig, [None, None, obs_enc_spec[], mags_nsa[i_nsa,1], mags_nsa[i_nsa,2], sigs_nsa[i_nsa,2]], color='C3')

ndim = int(np.sqrt(len(fig.axes)))
axes = np.array(fig.axes).reshape((ndim, ndim))

# Loop over the diagonal
for i in range(ndim):
    if i > 1: 
        ax = axes[i, i]
        h = ax.hist(obs_data[:,i-2], color='C0', density=True, range=ranges[i], histtype='stepfilled', alpha=0.5, bins=20)
        ax.set_ylim(0., 1.1*h[0].max())

# Loop over the histograms
for yi in range(ndim):
    for xi in range(yi):
        if xi > 1 and yi > 1: 
            ax = axes[yi, xi]
            ax.scatter(obs_data[:,xi-2], obs_data[:,yi-2], color='C0', s=1, rasterized=True)

ax = axes[2, ndim-2]
ax.fill_between([], [], [], color='k', label='Training Data')
ax.fill_between([], [], [], color='C0', label='SDSS')
ax.legend(handletextpad=0.2, markerscale=10, fontsize=25)
fig.savefig('../../paper2/figs/training.pdf', bbox_inches='tight')

In [14]:
IFrame("../../paper2/figs/training.pdf", width=600, height=600)