In [3]:
import numpy as np
import scipy.stats
import h5py
import matplotlib.pyplot as plt

In [7]:
with h5py.File('/arc/home/aydanmckay/green2020-stellar-model/green2020_test_data_small.h5', 'r') as f:
    d = f['data'][:]       # All the data needed to train or test the model
    r_fit = f['r_fit'][:]  # The reddening inferred using the trained model
    r_var = f['r_var'][:]  # The variance of the inferred reddening

The columns of the `data` array have the following meanings:

* `atm_param` : Spectroscopic labels, $\hat{\theta}$. `shape = (# of stars, 3)`.
* `atm_param_cov` : Covariance of spectroscopic labels, $C_{\theta}$. `shape = (# of stars, 13, 13)`
* `atm_param_p` : Like `atm_param`, but in normalized coordinates.
* `atm_param_cov_p` : Like `atm_param_cov`, but in normalized coordinates.
* `r` : Reddening estimate, $\hat{E}$. `shape = (# of stars,)`.
* `r_err` : Uncertainty in reddening estimate, $\sigma_E$.
* `mag` : Observed magnitudes, $\hat{m}$. `shape = (# of stars, 13)`.
* `mag_err` : Uncertainties in observed magnitudes, $\vec{\sigma}_m$.
* `parallax` : Observed parallaxes, $\hat{\varpi}$. `shape = (# of stars,)`.
* `parallax_err` : Uncertainties in observed parallaxes, $\sigma_{\varpi}$.
* `atm_source` : The survey that the spectroscopic labels come from. `shape = (# of stars,)`.
* `r_source` : The source of the reddening estimates. `shape = (# of stars,)`.

In [58]:
# Plot histogram of reddening residuals
dr = d['r']
dm1,dm2,dm3,dm4,dm5,dm6,dm7,dm8,dm9,dm10,dm11,dm12,dm13 = d['mag'].T
names = ['G','(BP-G)','(RP-G)','(g-G)','(r-G)','(i-G)','(z-G)','(y-G)','(J-G)','(H-G)','(K_s-G)','(W_1-G)','(W_2-G)']
ds = [dm1,dm2,dm3,dm4,dm5,dm6,dm7,dm8,dm9,dm10,dm11,dm12,dm13]
fig = plt.figure(figsize=(12,18))
ax = fig.add_subplot(5,3,1)
ax.hist(dr, range=(-0.15, 0.15), bins=50)
dr_mean = np.nanmean(dr)
dr_std = np.nanstd(dr)
dr_skew = scipy.stats.moment(dr, moment=3, nan_policy='omit')
dr_txt = r'$\Delta E = {:+.3f} \pm {:.3f}$'.format(dr_mean, dr_std)
dr_skew /= (dr_std**1.5 + 1.e-5)
dr_txt += '\n' + r'$\tilde{{\mu}}_3 = {:+.3f}$'.format(dr_skew)
ax.text(0.05, 0.95, dr_txt, ha='left', va='top', transform=ax.transAxes)
ax.set_xlabel(r'$\Delta E \ \left( \mathrm{estimated} - \mathrm{Bayestar19} \right)$',fontsize=10)
for it,(dr,name) in enumerate(zip(ds,names)):
    ax = fig.add_subplot(5,3,it+2)
    ax.hist(dr, bins=50)
    dr_mean = np.nanmean(dr)
    dr_std = np.nanstd(dr)
    dr_skew = scipy.stats.moment(dr, moment=3, nan_policy='omit')
    dr_txt = r'$\Delta '+name+r' = {:+.3f} \pm {:.3f}$'.format(dr_mean, dr_std)
    dr_skew /= (dr_std**1.5 + 1.e-5)
    dr_txt += '\n' + r'$\tilde{{\mu}}_3 = {:+.3f}$'.format(dr_skew)
    ax.text(0.05, 0.95, dr_txt, ha='left', va='top', transform=ax.transAxes)
    ax.set_xlabel(r'$\Delta '+name+r'\ \left( \mathrm{estimated} - \mathrm{observed} \right)$',fontsize=10)
fig.savefig('/arc/home/aydanmckay/yo.svg', dpi=150)
plt.close(fig)