In [None]:
import matplotlib.pyplot as plt
import numpy as np
import tables

%matplotlib inline

In [None]:
plt.rc('font', family='serif')
plt.rc('xtick', labelsize=12)
plt.rc('ytick', labelsize=12)
plt.rc('axes', labelsize=12)

In [None]:
h5file = tables.open_file('sample_short.h5', 'r')
h5node = h5file.get_node('/spectra')

# Spectra

In [None]:
spectra = np.array([x['spectrum'] for x in h5node.iterrows(step=2)])

In [None]:
# Preprocess
sqrt = np.sqrt(spectra)
sums = spectra.sum(axis=1)
sqrtsums = np.sqrt(spectra).sum(axis=1)
As = .02 * sums / (np.sqrt(2 / 3.14) * sqrtsums)
noise = np.random.normal(scale=(np.repeat(As[:, np.newaxis], spectra.shape[1], axis=1) * sqrt))
print((np.abs(noise).sum(axis=1) / spectra.sum(axis=1)))
spectra = (spectra + noise) / np.max(spectra + noise, axis=1)[:, np.newaxis]

In [None]:
lambdas = np.linspace(500, 55000, 1569128)[700000:700000+2**15]

In [None]:
plt.figure(figsize=(16, 6))

for spectrum in spectra[:5]:
    plt.plot(lambdas, spectrum)
    
plt.xlabel('$\lambda [\AA]$')
plt.ylabel('Normalized flux')
#plt.ylabel('Flux [$erg/s/cm^2/\AA$]')

plt.show()

In [None]:
# TODO: focus on dip (plot spectra w different metallicities)

In [None]:
s2 = (spectra @ spectra.T)

In [None]:
u, s, vh = np.linalg.svd(s2)

In [None]:
plt.bar(range(1, 1+7), 100 * s[:7] / np.sum(s))

plt.xlabel('singular value index')
plt.xticks(range(1, 1+7))

plt.ylabel('% variability explained')
plt.yticks(range(0, 100, 10))
plt.ylim(0, 100)

plt.show()

print(np.round(100 * s[:7] / np.sum(s), 1))

# Labels

In [None]:
N = 16000
labels = np.array([
    (x['MH_ratio'], x['T_eff'], x['alphaM_ratio'], x['log_g']) 
    for x in h5node.iterrows(step=1)]
)

In [None]:
MH_ratios, T_effs, alphaM_ratios, log_gs = labels.T

In [None]:
plt.figure(figsize=(6, 6))
plt.subplot(2, 2, 1)
plt.hist(MH_ratios)
plt.xlabel('$[Fe/H]$')

plt.subplot(2, 2, 2)
plt.hist(T_effs)
plt.xlabel('$T_{eff} [K]$')

plt.subplot(2, 2, 3)
plt.hist(alphaM_ratios)
plt.xlabel('$[ \alpha / H]$')

plt.subplot(2, 2, 4)
plt.hist(log_gs)
plt.xlabel('$\log g$')

plt.tight_layout()

In [None]:
plt.figure(figsize=(9, 9))
axes_labels = ['$[Fe/H]$', '$T_{eff}$', '$[\alpha / H]$', '$\log g$']
for i in range(4):
    for j in range(i):
        plt.subplot(3, 3, 3*(i-1)+j+1)
        plt.hist2d(labels[:, j], labels[:, i], cmap='inferno')
        plt.xlabel(axes_labels[j])
        plt.ylabel(axes_labels[i])
        
plt.tight_layout()
plt.show()