In [None]:
import matplotlib.pyplot as plt
import numpy as np
import tables

%matplotlib inline

In [None]:
plt.rc('font', family='serif')
plt.rc('xtick', labelsize=8)
plt.rc('ytick', labelsize=8)
plt.rc('axes', labelsize=8)
plt.rc('figure', autolayout=True, dpi=300)
plt.rc('lines', linewidth=1)
plt.rc('legend', fontsize=8)

In [None]:
h5file = tables.open_file('sample_real_stars.h5', 'r')
h5node = h5file.get_node('/spectra')

# Spectra

In [None]:
N = 2000
spectra = np.array([x['spectrum'] for x in h5node.iterrows(step=8000//N)])

In [None]:
# Preprocess
sqrt = np.sqrt(spectra)
sums = spectra.sum(axis=1)
sqrtsums = np.sqrt(spectra).sum(axis=1)
As = .02 * sums / (np.sqrt(2 / 3.14) * sqrtsums)
noise = np.random.normal(scale=(np.repeat(As[:, np.newaxis], spectra.shape[1], axis=1) * sqrt))
print((np.abs(noise).sum(axis=1) / spectra.sum(axis=1)))
spectra = (spectra + noise) / np.max(spectra + noise, axis=1)[:, np.newaxis]

In [None]:
# This doesn't look quite correct for some reason?
# When I look for H alpha, it's a bit off.
lambdas = np.linspace(500, 55000, spectra.shape[1])

In [None]:
spectra.shape[1]

In [None]:
current_lambda = 500
lambdas = [500]
for i in range(1, spectra.shape[1]):
    if current_lambda >= 500 and current_lambda <= 3000:
        current_lambda += .1
    elif current_lambda > 3000 and current_lambda <= 25000:
        delta = current_lambda / 650000
        current_lambda += delta
    elif current_lambda > 25000 and current_lambda <= 55000:
        delta = current_lambda / 250000
        current_lambda += delta
    lambdas.append(current_lambda)

In [None]:
plt.plot(lambdas)

In [None]:
lambdas1 = np.arange(500, 3000, .1)
lambdas2 = np.linspace(3000, 25000, 5*257355)
lambdas3 = np.linspace(25000, 55000, 257355)
lambdas = np.concatenate([lambdas1, lambdas2, lambdas3])[:-2]

In [None]:
plt.plot(lambdas)

In [None]:
plt.figure(figsize=(6, 6))
plt.subplot(3, 1, 1)
for spectrum in spectra[:5]:
    plt.plot(lambdas, spectrum, alpha=.75)
    
plt.xlabel('Wavelength $[\AA]$')
plt.ylabel('Normalized flux')

plt.subplot(3, 1, 2)
for spectrum in spectra[:5]:
    plt.plot(lambdas[700000:700000+2**15], spectrum[700000:700000+2**15], alpha=.75)
    
plt.xlabel('Wavelength $[\AA]$')
plt.ylabel('Normalized flux')

plt.subplot(3, 1, 3)
for spectrum in spectra[:5]:
    plt.plot(lambdas[712000:714000], spectrum[712000:714000], alpha=.75)
    
plt.xlabel('Wavelength $[\AA]$')
plt.ylabel('Normalized flux')

plt.show()

In [None]:
# TODO: focus on dip (plot spectra w different metallicities)

In [None]:
s2 = (spectra @ spectra.T)

In [None]:
u, s, vh = np.linalg.svd(s2)

In [None]:
plt.figure(figsize=(3, 3))

plt.bar(range(1, 1+5), 100 * s[:5] / np.sum(s))

plt.xlabel('singular value index')
plt.xticks(range(1, 1+5))

plt.ylabel('% variability explained')
#plt.yticks(range(0, 100, 10))
#plt.ylim(0, 100)
plt.ylim(.01, 100)
plt.yscale('log')

plt.show()

print(np.round(100 * s[:7] / np.sum(s), 1))

# Labels

In [None]:
N = 8000
labels = np.array([
    (x['MH_ratio'], x['T_eff'], x['alphaM_ratio'], x['log_g']) 
    for x in h5node.iterrows(step=8000//N)]
)

In [None]:
MH_ratios, T_effs, alphaM_ratios, log_gs = labels.T

In [None]:
plt.figure(figsize=(4, 4))
plt.subplot(2, 2, 1)
plt.hist(MH_ratios)
plt.xlabel(r'$[Fe/H]$')

plt.subplot(2, 2, 2)
plt.hist(T_effs)
plt.xlabel(r'$T_{eff} [K]$')

plt.subplot(2, 2, 3)
plt.hist(alphaM_ratios)
plt.xlabel(r'$[ \alpha / H]$')

plt.subplot(2, 2, 4)
plt.hist(log_gs)
plt.xlabel(r'$\log g$')

plt.tight_layout()

In [None]:
plt.figure(figsize=(9, 9))
axes_labels = ['$[Fe/H]$', '$T_{eff}$', r'$[\alpha / H]$', '$\log g$']
for i in range(4):
    for j in range(i):
        plt.subplot(3, 3, 3*(i-1)+j+1)
        plt.hist2d(labels[:, j], labels[:, i], cmap='inferno')
        plt.xlabel(axes_labels[j])
        plt.ylabel(axes_labels[i])
        
plt.tight_layout()
plt.show()