In [1]:
import calpit

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import os
import torch
import torch.nn as nn
from sbi.inference import SNPE
from sbi import utils as utils
from astropy.io import fits
from astropy.table import Table, Column
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
import pickle

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# load the lower SNR cut from Mairead
f1 = "/raid/users/heigerm/catalogues/sptab_spspectra_rvtab_lowsnr.fits"
HDUlist1 = fits.open(f1)
sp_tab1 = Table(HDUlist1['SPTAB'].data) 
apogee_tab1 = Table(HDUlist1[4].data)
spectra1 = Table(HDUlist1['SPECTRA_SP'].data) 

# load the original 7k (SNR larger than 50)
f2 = "/raid/users/heigerm/catalogues/sp_x_apogee_x_spspectra_rvtab.fits" 
# sp data
HDUlist2 = fits.open(f2)
# DESI
sp_tab2 = Table(HDUlist2['SPTAB'].data)   
# APOGEE
apogee_tab2 = Table(HDUlist2['APOGEEDR17'].data) 
# DESI SP Spectra
spectra2 = Table(HDUlist2['SPECTRA_SP'].data)

In [4]:
# Cross-match
from astropy.coordinates import SkyCoord
from astropy import units as u

apogee_coords = SkyCoord(ra=apogee_tab1['RA']*u.degree, dec=apogee_tab1['DEC']*u.degree)
spectra_coords = SkyCoord(ra=spectra1['TARGET_RA']*u.degree, dec=spectra1['TARGET_DEC']*u.degree) 

# Find the closest match for each entry in spectra1 within a tolerance
idx, d2d, _ = spectra_coords.match_to_catalog_sky(apogee_coords)

tolerance = 1 * u.arcsec

matches_within_tolerance = d2d < tolerance
apogee_tab1_matched = apogee_tab1[idx[matches_within_tolerance]]
spectra1_matched = spectra1[matches_within_tolerance]
sp_tab1_matched = sp_tab1[matches_within_tolerance]

In [5]:
from astropy.table import Table, Column, unique, vstack

apogee_tab1_selected = apogee_tab1_matched['APOGEE_ID', 'RA', 'DEC', 'FE_H', 'FE_H_ERR']

apogee_tab2_selected = apogee_tab2['APOGEE_ID', 'RA', 'DEC', 'FE_H', 'FE_H_ERR']

# Vertically stack the tables with the selected columns
apogee_tab_combined = vstack([apogee_tab1_selected, apogee_tab2_selected])

In [6]:
common_cols = set(spectra1_matched.colnames).intersection(spectra2.colnames)

# Select only the common columns from each table
spectra1_common = spectra1_matched[list(common_cols)]
spectra2_common = spectra2[list(common_cols)]
spectra_combined = vstack([spectra1_common, spectra2_common])

In [7]:
common_cols = set(sp_tab1_matched.colnames).intersection(sp_tab2.colnames)

# Select only the common columns from each table
sp1_common = sp_tab1_matched[list(common_cols)]
sp2_common = sp_tab2[list(common_cols)]
sp_combined = vstack([sp1_common, sp2_common])

In [8]:
# filter out abnormal rows for [Fe/H]
abnormal_rows = np.where((np.isnan(apogee_tab_combined['FE_H'])) | (apogee_tab_combined['FE_H'] > 10)
                        |(apogee_tab_combined['FE_H'] == 0) |
                        (apogee_tab_combined['FE_H_ERR'] == 0) |
                        (np.isnan(apogee_tab_combined['FE_H_ERR'])))[0]

# Create a mask to filter out the abnormal rows
mask = ~np.isin(np.arange(len(apogee_tab_combined)), abnormal_rows)

# Apply the mask to the datasets to filter out the abnormal rows
apogee_tab_masked = apogee_tab_combined[mask]
spectra_masked = spectra_combined[mask]
sp_masked = sp_combined[mask]

In [9]:
# spectra normalization
gb_combined_spectra = Table(names=['combined_flux', 'combined_wavelength'], dtype=['object', 'object'])

for row in spectra_masked:
    # Combine and sort flux and wavelength from all arms
    combined_flux = np.concatenate([row['flx_B'], row['flx_R'], row['flx_Z']])
    combined_wavelength = np.concatenate([row['B_WAVELENGTH'], row['R_WAVELENGTH'], row['Z_WAVELENGTH']])
    sort_order = np.argsort(combined_wavelength)
    combined_flux, combined_wavelength = combined_flux[sort_order], combined_wavelength[sort_order]

    # Normalize flux
    global_median = np.median(combined_flux)
    IQR = np.percentile(combined_flux, 75) - np.percentile(combined_flux, 25)
    normalized_flux = (combined_flux - global_median) / IQR

    gb_combined_spectra.add_row([normalized_flux, combined_wavelength])

In [10]:
flux = np.array(gb_combined_spectra['combined_flux'])
# Input spectra
X = np.array([np.array(flux_val, dtype=float) for flux_val in flux])
# Parameters
theta = np.array(apogee_tab_masked["FE_H"])

In [11]:
from sklearn.model_selection import train_test_split

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, theta, test_size=0.2, random_state=42)

# Convert to PyTorch tensors
X_train, X_test = map(torch.Tensor, (X_train, X_test))
y_train, y_test = map(torch.Tensor, (y_train, y_test))
y_train, y_test = y_train.unsqueeze(-1), y_test.unsqueeze(-1) 

In [12]:
z_grid = np.arange(-3, 1.01, 0.01)
Z_grid = torch.tensor(z_grid, dtype = torch.float32).unsqueeze(-1)

In [13]:
inference = SNPE(density_estimator="maf")
inference.append_simulations(y_train, X_train)
density_estimator = inference.train()
posterior = inference.build_posterior(density_estimator)

# save it

  warn("In one-dimensional output space, this flow is limited to Gaussians")


 Neural network successfully converged after 104 epochs.

In [14]:
cde_calib = np.array([np.exp(posterior.log_prob(Z_grid, X_train[i])) for i in range(len(X_train))]) # pdf from sbi for training
# np.save
cde_test = np.array([np.exp(posterior.log_prob(Z_grid, X_test[i])) for i in range(len(X_test))]) # pdf from sbi for testing

In [15]:
norm = np.trapz(cde_calib, z_grid) # or Z?
norm[norm==0] = 1
cde_calib = cde_calib/norm[:,None]

In [16]:
norm = np.trapz(cde_test, z_grid)
norm[norm==0] = 1
cde_test = cde_test/norm[:,None]

In [17]:
cde_test

array([[8.65442007e-03, 9.65019393e-03, 1.07477908e-02, ...,
        1.46945982e-24, 1.02318291e-24, 7.11590699e-25],
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
        3.47157633e-41, 7.34560553e-42, 1.53442160e-42],
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       ...,
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
        9.22338879e-12, 5.46613566e-12, 3.22288966e-12],
       [1.68473970e-29, 3.48275120e-29, 7.17126519e-29, ...,
        2.14694394e-39, 9.22571707e-40, 3.94881805e-40],
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
        8.08361604e-31, 1.49366541e-31, 2.70551339e-32]])

In [18]:
nn_model = calpit.nn.models.MLP(input_dim = 1+13787, hidden_layers = [4000, 2000, 1000, 500, 100, 50],
                                output_dim = 1)

In [19]:
calpit_model = calpit.CalPit(model=nn_model)
# lot parameters

+------------------+------------+
|     Modules      | Parameters |
+------------------+------------+
| layers.0.weight  |  55152000  |
|  layers.0.bias   |    4000    |
| layers.1.weight  |     1      |
| layers.2.weight  |  8000000   |
|  layers.2.bias   |    2000    |
| layers.3.weight  |     1      |
| layers.4.weight  |  2000000   |
|  layers.4.bias   |    1000    |
| layers.5.weight  |     1      |
| layers.6.weight  |   500000   |
|  layers.6.bias   |    500     |
| layers.7.weight  |     1      |
| layers.8.weight  |   50000    |
|  layers.8.bias   |    100     |
| layers.9.weight  |     1      |
| layers.10.weight |    5000    |
|  layers.10.bias  |     50     |
| layers.11.weight |     1      |
| layers.12.weight |     50     |
|  layers.12.bias  |     1      |
+------------------+------------+
Total Trainable Params: 65714707


In [None]:
trained_model = calpit_model.fit(X_train, # input features
                                 y_train, # True labels -> y_train
                                 cde_calib, # Initial CDEs/pdfs
                                 z_grid, # Grid on which CDEs/pdfs are evaluated
                                 n_epochs=1000, #number of epochs to train
                                 patience = 20,
                                 batch_size = 128,
                                 lr = 1e-6,
                                 ) # See documentation for additional training hyperparameters
# Plot train_loss - should decrease over time - if not y_train check
# Use all data


[   1/1000] | train_loss: 0.71936 |valid_bce: 0.69420 | 
Validation loss decreased (inf --> 0.694198).  Saving model ...
[   2/1000] | train_loss: 0.68918 |valid_bce: 0.68659 | 
Validation loss decreased (0.694198 --> 0.686587).  Saving model ...
[   3/1000] | train_loss: 0.68672 |valid_bce: 0.68901 | 
EarlyStopping counter: 1 out of 20
[   4/1000] | train_loss: 0.68534 |valid_bce: 0.68550 | 
Validation loss decreased (0.686587 --> 0.685500).  Saving model ...
[   5/1000] | train_loss: 0.68376 |valid_bce: 0.67829 | 
Validation loss decreased (0.685500 --> 0.678294).  Saving model ...
[   6/1000] | train_loss: 0.68055 |valid_bce: 0.67799 | 
Validation loss decreased (0.678294 --> 0.677986).  Saving model ...
[   7/1000] | train_loss: 0.68072 |valid_bce: 0.67800 | 
EarlyStopping counter: 1 out of 20
[   8/1000] | train_loss: 0.67731 |valid_bce: 0.68824 | 
EarlyStopping counter: 2 out of 20
[   9/1000] | train_loss: 0.67873 |valid_bce: 0.67753 | 
Validation loss decreased (0.677986 --> 0.

In [1]:
cov_grid = np.linspace(0,1,201) # The grid of (mis)-coverage values on which to evaluate the conditional PIT distribution

pit_conditional_test = calpit_model.predict(x_test = X_test, cov_grid=cov_grid) #Predict the local PIT distribution for a test dataset

NameError: name 'np' is not defined

In [None]:
pit_conditional_test

In [None]:
# Plot a random sample of 12 P-P plots
SEED = 299792458

rng = np.random.default_rng(SEED)
random_idx = rng.choice(len(X_test[:100]), 12, replace=False)

fig, axs = plt.subplots(3,4, figsize=(13, 8))
axs = np.ravel(axs)

for count, index in enumerate(random_idx):
    axs[count].scatter(cov_grid, pit_conditional_test[index], s=1)
    axs[count].plot(np.linspace(0, 1, 10), np.linspace(0, 1, 10), color="k", ls="--")
    axs[count].set_xlim(0, 1)
    axs[count].set_ylim(0, 1)
    axs[count].set_aspect("equal")

fig.suptitle("Local PIT Distribution", fontsize=20)

fig.text(0.35,0.01,"Expected Cumulative Probability",fontsize=20)
fig.text(0.,0.2,"Empirical Cumulative Probability", rotation=90, fontsize=20)

In [None]:
recal_cde_test = calpit_model.transform(x_test = X_test[:100], cde_test=cde_test, y_grid = z_grid) #Get the recalibrated CDEs

In [None]:
new_cde =calpit.utils.normalize(recal_cde_test,z_grid) # Normalize the CDEs

In [None]:
new_cde

In [None]:
# Plot showing the before and after CDEs
fig, axs = plt.subplots(3,4, figsize=(14,8))


axs = np.ravel(axs)

for count, index in enumerate(random_idx):

    axs[count].plot(z_grid, cde_test[index], c="C0")
    axs[count].plot(z_grid, recal_cde_test[index], c="C1")
    axs[count].axvline(y_test[index], 0, 1, c="k", ls="--", lw=1)
    axs[count].set_xlabel(r"$z$")
    axs[count].set_ylabel(r"$\hat{f}(z)$")





plt.tight_layout()

import matplotlib.lines as mlines

blue_line = mlines.Line2D([], [], color='C0', marker="", alpha=1, label='Original', ls="-")
orange_line = mlines.Line2D([], [], color='C1', marker='', alpha=1, label='Recalibrated', ls="-")
black_line = mlines.Line2D([], [], color='k', marker='', alpha=1, label='True\n redshift', ls="--")

fig.legend(bbox_to_anchor=[0.13,0.3], loc='upper left', handles=[blue_line,orange_line,black_line ], ncol=1, prop={'size': 12}, frameon=False, handlelength=1.0, handletextpad=0.2)
