**Name:** Alex Medina

**File:** Spectrum Generator With Correct TP Profile

## **SECTION 0:** Setup

In [1]:
# Begin
import os
os.environ['picaso_refdata'] = r'C:\Users\Alex\Desktop\Picaso\picaso\reference' # THIS MUST GO BEFORE YOUR IMPORT STATEMENT
os.environ['PYSYN_CDBS'] = r'C:\Users\Alex\Desktop\Picaso\grp\redcat\trds' # This is for the stellar data discussed below.

#General
import numpy as np
import pandas as pd
import astropy.units as u
import matplotlib.pyplot as plt
from pathlib import Path
from itertools import product

# Picaso
from picaso import justdoit as jdi
from picaso import justplotit as jpi

# Virga
from virga import justdoit as vj
from virga import justplotit as cldplt

# Other
from bokeh.models import Legend
from bokeh.palettes import Category10
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource, HoverTool
from bokeh.io import output_notebook
output_notebook()

## **SECTION 1:** Helper methods

In [2]:
def compute_mmw(profile_dict, fallback=2.36):
    """
    Computing mmw from Sonora profile if present; else use solar mmw.
    """
    mu = profile_dict.get("mu") or profile_dict.get("MU")
    if mu is None:
        return float(fallback) # Solar mmw
    mu = np.asarray(mu, float)
    if mu.ndim == 0:
        return float(mu)
    return float(np.mean(mu))

In [3]:
# Default axis assumptions for the pickle:
# arr.shape == (n_Teff=301, n_g=41, n_var=5, n_layers=91)
# We’ll provide overridable grids for Teff and g so you can adjust if needed.

def default_teff_grid():
    """
    500–2000 K in 5 K steps -> 301 points (500, 505, ..., 2000).
    """
    return np.linspace(500.0, 2000.0, 301)

def default_g_grid():
    """
    10–2010 m/s^2 in 50 m/s^2 steps -> 41 points (10, 60, ..., 2010).
    Advisor note: 'cloudy goes down to 10 m/s^2 to extrapolate' fits this.
    """
    return np.arange(10.0, 2010.0 + 1e-6, 50.0)

def load_tp_pickle(pickle_path):
    """
    Load the TP pickle (NumPy array). Allows .pickle/.npy saved with np.save.
    Returns the array without modifying anything.
    """
    p = Path(pickle_path)
    if not p.exists():
        raise FileNotFoundError(f"TP pickle not found: {p}")
    # allow_pickle=True covers both np.save pickles and plain .npy
    arr = np.load(p, allow_pickle=True)
    if arr.ndim != 4:
        raise ValueError(f"Expected 4D array, got shape {arr.shape}")
    return arr

def _nearest_index(val, grid):
    """
    Return index of nearest grid point to val.
    """
    grid = np.asarray(grid, float)
    return int(np.argmin(np.abs(grid - float(val))))

def _upsample_1d(y, factor):
    """
    Simple 1D linear upsampling by integer factor (>=1) along last axis.
    """
    if factor <= 1:
        return y
    n = y.shape[-1]
    x = np.arange(n)
    x_new = np.linspace(0, n - 1, n * factor)
    return np.interp(x_new, x, y)

def tprofile_from_pickle(arr, Teff_K,g_mps2, *, var_index_for_temperature=0,
                         teff_grid=None, g_grid=None, temperature_scale=1000.0,
                         upsample_factor=1):
    """
    Extract a temperature profile from the 4D pickle for the requested (Teff, g).

    Parameters
    ----------
    arr : np.ndarray
        4D array of shape (n_Teff, n_g, n_var, n_layers).
    Teff_K : float
        Target effective temperature in K.
    g_mps2 : float
        Target surface gravity in m/s^2.
    var_index_for_temperature : int
        Which 'var' in the 3rd axis holds temperature. Default 0.
        (All 5 look kK-like; you can change this if your advisor specifies.)
    teff_grid, g_grid : array-like or None
        Grids mapping axis indices to physical values. Defaults assume
        Teff=500..2000 step 5 K, g=10..2010 step 50 m/s^2.
    temperature_scale : float
        Scale to convert stored values to Kelvin. Defaults to *kK → K* (×1000).
    upsample_factor : int
        If >1, upsample the 91-layer profile by this factor (advisor note:
        “Multiply 91 layers by some multiplication factor”).

    Returns
    -------
    T_layers_K : 1D np.ndarray
        Temperature profile in Kelvin (length 91*upsample_factor).
        Note: This is just temperature vs. a *layer index*. The pressure grid
        isn't embedded in the file; we'll map it to PICASO's P grid later.
    """
    if teff_grid is None:
        teff_grid = default_teff_grid()
    if g_grid is None:
        g_grid = default_g_grid()

    iT = _nearest_index(Teff_K, teff_grid)
    iG = _nearest_index(g_mps2, g_grid)

    # Grab the “temperature-like” variable and convert to K
    T_kK = np.array(arr[iT, iG, var_index_for_temperature, :], dtype=float)
    T_K = temperature_scale * T_kK

    # Optional vertical refinement
    T_K = _upsample_1d(T_K, upsample_factor)

    return T_K

def remap_temperature_to_pressure_grid(T_src, P_target_bar):
    """
    We don't have a pressure grid in the pickle. The simplest physically
    sensible remap is to preserve vertical order and stretch/compress by
    *index fraction* onto the target pressure grid (monotone-in-index map).

    If later you identify a pressure variable in the pickle, swap this for an
    interpolation in logP space using that variable.
    """
    T_src = np.asarray(T_src, float)
    n_src = T_src.size
    if n_src < 2:
        return np.full_like(np.asarray(P_target_bar, float), T_src[0])

    # Map source indices [0..n_src-1] → fractional height [0..1]
    x_src = np.linspace(0.0, 1.0, n_src)

    # Target is sized to the PICASO pressure grid but also mapped to [0..1]
    n_tgt = np.asarray(P_target_bar, float).size
    x_tgt = np.linspace(0.0, 1.0, n_tgt)

    # Interpolate T over fractional height
    T_tgt = np.interp(x_tgt, x_src, T_src)
    return T_tgt

## **SECTION 2:** Main function

In [4]:
def bd_spectrum(waverange, gravity, Teff, kzz, fsed, mh, R,
                tp_var_index_for_temperature=0,
                tp_temperature_scale=1000.0,
                tp_upsample_factor=1, *,
                cloud_list, opacity_db, sonora_db, virga_db, tp_pickle_path,
                tp_teff_grid=None, tp_g_grid=None):
    
    """
    Compute a BD emission spectrum with Virga clouds.
    """

    # Opacity & inputs
    opa = jdi.opannection(wave_range=list(waverange), filename_db=opacity_db)
    bd = jdi.inputs(calculation="browndwarf")

    # Basic inputs
    bd.phase_angle(0)
    bd.gravity(gravity, gravity_unit=u.Unit('m/s**2'))
    bd.sonora(sonora_db, Teff)

    # Inject Kzz (must match pressure grid length)
    prof = bd.inputs['atmosphere']['profile']
    P_target_bar = np.asarray(prof["pressure"], float)

    # If a TP pickle is provided, overwrite the temperature profile
    arr = load_tp_pickle(tp_pickle_path)
    
    # Pull 1-D temperature (length 91 * upsample_factor) from the pickle
    T_layers_K = tprofile_from_pickle(arr, Teff, gravity,
                                      var_index_for_temperature=tp_var_index_for_temperature,
                                      teff_grid=tp_teff_grid, g_grid=tp_g_grid,
                                      temperature_scale=tp_temperature_scale,
                                      upsample_factor=tp_upsample_factor)
    
    # Map that 91*(factor) “by-layer” temperature onto PICASO's pressure grid
    T_on_P = remap_temperature_to_pressure_grid(T_layers_K, P_target_bar)

    # Install into the PICASO profile
    bd.inputs["atmosphere"]["profile"]["temperature"] = [float(v) for v in T_on_P]

    # Re-grab updated T and compute MMW, inject with kzz
    prof = bd.inputs['atmosphere']['profile']
    T_now = np.asarray(prof["temperature"], float)
    mmw   = compute_mmw(prof)

    # Inject Kzz (length must match P grid)
    bd.inputs["atmosphere"]["profile"]["kz"] = [float(kzz)] * len(P_target_bar)

    # Continue with clouds as before
    bd.virga(cloud_list, virga_db, fsed=float(fsed), mh=mh, mmw=mmw)

    out = bd.spectrum(opa, full_output=True)

    # Convert to F_nu, then regrid to constant R in wavenumber
    wn, th = out["wavenumber"], out["thermal"]  # cm^-1 and erg/cm^2/s/cm
    # Convert to F_nu on a wavelength grid first
    wm = 1e4 / wn
    flamy    = th * 1e-8  # per angstrom instead of per cm
    sp = jdi.psyn.ArraySpectrum(wm, flamy, 
                                waveunits='um',
                                fluxunits='FLAM')
    sp.convert('um')
    sp.convert('Fnu')  # erg/cm^2/s/Hz

    wav_um, F_nu = sp.wave, sp.flux # micron and erg/cm^2/s/Hz
    out['fluxnu'] = F_nu

    # Regrid at constant resolving power in WAVENUMBER space
    k_cm1_in = 1e4 / wav_um
    k_cm1_rg, F_nu_rg = jdi.mean_regrid(k_cm1_in, F_nu, R=R)  # returns k in cm^-1

    # Convert back to wavelength (micron) for plotting
    lam_um_rg = 1e4 / k_cm1_rg
    # Ensure ascending wavelength for plotting
    order = np.argsort(lam_um_rg)
    lam_um_rg = lam_um_rg[order]
    F_nu_rg   = F_nu_rg[order]

    out['regridx'] = lam_um_rg
    out['regridy'] = F_nu_rg
    return lam_um_rg, F_nu_rg

## **SECTION 3:** Configuration

In [5]:
# To see what clouds are availible
vj.available()

['Al2O3',
 'CH4',
 'CaAl12O19',
 'CaTiO3',
 'Cr',
 'Fe',
 'H2O',
 'KCl',
 'Mg2SiO4',
 'MgSiO3',
 'MnS',
 'NH3',
 'Na2S',
 'SiO2',
 'TiO2',
 'ZnS']

In [6]:
# Directories
input_dir  = r"C:\Users\Alex\Desktop\Picaso\data\sonora" # Sonora db
virga_dir  = r"C:\Users\Alex\Desktop\Picaso\data\virga" # Virga
opacit_dir = None # Opacity db
# WILL CHANGE ONCE RUNNING ON NEWTON AND TP CORRECT
# r"/groups/tkaralidi/opacity_500k_for_R5000_egpoutput.db"
#output_dir = r"C:\Users\Alex\Desktop\Picaso\outputs" # Output store
output_dir = Path(r"C:\Users\Alex\Desktop\Picaso\outputs")
pickle_path = r"C:\Users\Alex\Desktop\Picaso\NN_project\bobcat_to_diamondback.pickle"

# Wavelength and resolution
wav_range       = (0.3, 3.0) # microns
res_R           = 300 
# WILL CHANGE ONCE RUNNING ON NEWTON AND TP CORRECT
# res_R = 5000

# Cloud microphysics
MH              = 1.0   # [M/H] metallicity factor ~ solar
clouds          = ['Cr','MgSiO3','MnS']

# Grid space
Teff_range      = (700, 2000)     # K
gravity_range   = (50.0, 1500.0)  # m/s^2
logg_range      = (1.0, 10.0)
fsed_range      = (1.0, 4.0)      # dimensionless
Kzz_range       = (1e8, 3e10)     # cm^2/s

# Sampling
seed            = 43
N_spectra       = 5 # Number of spectra to generate for sampling

# Knobs
save_per_case   = True   # Also save per-spectrum files with friendly names
VERBOSE         = True

## **SECTION 4:** Execute ONE Model

In [7]:
# Since just ONE model, we need just ONE parameter set
Teff_one      = 1500     # K
gravity_one   = 100      # m/s^2
fsed_one      = 1.0      # dimensionless
kzz_one   = 1e10                   # cm^2/s

# Print parameters
if VERBOSE:
    print("Chosen single-model parameters:")
    print(f"Teff  = {Teff_one:.1f} K")
    print(f"g     = {gravity_one:.1f} m/s^2")
    print(f"f_sed = {fsed_one:.2f}")
    print(f"Kzz   = {kzz_one:.3e} cm^2/s")
    print(f"[M/H] = {MH:.2f}")
    print(f"R     = {res_R:d}")
    print(f"Waverange (micron): {wav_range[0]}–{wav_range[1]}")

# Run ONE model (bd_spectrum returns wavenumber [cm^-1] and Fnu regridded at constant R in wavenumber)
wm, Fnu = bd_spectrum(waverange=wav_range,
                      gravity=gravity_one,
                      Teff=Teff_one,
                      kzz=kzz_one,
                      fsed=fsed_one,
                      mh=MH,
                      R=res_R,
                      tp_var_index_for_temperature=0,
                      tp_temperature_scale=1000.0,
                      tp_upsample_factor=1,
                      cloud_list=clouds,
                      opacity_db=opacit_dir,
                      sonora_db=input_dir,
                      virga_db=virga_dir,
                      tp_pickle_path=pickle_path)

# For plotting
one_model_meta = {"Teff_K": Teff_one, "g_mps2": gravity_one, "fsed": fsed_one, "Kzz_cm2s": kzz_one,
                  "MH": MH, "R": res_R, "wav_um_min": float(wm.min()), "wav_um_max": float(wm.max())}

Chosen single-model parameters:
Teff  = 1500.0 K
g     = 100.0 m/s^2
f_sed = 1.00
Kzz   = 1.000e+10 cm^2/s
[M/H] = 1.00
R     = 300
Waverange (micron): 0.3–3.0
Not doing sublayer as cloud deck at the bottom of pressure grid
Not doing sublayer as cloud deck at the bottom of pressure grid
Not doing sublayer as cloud deck at the bottom of pressure grid


In [8]:
# Single plot

src = ColumnDataSource(data=dict(lam_um=wm, Fnu=Fnu))

title_txt = (f"BD Spectrum (Thermal Emission): "
             f"Teff={one_model_meta['Teff_K']:.0f} K, "
             f"g={one_model_meta['g_mps2']:.0f} m s^-2, "
             f"f_sed={one_model_meta['fsed']:.2f}, "
             f"Kzz={one_model_meta['Kzz_cm2s']:.1e} cm^2 s^-2")

p = figure(title=title_txt,
           x_axis_label="Wavelength (micron)",
           y_axis_label="Fnu (erg cm^-2 s^-1 Hz^-1)",
           sizing_mode="stretch_width",
           height=420,
           tools="pan,wheel_zoom,box_zoom,reset,save")

# Choose a simple line color from Category10
color = Category10[3][0]
p.line('lam_um', 'Fnu', source=src, line_width=2, color=color)

show(p)

## **SECTION 5:** Execute MULTIPLE Models

In [9]:
# Parameter space
# CAN EDIT TO PROPER PARAMETER SPACE
# AN ARRAY OF SPACED TEFF, GRAV, FSED, AND KZZ
Teff_p   = [1500]
grav_p   = [100]
fsed_p   = [1.0, 2.0]
kzz_p    = [1e9, 1e10]

grid = list(product(Teff_p, grav_p, fsed_p, kzz_p))
N_spectra = len(grid)

# Parameters
# WILL ADD A SPACING PRINT TO SEE HOW MANY SAMPLES PER
if VERBOSE:
    print(f"Sampling N={N_spectra} spectra at:")
    print(f"Teff_min={Teff_p[0]:.0f} K, Teff_max={Teff_p[-1]:.0f} K")
    print(f"grav_min={grav_p[0]:.0f} m s^-2, grav_max={grav_p[-1]:.0f} m s^-2")
    print(f"fsed_min={fsed_p[0]:.0f}, fsed_max={fsed_p[-1]:.0f}")
    print(f"Kzz_min={kzz_p[0]:.1e}, cm^2 s^-1, {kzz_p[-1]:.1e} cm^2 s^-1")

def run_bd_point(Teff, g, fsed, kzz):
    out = bd_spectrum(waverange=wav_range,
                      gravity=float(g),
                      Teff=float(Teff),
                      kzz=float(kzz),
                      fsed=float(fsed),
                      mh=MH,
                      R=res_R,
                      tp_var_index_for_temperature=0,
                      tp_temperature_scale=1000.0,
                      tp_upsample_factor=1,
                      cloud_list=clouds,
                      opacity_db=opacit_dir,
                      sonora_db=input_dir,
                      virga_db=virga_dir,
                      tp_pickle_path=pickle_path)
    if not (isinstance(out, tuple) and len(out) >= 2):
        raise RuntimeError("bd_spectrum must return at least (x, Fnu).")
    lam_um = np.asarray(out[0], float)
    Fnu    = np.asarray(out[1], float)
    idx    = np.argsort(lam_um)
    return lam_um[idx], Fnu[idx]

multi_specs = []
lam_ref = None
for (Teff_i, g_i, fsed_i, kzz_i) in grid:
    lam_i, F_i = run_bd_point(Teff_i, g_i, fsed_i, kzz_i)

    if lam_ref is None:
        lam_ref = lam_i.copy()
    else:
        # If needed, interpolate to the reference wavelength grid
        if (lam_i.shape != lam_ref.shape) or (not np.allclose(lam_i, lam_ref, rtol=0, atol=1e-12)):
            F_i = np.interp(lam_ref, lam_i, F_i)
            lam_i = lam_ref

    multi_specs.append(dict(
        lam_um=lam_i,
        Fnu=F_i,
        Teff=float(Teff_i),
        g=float(g_i),
        fsed=float(fsed_i),
        kzz=float(kzz_i)
    ))

multi_meta = {
    "R":        res_R,
    "MH":       float(MH),
    "N":        int(N_spectra),
    "lam_range": (float(lam_ref[0]), float(lam_ref[-1])),
    "Teff_grid": np.array(Teff_p, float),
    "g_grid":    np.array(grav_p, float),
    "fsed_grid": np.array(fsed_p, float),
    "kzz_grid":  np.array(kzz_p, float),
}

Sampling N=4 spectra at:
Teff_min=1500 K, Teff_max=1500 K
grav_min=100 m s^-2, grav_max=100 m s^-2
fsed_min=1, fsed_max=2
Kzz_min=1.0e+09, cm^2 s^-1, 1.0e+10 cm^2 s^-1
Not doing sublayer as cloud deck at the bottom of pressure grid
Not doing sublayer as cloud deck at the bottom of pressure grid
Not doing sublayer as cloud deck at the bottom of pressure grid
Not doing sublayer as cloud deck at the bottom of pressure grid
Not doing sublayer as cloud deck at the bottom of pressure grid
Not doing sublayer as cloud deck at the bottom of pressure grid
Not doing sublayer as cloud deck at the bottom of pressure grid
Not doing sublayer as cloud deck at the bottom of pressure grid
Not doing sublayer as cloud deck at the bottom of pressure grid
Not doing sublayer as cloud deck at the bottom of pressure grid
Not doing sublayer as cloud deck at the bottom of pressure grid
Not doing sublayer as cloud deck at the bottom of pressure grid


In [10]:
# In the future will have a large range of TEFF AND GRAV
# SO WILL BE ABLE TO CHANGE

uniq_T = sorted({float(s["Teff"]) for s in multi_specs})
uniq_g = sorted({float(s["g"])    for s in multi_specs})

def fmt_range(vals, unit="", fmt="{:.0f}"):
    if len(vals) == 1:
        return f"{fmt.format(vals[0])}{unit}"
    return f"{fmt.format(min(vals))} to {fmt.format(max(vals))}{unit} (n={len(vals)})"

Teff_txt = fmt_range(uniq_T, unit=" K", fmt="{:.0f}")
g_txt    = fmt_range(uniq_g, unit=" m s^-2", fmt="{:.0f}")

title_txt = (f"BD spectra (thermal emisson):"
             f"N={multi_meta['N']},"
             f"Teff={Teff_txt}"
             f"g={g_txt}")

# Palette that can scale beyond 10 lines (repeats after 20)
# WILL ONLY BE RELEVANT FOR THIS BUT NOT FOR FUTURE
def choose_palette(n):
    if n <= 3:   # Category10[3] is more distinct than slicing [10]
        base = Category10[3]
        return base[:n]
    if n <= 10:
        return Category10[10][:n]
    if n <= 20:
        return Category20[20][:n]
    # Repeat
    reps = int(np.ceil(n/20))
    return (Category20[20] * reps)[:n]

palette = choose_palette(len(multi_specs))

# Figure
p = figure(title=title_txt,
           x_axis_label="Wavelength (micron)",
           y_axis_label="Fν (erg cm^-2 s^-1 Hz^-1)",
           sizing_mode="stretch_width",
           height=520,
           tools="pan,wheel_zoom,box_zoom,reset,save")

# If you later include multiple Teff/g, include them in the legend only when varied
vary_T = len(uniq_T) > 1
vary_g = len(uniq_g) > 1

for i, s in enumerate(multi_specs):
    src = ColumnDataSource(dict(lam_um=s["lam_um"], Fnu=s["Fnu"]))
    parts = []
    if vary_T: parts.append(f"T={s['Teff']:.0f} K")
    if vary_g: parts.append(f"g={s['g']:.0f} m s⁻²")
    parts.append(f"f_sed={s['fsed']:.2g}")
    parts.append(f"Kzz={s['kzz']:.1e}")
    label = ", ".join(parts)

    p.line('lam_um', 'Fnu', source=src, line_width=2,
           color=palette[i % len(palette)], legend_label=label)

p.legend.location = "top_right"
p.legend.click_policy = "hide"
p.legend.label_text_font_size = "10pt"

show(p)

## **SECTION 6:** Generating and Saving Multple Models in .NPZ

In [11]:
# Build arrays X (params) and Y (spectra), and confirm a common wavelength grid
N = len(multi_specs)
lam_ref = multi_specs[0]["lam_um"].astype(float)
L = lam_ref.size

# [Teff, g, f_sed, Kzz]
X = np.zeros((N, 4), dtype=float)
Y = np.zeros((N, L), dtype=float)

for i, s in enumerate(multi_specs):
    lam_i = s["lam_um"].astype(float)
    Fnu_i = s["Fnu"].astype(float)

    # If any spectrum differs in wavelength grid, interpolate to lam_ref
    if lam_i.shape != lam_ref.shape or not np.allclose(lam_i, lam_ref, rtol=0, atol=0):
        Fnu_i = np.interp(lam_ref, lam_i, Fnu_i)

    X[i, :] = [s["Teff"], s["g"], s["fsed"], s["kzz"]]
    Y[i, :] = Fnu_i

# Build a readable master filename (supports single or multiple Teff/g)
uniq_T = sorted({float(s["Teff"]) for s in multi_specs})
uniq_g = sorted({float(s["g"])    for s in multi_specs})

def _fmt_range(vals, key):
    if len(vals) == 1:
        return f"{int(vals[0])}"
    return f"{int(min(vals))}-{int(max(vals))}"

T_tag = _fmt_range(uniq_T, "T")
G_tag = _fmt_range(uniq_g, "G")

# Master file name (includes clouds tag)
master_name = f"T{T_tag}_G{G_tag}_N{N}_allclouds.npz"
master_path = output_dir / master_name

# Save master with a few helpful extras
np.savez_compressed(
    master_path,
    x=X,                       # (N,4): [Teff, g, f_sed, Kzz]
    y=Y,                       # (N,L): spectra on common lam_ref
    wavelength_um=lam_ref,     # (L,)
    clouds=np.array(clouds),   # list of cloud species used
    MH=float(MH),
    R=int(res_R),
)
print(f"Saved master: {master_path}")

# Save per-case files (short, readable names + allclouds tag)
if save_per_case:
    for i in range(N):
        Teff_i, g_i, fsed_i, kzz_i = X[i]
        fname = (
            f"T{int(Teff_i)}_G{int(g_i)}_fsed{fsed_i:.2f}_"
            f"Kzz{float(kzz_i):.2e}_allclouds.npz"
        )
        fpath = output_dir / fname
        np.savez_compressed(
            fpath,
            x=X[i],                   # (4,)
            y=Y[i],                   # (L,)
            wavelength_um=lam_ref,    # (L,)
            clouds=np.array(clouds),
            MH=float(MH),
            R=int(res_R),
        )
    print(f"Saved {N} per-case files to {output_dir}")

Saved master: C:\Users\Alex\Desktop\Picaso\outputs\T1500_G100_N4_allclouds.npz
Saved 4 per-case files to C:\Users\Alex\Desktop\Picaso\outputs


## **SECTION 7:** Sample Stats

In [12]:
def inspect_npz(npz_path, show_table=True, max_rows=10):
    """
    Print shapes, input ranges, and flux stats for a saved .npz.
    Works for both per-case (x=(4,), y=(L,)) and master (x=(N,4), y=(N,L)).
    """
    npz_path = Path(npz_path)
    d = np.load(npz_path)

    # Normalize to batch form
    lam = d["wavelength_um"].astype(float)
    x_raw = d["x"]
    y_raw = d["y"]

    if x_raw.ndim == 1:   # per-case
        X = x_raw[None, :].astype(float)   # (1,4)
        Y = y_raw[None, :].astype(float)   # (1,L)
    else:                 # master
        X = x_raw.astype(float)            # (N,4)
        Y = y_raw.astype(float)            # (N,L)

    # Shapes
    print(f"X shape: {X.shape}  (columns: Teff[K], g[m/s^2], f_sed[-], Kzz[cm^2/s])")
    print(f"Y shape: {Y.shape}  (flux_nu per spectrum)")
    print(f"λ shape: {lam.shape} (micron)\n")

    # Input ranges
    Teff = X[:, 0]; g = X[:, 1]; fsed = X[:, 2]; Kzz = X[:, 3]
    print("Input ranges:")
    print(f"  Teff [K]    : {np.min(Teff):.1f} to {np.max(Teff):.1f}")
    print(f"  g [m/s^2]   : {np.min(g):.2f} to {np.max(g):.2f}")
    print(f"  f_sed [-]   : {np.min(fsed):.2f} to {np.max(fsed):.2f}")
    print(f"  Kzz [cm^2/s]: {np.min(Kzz):.3e} to {np.max(Kzz):.3e}\n")

    # Flux stats across all spectra
    ymin = np.min(Y)
    ymax = np.max(Y)
    print("Flux stats across all spectra:")
    print(f"  min: {ymin:.6e}  max: {ymax:.6e}\n")

    # Optional small table of inputs
    if show_table:
        df = pd.DataFrame(
            X,
            columns=["Teff_K", "g_mps2", "f_sed", "Kzz_cm2s"],
            index=pd.Index(range(X.shape[0]), name="spectrum_idx"),
        )
        # Show a compact view
        print(df.head(max_rows).to_string())

    return X, Y, lam

In [13]:
# Examples
# Per-case file:
inspect_npz(r"C:\Users\Alex\Desktop\Picaso\outputs\T1500_G100_fsed1.00_Kzz1.00e+09_allclouds.npz")

# Master file:
#inspect_npz(r"C:\Users\Alex\Desktop\Picaso\outputs\T1700_G100_MH1.00_R300_N5.npz", show_table=True, max_rows=10)

X shape: (1, 4)  (columns: Teff[K], g[m/s^2], f_sed[-], Kzz[cm^2/s])
Y shape: (1, 691)  (flux_nu per spectrum)
λ shape: (691,) (micron)

Input ranges:
  Teff [K]    : 1500.0 to 1500.0
  g [m/s^2]   : 100.00 to 100.00
  f_sed [-]   : 1.00 to 1.00
  Kzz [cm^2/s]: 1.000e+09 to 1.000e+09

Flux stats across all spectra:
  min: 7.787852e-21  max: 1.039810e-06

              Teff_K  g_mps2  f_sed      Kzz_cm2s
spectrum_idx                                     
0             1500.0   100.0    1.0  1.000000e+09


(array([[1.5e+03, 1.0e+02, 1.0e+00, 1.0e+09]]),
 array([[7.78785204e-21, 8.67852697e-21, 9.82206681e-21, 9.53805974e-20,
         1.24280143e-20, 1.38258564e-20, 1.55545508e-20, 1.75550818e-20,
         1.99896309e-20, 3.10171223e-20, 3.59406638e-19, 2.88489652e-20,
         3.21787671e-20, 3.63682511e-20, 4.12956835e-20, 4.70814603e-20,
         5.39408354e-20, 6.22117110e-20, 7.24963521e-20, 8.64723995e-20,
         2.12419431e-19, 1.97160308e-18, 1.54907439e-19, 1.98431888e-19,
         2.75631855e-19, 4.28629567e-19, 8.11762163e-19, 2.36818807e-18,
         7.92175640e-17, 1.99769062e-17, 2.48437024e-18, 1.21928522e-18,
         8.48010098e-19, 6.98301005e-19, 6.33448765e-19, 6.16229085e-19,
         6.29370435e-19, 6.62797904e-19, 7.15040115e-19, 7.92768761e-19,
         9.63069350e-19, 6.85267193e-17, 1.50877873e-18, 1.21442316e-18,
         1.30903916e-18, 1.44921320e-18, 1.61667775e-18, 1.80935725e-18,
         2.02838018e-18, 2.27598341e-18, 2.55504815e-18, 2.86890591e-18,
   

## **SECTION 8:** Plot

In [14]:
def plot_npz_file(npz_path, idx=None, title=None):
    """
    Plot a single .npz file.
    - Per-case file: ignores idx (since it stores one spectrum).
    - Master file: set idx to choose which row to plot.
    """
    d = np.load(npz_path, allow_pickle=True)
    lam = d["wavelength_um"].astype(float)

    # Master vs per-case
    if d["y"].ndim == 2:
        if idx is None:
            idx = 0
        F = d["y"][idx].astype(float)
        x_row = d["x"][idx].astype(float)
    else:
        F = d["y"].astype(float)
        x_row = d["x"].astype(float)

    if title is None:
        Teff, g, fsed, kzz = x_row
        title = f"{Path(npz_path).name} — Teff={Teff:.0f}, g={g:.0f}, f_sed={fsed:.2f}, Kzz={kzz:.2e}"

    p = figure(title=title,
               x_axis_label="Wavelength (micron)",
               y_axis_label="Fnu (erg cm^-2 s^-1 Hz^-1)",
               sizing_mode="stretch_width", height=420,
               tools="pan,wheel_zoom,box_zoom,reset,save")
    src = ColumnDataSource(dict(lam_um=lam, Fnu=F))
    p.line('lam_um', 'Fnu', source=src, line_width=2)
    show(p)

def overlay_npz_from_folder(folder, max_plots=5):
    """
    Overlay up to max_plots spectra from per-case files OR randomly sample from master files.
    """
    folder = Path(folder)
    paths = sorted(folder.glob("*.npz"))
    if not paths:
        print("No .npz files found.")
        return

    p = figure(title=f"Overlay check — up to {max_plots} spectra",
               x_axis_label="Wavelength (micron)",
               y_axis_label="Fnu (erg cm^-2 s^-1 Hz^-1)",
               sizing_mode="stretch_width", height=520,
               tools="pan,wheel_zoom,box_zoom,reset,save")

    pal = Category10[max(3, min(10, max_plots))]
    plotted = 0

    for path in paths:
        if plotted >= max_plots:
            break
        d = np.load(path, allow_pickle=True)
        lam = d["wavelength_um"].astype(float)
        y = d["y"]

        # Per-case
        if y.ndim == 1:
            F = y.astype(float)
            p.line(lam, F, line_width=2, color=pal[plotted % len(pal)],
                   legend_label=path.name)
            plotted += 1

        # Master: sample a few rows
        else:
            rows = min(y.shape[0], max_plots - plotted)
            for i in range(rows):
                F = y[i].astype(float)
                p.line(lam, F, line_width=2, color=pal[plotted % len(pal)],
                       legend_label=f"{path.name} [i={i}]")
                plotted += 1
                if plotted >= max_plots:
                    break

    p.legend.location = "top_right"
    p.legend.click_policy = "hide"
    show(p)

In [15]:
# For a single file
plot_npz_file(r"C:\Users\Alex\Desktop\Picaso\outputs\T1500_G100_fsed1.00_Kzz1.00e+09_allclouds.npz")

# For an area of the master path
plot_npz_file(rf"{master_path}", idx=1)

# Overall
overlay_npz_from_folder(r"C:\Users\Alex\Desktop\Picaso\outputs", max_plots=6)