**Name:** Alex Medina

**File:** Improvements on test run

Before anything, I need these inputs. On my local machine (Windows) the configuration is as such but in an environment like on Newton I need these not

In [1]:
import os
os.environ['picaso_refdata'] = r'C:\Users\Alex\Desktop\Picaso\picaso\reference' #THIS MUST GO BEFORE YOUR IMPORT STATEMENT
os.environ['PYSYN_CDBS'] = r'C:\Users\Alex\Desktop\Picaso\grp\redcat\trds' #this is for the stellar data discussed below.

In [2]:
#General
import numpy as np
import astropy.units as u
import pandas as pd
import argparse
from pathlib import Path
import matplotlib.pyplot as plt

# Picaso
from picaso import justdoit as jdi
from picaso import justplotit as jpi

# Virga
from virga import justdoit as vj
from virga import justplotit as cldplt

# Other
from bokeh.plotting import show, figure
from bokeh.models import Legend
from bokeh.palettes import Category10
from bokeh.io import output_notebook
output_notebook()

Having successfully gone through basic method with BD case, I am ready to improve upon it. In particular, I plan to generalize the parameters, strict grid checks, store metadata, braoden cloud species, and again save with MARGE in mind. Additionally, I will be including some toy exercies to test the full cpabilities of this script.

In [4]:
def bd_spectrum(
    wavmin, wavmax, logg_cgs, Teff,
    species, fsed, mh, mmw, R, kz_value,
    sonora_db, virga_db, clouds,
    tp_recycle=None, max_retries=2,
    return_lambda=True,
):
    """
    Compute a BD emission spectrum with optional Virga clouds.

    Parameters
    ----------
    logg_cgs : float
        log10(g) in cgs units (cm s^-2).
        Internally converted to SI for PICASO.
    kz_value : float
        Kzz [cm^2/s]; will be broadcast to the pressure grid.
    return_lambda : bool
        If True returns (lambda_um, flux_nu_R). Otherwise returns (wn_cm^-1, flux_nu_R).
    """
    # Opacity & inputs
    wave_range = [wavmin, wavmax]
    opa = jdi.opannection(wave_range=wave_range)
    bd  = jdi.inputs(calculation='browndwarf')

    # Geometry & gravity
    bd.phase_angle(0)
    g_si = (10.0**logg_cgs) / 100.0  # cgs -> m/s^2
    bd.gravity(g_si, gravity_unit=u.Unit('m/s**2'))

    # Atmosphere from Sonora
    bd.sonora(sonora_db, Teff)

    # Inject Kzz (must match pressure grid length)
    p = bd.inputs['atmosphere']['profile']['pressure']
    bd.inputs['atmosphere']['profile']['kz'] = [float(kz_value)] * len(p)

    # Optionally recycle a TP profile on retry
    if tp_recycle is not None:
        bd.inputs['atmosphere']['profile']['temperature'] = tp_recycle

    # Clouds
    if clouds:
        bd.virga(species, virga_db, fsed=fsed, mh=mh, mmw=mmw)

    # Retry loop with TP recycle handoff
    last_exc = None
    for _ in range(max_retries + 1):
        try:
            out = bd.spectrum(opa, full_output=True)
            break
        except Exception as e:
            last_exc = e
            tp_recycle = bd.inputs['atmosphere']['profile'].get('temperature')
    else:
        raise RuntimeError(f"PICASO failed after retries: {last_exc}")

    # Convert to F_nu, then regrid to constant R in wavenumber
    wn_cm, flam_per_cm = out['wavenumber'], out['thermal']     # [cm^-1], [erg/cm^2/s/cm]
    lam_um = 1e4 / wn_cm
    flamy  = flam_per_cm * 1e-8                                # erg/cm^2/s/Å

    sp = jdi.psyn.ArraySpectrum(lam_um, flamy, waveunits='um', fluxunits='FLAM')
    sp.convert('um')
    sp.convert('Fnu')  # erg/cm^2/s/Hz
    lam_um, fnu = sp.wave, sp.flux

    wn_R, fnu_R = jdi.mean_regrid(1e4/lam_um, fnu, R=R)        # constant R in wavenumber
    if return_lambda:
        # Return λ grid that matches wn_R (invert back)
        lam_R = 1e4 / wn_R
        return lam_R, fnu_R
    else:
        return wn_R, fnu_R


Configuration

In [5]:
# =====================
# Configuration
# =====================
sonora_directory = r"C:\Users\Alex\Desktop\Picaso\data\sonora"
virga_directory  = r"C:\Users\Alex\Desktop\Picaso\data\virga"
output_npz       = "test_out2.npz"

use_clouds = True

# Wavelength and resolution
wavelength_min  = 1.0
wavelength_max  = 2.5
res_R           = 300

# Cloud microphysics (global constants; species picked per-Teff below)
metallicity     = 1.0   # [M/H] ~ solar
mean_mol_weight = 2.2   # H2-dominated

# =====================
# Sampling ranges
# =====================
def sample_params(
    n, rng,
    teff_range=(700, 1400),
    logg_range=(4.0, 5.5),         # log10(g_cgs)
    fsed_range=(0.5, 5.0),
    kzz_range=(1e7, 1e11)          # cm^2/s
):
    Teff  = rng.uniform(*teff_range, size=n)
    logg  = rng.uniform(*logg_range, size=n)            # log g (cgs)
    fsed  = rng.uniform(*fsed_range, size=n)
    logkz = rng.uniform(np.log10(kzz_range[0]), np.log10(kzz_range[1]), size=n)
    kz    = 10.0**logkz
    return Teff, logg, fsed, kz

def pick_cloud_species(Teff):
    # Cool/cold: sulfides + chlorides; hotter: metals/silicates (simple heuristic)
    return ['Na2S','KCl'] if Teff < 1100 else ['MnS','Fe']

Building the new dataset that:
- Samples parameters
- Runs spectra and retries
- Stores metadata
- Logs flux in parallel array

In [6]:
import json

def build_dataset(n_spectra, seed, cfg):
    rng = np.random.default_rng(seed)

    Teff, logg, fsed, kz = sample_params(
        n_spectra, rng,
        teff_range=cfg['teff_range'],
        logg_range=cfg['logg_range'],
        fsed_range=cfg['fsed_range'],
        kzz_range=cfg['kzz_range']
    )

    # First run: choose species based on Teff[0]
    species0 = pick_cloud_species(Teff[0]) if cfg['use_clouds'] else []
    lam0, y0 = bd_spectrum(
        cfg['wavmin'], cfg['wavmax'], logg[0], Teff[0],
        tuple(species0), fsed[0], cfg['mh'], cfg['mmw'],
        cfg['R'], kz[0], cfg['sonora'], cfg['virga'], cfg['use_clouds'],
        return_lambda=True
    )

    nlam = len(lam0)
    # X = [Teff_K, logg_cgs, fsed, Kzz_cm2s]
    X = np.column_stack([Teff, logg, fsed, kz]).astype(np.float32)
    Y = np.empty((n_spectra, nlam), dtype=np.float32)
    Y[0] = y0.astype(np.float32)
    lam = lam0.astype(np.float32)

    failures = []
    species_used = [species0]

    for i in range(1, n_spectra):
        try:
            sp_i = pick_cloud_species(Teff[i]) if cfg['use_clouds'] else []
            lami, yi = bd_spectrum(
                cfg['wavmin'], cfg['wavmax'], logg[i], Teff[i],
                tuple(sp_i), fsed[i], cfg['mh'], cfg['mmw'],
                cfg['R'], kz[i], cfg['sonora'], cfg['virga'], cfg['use_clouds'],
                return_lambda=True
            )
            if len(lami) != nlam or not np.allclose(lami, lam, atol=1e-8, rtol=0):
                raise RuntimeError("Wavelength grid changed")
            Y[i] = yi.astype(np.float32)
            species_used.append(sp_i)
        except Exception as e:
            failures.append((int(i), str(e)))
            # fallback: copy previous (keeps shapes consistent; logged in metadata)
            Y[i] = Y[i-1]
            species_used.append(species_used[-1])

    # Log-flux variant (clipped to avoid -inf)
    Ylog = np.log(np.clip(Y, 1e-99, None)).astype(np.float32)

    # Simple global stats (handy for MARGE normalization later)
    stats = {
        'X_mean': X.mean(axis=0).tolist(),
        'X_std' : X.std(axis=0).tolist(),
        'Y_mean': float(Y.mean()),
        'Y_std' : float(Y.std()),
        'Ylog_mean': float(Ylog.mean()),
        'Ylog_std' : float(Ylog.std())
    }

    # Pack + save
    np.savez_compressed(
        cfg['out_npz'],
        X=X, Y=Y, Ylog=Ylog, wavelength_um=lam,
        inputs=np.array(['Teff_K','logg_cgs','fsed','Kzz_cm2s'], dtype=object),
        species=np.array([tuple(s) for s in species_used], dtype=object)
    )
    with open(cfg['out_npz'].replace('.npz', '_meta.json'), 'w') as f:
        json.dump({
            'seed': seed,
            'ranges': {
                'teff_range': cfg['teff_range'],
                'logg_range': cfg['logg_range'],
                'fsed_range': cfg['fsed_range'],
                'kzz_range': cfg['kzz_range']
            },
            'use_clouds': cfg['use_clouds'],
            'R': cfg['R'],
            'wavmin': cfg['wavmin'],
            'wavmax': cfg['wavmax'],
            'failures': failures,
            'stats': stats
        }, f, indent=2)

    print(f"Saved {cfg['out_npz']}: X{X.shape}, Y{Y.shape}, λ{lam.shape}. "
          f"Clouds={cfg['use_clouds']}. Failures={len(failures)}")
    return {'X': X, 'Y': Y, 'Ylog': Ylog, 'lam': lam, 'stats': stats, 'failures': failures}

In [7]:
cfg = {
    'sonora': sonora_directory,
    'virga' : virga_directory,
    'wavmin': wavelength_min,
    'wavmax': wavelength_max,
    'R'     : res_R,
    'mh'    : metallicity,
    'mmw'   : mean_mol_weight,
    'use_clouds': use_clouds,
    'teff_range': (700, 1400),
    'logg_range': (4.0, 5.5),     # log10(g_cgs)
    'fsed_range': (0.5, 5.0),
    'kzz_range' : (1e7, 1e11),
    'out_npz'   : output_npz
}

_ = build_dataset(n_spectra=10, seed=2, cfg=cfg)

Saved test_out2.npz: X(10, 4), Y(10, 275), λ(275,). Clouds=True. Failures=9


In [8]:
from bokeh.plotting import figure, show
from bokeh.models import Legend
from bokeh.palettes import Category10

# Load the file you just wrote
data = np.load(output_npz, allow_pickle=True)
X = data['X']             # (n, 4): Teff, logg_cgs, fsed, Kzz_cm2s
Y = data['Y']             # (n, nlam)
lam = data['wavelength_um']

n_spectra = Y.shape[0]
palette = Category10[10]
colors = [palette[i % len(palette)] for i in range(n_spectra)]

p = figure(width=800, height=420,
           x_axis_label="Wavelength (μm)", y_axis_label="Flux (Fν)",
           title="Sample Spectra")

items = []
for i in range(n_spectra):
    teff_i = X[i,0]; fsed_i = X[i,2]
    lbl = f"Teff={teff_i:.0f}K, f_sed={fsed_i:.2f}"
    r = p.line(lam, Y[i], line_width=2, color=colors[i])
    items.append((lbl, [r]))

legend = Legend(items=items, location="center")
p.add_layout(legend, 'right')
p.legend.label_text_font_size = "8pt"
show(p)

In [9]:
def regrid_to_reference(wn_in, fnu_in, wn_ref=None, R=None):
    """
    If wn_ref is provided -> force regrid onto that grid (identical across runs).
    Else -> build a reference grid with constant R and return it with flux.
    """
    if wn_ref is not None:
        wn, fy = jdi.mean_regrid(wn_in, fnu_in, newx=wn_ref)
        return wn, fy
    else:
        wn, fy = jdi.mean_regrid(wn_in, fnu_in, R=R)
        return wn, fy

def bd_spectrum_safe(*args, clouds=True, **kwargs):
    """
    Wrapper: try with clouds; on failure, retry cloud-free once.
    Returns (lam_um, fnu, used_clouds, err_msg)
    """
    err = None
    try:
        lam, fnu = bd_spectrum(*args, clouds=clouds, **kwargs)
        return lam, fnu, clouds, err
    except Exception as e:
        err = f"cloud run failed: {e}"
        try:
            lam, fnu = bd_spectrum(*args, clouds=False, **kwargs)
            return lam, fnu, False, err
        except Exception as e2:
            raise RuntimeError(f"cloud-free retry also failed: {e2} | first: {err}")

In [10]:
# Small baseline set away from the tricky Teff band
rng = np.random.default_rng(0)
Teff  = rng.uniform(800, 1100, size=6)
logg  = rng.uniform(4.5, 5.2,  size=6)

fsed  = np.full_like(Teff, 2.0)     # fixed
kz    = np.full_like(Teff, 1e9)     # cm^2/s fixed
clouds = False

# First model -> build reference grid
lam0, f0, _, _ = bd_spectrum_safe(
    wavelength_min, wavelength_max, logg[0], Teff[0],
    tuple([]), fsed[0], metallicity, mean_mol_weight, res_R, kz[0],
    sonora_directory, virga_directory, return_lambda=True, clouds=clouds
)
wn_ref = 1e4/lam0  # reference grid in wavenumber

# Pack spectra on the same grid
Ys = []
fail = []
for i in range(len(Teff)):
    lam, fnu, used_clouds, err = bd_spectrum_safe(
        wavelength_min, wavelength_max, logg[i], Teff[i],
        tuple([]), fsed[i], metallicity, mean_mol_weight, res_R, kz[i],
        sonora_directory, virga_directory, return_lambda=True, clouds=clouds
    )
    wn_i = 1e4/lam
    wn_out, f_out = regrid_to_reference(wn_i, fnu, wn_ref=wn_ref)
    Ys.append(f_out)
    if err: fail.append((i, err))
lam_ref = 1e4/wn_ref
Y = np.vstack(Ys)

print(f"Toy0 — cloud-free: n={len(Teff)}, failures={len(fail)}")
for i, msg in fail[:3]:
    print(f"  sample {i}: {msg}")

Toy0 — cloud-free: n=6, failures=0


In [11]:
from bokeh.plotting import figure, show
from bokeh.models import Legend
from bokeh.palettes import Category10

palette = Category10[10]
colors = [palette[i % len(palette)] for i in range(len(Teff))]

p = figure(width=800, height=420,
           x_axis_label="Wavelength (μm)", y_axis_label="Flux (Fν)",
           title="Toy 0 — Cloud-free baseline")

items = []
for i in range(len(Teff)):
    lbl = f"Teff={Teff[i]:.0f}K, logg={logg[i]:.2f}"
    r = p.line(lam_ref, Y[i], line_width=2, color=colors[i])
    items.append((lbl, [r]))

legend = Legend(items=items, location="center")
p.add_layout(legend, 'right')
p.legend.label_text_font_size = "8pt"
show(p)


In [12]:
# One representative point (away from 1300–1500 K)
Teff1, logg1 = 1000.0, 5.0
fsed1, kz1   = 2.0, 1e9
sp_cool = ['Na2S','KCl']

# Cloud-free
lam_cf, f_cf, _, _ = bd_spectrum_safe(
    wavelength_min, wavelength_max, logg1, Teff1,
    tuple([]), fsed1, metallicity, mean_mol_weight, res_R, kz1,
    sonora_directory, virga_directory, return_lambda=True, clouds=False
)
# Cloudy (retry to cloud-free if it fails happens inside)
lam_cl, f_cl, used_clouds, err = bd_spectrum_safe(
    wavelength_min, wavelength_max, logg1, Teff1,
    tuple(sp_cool), fsed1, metallicity, mean_mol_weight, res_R, kz1,
    sonora_directory, virga_directory, return_lambda=True, clouds=True
)

# Regrid onto lam_cf grid (via wn_ref)
wn_ref = 1e4/lam_cf
wn_cl  = 1e4/lam_cl
_, f_cl_r = regrid_to_reference(wn_cl, f_cl, wn_ref=wn_ref)
lam_ref = lam_cf

from bokeh.plotting import figure, show
p = figure(width=800, height=420, x_axis_label="Wavelength (μm)", y_axis_label="Flux (Fν)",
           title=f"Toy 1 — Clouds on/off @ Teff={Teff1:.0f}K, logg={logg1:.2f}, fsed={fsed1}, Kzz=1e9")
r1 = p.line(lam_ref, f_cf, line_width=2, color="black", legend_label="Cloud-free")
r2 = p.line(lam_ref, f_cl_r, line_width=2, color="firebrick", legend_label=("Cloudy" if used_clouds else "Cloud step failed → cloud-free"))
p.legend.location = "top_right"
show(p)


In [13]:
Teff2, logg2 = 1000.0, 5.0
kz2 = 1e9
fseds = [0.5, 1.0, 2.0, 3.0, 5.0]
sp_cool = ['Na2S','KCl']

# Reference grid from first run
lam0, f0, used0, err0 = bd_spectrum_safe(
    wavelength_min, wavelength_max, logg2, Teff2,
    tuple(sp_cool), fseds[0], metallicity, mean_mol_weight, res_R, kz2,
    sonora_directory, virga_directory, return_lambda=True, clouds=True
)
wn_ref = 1e4/lam0

rows = []
for fs in fseds:
    lam, f, used, err = bd_spectrum_safe(
        wavelength_min, wavelength_max, logg2, Teff2,
        tuple(sp_cool), fs, metallicity, mean_mol_weight, res_R, kz2,
        sonora_directory, virga_directory, return_lambda=True, clouds=True
    )
    _, f_r = regrid_to_reference(1e4/lam, f, wn_ref=wn_ref)
    rows.append((fs, f_r, used, err))
lam_ref = 1e4/wn_ref

from bokeh.plotting import figure, show
from bokeh.palettes import Category10
p = figure(width=800, height=420, x_axis_label="Wavelength (μm)", y_axis_label="Flux (Fν)",
           title=f"Toy 2 — Vary fsed @ Teff={Teff2:.0f}K, logg={logg2:.2f}, Kzz=1e9")
palette = Category10[10]
items = []
for i, (fs, f_r, used, err) in enumerate(rows):
    lbl = f"fsed={fs} " + ("" if used else "(cloud step failed→cf)")
    r = p.line(lam_ref, f_r, line_width=2, color=palette[i % 10])
    items.append((lbl, [r]))
from bokeh.models import Legend
legend = Legend(items=items, location="center")
p.add_layout(legend, 'right')
p.legend.label_text_font_size = "8pt"
show(p)


In [14]:
Teff3, logg3, fsed3 = 1000.0, 5.0, 1.0
kzzs = [1e7, 1e8, 1e9, 1e10, 1e11]
sp_cool = ['Na2S','KCl']

# Reference grid
lam0, f0, used0, err0 = bd_spectrum_safe(
    wavelength_min, wavelength_max, logg3, Teff3,
    tuple(sp_cool), fsed3, metallicity, mean_mol_weight, res_R, kzzs[0],
    sonora_directory, virga_directory, return_lambda=True, clouds=True
)
wn_ref = 1e4/lam0

rows = []
for kzval in kzzs:
    lam, f, used, err = bd_spectrum_safe(
        wavelength_min, wavelength_max, logg3, Teff3,
        tuple(sp_cool), fsed3, metallicity, mean_mol_weight, res_R, kzval,
        sonora_directory, virga_directory, return_lambda=True, clouds=True
    )
    _, f_r = regrid_to_reference(1e4/lam, f, wn_ref=wn_ref)
    rows.append((kzval, f_r, used, err))
lam_ref = 1e4/wn_ref

from bokeh.plotting import figure, show
from bokeh.palettes import Category10
p = figure(width=800, height=420, x_axis_label="Wavelength (μm)", y_axis_label="Flux (Fν)",
           title=f"Toy 3 — Vary Kzz @ Teff={Teff3:.0f}K, logg={logg3:.2f}, fsed={fsed3}")
palette = Category10[10]
items = []
for i, (kzval, f_r, used, err) in enumerate(rows):
    lbl = f"Kzz={kzval:.0e} " + ("" if used else "(cloud step failed→cf)")
    r = p.line(lam_ref, f_r, line_width=2, color=palette[i % 10])
    items.append((lbl, [r]))
from bokeh.models import Legend
legend = Legend(items=items, location="center")
p.add_layout(legend, 'right')
p.legend.label_text_font_size = "8pt"
show(p)
