**Name:** Alex Medina

**File:** Comprehensive Test

In [1]:
import os
os.environ['picaso_refdata'] = r'C:\Users\Alex\Desktop\Picaso\picaso\reference' #THIS MUST GO BEFORE YOUR IMPORT STATEMENT
os.environ['PYSYN_CDBS'] = r'C:\Users\Alex\Desktop\Picaso\grp\redcat\trds' #this is for the stellar data discussed below.

#General
import numpy as np
import pandas as pd
import astropy.units as u
import matplotlib.pyplot as plt
import sys
import os
import json
from pathlib import Path

# Picaso
from picaso import justdoit as jdi
from picaso import justplotit as jpi

# Virga
from virga import justdoit as vj
from virga import justplotit as cldplt

# Other
from bokeh.models import Legend
from bokeh.plotting import show, figure
from bokeh.palettes import Category10
from bokeh.io import output_notebook
output_notebook()

**SECTION 1:** Helper methods

In [2]:
# FOR FUTURE INTERPOLATING PICKLE VALUES
def interpl(P_bar, T_K, **kwargs):
    """
    TP override/modifier (FUTURE):
    - TODAY: return inputs unchanged.
    - LATER: drop in your interpolation-to-higher-res or apply a pickle modifier.
    """
    return P_bar, T_K

In [3]:
# Selecting clouds instead of harcoding cloud species
# Select using virga condensation

def autoselect_clouds(P_bar, T_K, mmw, p_cutoff=1e-2):
    """
    Pick condensates present somewhere above ~10 mbar (P >= p_cutoff).
    """

    cloud_species = ["Al2O3","Cr","Fe","H2O","KCl","Mg2SiO4",
                     "MgSiO3","MnS","NH3","Na2S","ZnS"]
    
    P = np.asarray(P_bar, float)
    T = np.asarray(T_K,   float)

    sel = []
    for sp in cloud_species:
        # Returns condensation T at each P
        _, Tcond = vj.condensation_t(sp, 1.0, mmw, pressure=P)

        # Builds a boolean mask of “valid” levels to check
        # Only deeper than p_cut
        # Where T and Tcond are both finite
        mask = (P >= p_cutoff) & np.isfinite(T) & np.isfinite(Tcond)

        # Test if there exists at least 1 level where atmosphere is
        # Cold enough to condense
        # I.e. if there are any T < Tcond
        # The condensate can forn in the column
        if mask.any() and np.any(T[mask] <= Tcond[mask]):
            # Cloud species added to profile
            sel.append(sp)
    return sel

# Cloud selection depends on MMW, but I feel that I am making loops here
def compute_mmw(profile_dict, fallback=2.36):
    """
    Computing mmw from Sonora profile if present; else use solar mmw.
    """
    mu = profile_dict.get("mu") or profile_dict.get("MU")
    if mu is None:
        return float(fallback)
    mu = np.asarray(mu, float)
    if mu.ndim == 0:
        return float(mu)
    return float(np.mean(mu))

def sample_params(n, *, Teff_min, Teff_max, g_min, g_max, fsed_min, fsed_max, kz_min, kz_max, seed=42):
    rng = np.random.default_rng(seed)   # <-- create ONCE outside any loop
    Teff = rng.uniform(Teff_min, Teff_max, size=n)
    g    = rng.uniform(g_min,   g_max,   size=n)
    fsed = rng.uniform(fsed_min, fsed_max, size=n)
    kz   = 10 ** rng.uniform(np.log10(kz_min), np.log10(kz_max), size=n)  # log-uniform
    return Teff, g, fsed, kz

# To randomly sample gravity, fsed and kzz
def rng_uniform(rng, lo, hi, size):
    return rng.uniform(lo, hi, size=size)

def rng_log_uniform(rng, lo, hi, size):
    # Sample uniformly in log10 space
    return 10.0 ** rng.uniform(np.log10(lo), np.log10(hi), size=size)

def sanitize_float_for_name(x, fmt="{:.2e}"):
    # "6.38e+09" -> "6.38e09", "." ok on Windows, remove '+'
    s = fmt.format(float(x)).replace("+", "")
    return s

In [4]:
# To have a specific filename structure
def make_case_filename(Teff, gravity, kzz, fsed, output_dir, ext=".npz"):
    # Whether cloud or clear case
    cloud_tag = "clouds"
    base = f"T{int(round(Teff))}G{int(round(gravity))}{cloud_tag}"

    base += f"_Kzz{float(kzz):.2e}"
    base += f"_fsed{float(fsed):.2f}"

    return os.path.join(output_dir, base + ext)

# Save all files in one place
def make_master_filename(n_spectra, waverange, R, output_dir, clouds=True, ext=".npz"):
    cloud_tag = "clouds" if clouds else "clear"
    # Encode basic config in the filename
    wmin, wmax = waverange
    base = f"N{n_spectra}_R{int(R)}_{wmin:.2f}-{wmax:.2f}um_{cloud_tag}"
    return os.path.join(output_dir, base + ext)

**SECTION 2:** Main function

In [5]:
def bd_spectrum(waverange, gravity, Teff, kzz, fsed, mh, R,
                opacity_db, sonora_db, virga_db):
    """
    Compute a BD emission spectrum with optional Virga clouds.
    """

    # Opacity & inputs
    opa = jdi.opannection(wave_range=list(waverange), filename_db=opacity_db)
    bd = jdi.inputs(calculation="browndwarf")

    # Basic inputs
    bd.phase_angle(0)
    bd.gravity(gravity, gravity_unit=u.Unit('m/s**2'))
    bd.sonora(sonora_db, Teff)


    # Inject Kzz (must match pressure grid length)
    prof = bd.inputs['atmosphere']['profile']
    P = np.asarray(prof["pressure"], float)
    T = np.asarray(prof["temperature"], float)
    mmw = compute_mmw(prof)
    
    bd.inputs["atmosphere"]["profile"]["kz"] = [float(kzz)] * len(P)
    
    # Clouds
    cloud_list = autoselect_clouds(P, T, mmw)
    bd.virga(cloud_list, virga_db, fsed=float(fsed), mh=mh, mmw=mmw)

    out = bd.spectrum(opa, full_output=True)

    # Convert to F_nu, then regrid to constant R in wavenumber
    wn, th = out["wavenumber"], out["thermal"] # [cm^-1], [erg/cm^2/s/cm]
    wl = 1e4/wn
    flamy  = th * 1e-8                                # erg/cm^2/s/Å

    sp = jdi.psyn.ArraySpectrum(wl, flamy,
                                waveunits='um',
                                fluxunits='FLAM')
    
    sp.convert('um')
    sp.convert('Fnu')  # erg/cm^2/s/Hz
    wn, th = sp.wave, sp.flux

    # regrid in wavenumber to constant resolving power R
    out['fluxnu'] = th

    wn, th = jdi.mean_regrid(1e4/wn, th, R=R) #wavenumber, erg/cm2/s/Hz
    out['regridy'] =  th
    out['regridx'] = wn
    
    return wn, th

**SECTION 3:** Configuration

In [6]:
# For sampling

# Directories
input_dir  = r"C:\Users\Alex\Desktop\Picaso\data\sonora" # Sonora db
virga_dir  = r"C:\Users\Alex\Desktop\Picaso\data\virga" # Virga
opacit_dir = None # Opacity db
# Will change once runninng on Newton
# r"/groups/tkaralidi/opacity_500k_for_R5000_egpoutput.db"
output_dir = r"C:\Users\Alex\Desktop\Picaso\outputs" # Output store

# Ensure output dir
os.makedirs(output_dir, exist_ok=True)

use_clouds = True # Clouds on/off

# Wavelength and resolution
wav_range       = (0.3, 3.0) # microns
res_R           = 300 # Can change once opacity and TP change

# Cloud microphysics
MH              = 1.0   # [M/H] metallicity factor ~ solar

# Sampling
N_spectra       = 5 # Number of spectra to generate for sampling
seed            = 2

Teff_range      = (700, 1400)     # K
gravity_range   = (50.0, 1500.0)  # m/s^2
fsed_range      = (0.5, 5.0)      # dimensionless
Kzz_range       = (1e8, 3e10)     # cm^2/s (log-uniform typical for BD)

# Knobs
save_per_case   = True   # Also save per-spectrum files with friendly names
kzz_loguniform  = True   # Log-uniform sampling for Kzz (typical)
VERBOSE         = True

**SECTION 4:** Execute

In [7]:
def run_grid(
    n_spectra,
    Teff_min, Teff_max,
    g_min, g_max,
    fsed_min, fsed_max,
    kz_min, kz_max,
    outdir,
    R, wavmin, wavmax,
    sonora_db, virga_db,
    seed=123
):
    os.makedirs(outdir, exist_ok=True)

    Teff_arr, g_arr, fsed_arr, kz_arr = sample_params(
        n_spectra,
        Teff_min=Teff_min, Teff_max=Teff_max,
        g_min=g_min, g_max=g_max,
        fsed_min=fsed_min, fsed_max=fsed_max,
        kz_min=kz_min, kz_max=kz_max,
        seed=seed
    )

    # Storage
    X_rows = []
    Y_rows = []
    lam_ref = None

    for i in range(n_spectra):
        Teff_i = float(Teff_arr[i])
        g_i    = float(g_arr[i])
        fsed_i = float(fsed_arr[i])
        kz_i   = float(kz_arr[i])

        # ---- compute spectrum (clouds-only) ----
        # Must return wavelength (µm) shape (nlam,), flux (nlam,)
        lam_um, flux = bd_spectrum(
            wavmin=wavmin, wavmax=wavmax,
            gravity=g_i, Teff=Teff_i,
            species=None,    # your cloud list is inside bd_spectrum for clouds-only
            fsed=fsed_i, mh=1.0, mmw=2.2,
            R=R, kz_value=kz_i,
            sonora_db=sonora_db, virga_db=virga_db,
            clouds=True
        )

        lam_um = np.asarray(lam_um, float).reshape(-1)     # <-- ensure (nlam,)
        flux   = np.asarray(flux,   float).reshape(-1)

        if lam_ref is None:
            lam_ref = lam_um.copy()
        else:
            if lam_um.shape != lam_ref.shape or not np.allclose(lam_um, lam_ref, rtol=5e-7, atol=1e-8):
                raise RuntimeError("Wavelength grid mismatch vs first case — check endpoints/R.")

        # Build X row [Teff[K], g[m/s^2], f_sed[-], Kz[cm^2/s]]
        X_rows.append([Teff_i, g_i, fsed_i, kz_i])
        Y_rows.append(flux)

        # Unique, informative filename (no + in exponent)
        f_T   = f"T{int(round(Teff_i))}"
        f_G   = f"G{int(round(g_i))}"
        f_Kzz = f"Kzz{sanitize_float_for_name(kz_i, fmt='{:.2e}')}"
        f_fs  = f"fsed{fsed_i:.2f}"
        fname = f"{f_T}{f_G}clouds_{f_Kzz}_{f_fs}.npz"
        fpath = os.path.join(outdir, fname)

        # If parameters collide (identical), avoid overwrite by suffixing an index
        if os.path.exists(fpath):
            base, ext = os.path.splitext(fpath)
            fpath = f"{base}_idx{i+1}{ext}"

        np.savez(
            fpath,
            x=np.array([[Teff_i, g_i, fsed_i, kz_i]], dtype=float),         # (1,4)
            y=np.array([flux], dtype=float),                                # (1,nlam)
            wavelength_um=lam_ref.astype(float)                             # (nlam,)
        )
        print(f"[OK] Saved case {i+1} -> {fpath}")

    # Stack and one final dataset file (optional)
    X = np.array(X_rows, dtype=float)                   # (n,4)
    Y = np.vstack(Y_rows).astype(float)                 # (n,nlam)
    bigpath = os.path.join(outdir, f"dataset_T{int(Teff_min)}-{int(Teff_max)}_G{int(g_min)}-{int(g_max)}_R{int(R)}.npz")
    np.savez(bigpath, x=X, y=Y, wavelength_um=lam_ref.astype(float))
    print(f"[OK] Saved combined dataset -> {bigpath}")

    # Quick diagnostics
    print(f"Keys: ['x','y','wavelength_um']")
    print(f"X shape: {X.shape} (columns: Teff[K], g[m/s^2], f_sed[-], Kz[cm^2/s])")
    print(f"Y shape: {Y.shape} (flux_nu per spectrum)")
    print(f"lam shape: {lam_ref.shape} (micron)")
    print(f"Input ranges:")
    print(f"  Teff [K]     : {X[:,0].min()} to {X[:,0].max()}")
    print(f"  g [m/s^2]    : {X[:,1].min()} to {X[:,1].max()}")
    print(f"  f_sed [-]    : {X[:,2].min()} to {X[:,2].max()}")
    print(f"  Kz [cm^2/s]  : {X[:,3].min()} to {X[:,3].max()}")
    print(f"Flux stats across all spectra: min={Y.min()} max={Y.max()}")

In [9]:
if __name__ == "__main__":
    outdir = r"C:\Users\Alex\Desktop\Picaso\outputs"
    sonora_db = r"C:\Users\Alex\Desktop\Picaso\data\sonora"
    virga_db  = r"C:\Users\Alex\Desktop\Picaso\data\virga"

    run_grid(
        n_spectra=5,              # how many random cases to generate
        Teff_min=700, Teff_max=1400,
        g_min=300,  g_max=1000,   # m/s^2
        fsed_min=1, fsed_max=4,
        kz_min=1e7, kz_max=1e10,  # cm^2/s
        outdir=outdir,
        R=300, wavmin=3.0, wavmax=5.0,
        sonora_db=sonora_db, virga_db=virga_db,
        seed=42
    )


TypeError: bd_spectrum() got an unexpected keyword argument 'wavmin'

**SECTION 5:** Visualizations

In [None]:
# Path the the .NPZ folder

npz_path = Path(r"C:\Users\Alex\Desktop\Picaso\outputs\T700G429clouds_Kzz6.38e+09_fsed3.03.npz")
data = np.load(npz_path)

# The file stores:
# x -> inputs [Teff, g, f_sed, Kz] with shape (n_spectra, 4)
# y -> spectra (flux_nu) with shape (n_spectra, n_lambda)
# wavelength_um -> wavelength grid in microns (n_lambda,)
print("Keys:", list(data.keys()))
X = data["x"]
Y = data["y"]
lam = data["wavelength_um"]

n_spectra, n_inputs = X.shape
print(f"X shape: {X.shape}  (columns: Teff[K], g[m/s^2], f_sed[-], Kz[cm^2/s])")
print(f"Y shape: {Y.shape}  (flux_nu per spectrum)")
print(f"lam shape: {lam.shape}  (micron)")

print("\nInput ranges:")
print("  Teff [K]      :", float(X[:,0].min()), "to", float(X[:,0].max()))
print("  g [m/s^2]     :", float(X[:,1].min()), "to", float(X[:,1].max()))
print("  f_sed [-]     :", float(X[:,2].min()), "to", float(X[:,2].max()))
print("  Kz [cm^2/s]   :", float(X[:,3].min()), "to", float(X[:,3].max()))

print("\nFlux stats across all spectra:")
print("  min:", float(Y.min()), " max:", float(Y.max()))

Keys: ['x', 'y', 'wavelength_um']
X shape: (1, 4)  (columns: Teff[K], g[m/s^2], f_sed[-], Kz[cm^2/s])
Y shape: (1, 691)  (flux_nu per spectrum)
lam shape: ()  (micron)

Input ranges:
  Teff [K]      : 700.0 to 700.0
  g [m/s^2]     : 429.33758544921875 to 429.33758544921875
  f_sed [-]     : 3.030195474624634 to 3.030195474624634
  Kz [cm^2/s]   : 6378708480.0 to 6378708480.0

Flux stats across all spectra:
  min: 2.7680091654342758e-22  max: 1.6605585528850497e-07
