In [3]:
from urllib.request import urlopen
import json
import pandas as pd

In [32]:
# Fetch the transient
response = urlopen('https://blast.ncsa.illinois.edu/api/transient/?name=2004ef&format=json')
data = json.loads(response.read())

transient = data[0]  # unpack the first (and only) result

transient.keys()

print("\n--- Transient Information ---\n")

for key, value in transient.items():
    print(f"{key:30}: {value}")

print("\n--- Host Information ---\n")

for key, value in transient['host'].items():
    print(f"{key:30}: {value}")

print(len(transient.keys()), "keys in transient")



--- Transient Information ---

id                            : 2624
ra_deg                        : 340.54174583333327
dec_deg                       : 19.994555555555557
name                          : 2004ef
public_timestamp              : None
redshift                      : 0.03002
spectroscopic_class           : SN Ia
milkyway_dust_reddening       : 0.047228389531373975
image_trim_status             : processed
progress                      : 100
software_version              : 1.2.0
host                          : {'id': 12156, 'ra_deg': 340.54385948, 'dec_deg': 19.9969434, 'name': '2004ef', 'redshift': 0.030988, 'redshift_err': None, 'photometric_redshift': 0.01854147685274371, 'photometric_redshift_err': None, 'milkyway_dust_reddening': 0.04736369989812374, 'software_version': None}

--- Host Information ---

id                            : 12156
ra_deg                        : 340.54385948
dec_deg                       : 19.9969434
name                          : 2004ef
redshi

In [None]:
url = 'https://blast.ncsa.illinois.edu/api/transient/get/2004ef?format=json'
payload = json.loads(urlopen(url).read())  # ← just one dict
print(type(payload))  # Just to confirm — should say <class 'dict'>
for key, value in payload.items():
    print(f"{key:30}: {value}")

print(len(payload.keys()), "keys in payload")

<class 'dict'>
transient_id                  : 2624
transient_ra_deg              : 340.54174583333327
transient_dec_deg             : 19.994555555555557
transient_name                : 2004ef
transient_public_timestamp    : None
transient_redshift            : 0.03002
transient_spectroscopic_class : SN Ia
transient_milkyway_dust_reddening: 0.047228389531373975
transient_image_trim_status   : processed
transient_progress            : 100
transient_software_version    : 1.2.0
transient_host                : {'id': 12156, 'ra_deg': 340.54385948, 'dec_deg': 19.9969434, 'name': '2004ef', 'redshift': 0.030988, 'redshift_err': None, 'photometric_redshift': 0.01854147685274371, 'photometric_redshift_err': None, 'milkyway_dust_reddening': 0.04736369989812374, 'software_version': None}
host_name                     : 2004ef
host_ra_deg                   : 340.54385948
host_dec_deg                  : 19.9969434
host_redshift                 : 0.030988
host_milkyway_dust_reddening  : 0.0473636998

In [34]:
from urllib.request import urlopen
import json
import re
import pandas as pd

BASE = "https://blast.ncsa.illinois.edu"

def fetch_payload(name):
    url = f"{BASE}/api/transient/get/{name}?format=json"
    return json.loads(urlopen(url).read())

def extract_host_meta(payload):
    meta = payload.get("transient_host", {}) or {}
    out = {
        "host_ra_deg": meta.get("ra_deg"),
        "host_dec_deg": meta.get("dec_deg"),
        "host_redshift": meta.get("redshift"),
        "host_name": meta.get("name"),
    }
    # also useful SN-level bits
    out.update({
        "sn_name": payload.get("transient_name"),
        "sn_ra_deg": payload.get("transient_ra_deg"),
        "sn_dec_deg": payload.get("transient_dec_deg"),
        "sn_class": str(payload.get("transient_spectroscopic_class", "")).strip(),
        "sn_redshift": payload.get("transient_redshift"),
    })
    return out

# Grab any keys that look like aperture photometry (global/local), mags/fluxes + errors
AP_PHOT_RE = re.compile(r'^(?P<scope>global|local)_aperture_(?P<rest>.+)$', re.I)

def extract_aperture_photometry(payload):
    rows = []
    for k, v in payload.items():
        m = AP_PHOT_RE.match(k)
        if not m:
            continue
        # Split the tail and try to infer band & quantity
        parts = m.group("rest").split("_")
        # We’re looking for patterns like: SDSS_g_magnitude(_error) or 2MASS_H_flux(_error)
        if len(parts) < 2:
            continue
        # quantity is usually at the end: magnitude / flux / magnitude_error / flux_error
        quantity = parts[-1]
        band = None
        # try extracting band as the last token before quantity; sometimes there's survey + band
        # e.g., SDSS_g_magnitude -> survey=SDSS, band=g
        if len(parts) >= 2:
            band = parts[-2]
        survey = None
        if len(parts) >= 3:
            survey = "_".join(parts[:-2])  # everything before band+quantity
        rows.append({
            "scope": m.group("scope").lower(),   # global/local
            "survey": survey,
            "band": band,
            "quantity": quantity,                 # flux, magnitude, flux_error, magnitude_error
            "value": v
        })
    if not rows:
        return pd.DataFrame(columns=["scope", "survey", "band", "quantity", "value"])
    df = pd.DataFrame(rows)
    # Pivot into wide form: one row per (scope, survey, band)
    wide = df.pivot_table(index=["scope", "survey", "band"], columns="quantity", values="value", aggfunc="first").reset_index()
    return wide

# Pull SED-derived host parameters like log_mass/log_sfr/... with percentiles
PARAM_KEYS = ("host_log_mass", "host_log_sfr", "host_log_ssfr", "host_log_age")

def extract_host_sed_params(payload):
    out = {}
    for k, v in payload.items():
        if not any(p in k for p in PARAM_KEYS):
            continue
        # Keep both global/local; keep percentile suffixes like _16/_50/_84
        out[k] = v
    # Make a tidier view grouped by (scope, param, percentile)
    rows = []
    for k, v in out.items():
        scope = "global" if "global_aperture" in k else ("local" if "local_aperture" in k else None)
        # find which param
        param = next((p for p in PARAM_KEYS if p in k), None)
        # percentile if present (16/50/84)
        perc = None
        m = re.search(r"_(16|50|84)\b", k)
        if m:
            perc = int(m.group(1))
        rows.append({"scope": scope, "param": param, "percentile": perc, "value": v, "key": k})
    if not rows:
        return pd.DataFrame(columns=["scope", "param", "percentile", "value", "key"])
    df = pd.DataFrame(rows)
    # Also make a wide table with 16/50/84 columns if available
    wide = df.pivot_table(index=["scope", "param"], columns="percentile", values="value", aggfunc="first").reset_index()
    wide = wide.rename(columns={16: "p16", 50: "p50", 84: "p84"})
    return df.sort_values(["scope", "param", "percentile"]), wide

# ---- Example usage on SN 2004ef ----
payload = fetch_payload("2004ef")

host_meta = extract_host_meta(payload)
phot_wide = extract_aperture_photometry(payload)
sed_long, sed_wide = extract_host_sed_params(payload)

print("HOST META\n", host_meta, "\n")
print("APERTURE PHOTOMETRY (wide)\n", phot_wide.head(), "\n")
print("SED PARAMS (wide)\n", sed_wide.head(), "\n")

HOST META
 {'host_ra_deg': 340.54385948, 'host_dec_deg': 19.9969434, 'host_redshift': 0.030988, 'host_name': '2004ef', 'sn_name': '2004ef', 'sn_ra_deg': 340.54174583333327, 'sn_dec_deg': 19.994555555555557, 'sn_class': 'SN Ia', 'sn_redshift': 0.03002} 

APERTURE PHOTOMETRY (wide)
 quantity   scope   survey  band   16   50   84  \
0         global    2MASS     H  NaN  NaN  NaN   
1         global    2MASS     J  NaN  NaN  NaN   
2         global    2MASS     K  NaN  NaN  NaN   
3         global  2MASS_H  flux  NaN  NaN  NaN   
4         global  2MASS_H    is  NaN  NaN  NaN   

quantity                                           aperture arcsec  \
0         {'id': 117687, 'ra_deg': 340.54377179750253, '...    NaN   
1         {'id': 117692, 'ra_deg': 340.54377179750253, '...    NaN   
2         {'id': 117694, 'ra_deg': 340.54377179750253, '...    NaN   
3                                                       NaN    NaN   
4                                                       NaN    NaN 

In [37]:
import re, pandas as pd

# ---- from your payload variable ----
p = payload  # rename for brevity

# 1) Photometry table (mag/err where present)
phot_rows = []
pattern = re.compile(r'^(?P<scope>local|global)_aperture_(?P<survey>[A-Za-z0-9]+)_(?P<band>[A-Za-z0-9]+)_(?P<quantity>magnitude|magnitude_error|flux|flux_error)$')
for k, v in p.items():
    m = pattern.match(k)
    if not m: 
        continue
    d = m.groupdict()
    phot_rows.append({**d, "value": v})

phot = (pd.DataFrame(phot_rows)
          .pivot_table(index=["scope","survey","band"], columns="quantity", values="value", aggfunc="first")
          .reset_index())

# Optional: focus on familiar sets
want = (
    (phot["survey"].isin(["SDSS","PanSTARRS","DES"])) & 
    (phot["band"].isin(list("ugrizy")))
)
phot_sel = phot[want].sort_values(["scope","survey","band"])

# 2) SED host parameters (mass/SFR; p16/p50/p84) for local+global
def collect_param(param):
    rows = []
    for scope in ("local","global"):
        r = {}
        for pctl in ("16","50","84"):
            key = f"{scope}_aperture_host_{param}_{pctl}"
            if key in p: 
                r[pctl] = p[key]
        if r:
            rows.append({"scope":scope, "param":param, "p16":r.get("16"), "p50":r.get("50"), "p84":r.get("84")})
    return rows

sed = pd.DataFrame(
    collect_param("log_mass") +
    collect_param("log_sfr") +
    collect_param("log_ssfr") +
    collect_param("log_age")
).sort_values(["scope","param"])

# 3) A tiny meta dict you’ll probably want to keep
meta = {
    "sn_name": p.get("transient_name"),
    "sn_ra_deg": p.get("transient_ra_deg"),
    "sn_dec_deg": p.get("transient_dec_deg"),
    "sn_z": p.get("transient_redshift"),
    "host_ra_deg": p.get("host_ra_deg"),
    "host_dec_deg": p.get("host_dec_deg"),
    "host_z": p.get("host_redshift"),
    "ebv_mw": p.get("host_milkyway_dust_reddening"),
}
print(meta)
print(sed)


{'sn_name': '2004ef', 'sn_ra_deg': 340.54174583333327, 'sn_dec_deg': 19.994555555555557, 'sn_z': 0.03002, 'host_ra_deg': 340.54385948, 'host_dec_deg': 19.9969434, 'host_z': 0.030988, 'ebv_mw': 0.04736369989812374}
    scope     param        p16        p50        p84
7  global   log_age   6.275111   8.004165  11.589696
1  global  log_mass  10.803862  10.949363  11.082343
3  global   log_sfr  -1.253714  -0.096142   0.952807
5  global  log_ssfr -12.359298 -11.008361  -9.931248
6   local   log_age   6.390808   7.884289  10.741411
0   local  log_mass   9.092002   9.222703   9.349392
2   local   log_sfr  -1.707018  -1.142086  -0.620095
4   local  log_ssfr -10.976228 -10.369217  -9.823326


In [None]:
# assume `payload` is your /api/transient/get/<name>?format=json dict

host = {
    k: payload[k]
    for k in ("host_name","host_ra_deg","host_dec_deg","host_redshift","host_milkyway_dust_reddening")
}

# flatten all host SED parameters (local & global)
sed_local  = {k: v for k, v in payload.items()  if k.startswith("local_aperture_host_")}
sed_global = {k: v for k, v in payload.items()  if k.startswith("global_aperture_host_")}

# grab photometry by band for both apertures
def collect_phot(ap_prefix="local_aperture_"):
    bands = {}
    for k, v in payload.items():
        if k.startswith(ap_prefix) and any(s in k for s in ("_flux","_magnitude")):
            # e.g., local_aperture_SDSS_g_magnitude  -> band = "SDSS_g", field="magnitude"
            parts = k[len(ap_prefix):].split("_", 2)  # ["SDSS", "g", "magnitude..."]
            if len(parts) == 3:
                band = f"{parts[0]}_{parts[1]}"
                field = parts[2]
                bands.setdefault(band, {})[field] = v
    return bands

phot_local  = collect_phot("local_aperture_")
phot_global = collect_phot("global_aperture_")

# SN identifying info
sn_info = {
    "name": payload.get("transient_name"),
    "ra_deg": payload.get("transient_ra_deg"),
    "dec_deg": payload.get("transient_dec_deg"),
    "redshift": payload.get("transient_redshift"),
    "spectroscopic_class": payload.get("transient_spectroscopic_class"),
    "photometric_class": payload.get("transient_photometric_class"),  # may be absent/None
}



In [39]:
from urllib.request import urlopen
import json
import re
import pandas as pd
from collections import defaultdict

BASE = "https://blast.ncsa.illinois.edu"

# --------------------------- helpers ---------------------------

def fetch_payload(name: str):
    url = f"{BASE}/api/transient/get/{name}?format=json"
    with urlopen(url) as r:
        return json.loads(r.read())

def is_snia(payload: dict) -> bool:
    """Return True if BLAST thinks this is (probably) a Type Ia."""
    sc = (payload.get("transient_spectroscopic_class") or "").strip().lower()
    pc = (payload.get("transient_photometric_class") or "").strip().lower()
    return ("ia" in sc) or ("ia" in pc)

def _get(payload, key, default=None):
    return payload.get(key, default)

def extract_catalog_row(payload: dict) -> dict:
    """Flatten the bits you care about into a single row."""
    row = {
        # SN id / position / z / classes
        "sn_name": _get(payload, "transient_name"),
        "sn_ra_deg": _get(payload, "transient_ra_deg"),
        "sn_dec_deg": _get(payload, "transient_dec_deg"),
        "sn_redshift": _get(payload, "transient_redshift"),
        "sn_spectroscopic_class": (payload.get("transient_spectroscopic_class") or "").strip(),
        "sn_photometric_class": (payload.get("transient_photometric_class") or "").strip(),
        "sn_processing_status": _get(payload, "transient_processing_status"),

        # Host basics
        "host_name": _get(payload, "host_name"),
        "host_ra_deg": _get(payload, "host_ra_deg"),
        "host_dec_deg": _get(payload, "host_dec_deg"),
        "host_redshift": _get(payload, "host_redshift"),
        "host_EBV_MW": _get(payload, "host_milkyway_dust_reddening"),
    }

    # SED medians (p50) for both local & global (add p16/p84 if you want)
    params = ["log_mass", "log_sfr", "log_ssfr", "log_age"]
    for scope in ("local", "global"):
        for p in params:
            key = f"{scope}_aperture_host_{p}_50"
            row[f"{scope}_{p}_50"] = _get(payload, key)
        # optional quality-ish extra
        row[f"{scope}_mass_surviving_ratio"] = _get(payload, f"{scope}_aperture_host_mass_surviving_ratio")

    return row

_PHOT_RE = re.compile(
    r'^(?P<scope>local|global)_aperture_(?P<survey>[A-Za-z0-9]+)_(?P<band>[A-Za-z0-9]+)_(?P<quantity>magnitude|magnitude_error|flux|flux_error|is_validated)$',
    re.I
)

def extract_photometry_rows(payload: dict):
    """Return long-form rows for photometry with mag/flux + errors."""
    rows = defaultdict(dict)
    base_id = {
        "sn_name": _get(payload, "transient_name"),
        "host_name": _get(payload, "host_name"),
    }

    for k, v in payload.items():
        m = _PHOT_RE.match(k)
        if not m:
            continue
        scope, survey, band, quantity = m.group("scope","survey","band","quantity")
        idx = (scope.lower(), survey, band)
        if "scope" not in rows[idx]:
            rows[idx].update(base_id)
            rows[idx]["scope"] = scope.lower()
            rows[idx]["survey"] = survey
            rows[idx]["band"] = band
        rows[idx][quantity] = v

    # normalize booleans, missing values
    out = []
    for r in rows.values():
        r.setdefault("magnitude", None)
        r.setdefault("magnitude_error", None)
        r.setdefault("flux", None)
        r.setdefault("flux_error", None)
        # convert is_validated to bool if present
        if "is_validated" in r and isinstance(r["is_validated"], str):
            r["is_validated"] = r["is_validated"].lower() == "true"
        out.append(r)
    return out

# --------------------------- main routine ---------------------------

def build_blash_catalog(sn_names, require_snia=True,
                        catalog_csv="blast_hosts_catalog.csv",
                        photometry_csv="blast_hosts_photometry.csv"):
    cat_rows, phot_rows = [], []

    for name in sn_names:
        try:
            payload = fetch_payload(name)
        except Exception as e:
            print(f"[WARN] {name}: fetch failed: {e}")
            continue

        if require_snia and not is_snia(payload):
            print(f"[SKIP] {name}: not Ia by BLAST labels")
            continue

        cat_rows.append(extract_catalog_row(payload))
        phot_rows.extend(extract_photometry_rows(payload))

    cat_df = pd.DataFrame(cat_rows)
    phot_df = pd.DataFrame(phot_rows)

    # Save
    if not cat_df.empty:
        cat_df.to_csv(catalog_csv, index=False)
        print(f"[OK] wrote {catalog_csv} ({len(cat_df)} rows)")
    else:
        print("[INFO] catalog is empty")

    if not phot_df.empty:
        phot_df.sort_values(["sn_name","scope","survey","band"]).to_csv(photometry_csv, index=False)
        print(f"[OK] wrote {photometry_csv} ({len(phot_df)} rows)")
    else:
        print("[INFO] photometry is empty")

    return cat_df, phot_df

# --------------------------- example ---------------------------

if __name__ == "__main__":
    # Replace with your list
    targets = ["2004ef", "2018gv", "2011fe", "2014J"]
    catalog, phot = build_blash_catalog(targets)

[OK] wrote blast_hosts_catalog.csv (4 rows)
[OK] wrote blast_hosts_photometry.csv (192 rows)


In [40]:
from urllib.request import urlopen
import json
import re
import pandas as pd
from collections import defaultdict

BASE = "https://blast.ncsa.illinois.edu"

# --------------------------- helpers ---------------------------

def fetch_payload(name: str):
    url = f"{BASE}/api/transient/get/{name}?format=json"
    with urlopen(url) as r:
        return json.loads(r.read())

def is_snia(payload: dict) -> bool:
    """Return True if BLAST thinks this is (probably) a Type Ia."""
    sc = (payload.get("transient_spectroscopic_class") or "").strip().lower()
    pc = (payload.get("transient_photometric_class") or "").strip().lower()
    return ("ia" in sc) or ("ia" in pc)

def _get(payload, key, default=None):
    return payload.get(key, default)

def extract_catalog_row(payload: dict) -> dict:
    """Flatten the bits you care about into a single row."""
    row = {
        # SN id / position / z / classes
        "sn_name": _get(payload, "transient_name"),
        "sn_ra_deg": _get(payload, "transient_ra_deg"),
        "sn_dec_deg": _get(payload, "transient_dec_deg"),
        "sn_redshift": _get(payload, "transient_redshift"),
        "sn_spectroscopic_class": (payload.get("transient_spectroscopic_class") or "").strip(),
        "sn_photometric_class": (payload.get("transient_photometric_class") or "").strip(),
        "sn_processing_status": _get(payload, "transient_processing_status"),

        # Host basics
        "host_name": _get(payload, "host_name"),
        "host_ra_deg": _get(payload, "host_ra_deg"),
        "host_dec_deg": _get(payload, "host_dec_deg"),
        "host_redshift": _get(payload, "host_redshift"),
        "host_EBV_MW": _get(payload, "host_milkyway_dust_reddening"),
    }

    # SED medians (p50) for both local & global (add p16/p84 if you want)
    params = ["log_mass", "log_sfr", "log_ssfr", "log_age"]
    for scope in ("local", "global"):
        for p in params:
            key = f"{scope}_aperture_host_{p}_50"
            row[f"{scope}_{p}_50"] = _get(payload, key)
        # optional quality-ish extra
        row[f"{scope}_mass_surviving_ratio"] = _get(payload, f"{scope}_aperture_host_mass_surviving_ratio")

    return row

_PHOT_RE = re.compile(
    r'^(?P<scope>local|global)_aperture_(?P<survey>[A-Za-z0-9]+)_(?P<band>[A-Za-z0-9]+)_(?P<quantity>magnitude|magnitude_error|flux|flux_error|is_validated)$',
    re.I
)

def extract_photometry_rows(payload: dict):
    """Return long-form rows for photometry with mag/flux + errors."""
    rows = defaultdict(dict)
    base_id = {
        "sn_name": _get(payload, "transient_name"),
        "host_name": _get(payload, "host_name"),
    }

    for k, v in payload.items():
        m = _PHOT_RE.match(k)
        if not m:
            continue
        scope, survey, band, quantity = m.group("scope","survey","band","quantity")
        idx = (scope.lower(), survey, band)
        if "scope" not in rows[idx]:
            rows[idx].update(base_id)
            rows[idx]["scope"] = scope.lower()
            rows[idx]["survey"] = survey
            rows[idx]["band"] = band
        rows[idx][quantity] = v

    # normalize booleans, missing values
    out = []
    for r in rows.values():
        r.setdefault("magnitude", None)
        r.setdefault("magnitude_error", None)
        r.setdefault("flux", None)
        r.setdefault("flux_error", None)
        # convert is_validated to bool if present
        if "is_validated" in r and isinstance(r["is_validated"], str):
            r["is_validated"] = r["is_validated"].lower() == "true"
        out.append(r)
    return out

# --------------------------- main routine ---------------------------

def build_blash_catalog(sn_names, require_snia=True,
                        catalog_csv="blast_hosts_catalog.csv",
                        photometry_csv="blast_hosts_photometry.csv"):
    cat_rows, phot_rows = [], []

    for name in sn_names:
        try:
            payload = fetch_payload(name)
        except Exception as e:
            print(f"[WARN] {name}: fetch failed: {e}")
            continue

        if require_snia and not is_snia(payload):
            print(f"[SKIP] {name}: not Ia by BLAST labels")
            continue

        cat_rows.append(extract_catalog_row(payload))
        phot_rows.extend(extract_photometry_rows(payload))

    cat_df = pd.DataFrame(cat_rows)
    phot_df = pd.DataFrame(phot_rows)

    # Save
    if not cat_df.empty:
        cat_df.to_csv(catalog_csv, index=False)
        print(f"[OK] wrote {catalog_csv} ({len(cat_df)} rows)")
    else:
        print("[INFO] catalog is empty")

    if not phot_df.empty:
        phot_df.sort_values(["sn_name","scope","survey","band"]).to_csv(photometry_csv, index=False)
        print(f"[OK] wrote {photometry_csv} ({len(phot_df)} rows)")
    else:
        print("[INFO] photometry is empty")

    return cat_df, phot_df

# --------------------------- example ---------------------------

if __name__ == "__main__":
    # Replace with your list
    targets = ["2004ef", "2018gv", "2011fe", "2014J"]
    catalog, phot = build_blash_catalog(targets)

[OK] wrote blast_hosts_catalog.csv (4 rows)
[OK] wrote blast_hosts_photometry.csv (192 rows)


In [41]:
from urllib.request import urlopen
import json, re
import pandas as pd

BASE = "https://blast.ncsa.illinois.edu"

# Choose exactly which photometry to include as columns
PHOT_TO_KEEP = {
    "SDSS":  list("ugriz"),
    "PanSTARRS": list("grizy"),
    "DES":   list("griz"),
    "2MASS": ["J","H","K"],
    "GALEX": ["FUV","NUV"],
    "WISE":  ["W1","W2","W3","W4"],
}
# which photometry fields to flatten
PHOT_FIELDS = ["magnitude","magnitude_error","flux","flux_error","is_validated"]

def fetch_payload(name: str):
    url = f"{BASE}/api/transient/get/{name}?format=json"
    with urlopen(url) as r:
        return json.loads(r.read())

def is_snia(payload):
    s = (payload.get("transient_spectroscopic_class") or "").lower()
    p = (payload.get("transient_photometric_class") or "").lower()
    return ("ia" in s) or ("ia" in p)

def base_row(payload: dict) -> dict:
    row = {
        # SN identifiers
        "sn_name": payload.get("transient_name"),
        "sn_ra_deg": payload.get("transient_ra_deg"),
        "sn_dec_deg": payload.get("transient_dec_deg"),
        "sn_redshift": payload.get("transient_redshift"),
        "sn_spectroscopic_class": (payload.get("transient_spectroscopic_class") or "").strip(),
        "sn_photometric_class": (payload.get("transient_photometric_class") or "").strip(),
        "sn_processing_status": payload.get("transient_processing_status"),
        # Host basics
        "host_name": payload.get("host_name"),
        "host_ra_deg": payload.get("host_ra_deg"),
        "host_dec_deg": payload.get("host_dec_deg"),
        "host_redshift": payload.get("host_redshift"),
        "host_EBV_MW": payload.get("host_milkyway_dust_reddening"),
    }
    # SED medians (add p16/p84 if you want by duplicating this block)
    for scope in ("local","global"):
        for p in ("log_mass","log_sfr","log_ssfr","log_age"):
            row[f"{scope}_{p}_50"] = payload.get(f"{scope}_aperture_host_{p}_50")
        row[f"{scope}_mass_surviving_ratio"] = payload.get(f"{scope}_aperture_host_mass_surviving_ratio")
    return row

# compile once
_PHOT_RE = re.compile(
    r'^(?P<scope>local|global)_aperture_(?P<survey>[A-Za-z0-9]+)_(?P<band>[A-Za-z0-9]+)_(?P<what>magnitude|magnitude_error|flux|flux_error|is_validated)$',
    re.I
)

def flatten_photometry(payload: dict, row: dict, phot_to_keep=PHOT_TO_KEEP):
    """Add photometry columns in-place to `row` following: 
       <scope>_<survey>_<band>_<what>"""
    # pre-create columns (so every row has same schema)
    for scope in ("local","global"):
        for survey, bands in phot_to_keep.items():
            for b in bands:
                for w in PHOT_FIELDS:
                    row.setdefault(f"{scope}_{survey}_{b}_{w}", None)

    for k, v in payload.items():
        m = _PHOT_RE.match(k)
        if not m:
            continue
        scope, survey, band, what = m.group("scope","survey","band","what")
        # normalize survey tag to our keys (e.g., SDSS vs Sdss)
        survey_norm = survey  # BLAST’s tags already match (SDSS, DES, etc.)
        # only keep what we asked for
        if survey_norm not in phot_to_keep or band not in phot_to_keep[survey_norm]:
            continue
        row[f"{scope}_{survey_norm}_{band}_{what}"] = v

def one_row_for(name: str, require_snia=True, phot_to_keep=PHOT_TO_KEEP):
    payload = fetch_payload(name)
    if require_snia and not is_snia(payload):
        return None
    row = base_row(payload)
    flatten_photometry(payload, row, phot_to_keep)
    return row

def build_catalog(sn_names, require_snia=True):
    rows = []
    for n in sn_names:
        try:
            r = one_row_for(n, require_snia=require_snia)
            if r:
                rows.append(r)
            else:
                print(f"[skip] {n}: not Ia by BLAST labels")
        except Exception as e:
            print(f"[warn] {n}: {e}")
    df = pd.DataFrame(rows)
    # consistent column order: identifiers → SED → photometry
    id_cols = ["sn_name","sn_ra_deg","sn_dec_deg","sn_redshift",
               "sn_spectroscopic_class","sn_photometric_class","sn_processing_status",
               "host_name","host_ra_deg","host_dec_deg","host_redshift","host_EBV_MW"]
    sed_cols = [c for c in df.columns if c.startswith("local_") or c.startswith("global_")]
    # put identifiers first
    df = df[id_cols + sorted([c for c in sed_cols if "log_" in c or "mass_surviving_ratio" in c]) +
            sorted([c for c in sed_cols if any(x in c for x in ("magnitude","flux","is_validated"))])]
    return df

# ===== Example =====
if __name__ == "__main__":
    targets = ["2004ef","2018gv","2011fe","2014J"]   # your list here
    df = build_catalog(targets)
    df.to_csv("blast_one_row_per_pair.csv", index=False)
    print("Wrote blast_one_row_per_pair.csv with", len(df), "rows and", len(df.columns), "columns")

Wrote blast_one_row_per_pair.csv with 4 rows and 252 columns


In [45]:
import pandas as pd

path = "/Users/pittsburghgraduatestudent/first_paper_blast_webapi/blast_one_row_per_pair.csv"

df = pd.read_csv(path)

print(df.columns)     # column names


Index(['sn_name', 'sn_ra_deg', 'sn_dec_deg', 'sn_redshift',
       'sn_spectroscopic_class', 'sn_photometric_class',
       'sn_processing_status', 'host_name', 'host_ra_deg', 'host_dec_deg',
       ...
       'local_WISE_W3_flux', 'local_WISE_W3_flux_error',
       'local_WISE_W3_is_validated', 'local_WISE_W3_magnitude',
       'local_WISE_W3_magnitude_error', 'local_WISE_W4_flux',
       'local_WISE_W4_flux_error', 'local_WISE_W4_is_validated',
       'local_WISE_W4_magnitude', 'local_WISE_W4_magnitude_error'],
      dtype='object', length=252)


In [46]:
import pandas as pd

pd.set_option("display.max_columns", None)  # show all columns
pd.set_option("display.width", None)        # don't wrap columns to the next line

print(df.columns.tolist())  # as a Python list

['sn_name', 'sn_ra_deg', 'sn_dec_deg', 'sn_redshift', 'sn_spectroscopic_class', 'sn_photometric_class', 'sn_processing_status', 'host_name', 'host_ra_deg', 'host_dec_deg', 'host_redshift', 'host_EBV_MW', 'global_log_age_50', 'global_log_mass_50', 'global_log_sfr_50', 'global_log_ssfr_50', 'global_mass_surviving_ratio', 'local_log_age_50', 'local_log_mass_50', 'local_log_sfr_50', 'local_log_ssfr_50', 'local_mass_surviving_ratio', 'global_2MASS_H_flux', 'global_2MASS_H_flux_error', 'global_2MASS_H_is_validated', 'global_2MASS_H_magnitude', 'global_2MASS_H_magnitude_error', 'global_2MASS_J_flux', 'global_2MASS_J_flux_error', 'global_2MASS_J_is_validated', 'global_2MASS_J_magnitude', 'global_2MASS_J_magnitude_error', 'global_2MASS_K_flux', 'global_2MASS_K_flux_error', 'global_2MASS_K_is_validated', 'global_2MASS_K_magnitude', 'global_2MASS_K_magnitude_error', 'global_DES_g_flux', 'global_DES_g_flux_error', 'global_DES_g_is_validated', 'global_DES_g_magnitude', 'global_DES_g_magnitude_error

In [47]:
for col in df.columns:
    print(col)

sn_name
sn_ra_deg
sn_dec_deg
sn_redshift
sn_spectroscopic_class
sn_photometric_class
sn_processing_status
host_name
host_ra_deg
host_dec_deg
host_redshift
host_EBV_MW
global_log_age_50
global_log_mass_50
global_log_sfr_50
global_log_ssfr_50
global_mass_surviving_ratio
local_log_age_50
local_log_mass_50
local_log_sfr_50
local_log_ssfr_50
local_mass_surviving_ratio
global_2MASS_H_flux
global_2MASS_H_flux_error
global_2MASS_H_is_validated
global_2MASS_H_magnitude
global_2MASS_H_magnitude_error
global_2MASS_J_flux
global_2MASS_J_flux_error
global_2MASS_J_is_validated
global_2MASS_J_magnitude
global_2MASS_J_magnitude_error
global_2MASS_K_flux
global_2MASS_K_flux_error
global_2MASS_K_is_validated
global_2MASS_K_magnitude
global_2MASS_K_magnitude_error
global_DES_g_flux
global_DES_g_flux_error
global_DES_g_is_validated
global_DES_g_magnitude
global_DES_g_magnitude_error
global_DES_i_flux
global_DES_i_flux_error
global_DES_i_is_validated
global_DES_i_magnitude
global_DES_i_magnitude_error
glob