In [None]:
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec

import sys, os
# This is not super pretty, but I think this is the best way to import stuff from ../../../util?
CODE_ROOT = os.path.abspath(os.path.join(os.getcwd(), "..", "..", ".."))
if CODE_ROOT not in sys.path:
    sys.path.insert(1, CODE_ROOT)

from util.spectra_helpers import SpectraCustomHDF5

In [None]:
suite_to_use = "L25n256_suite"
index_list = [24, 35, 40]
spec_num = 512

fluxes = []
for i in index_list:
    path_to_file = f"/vera/ptmp/gc/jerbo/training_data/{suite_to_use}/gp{i}_spectra.hdf5"
    spec_file = SpectraCustomHDF5(path_to_file)
    wavelengths, flux = spec_file.get_single_spectrum(spec_num)
    fluxes.append(flux)


In [None]:
fig, ax = plt.subplots(figsize=(13, 10))

ax.set_xticks([])
ax.set_yticks([])
for spine in ax.spines.values():
    spine.set_visible(False)

gs = gridspec.GridSpec(len(fluxes), 1, hspace=0)
axes = []

for i in range(len(fluxes)):
    ax = fig.add_subplot(gs[i, 0])

    ax.plot(wavelengths, fluxes[i], label="simulated spectra")
    ax.annotate(f"Box {index_list[i]}", (wavelengths[0], min(fluxes[i])))
    # ax.plot(wavelengths, patched_spectra[i], color="black", label="original spectrum")
    # ax.set_xlim([left_cut, right_cut])
    ax.set_ylabel("Relative Flux")
    ax.legend(loc="upper left")
    ax.tick_params(axis='x', direction='in')
    if i == len(fluxes)-1:
        ax.set_xlabel(r"Wavelength [$\AA$]")
    if i < len(fluxes)-1:
        ax.tick_params(labelbottom=False)

    axes.append(ax)

# fig.supxlabel(r"Wavelength [$\AA$]")
# fig.supylabel("Relative Flux")

# plt.savefig("plots/noise_spectra.pdf", format="PDF")
plt.show()

In [None]:
from astropy.table import Table

lya_cat = Table.read("data/BOSSLyaDR9_cat.fits")

# take first 10
count = 0
for row in lya_cat:
    if row["SNR"] > 10:
        print(f"({row["PLATE"]}, {row["MJD"]}, {row["FIBERID"]}), ")

        count +=1
        if count > 5:
            break

In [None]:
import os
import requests
import numpy as np
import matplotlib.pyplot as plt
from astropy.io import fits

# (plate, mjd, fiber)
pmf_list = [(4216, 55477, 718), 
            (4296, 55499, 630), 
            (4216, 55477, 746), 
            ]

wavelengths_boss_specs = []
fluxes_boss_specs = []
for plate, mjd, fiber in pmf_list:
    # Convert fiber to zero-padded 4-digit format
    fiber_str = f"{fiber:04d}"

    # --- BOSS DR9 reduction version that contains per-object spectra ---
    redux_version = "v5_4_45"

    # --- Construct the correct SAS URL ---
    filename = f"speclya-{plate}-{mjd}-{fiber_str}.fits"

    url = (
        "https://data.sdss.org/sas/dr9/boss/spectro/redux/"
        f"{redux_version}/spectra/lya/{plate}/{filename}"
    )
    # url = base_url + filename

    print("Downloading:", url)

    file_path = "data/" + filename

    # --- Download the file if needed ---
    if not os.path.exists(file_path):
        r = requests.get(url)
        r.raise_for_status()
        with open(file_path, "wb") as f:
            f.write(r.content)
            print("Saved to:", file_path)
    else:
        print("File already exists locally.")

    # --- Read FITS and extract data ---
    with fits.open(file_path) as hdul:
        data = hdul[1].data

        print(hdul[1].columns.names)

        # BOSS wavelength grid is in log10(Å)
        loglam   = data["LOGLAM"]
        flux     = data["FLUX"]
        ivar     = data["IVAR"]
        cont     = data["CONT"]
        resid    = data["RESID"]
        dla_corr = data["DLA_CORR"]

    # Convert to angstroms
    wavelength = 10**loglam

    F = flux * dla_corr / (cont * resid)

    # Select Lyα forest region
    mask = (
        (wavelength >= 3500.0) &
        (wavelength <= 3900.0) &
        (ivar > 0)
    )

    wave_sel = wavelength[mask]
    F_sel    = F[mask]

    wavelengths_boss_specs.append(wave_sel)
    fluxes_boss_specs.append(F_sel)

In [None]:
fig, ax = plt.subplots(figsize=(13, 10))

ax.set_xticks([])
ax.set_yticks([])
for spine in ax.spines.values():
    spine.set_visible(False)

gs = gridspec.GridSpec(len(fluxes_boss_specs), 1, hspace=0)
axes = []

for i in range(len(fluxes_boss_specs)):
    ax = fig.add_subplot(gs[i, 0])

    ax.plot(wavelengths_boss_specs[i], fluxes_boss_specs[i], label="simulated spectra")
    # ax.annotate(f"Box {index_list[i]}", (wavelengths_boss_specs[0], min(fluxes_boss_specs[i])))
    # ax.plot(wavelengths, patched_spectra[i], color="black", label="original spectrum")
    # ax.set_xlim([left_cut, right_cut])
    ax.set_ylabel("Relative Flux")
    ax.legend(loc="upper left")
    ax.tick_params(axis='x', direction='in')
    if i == len(fluxes_boss_specs)-1:
        ax.set_xlabel(r"Wavelength [$\AA$]")
    if i < len(fluxes_boss_specs)-1:
        ax.tick_params(labelbottom=False)

    axes.append(ax)

# fig.supxlabel(r"Wavelength [$\AA$]")
# fig.supylabel("Relative Flux")

# plt.savefig("plots/noise_spectra.pdf", format="PDF")
plt.show()

In [None]:
import os
import random
import numpy as np
import matplotlib.pyplot as plt
import requests
from astropy.table import Table
from astropy.io import fits

# -----------------------------
# Path to local DR9 Lyα catalog
# -----------------------------
catalog_file = "data/BOSSLyaDR9_cat.fits"
lya_cat = Table.read(catalog_file)

# -----------------------------
# Filter catalog by SNR_LYA ~ 10
# -----------------------------
snr_min = 9.0
snr_max = 11.0

if "SNR_LYAF" not in lya_cat.colnames:
    raise ValueError("Catalog does not contain 'SNR_LYA' column.")

filtered_cat = lya_cat[(lya_cat["SNR_LYAF"] >= snr_min) & (lya_cat["SNR_LYAF"] <= snr_max)]
print(f"Number of spectra with SNR ~10: {len(filtered_cat)}")

# -----------------------------
# Pick 10 random spectra
# -----------------------------
indices = list(range(len(filtered_cat)))
random.shuffle(indices)

selected = []
for i in indices:
    row = filtered_cat[i]
    plate, mjd, fiber = row["PLATE"], row["MJD"], row["FIBERID"]
    selected.append((plate, mjd, fiber))
    if len(selected) >= 100:
        break

# -----------------------------
# SAS base URL for DR9 Lyα spectra
# -----------------------------
redux_version = "v5_4_45"
base_url = "https://data.sdss.org/sas/dr9/boss/spectro/redux"

# -----------------------------
# Plot setup
# -----------------------------
plt.figure(figsize=(10, 5))

for plate, mjd, fiber in selected:
    fiber_str = f"{fiber:04d}"
    filename = f"speclya-{plate}-{mjd}-{fiber_str}.fits"
    url = f"{base_url}/{redux_version}/spectra/lya/{plate}/{filename}"

    # Skip if file exists locally
    if not os.path.exists(filename):
        try:
            r = requests.get(url, timeout=10)
            r.raise_for_status()
            with open(filename, "wb") as f:
                f.write(r.content)
            print(f"Downloaded {filename}")
        except requests.HTTPError:
            print(f"File not found on SAS, skipping: {filename}")
            continue
        except requests.RequestException as e:
            print(f"Error downloading {filename}: {e}")
            continue

    # Read FITS file
    try:
        with fits.open(filename) as hdul:
            data = hdul[1].data
            loglam   = data["LOGLAM"]
            flux     = data["FLUX"]
            ivar     = data["IVAR"]
            cont     = data["CONT"]
            resid    = data["RESID"]
            dla_corr = data["DLA_CORR"]

        wavelength = 10 ** loglam
        F = flux * dla_corr / (cont * resid)

        mask = (wavelength >= 3500.0) & (wavelength <= 3900.0) & (ivar > 0)
        plt.plot(wavelength[mask], F[mask], alpha=0.6, lw=0.8)

    except Exception as e:
        print(f"Error reading {filename}: {e}")
        continue

plt.axhline(1.0, color="black", ls="--", lw=0.8)
plt.xlabel("Wavelength [Å]")
plt.ylabel("Transmission F")
plt.title("SDSS DR9 Lyα Forest (SNR ~10, 10 spectra)")
plt.tight_layout()
plt.show()
