In [1]:
# Install necessary packages (uncomment if needed)
# %pip install astroquery astropy

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# For SDSS queries and FITS handling
from astroquery.sdss import SDSS
from astropy.io import fits

# TensorFlow via its tf.keras namespace (using alternative import)
import tensorflow as tf
print("TensorFlow version:", tf.__version__)
Sequential = tf.keras.models.Sequential
Conv1D = tf.keras.layers.Conv1D
MaxPooling1D = tf.keras.layers.MaxPooling1D
Flatten = tf.keras.layers.Flatten
Dense = tf.keras.layers.Dense

# Other ML utilities
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.decomposition import PCA, FastICA, NMF

# Set random seed for reproducibility
np.random.seed(42)
tf.random.set_seed(42)


TensorFlow version: 2.18.0


In [3]:
# Query SDSS for a sample of galaxy spectra (here TOP 10)
query = "SELECT TOP 10 specobjid, plate, mjd, fiberid FROM SpecObj WHERE class='GALAXY'"
galaxy_table = SDSS.query_sql(query)

if galaxy_table is None or len(galaxy_table) == 0:
    raise Exception("No galaxy spectra were returned. Please check your query and network connection.")

print("SDSS query returned:")
print(galaxy_table)


SDSS query returned:
     specobjid      plate  mjd  fiberid
------------------- ----- ----- -------
3312424264937793536  2942 54521      97
3312465221745928192  2942 54521     246
3312414919088957440  2942 54521      63
3556869618884780032  3159 54802     552
3516337221215035392  3123 54741     552
3613183857425227776  3209 54906     622
3312497932216854528  2942 54521     365
3312484463199414272  2942 54521     316
3312482539054065664  2942 54521     309
 713889571598788608   634 52164     251


In [5]:
spectra_list = []
spec_ids = []
wavelength_list = []  # to store the wavelength array (should be the same for all spectra)

for row in galaxy_table:
    plate = row['plate']
    mjd = row['mjd']
    fiberid = row['fiberid']
    # Download the spectrum using SDSS.get_spectra
    spec = SDSS.get_spectra(plate=plate, mjd=mjd, fiberID=fiberid)
    
    if spec is None or len(spec) == 0:
        continue
    
    # spec is a list of HDUList objects; we take the first one.
    hdu = spec[0]
    data = hdu[1].data  # second HDU contains the spectral data
    flux = data['flux']  # flux array
    header = hdu[1].header
    # Construct the wavelength array.
    # SDSS spectra have wavelengths computed as: wavelength = 10^(COEFF0 + COEFF1 * i)
    n = len(flux)
    loglam0 = header['COEFF0']
    loglam1 = header['COEFF1']
    wavelength = 10 ** (loglam0 + loglam1 * np.arange(n))
    
    spectra_list.append(flux)
    spec_ids.append(row['specobjid'])
    if len(wavelength_list) == 0:
        wavelength_list = wavelength  # assume all spectra share the same wavelength grid

print(f"Downloaded {len(spectra_list)} spectra.")


KeyError: "Keyword 'COEFF0' not found."