In [32]:
from astropy.io import fits
import os
import glob
import pandas as pd

## 1. Opening a random .fits file from Unzipped JADES spectra

In [33]:
# Specify the path to your FITS file
fits_file = '/Users/aryanahaghjoo/Documents/GitHub/super_resolution/data/JADES/JADES_spectra_unzipped/hlsp_jades_jwst_nirspec_clear-prism/hlsp_jades_jwst_nirspec_goods-n-mediumhst-00000604_clear-prism_v1.0_s2d.fits'

# Open the FITS file
with fits.open(fits_file) as hdul:
    # Print the FITS file structure
    hdul.info()
    
    # Access the primary HDU (Header Data Unit)
    primary_hdu = hdul[0]
    
    # Print the header
    print("\nHeader of the Primary HDU:")
    print(repr(primary_hdu.header))
    
    # Access the data (if it's an image)
    if primary_hdu.data is not None:
        print("\nData shape:", primary_hdu.data.shape)

    # If there's an extension (e.g., table), access it
    if len(hdul) > 1:
        table_hdu = hdul[1]
        print("\nHeader of Extension HDU:")
        print(repr(table_hdu.header))
        print("\nData in Extension HDU (First 5 Rows):")
        print(table_hdu.data[:5])

Filename: /Users/aryanahaghjoo/Documents/GitHub/super_resolution/data/JADES/JADES_spectra_unzipped/hlsp_jades_jwst_nirspec_clear-prism/hlsp_jades_jwst_nirspec_goods-n-mediumhst-00000604_clear-prism_v1.0_s2d.fits
No.    Name      Ver    Type      Cards   Dimensions   Format
  0  PRIMARY       1 PrimaryHDU      34   ()      
  1  FLUX          1 ImageHDU         9   (674, 27)   float64   
  2  FLUX_ERR      1 ImageHDU         9   (674, 27)   float64   
  3  WAVELENGTH    1 ImageHDU         8   (674,)   float64   
  4  RA            1 ImageHDU         9   (674, 27)   float64   
  5  DEC           1 ImageHDU         9   (674, 27)   float64   
  6  ASDF          1 BinTableHDU      9   0R x 0C   []   

Header of the Primary HDU:
SIMPLE  =                    T / conforms to FITS standard                      
BITPIX  =                    8 / array data type                                
NAXIS   =                    0 / number of array dimensions                     
EXTEND  =               

## 2. Extracting Spectrum from "hlsp_jades_jwst_nirspec_clear-prism"

In [35]:
import os
import glob
import pandas as pd
from astropy.io import fits
from tqdm import tqdm

# Directory containing your FITS files with 1D spectra
fits_dir = '/Users/aryanahaghjoo/Documents/GitHub/super_resolution/data/JADES/JADES_spectra_unzipped/hlsp_jades_jwst_nirspec_clear-prism'
# Collect both lowercase and uppercase file extensions
fits_files = glob.glob(os.path.join(fits_dir, '*.fits')) + glob.glob(os.path.join(fits_dir, '*.FITS'))

data_list = []

for file in tqdm(fits_files, desc="Processing FITS files"):
    with fits.open(file) as hdul:
        # Search for the EXTRACT1D extension
        extract1d_hdu = None
        for hdu in hdul:
            if hdu.header.get('EXTNAME', '').strip().upper() == 'EXTRACT1D':
                extract1d_hdu = hdu
                break

        # Skip files that are not 1D spectra
        if extract1d_hdu is None:
            continue

        # Confirm the extension is a table with columns
        if not hasattr(extract1d_hdu, 'columns'):
            print(f"Warning: 'EXTRACT1D' in {os.path.basename(file)} is not a table. Skipping file.")
            continue

        table_data = extract1d_hdu.data
        available_columns = [col.upper() for col in extract1d_hdu.columns.names]
        required_columns = ['WAVELENGTH', 'FLUX', 'FLUX_ERR']
        if not all(col in available_columns for col in required_columns):
            print(f"Warning: Missing required columns in {os.path.basename(file)}. Skipping file.")
            continue

        # Extract data columns
        wavelength = table_data['WAVELENGTH']
        flux = table_data['FLUX']
        flux_err = table_data['FLUX_ERR']

        data_list.append({
            'file_name': os.path.basename(file),
            'WAVELENGTH': wavelength,
            'FLUX': flux,
            'FLUX_ERR': flux_err
        })

# Create a DataFrame where each row corresponds to a file
df = pd.DataFrame(data_list)
#print(df.head())

Processing FITS files: 100%|██████████| 7504/7504 [00:07<00:00, 1068.74it/s]


In [None]:
df
data/JADES/JADES_spectra_unzipped/hlsp_jades_jwst_nirspec_clear-prism

Unnamed: 0,file_name,WAVELENGTH,FLUX,FLUX_ERR
0,hlsp_jades_jwst_nirspec_goods-s-mediumjwst1180...,"[0.6025500540697117, 0.6051543430096333, 0.607...","[nan, 1.6558568411742087e-20, 1.32674916986150...","[nan, 1.9065509910426825e-20, 1.82963763546714..."
1,hlsp_jades_jwst_nirspec_goods-n-mediumhst-0002...,"[0.6025500540697117, 0.6051543430096333, 0.607...","[nan, 2.01184890526121e-20, 4.176809182277331e...","[nan, 1.7326892567333e-20, 1.6103023576710562e..."
2,hlsp_jades_jwst_nirspec_goods-s-mediumhst-0000...,"[0.6025500540697117, 0.6051543430096333, 0.607...","[nan, 2.5894512549419576e-20, 2.87361198826301...","[nan, 4.630180953582602e-20, 4.404882103455821..."
3,hlsp_jades_jwst_nirspec_goods-s-mediumhst-0004...,"[0.6025500540697117, 0.6051543430096333, 0.607...","[nan, 2.4834252066419896e-20, 2.27124545700521...","[nan, 3.01149165917404e-20, 2.5857642973558054..."
4,hlsp_jades_jwst_nirspec_goods-n-mediumjwst-000...,"[0.6025500540697117, 0.6051543430096333, 0.607...","[1.7628827558841336e-20, 2.129762538605096e-20...","[4.5999273483926965e-20, 1.7484532328649386e-2..."
...,...,...,...,...
3747,hlsp_jades_jwst_nirspec_goods-s-mediumhst-0000...,"[0.6025500540697117, 0.6051543430096333, 0.607...","[-6.019837867354967e-19, -2.866506782807916e-1...","[7.4240623455505e-19, 3.7582540039241757e-19, ..."
3748,hlsp_jades_jwst_nirspec_goods-s-mediumjwst-000...,"[0.6025500540697117, 0.6051543430096333, 0.607...","[nan, 2.63448617724546e-20, 4.533356832237835e...","[nan, 2.1416511199229643e-20, 2.02147796931549..."
3749,hlsp_jades_jwst_nirspec_goods-s-mediumjwst-000...,"[0.6025500540697117, 0.6051543430096333, 0.607...","[nan, 2.303869901869643e-17, -4.56541470833669...","[nan, 3.290906060147686e-18, 2.796700111192446..."
3750,hlsp_jades_jwst_nirspec_goods-n-mediumjwst-000...,"[0.6025500540697117, 0.6051543430096333, 0.607...","[nan, -1.8616483994452673e-20, -2.899215344097...","[nan, 1.6998804342523424e-20, 1.55563771351816..."
