## 1. Keck Data

In [7]:
import os
import glob
import pandas as pd
import numpy as np
from astropy.io import fits

# Specify your folder path containing the FITS files
folder_path = '/Users/aryanahaghjoo/Documents/GitHub/super_resolution/data/co_1dspec'
fits_files = glob.glob(os.path.join(folder_path, '*.fits'))
print(f"Found {len(fits_files)} FITS files.")

dfs = []
for file in fits_files:
    try:
        with fits.open(file) as hdul:
            # Print HDU info for debugging (uncomment next line if needed)
            # hdul.info()
            
            # Try to locate the first HDU with data
            data_found = False
            for hdu in hdul:
                if hdu.data is not None:
                    header = hdu.header
                    data = hdu.data
                    data_found = True
                    break
            
            if not data_found:
                print(f"No data found in {file}.")
                continue
            
            # If the data is in a table (binary table HDU), you might need to convert it differently.
            if hasattr(data, 'dtype') and data.dtype.names is not None:
                # For binary table data, convert to a pandas DataFrame directly
                df = pd.DataFrame(data.byteswap().newbyteorder())
                df['source_file'] = os.path.basename(file)
                dfs.append(df)
                print(f"Processed table from {file} successfully.")
            # Otherwise, assume 1D spectral data stored as an array
            elif data.ndim == 1:
                # Construct wavelength array from header keywords (if available)
                crval1 = header.get('CRVAL1', 0)  # starting wavelength
                cdelt1 = header.get('CDELT1', 1)  # wavelength increment
                naxis1 = header.get('NAXIS1', len(data))
                wavelength = crval1 + cdelt1 * np.arange(naxis1)
                
                df = pd.DataFrame({
                    'wavelength': wavelength,
                    'flux': data
                })
                df['source_file'] = os.path.basename(file)
                dfs.append(df)
                #print(f"Processed 1D spectrum from {file} successfully.")
            else:
                print(f"Data in {file} is not 1D and not a recognized table format (shape: {data.shape}). Skipping.")
    except Exception as e:
        print(f"Error reading {file}: {e}")

if dfs:
    combined_df = pd.concat(dfs, ignore_index=True)
    print("Combined DataFrame head:")
    print(combined_df.head())
else:
    print("No DataFrames were created from the FITS files. Double-check the file structure using fits.open(filename).info().")

Found 1580 FITS files.
Combined DataFrame head:
   wavelength          flux                 source_file
0  11423.0000  1.557751e-17  co1_04.J.19812.ell.1d.fits
1  11424.3028  2.043431e-17  co1_04.J.19812.ell.1d.fits
2  11425.6056 -2.512184e-18  co1_04.J.19812.ell.1d.fits
3  11426.9084 -8.121936e-18  co1_04.J.19812.ell.1d.fits
4  11428.2112 -3.723886e-17  co1_04.J.19812.ell.1d.fits


In [8]:
combined_df #3million rows

Unnamed: 0,wavelength,flux,source_file
0,11423.0000,1.557751e-17,co1_04.J.19812.ell.1d.fits
1,11424.3028,2.043431e-17,co1_04.J.19812.ell.1d.fits
2,11425.6056,-2.512184e-18,co1_04.J.19812.ell.1d.fits
3,11426.9084,-8.121936e-18,co1_04.J.19812.ell.1d.fits
4,11428.2112,-3.723886e-17,co1_04.J.19812.ell.1d.fits
...,...,...,...
3410466,11339.6030,-1.758134e-18,co1_02.Y.8093.ell.1d.fits
3410467,11340.6885,-2.401424e-19,co1_02.Y.8093.ell.1d.fits
3410468,11341.7740,-1.495338e-16,co1_02.Y.8093.ell.1d.fits
3410469,11342.8595,-1.206990e-16,co1_02.Y.8093.ell.1d.fits


## 2. HST Data

In [9]:
import os
import pandas as pd
from astropy.io import fits
from astropy.table import Table

# Change this to your Hubble data folder path
folder_path = '/Users/aryanahaghjoo/Documents/GitHub/super_resolution/data/cosmos_3dhst_v4.1.5_catalogs'

# List of file extensions to process as table data
table_exts = ['dat', 'cat', 'fout', 'rf', 'str']

dfs = []

# Loop over files in the folder
for filename in os.listdir(folder_path):
    filepath = os.path.join(folder_path, filename)
    ext = filename.split('.')[-1].lower()

    try:
        if ext == 'fits':
            print(f"Processing FITS file: {filename}")
            # First try reading the file directly as a table
            try:
                table = Table.read(filepath)
            except Exception as e:
                # If that fails, open the FITS file and search for an HDU with data
                table = None
                with fits.open(filepath) as hdul:
                    for hdu in hdul:
                        if hdu.data is not None:
                            try:
                                table = Table(hdu.data)
                                break
                            except Exception as e2:
                                continue
            if table is None:
                print(f"WARNING: No table found in {filename}. Skipping.")
                continue
            df = table.to_pandas()
            df['source_file'] = filename
            dfs.append(df)

        elif ext in table_exts:
            print(f"Processing ASCII table: {filename}")
            try:
                # Adjust the format parameter if necessary (e.g., 'ascii.fixed_width')
                table = Table.read(filepath, format='ascii')
            except Exception as e:
                print(f"Error reading {filename} as ASCII: {e}")
                continue
            df = table.to_pandas()
            df['source_file'] = filename
            dfs.append(df)
        else:
            print(f"Skipping unrecognized file type: {filename}")
    except Exception as ex:
        print(f"Error processing {filename}: {ex}")

if dfs:
    combined_df = pd.concat(dfs, ignore_index=True)
    print("Combined DataFrame head:")
    print(combined_df.head())
else:
    print("No data could be loaded into DataFrames.")

Processing ASCII table: cosmos_3dhst.v4.1.5.duplicates_2d.dat
Processing ASCII table: cosmos_3dhst.v4.1.5.z_max_grism.rf
Processing FITS file: cosmos_3dhst.v4.1.5.linefit.concat.fits
Processing ASCII table: cosmos_3dhst.v4.1.5.zbest.fout
Processing FITS file: cosmos_3dhst.v4.1.5.zfit.concat.fits
Processing ASCII table: cosmos_3dhst.v4.1.5.zfit.concat.dat
Processing FITS file: cosmos_3dhst.v4.1.5.zbest.fits
Skipping unrecognized file type: cosmos_3dhst.v4.1.5.zbest.sfr
Processing ASCII table: cosmos_3dhst.v4.1.5.duplicates_zfit.dat
Processing ASCII table: cosmos_3dhst.v4.1.5.zbest.rf
Processing ASCII table: cosmos_3dhst.v4.1.5.IR.cat
Processing ASCII table: cosmos_3dhst.v4.1.5.z_max_grism.fout
Processing FITS file: cosmos_3dhst.v4.1.5.linefit.linematched.fits
Processing FITS file: cosmos_3dhst.v4.1.5.zfit.linematched.fits
Processing ASCII table: cosmos_3dhst.v4.1.5.zbest.dat
Skipping unrecognized file type: cosmos_3dhst.v4.1.5.z_max_grism.sfr
Processing ASCII table: cosmos_3dhst.v4.1.5.

In [10]:
combined_df # 450 Krows

Unnamed: 0,col1,col2,col3,col4,col5,source_file,id,z_max_grism,DM,L153,...,col6,col7,col8,col9,col10,col11,z_grism_l95,z_grism_l68,z_grism_u68,z_grism_u95
0,1.0,00000,00000,00000,00000,cosmos_3dhst.v4.1.5.duplicates_2d.dat,,,,,...,,,,,,,,,,
1,2.0,00000,00000,00000,00000,cosmos_3dhst.v4.1.5.duplicates_2d.dat,,,,,...,,,,,,,,,,
2,3.0,00000,00000,00000,00000,cosmos_3dhst.v4.1.5.duplicates_2d.dat,,,,,...,,,,,,,,,,
3,4.0,00000,00000,00000,00000,cosmos_3dhst.v4.1.5.duplicates_2d.dat,,,,,...,,,,,,,,,,
4,5.0,00000,00000,00000,00000,cosmos_3dhst.v4.1.5.duplicates_2d.dat,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
458542,,,,,,cosmos_3dhst.v4.1.5.zfit.linematched.dat,,-1.0,,,...,,,,,,,-1.0,-1.0,-1.0,-1.0
458543,,,,,,cosmos_3dhst.v4.1.5.zfit.linematched.dat,,-1.0,,,...,,,,,,,-1.0,-1.0,-1.0,-1.0
458544,,,,,,cosmos_3dhst.v4.1.5.zfit.linematched.dat,,-1.0,,,...,,,,,,,-1.0,-1.0,-1.0,-1.0
458545,,,,,,cosmos_3dhst.v4.1.5.zfit.linematched.dat,,-1.0,,,...,,,,,,,-1.0,-1.0,-1.0,-1.0
