In [1]:
import json
import os
import re

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from astropy.io import fits
from tqdm.auto import tqdm

# Collecting Information from FITS Files

In [2]:
# visualisation for the extracted spectral data
def plot_spec_orders(spec_df_dict, plot_label, exp_filename):
    fig, axes = plt.subplots(
        40, 1, figsize=(8, 80), dpi=300, gridspec_kw={'hspace': .14}
    )

    for (spec_order, axis) in zip(
            sorted(spec_df_by_order_dict.keys(), reverse=True), axes):
        df = spec_df_dict[spec_order]

        axis.errorbar(
            df['wvl_angstrom'], y=df['flux'],
            yerr=df['flux_err'], fmt='o',
            c='tab:blue', ecolor='darkgrey', mfc='None',
            ms=1, elinewidth=.6, mew=.6
        )
        axis.text(
            s=f'Order {spec_order}', x=.03, y=.9,
            va='top', ha='left', transform=axis.transAxes,
            font='monospace', fontsize=8
        )
        axis.text(
            s=f'avg. flux/$\sigma_\mathrm{{flux}}$={np.nanmean(df["flux"] / df["flux_err"]):.1f}',
            x=.97, y=.9, va='top', ha='right', transform=axis.transAxes,
            font='monospace', fontsize=8
        )
        axis.axhline(0, c='k', lw=.4, ls=':', alpha=.8)
        axis.set_ylabel('Flux', font='monospace', fontsize=10)

        plot_upper_threshold = np.ceil(np.nanpercentile(df['flux'], 99) / 6) * 6
        y_major_locator_base = max(6, (plot_upper_threshold // 4) // 6 * 6)
        y_minor_locator_base = y_major_locator_base / 3
        y_upper_lim = np.ceil(plot_upper_threshold / y_minor_locator_base) * y_minor_locator_base
        axis.set_ylim(-y_minor_locator_base, y_upper_lim)
        axis.yaxis.set_major_locator(
            plt.MultipleLocator(base=y_major_locator_base, offset=y_minor_locator_base))
        axis.yaxis.set_minor_locator(
            plt.MultipleLocator(base=y_minor_locator_base))

        axis.set_xlim(np.floor(df['wvl_angstrom'].min() / 5) * 5 - 5,
                      np.ceil(df['wvl_angstrom'].max() / 5) * 5 + 5)
        axis.xaxis.set_major_locator(plt.MultipleLocator(base=25))
        axis.xaxis.set_minor_locator(plt.MultipleLocator(base=5))

        axis.tick_params(axis='both', right=True, top=True,
                         labelsize=8, direction='in', length=5, which='major')
        axis.tick_params(axis='both', right=True, top=True,
                         labelsize=8, direction='in', length=4, which='minor')

    axes[-1].set_xlabel(r'$\lambda$ [$\AA$]', font='monospace', fontsize=10)

    # set the title of the plot
    axes[0].set_title(f'{plot_label}', font='monospace', fontsize=12)
    fig.savefig(exp_filename, bbox_inches='tight')
    plt.close(fig)

In [3]:
obs_date = 230614
combined_img_fits_file_dir = f'../data/raw-combined/{obs_date}/'
spec_fits_file_dir = f'../data/extracted/{obs_date}/'

# path to plots and summary files
exp_path = f'../results/'
fig_exp_path = os.path.join(exp_path, f'figures/{obs_date}')
file_exp_path = os.path.join(exp_path, f'spec_files/{obs_date}')

# create directories if not exist
for path in [exp_path, fig_exp_path, file_exp_path]:
    if not os.path.isdir(path):
        os.makedirs(path)

# full orders ranging from 299 to 338
full_orders = range(299, 339)

## Collection information from combined fits images

In [4]:
combined_img_info_dict = {}
combined_img_fits_filenames = [name for name in os.listdir(combined_img_fits_file_dir)
                               if re.search(r'combined(\d+)', name)]
for fits_img_filename in tqdm(sorted(
        combined_img_fits_filenames, key=lambda x: int(re.search(r'(\d+)-', x).group(1)))):
    print(f'processing \"{os.path.join(combined_img_fits_file_dir, fits_img_filename)}\"...')

    # import FITS data and header info
    with fits.open(os.path.join(combined_img_fits_file_dir, fits_img_filename)) as fits_img_file:
        fits_img_header = fits_img_file[0].header

    combined_img_info_dict[fits_img_filename] = {
        'src_img_fits_combined_img': json.dumps(
            re.findall(r'icm.*?\.a\.fits', str(fits_img_header['HISTORY']))),
    }

combined_img_info_df = pd.DataFrame(combined_img_info_dict).T

  0%|          | 0/24 [00:00<?, ?it/s]

processing "data/raw-combined/230614/combined1-3.fits"...
processing "data/raw-combined/230614/combined11-13.fits"...
processing "data/raw-combined/230614/combined21-23.fits"...
processing "data/raw-combined/230614/combined24-30.fits"...
processing "data/raw-combined/230614/combined31-37.fits"...
processing "data/raw-combined/230614/combined38-41.fits"...
processing "data/raw-combined/230614/combined49-54.fits"...
processing "data/raw-combined/230614/combined55-61.fits"...
processing "data/raw-combined/230614/combined62-63.fits"...
processing "data/raw-combined/230614/combined64-66.fits"...
processing "data/raw-combined/230614/combined67-70.fits"...
processing "data/raw-combined/230614/combined71-74.fits"...
processing "data/raw-combined/230614/combined82-85.fits"...
processing "data/raw-combined/230614/combined86-89.fits"...
processing "data/raw-combined/230614/combined90-94.fits"...
processing "data/raw-combined/230614/combined95-100.fits"...
processing "data/raw-combined/230614/comb

## Collecting Information from FITS Files

In [5]:
spec_info_dict = {}
spec_fits_filenames = [name for name in os.listdir(spec_fits_file_dir)
                       if name.startswith('combined_spectra')]
for spec_fits_filename in tqdm(sorted(
        spec_fits_filenames, key=lambda x: int(re.search(r'(\d+)-', x).group(1)))):
    spec_label = re.sub(
        r'combined_spectra(\d+)', r'spec\1', spec_fits_filename.split('.')[0])
    print(f'processing \"{os.path.join(spec_fits_file_dir, spec_fits_filename)}\"...')

    # import FITS data and header info
    with fits.open(os.path.join(spec_fits_file_dir, spec_fits_filename)) as fits_file:
        fits_spec_data = fits_file[0].data
        fits_spec_header = fits_file[0].header

    orders = [int(s) for s in fits_spec_header['ORDERS'].split(',')]

    # extract spectral data from each order
    spec_df_by_order_dict = {}
    snr_by_order_dict = {}
    for idx, order in enumerate(orders):
        spec_df_by_order_dict[order] = pd.DataFrame({
            'wvl_um': fits_spec_data[idx, 0, :],
            'wvl_angstrom': fits_spec_data[idx, 0, :] * 1e4,
            'flux': fits_spec_data[idx, 1, :],
            'flux_err': fits_spec_data[idx, 2, :]
        })

        order_snr = (lambda x: x[~np.isnan(x)])(
            spec_df_by_order_dict[order]['flux'] / spec_df_by_order_dict[order]['flux_err']
        )
        snr_by_order_dict[order] = {
            'mean': np.mean(order_snr) if len(order_snr) > 0 else np.nan,
            'std': np.std(order_snr) if len(order_snr) > 0 else np.nan
        }

    # plot the spectral data
    plot_spec_orders(
        spec_df_by_order_dict,
        f'{obs_date}-{spec_label} ({fits_spec_header["OBJECT"]})',
        os.path.join(fig_exp_path, f'{obs_date}-{spec_label}.png')
    )

    # collect information from FITS header
    spec_info_dict[spec_fits_filename] = {
        'object': fits_spec_header['OBJECT'],
        'extracted_spec_fits': spec_fits_filename,
        'combined_img_fits': fits_spec_header['AIMAGE'],
        'src_img_fits_spec': json.dumps(
            re.findall(r'icm.*?\.a\.fits', str(fits_spec_header['HISTORY']))),
        'flat_fits': fits_spec_header['FLAT'],
        'wvl_cal_fits': fits_spec_header['WAVECAL'],
        'obs_date': obs_date,
        'obs_humidity': fits_spec_header['TCS_HUM'],
        'obs_air_temp_deg': fits_spec_header['TCS_AIRT'],
        'obs_mean_wind_speed_mph': fits_spec_header['TCS_WMSP'],
        'obs_wind_direction_deg': fits_spec_header['TCS_WDIR'],
        'obs_elevation_deg': fits_spec_header['TCS_EL'],
        'obs_azimuth_deg': fits_spec_header['TCS_AZ'],
        'obs_ra_hms_fk5': fits_spec_header['RA'],
        'obs_dec_hms_fk5': fits_spec_header['DEC'],
        'obs_avg_mjd': fits_spec_header['AVE_MJD'],
        'obs_total_exp_time': fits_spec_header['TOTITIME'],
        'obs_exp_time': fits_spec_header['ITIME'],
        'obs_avg_air_mass': fits_spec_header['AVE_AM'],
        'num_img_combined': fits_spec_header['NIMCOMB'],
        **{f'ap_pos_order{order}': fits_spec_header.get(f'APOSO{order}', None)
           for order in full_orders},
        **{f'extn_range_order{order}': json.dumps(
            list(map(int, fits_spec_header.get(f'XROR{order}', '0,0').split(','))))
            for order in full_orders},
        **{f'snr_mean_order{order}': snr_by_order_dict[order]['mean']
        if order in snr_by_order_dict else np.nan
           for order in full_orders},
        **{f'snr_std_order{order}': snr_by_order_dict[order]['std']
        if order in snr_by_order_dict else np.nan
           for order in full_orders}
    }

    # save the DataFrames as hdf5 files
    with pd.HDFStore(os.path.join(file_exp_path, f'{obs_date}-{spec_label}.h5'), 'w') as store:
        for order, spec_df in spec_df_by_order_dict.items():
            store[f'order{order}'] = spec_df

spec_info_df = pd.DataFrame(spec_info_dict).T

  0%|          | 0/18 [00:00<?, ?it/s]

processing "data/extracted/230614/combined_spectra49-54.fits"...
processing "data/extracted/230614/combined_spectra55-61.fits"...
processing "data/extracted/230614/combined_spectra62-63.fits"...
processing "data/extracted/230614/combined_spectra64-66.fits"...
processing "data/extracted/230614/combined_spectra67-70.fits"...
processing "data/extracted/230614/combined_spectra71-74.fits"...
processing "data/extracted/230614/combined_spectra82-85.fits"...
processing "data/extracted/230614/combined_spectra86-89.fits"...
processing "data/extracted/230614/combined_spectra90-94.fits"...
processing "data/extracted/230614/combined_spectra95-100.fits"...
processing "data/extracted/230614/combined_spectra101-104.fits"...
processing "data/extracted/230614/combined_spectra112-113.fits"...
processing "data/extracted/230614/combined_spectra114-115.fits"...
processing "data/extracted/230614/combined_spectra116-118.fits"...
processing "data/extracted/230614/combined_spectra119-122.fits"...
processing "da

## Merging the Summary DataFrames

In [6]:
merged_summary_df = pd.merge(
    spec_info_df, combined_img_info_df, left_on='combined_img_fits', right_index=True, how='left')

# check if 'src_img_fits_spec' and 'src_img_fits_combined_img' are consistent, if available
merged_summary_df['is_src_img_fits_log_consistent'] = merged_summary_df.apply(
    lambda x: x['src_img_fits_spec'] == x['src_img_fits_combined_img'], axis=1)
merged_summary_df['src_img_fits'] = merged_summary_df.apply(
    lambda x: x['src_img_fits_spec'] if x['is_src_img_fits_log_consistent']
    else x['src_img_fits_combined_img'],
    axis=1)

merged_summary_df['group'] = merged_summary_df['object'].apply(
    lambda x: 'Telluric_STD' if 'Telluric' in x
    else 'RV_STD' if 'RVSTD' in x
    else f'LP_2442_gp{x[0]}'
)

merged_summary_df[[
    'src_img_fits', 'is_src_img_fits_log_consistent',
    'src_img_fits_spec', 'src_img_fits_combined_img'
]]

Unnamed: 0,src_img_fits,is_src_img_fits_log_consistent,src_img_fits_spec,src_img_fits_combined_img
combined_spectra49-54.fits,"[""icm.2023A023.230614.fname.00049.a.fits"", ""ic...",True,"[""icm.2023A023.230614.fname.00049.a.fits"", ""ic...","[""icm.2023A023.230614.fname.00049.a.fits"", ""ic..."
combined_spectra55-61.fits,"[""icm.2023A023.230614.fname.00055.a.fits"", ""ic...",False,[],"[""icm.2023A023.230614.fname.00055.a.fits"", ""ic..."
combined_spectra62-63.fits,"[""icm.2023A023.230614.fname.00062.a.fits"", ""ic...",True,"[""icm.2023A023.230614.fname.00062.a.fits"", ""ic...","[""icm.2023A023.230614.fname.00062.a.fits"", ""ic..."
combined_spectra64-66.fits,"[""icm.2023A023.230614.fname.00064.a.fits"", ""ic...",False,[],"[""icm.2023A023.230614.fname.00064.a.fits"", ""ic..."
combined_spectra67-70.fits,"[""icm.2023A023.230614.fname.00067.a.fits"", ""ic...",True,"[""icm.2023A023.230614.fname.00067.a.fits"", ""ic...","[""icm.2023A023.230614.fname.00067.a.fits"", ""ic..."
combined_spectra71-74.fits,"[""icm.2023A023.230614.fname.00071.a.fits"", ""ic...",True,"[""icm.2023A023.230614.fname.00071.a.fits"", ""ic...","[""icm.2023A023.230614.fname.00071.a.fits"", ""ic..."
combined_spectra82-85.fits,"[""icm.2023A023.230614.fname.00082.a.fits"", ""ic...",True,"[""icm.2023A023.230614.fname.00082.a.fits"", ""ic...","[""icm.2023A023.230614.fname.00082.a.fits"", ""ic..."
combined_spectra86-89.fits,"[""icm.2023A023.230614.fname.00086.a.fits"", ""ic...",True,"[""icm.2023A023.230614.fname.00086.a.fits"", ""ic...","[""icm.2023A023.230614.fname.00086.a.fits"", ""ic..."
combined_spectra90-94.fits,"[""icm.2023A023.230614.fname.00090.a.fits"", ""ic...",False,[],"[""icm.2023A023.230614.fname.00090.a.fits"", ""ic..."
combined_spectra95-100.fits,"[""icm.2023A023.230614.fname.00095.a.fits"", ""ic...",True,"[""icm.2023A023.230614.fname.00095.a.fits"", ""ic...","[""icm.2023A023.230614.fname.00095.a.fits"", ""ic..."


In [7]:
# adjust columns order
merged_summary_df = merged_summary_df[[
    'group', 'object', 'extracted_spec_fits', 'combined_img_fits', 'flat_fits', 'wvl_cal_fits',
    'src_img_fits', 'is_src_img_fits_log_consistent', 'src_img_fits_spec', 'src_img_fits_combined_img',
    *[col for col in merged_summary_df.columns if 'obs_' in col], 'num_img_combined',
    *[col for col in merged_summary_df.columns if 'ap_pos_order' in col],
    *[col for col in merged_summary_df.columns if 'extn_range_order' in col],
    *[val for pair in zip(
        sorted([col for col in merged_summary_df.columns if 'snr_mean_order' in col]),
        sorted([col for col in merged_summary_df.columns if 'snr_std_order' in col]))
      for val in pair]
]]
merged_summary_df

Unnamed: 0,group,object,extracted_spec_fits,combined_img_fits,flat_fits,wvl_cal_fits,src_img_fits,is_src_img_fits_log_consistent,src_img_fits_spec,src_img_fits_combined_img,...,snr_mean_order334,snr_std_order334,snr_mean_order335,snr_std_order335,snr_mean_order336,snr_std_order336,snr_mean_order337,snr_std_order337,snr_mean_order338,snr_std_order338
combined_spectra49-54.fits,LP_2442_gp1,1c6ba8a3ab4,combined_spectra49-54.fits,combined49-54.fits,flat42-46.fits,wavecal47-48.fits,"[""icm.2023A023.230614.fname.00049.a.fits"", ""ic...",True,"[""icm.2023A023.230614.fname.00049.a.fits"", ""ic...","[""icm.2023A023.230614.fname.00049.a.fits"", ""ic...",...,52.358773,19.579994,51.859705,20.035172,51.293148,19.977491,49.217707,19.311716,40.985025,18.745352
combined_spectra55-61.fits,LP_2442_gp1,1d657d06808,combined_spectra55-61.fits,combined55-61.fits,flat42-46.fits,wavecal47-48.fits,"[""icm.2023A023.230614.fname.00055.a.fits"", ""ic...",False,[],"[""icm.2023A023.230614.fname.00055.a.fits"", ""ic...",...,55.498565,20.82173,55.102963,21.213036,54.548106,21.052929,52.119279,20.344811,42.580629,19.777881
combined_spectra62-63.fits,LP_2442_gp2,2336a99d19a,combined_spectra62-63.fits,combined62-63.fits,flat42-46.fits,wavecal47-48.fits,"[""icm.2023A023.230614.fname.00062.a.fits"", ""ic...",True,"[""icm.2023A023.230614.fname.00062.a.fits"", ""ic...","[""icm.2023A023.230614.fname.00062.a.fits"", ""ic...",...,64.022771,21.055058,64.234949,21.275507,63.987408,21.228378,62.874706,21.230294,58.811137,21.033809
combined_spectra64-66.fits,LP_2442_gp2,23c0cb58007,combined_spectra64-66.fits,combined64-66.fits,flat42-46.fits,wavecal47-48.fits,"[""icm.2023A023.230614.fname.00064.a.fits"", ""ic...",False,[],"[""icm.2023A023.230614.fname.00064.a.fits"", ""ic...",...,57.306749,19.553841,57.298781,20.137227,56.936397,20.052341,55.688451,19.772311,50.796886,19.51004
combined_spectra67-70.fits,LP_2442_gp2,218094e1fdf,combined_spectra67-70.fits,combined67-70.fits,flat42-46.fits,wavecal47-48.fits,"[""icm.2023A023.230614.fname.00067.a.fits"", ""ic...",True,"[""icm.2023A023.230614.fname.00067.a.fits"", ""ic...","[""icm.2023A023.230614.fname.00067.a.fits"", ""ic...",...,47.822319,17.668776,47.561072,17.98612,47.320676,18.028415,45.828948,17.665787,39.757675,17.170064
combined_spectra71-74.fits,Telluric_STD,TelluricSTD,combined_spectra71-74.fits,combined71-74.fits,flat42-46.fits,wavecal47-48.fits,"[""icm.2023A023.230614.fname.00071.a.fits"", ""ic...",True,"[""icm.2023A023.230614.fname.00071.a.fits"", ""ic...","[""icm.2023A023.230614.fname.00071.a.fits"", ""ic...",...,146.493434,45.93249,148.06165,46.40192,147.128053,45.922774,145.862089,46.664551,140.012326,46.705143
combined_spectra82-85.fits,LP_2442_gp3,3c1c5ba6fe8,combined_spectra82-85.fits,combined82-85.fits,flat75-79.fits,wavecal80-81.fits,"[""icm.2023A023.230614.fname.00082.a.fits"", ""ic...",True,"[""icm.2023A023.230614.fname.00082.a.fits"", ""ic...","[""icm.2023A023.230614.fname.00082.a.fits"", ""ic...",...,61.008207,21.039967,60.887588,21.520396,60.51742,21.468749,59.021606,21.424972,52.745841,21.091524
combined_spectra86-89.fits,LP_2442_gp3,355363b9228,combined_spectra86-89.fits,combined86-89.fits,flat75-79.fits,wavecal80-81.fits,"[""icm.2023A023.230614.fname.00086.a.fits"", ""ic...",True,"[""icm.2023A023.230614.fname.00086.a.fits"", ""ic...","[""icm.2023A023.230614.fname.00086.a.fits"", ""ic...",...,54.607011,19.256496,54.386599,19.641533,53.976116,19.547927,52.292955,19.621717,46.338986,19.167658
combined_spectra90-94.fits,LP_2442_gp3,3eb70968365,combined_spectra90-94.fits,combined90-94.fits,flat75-79.fits,wavecal80-81.fits,"[""icm.2023A023.230614.fname.00090.a.fits"", ""ic...",False,[],"[""icm.2023A023.230614.fname.00090.a.fits"", ""ic...",...,51.087831,18.70227,50.62593,19.114862,50.190986,18.914252,48.348824,19.098858,41.296638,18.593258
combined_spectra95-100.fits,LP_2442_gp3,3277693c2e6,combined_spectra95-100.fits,combined95-100.fits,flat75-79.fits,wavecal80-81.fits,"[""icm.2023A023.230614.fname.00095.a.fits"", ""ic...",True,"[""icm.2023A023.230614.fname.00095.a.fits"", ""ic...","[""icm.2023A023.230614.fname.00095.a.fits"", ""ic...",...,52.045244,19.383127,51.477567,19.770041,50.961467,19.455023,48.721037,19.525796,40.828294,18.735919


In [8]:
# save the collected information as CSV file
merged_summary_df.to_csv(f'results/{obs_date}-summary.csv', index=False)