## Photometry on MIRI images

In [None]:
%load_ext autoreload
%autoreload 2

import os
import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings
import subprocess
import miri_utils.photometry_tools as phot
import pickle as pkl

from astropy.io import fits
from astropy.wcs import FITSFixedWarning
from astropy.table import Table, join

warnings.simplefilter("ignore", category=FITSFixedWarning)


cutout_dir = "/Users/benjamincollins/University/master/Red_Cardinal/cutouts_phot/"
phot_dir = "/Users/benjamincollins/University/master/Red_Cardinal/photometry/"


# Section to obtain modified apertures

Let's inspect Amirs table:

In [None]:
table_path =  '/Users/benjamincollins/University/master/Red_Cardinal/catalogues/Flux_Aperture_PSFMatched_AperCorr_old.fits'
table_path =  '/Users/benjamincollins/University/master/Red_Cardinal/photometry/phot_tables/Photometry_Table_MIRI_v6.fits'

table = Table.read(table_path)
#print(table[:5])
table.info()
print(table.columns)
print(table['ID'][:10])

Now let's try and call the function:

In [None]:
cutout_dir = "/Users/benjamincollins/University/master/Red_Cardinal/cutouts_phot/"
phot_dir = "/Users/benjamincollins/University/master/Red_Cardinal/photometry/"
mask_folder = "/Users/benjamincollins/University/master/Red_Cardinal/photometry/apertures/masks/"


# Get all FITS file paths
fits_files = glob.glob(os.path.join(cutout_dir, '*.fits'))

# Get the basenames of the FITS files
fits_fnames = [os.path.basename(f) for f in fits_files]

adjusted_apertures = []

for fname in fits_fnames:
    id = fname.split('_')[0]
    filter = fname.split('_')[1]
    survey_obs = fname.split('_')[3]
    
    if '003' in survey_obs:
        survey = 'primer'
        obs = '003'
    elif '004' in survey_obs:
        survey = 'primer'
        obs = '004'
    elif 'cweb1' in survey_obs:
        survey = 'cweb'
        obs = '1'
    elif 'cweb2' in survey_obs:
        survey = 'cweb'
        obs = '2'
    elif 'cos3d1' in survey_obs:
        survey = 'cos3d'
        obs = '1'
    elif 'cos3d2' in survey_obs:
        survey = 'cos3d'
        obs = '2'
    else:
        print(f"Unknown survey and/or observation number for galaxy {id}:\n")
        print(survey_obs)
    
    # Call and collect results
    result = phot.adjust_aperture(id, filter, survey, obs, phot_dir, mask_folder=mask_folder, rescale=True)
    
    if result:
        adjusted_apertures.append(result)

# After loop: create a DataFrame
df_apertures = pd.DataFrame(adjusted_apertures)

# v5 for manually modified ellipse sizes
df_path = '/Users/benjamincollins/University/master/Red_Cardinal/photometry/apertures/aperture_table_small.csv'

# (optional) Save to CSV or integrate into photometry table
#df_apertures.to_csv(df_path, index=False)

Now we can easily open any given FITS file with its corresponding ellipse region

In [None]:
# --- Launch DS9 with the MIRI cutout and the overplotted aperture ---
region_dir = "/Users/benjamincollins/University/master/Red_Cardinal/photometry/regions/"
cutout_dir = "/Users/benjamincollins/University/master/Red_Cardinal/cutouts_phot/"
phot_dir   = "/Users/benjamincollins/University/master/Red_Cardinal/photometry/"

id = '10245'
filter = 'F770W'
survey_obs = 'primer004'
cutout_path = os.path.join(cutout_dir, f'{id}_{filter}_cutout_{survey_obs}.fits')
reg_path = os.path.join(region_dir, f'{id}_{survey_obs}_aperture.reg')
subprocess.run(["ds9", cutout_path, "-regions", reg_path])


Let's check the table:

In [None]:
table_path =  '/Users/benjamincollins/University/master/Red_Cardinal/Flux_Aperture_PSFMatched_AperCorr_old.fits'

table = Table.read(table_path)
print(table[:5])
table.info()
print(table['Image_Err'].shape)

# Run this cell for updated photometry input files

In [None]:
# --- Parameters ---
cutouts_folder = "/Users/benjamincollins/University/master/Red_Cardinal/cutouts_phot/"
#aperture_table = '/Users/benjamincollins/University/master/Red_Cardinal/photometry/aperture_table_v5.csv'
#fig_path = '/Users/benjamincollins/University/master/Red_Cardinal/photometry/Plots_MIRI_phot_v5/'
#fig_path = None

# Get all possible F770W files
all_f770w_files = glob.glob(os.path.join(cutouts_folder, f'*F770W*.fits'))

# Group F770W files by galaxy ID and filter
f770w_files = []
galaxy_ids = set([os.path.basename(f).split('_')[0] for f in all_f770w_files])

#galaxy_ids = ['8465', '7922', '9871', '12202', '8843', '7904', '8338', '10021', '10245', '11136', '12340', '20397']


for galaxy_id in galaxy_ids:
    # Find all F770W files for this galaxy ID
    matching_files = [f for f in all_f770w_files if os.path.basename(f).startswith(galaxy_id)]
    
    # Handle special case for galaxy 11853
    if galaxy_id == '11853':
        # Use the cweb2 file if available
        cweb2_files = [f for f in matching_files if 'cweb2' in f.lower()]
        if cweb2_files:
            f770w_files.append(cweb2_files[0])
            continue  # Skip to the next galaxy
    
    # Prioritise PRIMER over COSMOS-Web
    primer_files = [f for f in matching_files if 'primer' in f.lower()]
    cweb_files = [f for f in matching_files if 'cweb' in f.lower()]
    
    if primer_files:
        f770w_files.append(primer_files[0])  # Prefer PRIMER file
    elif cweb_files:
        f770w_files.append(cweb_files[0])  # Use CWEB only if no PRIMER available


# Get all F1000W files
f1000w_files = glob.glob(os.path.join(cutouts_folder, f'*F1000W*.fits'))

# Get all F1800W files
f1800w_files = glob.glob(os.path.join(cutouts_folder, f'*F1800W*.fits'))

# Get all F1800W files
f2100w_files = glob.glob(os.path.join(cutouts_folder, f'*F2100W*.fits'))


# Section to compare different aperture sizes with each other

In [None]:
######################################
## STEP 1:   PERFORM PHOTOMETRY     ##
######################################

output_folder = '/Users/benjamincollins/University/master/Red_Cardinal/photometry/'
os.makedirs(output_folder, exist_ok=True)

aperture_table_small = '/Users/benjamincollins/University/master/Red_Cardinal/photometry/apertures/aperture_table_small.csv'

phot.perform_photometry(f770w_files, aperture_table_small, output_folder, suffix='small_v3', apply_aper_corr=False)
phot.perform_photometry(f1000w_files, aperture_table_small, output_folder, suffix='small_v3', apply_aper_corr=False)
phot.perform_photometry(f1800w_files, aperture_table_small, output_folder, suffix='small_v3', apply_aper_corr=False)
phot.perform_photometry(f2100w_files, aperture_table_small, output_folder, suffix='small_v3', apply_aper_corr=False)

aperture_table_big = '/Users/benjamincollins/University/master/Red_Cardinal/photometry/apertures/aperture_table_big.csv'

phot.perform_photometry(f770w_files, aperture_table_big, output_folder, suffix='big_v3', apply_aper_corr=False)
phot.perform_photometry(f1000w_files, aperture_table_big, output_folder, suffix='big_v3', apply_aper_corr=False)
phot.perform_photometry(f1800w_files, aperture_table_big, output_folder, suffix='big_v3', apply_aper_corr=False)
phot.perform_photometry(f2100w_files, aperture_table_big, output_folder, suffix='big_v3', apply_aper_corr=False)

In [None]:
######################################
## STEP 2:   CREATE FITS TABLE      ##
######################################

results_dir = '/Users/benjamincollins/University/master/Red_Cardinal/photometry/results/'

fits_table_small = 'Flux_SmallAperture_NoCorr_MIRI_v3.fits'

f770w_fname  = os.path.join(results_dir, 'phot_table_F770W_small_v3.csv')
f1000w_fname = os.path.join(results_dir, 'phot_table_F1000W_small_v3.csv')
f1800w_fname = os.path.join(results_dir, 'phot_table_F1800W_small_v3.csv')
f2100w_fname = os.path.join(results_dir, 'phot_table_F2100W_small_v3.csv')

csv_paths = [f770w_fname, f1000w_fname, f1800w_fname, f2100w_fname]

# Now create the combined FITS table
phot.create_fits_table_from_csv(csv_paths, output_file=fits_table_small)



fits_table_big = 'Flux_BigAperture_NoCorr_MIRI_v3.fits'

f770w_fname  = os.path.join(results_dir, 'phot_table_F770W_big_v3.csv')
f1000w_fname = os.path.join(results_dir, 'phot_table_F1000W_big_v3.csv')
f1800w_fname = os.path.join(results_dir, 'phot_table_F1800W_big_v3.csv')
f2100w_fname = os.path.join(results_dir, 'phot_table_F2100W_big_v3.csv')

csv_paths = [f770w_fname, f1000w_fname, f1800w_fname, f2100w_fname]

# Now create the combined FITS table
phot.create_fits_table_from_csv(csv_paths, output_file=fits_table_big)

Now let's have a look at the tables and compare the effect of the aperture correction!

In [None]:
# Load the two tables
table_small_path = '/Users/benjamincollins/University/master/Red_Cardinal/photometry/phot_tables/Flux_SmallAperture_NoCorr_MIRI_v3.fits'
table_big_path = '/Users/benjamincollins/University/master/Red_Cardinal/photometry/phot_tables/Flux_BigAperture_NoCorr_MIRI_v3.fits'

table_small = Table.read(table_small_path)
filters_small = table_small['Filters']

non_detections = {
    'F770W': [11137, 17793, 8843, 12175, 7696, 7185, 8465, 19098, 12443, 12202, 21547, 9517, 9901, 10415, 12213, 
              21451, 11853, 11086, 22606, 18769, 9809, 11481, 21472, 19681, 12513, 21218, 12133, 16615, 10600, 11247, 20720, 17534], 
    'F1000W': [17984, 12513, 12164, 12133, 11716, 16615, 16424, 12202, 11723, 11853, 13297, 18327, 12443, 17534], 
    'F1800W': [12164, 11716, 10565, 10054, 11723, 12175, 19024, 8465, 8338, 18769, 7102, 10400, 12513, 19681, 7904, 
               10339, 12133, 10600, 9517, 10415, 11247, 12213, 11451, 7934], 
    'F2100W': [17984, 12164, 11716, 16516, 11723, 11853, 12175, 16474, 12443, 12513, 12133, 16615, 16424, 12202, 
               12332, 17517, 12014, 11247, 13297, 12213, 17916, 17534]
    }

phot.compare_aperture_statistics(table_small_path, table_big_path, summary_doc_path="summary_aperture_comparison.md", non_detections=non_detections)

Load and display aperture statistics

In [None]:
stats_file = '/Users/benjamincollins/University/Master/Red_Cardinal/photometry/apertures/aperture_comparisons/comparison_data.pkl'
with open(stats_file, 'rb') as f:
    stats = pkl.load(f)
    
non_detections = {
    'F770W': [11137, 17793, 8843, 12175, 7696, 7185, 8465, 19098, 12443, 12202, 21547, 9517, 9901, 10415, 12213, 
              21451, 11853, 11086, 22606, 18769, 9809, 11481, 21472, 19681, 12513, 21218, 12133, 16615, 10600, 11247, 20720, 17534], 
    'F1000W': [17984, 12513, 12164, 12133, 11716, 16615, 16424, 12202, 11723, 11853, 13297, 18327, 12443, 17534], 
    'F1800W': [12164, 11716, 10565, 10054, 11723, 12175, 19024, 8465, 8338, 18769, 7102, 10400, 12513, 19681, 7904, 
               10339, 12133, 10600, 9517, 10415, 11247, 12213, 11451, 7934, 19098], 
    'F2100W': [17984, 12164, 11716, 16516, 11723, 11853, 12175, 16474, 12443, 12513, 12133, 16615, 16424, 12202, 
               12332, 17517, 12014, 11247, 13297, 12213, 17916, 17534]
    }

fig_path = '/Users/benjamincollins/University/Master/Red_Cardinal/photometry/apertures/aperture_comparisons/'
phot.plot_aperture_summary(stats, non_detections, scaling=True)

Create statistics plot for the appendix

In [None]:
stats_file = '/Users/benjamincollins/University/Master/Red_Cardinal/photometry/apertures/aperture_comparisons/comparison_data.pkl'
with open(stats_file, 'rb') as f:
    stats_dict = pkl.load(f)
 
appendix_fig = '/Users/benjamincollins/University/Master/Red_Cardinal/photometry/apertures/aperture_comparisons/appendix_figure_new.png'

non_detections = {
    # I include ID 11451 here as I had to manually remove the outlier. It is, however, a detection.
    'F770W': [11137, 17793, 8843, 12175, 7696, 7185, 8465, 19098, 12443, 12202, 21547, 9517, 9901, 10415, 12213, 
              21451, 11853, 11086, 22606, 18769, 9809, 11481, 21472, 19681, 12513, 21218, 12133, 16615, 10600, 11247, 20720, 17534, 11451], 
    'F1000W': [17984, 12513, 12164, 12133, 11716, 16615, 16424, 12202, 11723, 11853, 13297, 18327, 12443, 17534], 
    'F1800W': [12164, 11716, 10565, 10054, 11723, 12175, 19024, 8465, 8338, 18769, 7102, 10400, 12513, 19681, 7904, 
               10339, 12133, 10600, 9517, 10415, 11247, 12213, 11451, 7934, 19098], # also added 19098 here as it is plagued by detector artefacts 
    'F2100W': [17984, 12164, 11716, 16516, 11723, 11853, 12175, 16474, 12443, 12513, 12133, 16615, 16424, 12202, 
               12332, 17517, 12014, 11247, 13297, 12213, 17916, 17534]
    }

phot.plot_appendix_figure(data_comparison=stats_dict, non_detections=non_detections, fig_path=appendix_fig, scaling='log')

Now we have to deal with the outlier! ID 11451 is a detection, but its source is not centred on the image, therefore the aperture sizes yield inconsistent results.

Outlier in F770W with ratio 1.74 for galaxy ID 11420
Outlier in F770W with ratio 3.47 for galaxy ID 11451
Outlier in F770W with ratio 1.65 for galaxy ID 17669
Outlier in F770W with ratio 1.54 for galaxy ID 19024
Outlier in F770W with ratio 1.71 for galaxy ID 8338

Outlier in F1800W with ratio 1.53 for galaxy ID 11494
Outlier in F1800W with ratio 1.54 for galaxy ID 16874
Outlier in F1800W with ratio 1.62 for galaxy ID 19098
Outlier in F1800W with ratio 1.67 for galaxy ID 19563
Outlier in F1800W with ratio 1.52 for galaxy ID 9871

In [None]:

phot.show_apertures(11451, 'F770W')
phot.show_apertures(8338, 'F770W')
phot.show_apertures(11420, 'F770W')
phot.show_apertures(17669, 'F770W')
phot.show_apertures(19024, 'F770W')

    
phot.show_apertures(11494, 'F1800W')
phot.show_apertures(16874, 'F1800W')
phot.show_apertures(19098, 'F1800W')
phot.show_apertures(19563, 'F1800W')
phot.show_apertures(9871, 'F1800W')

    


In [None]:
from photutils.aperture import EllipticalAperture

objid = 13174
band = "F2100W"

aperture_table = "/Users/benjamincollins/University/Master/Red_Cardinal/photometry/apertures/aperture_table.csv"
df = pd.read_csv(aperture_table, index_col=False)

ap = df.loc[df['ID'] == objid].squeeze()
print(ap['Apr_A'])

vis_data = phot.load_vis(f'/Users/benjamincollins/University/master/Red_Cardinal/photometry/vis_data/{objid}_{band}.h5')
output_file= os.path.join(f'/Users/benjamincollins/University/master/Red_Cardinal/photometry/apertures/aperture_comparisons/{objid}_{band}.png')

# Extract data from the dictionary
image_data = vis_data['original_data']
background_plane = vis_data['background_plane']
background_subtracted = vis_data['background_subtracted']
segm_mask = vis_data['segmentation_mask']
mask_vis = vis_data['mask_vis']
aperture_params = vis_data['aperture_params']
sigma = vis_data['sigma']
region_name = vis_data['region_name']
galaxy_id = vis_data['galaxy_id']
filter = vis_data['filter']

# Create aperture objects for plotting
x_center = aperture_params['x_center']
y_center = aperture_params['y_center']
a = aperture_params['a']
b = aperture_params['b']
theta = aperture_params['theta']

big_aperture = EllipticalAperture(
    positions=(x_center, y_center),
    a=a,
    b=b,
    theta=theta
)

small_aperture = EllipticalAperture(
    positions=(x_center, y_center),
    a=a/2,
    b=b/2,
    theta=ap['Apr_Theta']
)


# Create figure with three subplots in a horizontal row
fig, ax = plt.subplots(figsize=(4, 4))

# Background-subtracted data
vmin = np.nanpercentile(background_subtracted, 5)
vmax = np.nanpercentile(background_subtracted, 95)

im1 = ax.imshow(background_subtracted, origin='lower', cmap='gray', vmin=vmin, vmax=vmax)
small_aperture.plot(ax=ax, color='red', lw=2, label='Small Aperture')
big_aperture.plot(ax=ax, color='blue', lw=3, label='Large Aperture')
ax.legend(loc='upper right', fontsize=10)
    
# Tight layout and saving the figure‚
plt.tight_layout()
#plt.suptitle(f'Galaxy ID {galaxy_id} - {filter}', fontsize=14)
plt.subplots_adjust(top=0.85)  # Adjust to prevent overlap with annotation
plt.savefig(output_file, dpi=150)
plt.show()
plt.close(fig)


# Section of the notebook dealing with detection statistics

In [None]:
phot_table_path = '/Users/benjamincollins/University/master/Red_Cardinal/miri_photometry_v1/Photometry_Table_MIRI.fits'
phot_table_path = '/Users/benjamincollins/University/master/Red_Cardinal/photometry/phot_tables/Photometry_Table_MIRI_v6.fits'
table = Table.read(phot_table_path)
flux = table['Flux']
err  = table['Flux_Err']

bands = ['F770W', 'F1000W', 'F1800W', 'F2100W']
auto_flags = {band: [] for band in bands}

gal_ids = [id.decode() if isinstance(id, bytes) else str(id) for id in table['ID']]

sn_threshold = 3.0

# Collect all data for comprehensive analysis
for idx, band in enumerate(bands):  # bands = ["F770W", "F1000W", "F1800W", "F2100W"]

    print("Analysing band: ", band)
    for gid in gal_ids:
        index = gal_ids.index(gid)

        # Fluxes (convert to µJy)
        flux = table['Flux'][index][idx] * 1e6
        flux_err = table['Flux_Err'][index][idx] * 1e6
        
        if gid == "11723": 
            print(f"Galaxy ID: {gid}, Band: {band}, Flux: {flux:.2f} µJy, Flux Error: {flux_err:.2f} µJy, S/N: {flux/flux_err:.2f}")
        
        # Skip if any crucial value is invalid
        if not (np.isfinite(flux) and (flux > 0) and np.isfinite(flux_err)):
            continue
        sn = flux / flux_err    
        if sn < sn_threshold or flux <= 0:
            print(f"Galaxy ID: {gid}, Band: {band}, S/N: {sn:.2f}, Flux: {flux:.2f} µJy, Flux Error: {flux_err:.2f} µJy")
            auto_flags[band].append(int(gid))
            
print(auto_flags)

In [None]:
stats_file = '/Users/benjamincollins/University/Master/Red_Cardinal/photometry/apertures/aperture_comparisons/comparison_data.pkl'
with open(stats_file, 'rb') as f:
    data_comparison = pkl.load(f)

outliers = phot.analyse_outliers(data_comparison, flags=auto_flags, threshold=2.0)
display(outliers.head())

Let's figure out where the huge discrepancy comes from:

In [None]:
manual_flags = {
        "F770W": [7696,7730,8465,8843,9517,9809,9901,9986,10415,10600,11086,11137,
            11247,11451,11481,11853,12133,12175,12202,12213,12443,12513,16424,16615,
            17534,17793,18769,19024,19098,19307,19681,20720,21218,21424,21451,
            21472,21477,21547,22606], 
        "F1000W": [11716,11723,11853,12133,12164,12202,12332,12443,12513,13297,16424,
                    16615,17534,17793,17984,18327,19307],
        "F1800W": [7102,7904,7922,7934,8338,8465,9517,10054,10128,10339,10400,10415,
                    10565,10592,10600,11142,11247,11420,11451,11716,11723,12014,12133,
                    12164,12175,12202,12213,12332,12513,16419,18332,18769,19024,19098,
                    19393,19563,19681,21451],
        "F2100W": [7102,11142,11247,11494,11716,11723,11853,12014,12133,12164,12175,
                    12202,12213,12332,12443,12513,13297,16419,16424,16474,16516,16615,
                    16874,17000,17517,17534,17842,17916,17984,18094]
    }

auto_flags = {
    'F770W': [7185, 7696, 9809, 9901, 10415, 11247, 11481, 12133, 12175, 12443, 12513, 16615, 17534, 17793, 20720, 21218, 21451, 22606], 
    'F1000W': [11716, 11723, 12202, 13297, 16424, 17534, 17984, 18327], 
    'F1800W': [7904, 9517, 10054, 10339, 10415, 10565, 10600, 11247, 11451, 12133, 18769], 
    'F2100W': [11247, 11716, 11723, 12133, 12202, 12332, 16516, 16615, 17517, 17534, 17916, 17984]
    }


manual_only = {k: v for k, v in manual_flags.items() if k not in auto_flags}
auto_only   = {k: v for k, v in auto_flags.items() if k not in manual_flags}
both        = {k: v for k, v in manual_flags.items() if k in auto_flags}

# Find discrepancies in overlapping keys
discrepant_filters = {}
for gal in set(manual_flags) & set(auto_flags): # loop through galaxies in both dicts
    diff = set(manual_flags[gal]) ^ set(auto_flags[gal])    # gives you the symmetric difference
    if diff:
        discrepant_filters[gal] = diff

from astropy.visualization import ZScaleInterval, ImageNormalize, AsinhStretch

bands = ['F770W', 'F1000W', 'F1800W', 'F2100W']

for band in bands:
    ids = discrepant_filters.get(band, [])
    offset = 0
    fig = plt.figure(figsize=(10,10))
    for i, objid in enumerate(ids):
        
        try:
            vis_data = phot.load_vis(f"/Users/benjamincollins/University/Master/Red_Cardinal/photometry/vis_data/{objid}_{band}.h5")
        except FileNotFoundError:
            print(f"⚠️ No visualisation data file found for {objid} in {band}")
            continue
        
        nrows = int(np.ceil(len(ids)/4))
        ax = plt.subplot(nrows, 4, i+1)
    
        try:
            
            img = vis_data["background_subtracted"]
            
            image_data = vis_data["original_data"]
            source_mask_bool = vis_data["source_mask"]
            segm_mask = vis_data["segmentation_mask"]
            combined_mask = source_mask_bool | segm_mask | np.isnan(image_data)

            clean_image = np.where(combined_mask, np.nan, img)
            
            # Normalisation: auto scale + asinh stretch
            interval = ZScaleInterval()
            vmin, vmax = interval.get_limits(img)
            norm = ImageNormalize(vmin=vmin, vmax=vmax, stretch=AsinhStretch())
            
            plt.imshow(img, origin="lower", cmap="inferno", norm=norm)
            plt.title(f"Galaxy {objid} - {band}")
        except Exception as e:
            print(f"⚠️ Could not process {objid}: {e}")
    
    plt.tight_layout()
    #plt.subplots_adjust(top=0.94, hspace=0.35, wspace=0.3)
        
    # Save with high DPI for appendix quality
    #plt.savefig(fig_path, dpi=200, bbox_inches='tight', pad_inches=0)
    plt.show()
    #plt.close()



Let's use a new routine to investigate these in-between cases:

In [None]:
bands = ["F770W", "F1000W", "F1800W", "F2100W"]

discrepant = {}

for band in bands:
    discrepant_ids = discrepant_filters.get(band, [])  # your list
    
    nondetections = []    
    results = []
    for objid in discrepant_ids:
        
        vis_path = f"/Users/benjamincollins/University/Master/Red_Cardinal/photometry/vis_data/{objid}_{band}.h5"
        try:
            vis_data = phot.load_vis(vis_path)
        except FileNotFoundError:
            print(f"❌ No visualisation data file found at {vis_path}")
            continue
        
        info = phot.recompute_empirical_snr(vis_data=vis_data, n_random=200)
        
        if info['sn'] < 3.0:
            print(f"⚠️ Galaxy {objid} in {band} has recomputed S/N < 3.0: {info['sn']}")
            nondetections.append(objid)
        else:
            print(f"✅ Galaxy {objid} in {band} has recomputed S/N = {info['sn']}")
        
        results.append(dict(objid=objid, empirical_snr=info['sn'],
                            flux=info['flux'], flux_err=info['flux_err']))

    discrepant[band] = nondetections
        

    # quick table-style print
    print(f"\n\nFilter {band}:")
    for r in results:
        print(r)

print(discrepant)
#print(snr_stats)



In [None]:
auto_flags = {
    'F770W': [7185, 7696, 9809, 9901, 10415, 11247, 11481, 12133, 12175, 12443, 12513, 16615, 17534, 17793, 20720, 21218, 21451, 22606, 11723], 
    'F1000W': [11716, 11723, 12202, 13297, 16424, 17534, 17984, 18327], 
    'F1800W': [7904, 9517, 10054, 10339, 10415, 10565, 10600, 11247, 11451, 12133, 18769], 
    'F2100W': [11247, 11716, 11723, 12133, 12202, 12332, 16516, 16615, 17517, 17534, 17916, 17984]
    }

discrepant = {
    'F770W': [11137, 8843, 11853, 11086, 7185, 18769, 8465, 19098, 21472, 19681, 10600, 12202, 21547, 9517, 12213], 
    'F1000W': [12513, 12164, 12133, 16615, 11853, 12443], 
    'F1800W': [12164, 12175, 8465, 8338, 10400, 12213, 7102, 11716, 11723, 19024, 12513, 19681, 7934], 
    'F2100W': [12164, 12175, 12443, 16424, 12213, 11853, 16474, 12513, 12014, 13297]
    }

bands = ["F770W", "F1000W", "F1800W", "F2100W"]
 
final_nondetections = {}
for band in bands:
    final_nondetections[band] = list(set(discrepant.get(band, [])) | set(auto_flags.get(band, [])))
    
print(final_nondetections)

#fits_table_v5 = '/Users/benjamincollins/University/master/Red_Cardinal/photometry/phot_tables/Flux_Aperture_PSFMatched_AperCorr_MIRI_v5.fits'
fits_table_v5 = '/Users/benjamincollins/University/master/Red_Cardinal/photometry/phot_tables/Photometry_Table_MIRI_v6.fits'

stats_path = '/Users/benjamincollins/University/master/Red_Cardinal/photometry/results/galaxy_stats_thesis.md'
fig_path = '/Users/benjamincollins/University/master/Red_Cardinal/photometry/plots/heat_maps/miri_coverage_long_v3.png'

phot.plot_galaxy_filter_matrix(fits_table_v5, fig_path=fig_path, title='MIRI Coverage\n', cols=3)

fig_path = '/Users/benjamincollins/University/master/Red_Cardinal/photometry/plots/heat_maps/miri_detections_long_v3.png'

phot.plot_galaxy_filter_matrix(fits_table_v5, fig_path=fig_path, title='MIRI Detections\n', nondetections=final_nondetections, cols=3)

phot.write_detection_stats(fits_table_v5, stats_path=stats_path, nondetections=final_nondetections)

# Section to compare my aperture photometry to the photometry of the COSMOS-Web2025 catalogue

1) Perform the photometry again for my data using the updated and corrected photometry
2) Combine the tables into a single FITS output file

In [None]:
######################################
## STEP 1:   PERFORM PHOTOMETRY     ##
######################################

phot.perform_photometry(f770w_files, aperture_table_small, output_folder, suffix='small_v2')

phot.perform_photometry(f1800w_files, aperture_table_small, output_folder, suffix='small_v2')



In [None]:
######################################
## STEP 2:   CREATE FITS TABLE      ##
######################################

fits_table_v5 = 'Photometry_Table_MIRI.fits'

results_dir = '/Users/benjamincollins/University/master/Red_Cardinal/photometry/results/'

f770w_fname  = os.path.join(results_dir, 'phot_table_F770W_v5.csv')
f1000w_fname = os.path.join(results_dir, 'phot_table_F1000W_v5.csv')
f1800w_fname = os.path.join(results_dir, 'phot_table_F1800W_v5.csv')
f2100w_fname = os.path.join(results_dir, 'phot_table_F2100W_v5.csv')

csv_paths = [f770w_fname, f1000w_fname, f1800w_fname, f2100w_fname]

# Now create the combined FITS table
phot.create_fits_table_from_csv(csv_paths, output_file=fits_table_v5)

Cross-match galaxies between catalogues

In [None]:
from astropy.coordinates import SkyCoord
import astropy.units as u
from astropy.table import hstack

cweb_path = '/Users/benjamincollins/University/master/Red_Cardinal/COSMOS-Web_DR1/COSMOSWeb_reduced.fits'
#my_path = '/Users/benjamincollins/University/master/Red_Cardinal/photometry/results/Flux_Aperture_PSFMatched_AperCorr_MIRI_v4.fits'
my_path = '/Users/benjamincollins/University/Master/Red_Cardinal/miri_photometry_v1/Photometry_Table_MIRI.fits'
cat_path = '/Users/benjamincollins/University/master/Red_Cardinal/cat_targets.fits'

my_table = Table.read(my_path, hdu=1)
cosmos_table = Table.read(cweb_path, hdu=1)
my_cat = Table.read(cat_path, hdu=1)
fits.info(my_path)
fits.info(cweb_path)
#print(my_cat.columns)

# Rename ID column to id to match other catalogues
my_table.rename_column('ID', 'id')

# Reduce catalogue
my_cat_small = my_cat['id', 'ra', 'dec']

# Force type setting
my_table['id'] = my_table['id'].astype(str)
my_cat_small['id'] = my_cat_small['id'].astype(str)

# Match according to IDs
matched = join(my_table, my_cat_small, keys=('id'), join_type='inner')

print(matched.columns)

#matched.write('/Users/benjamincollins/University/master/Red_Cardinal/COSMOS-Web_DR1/Phot_Table_id_matched.fits', overwrite=True)

ids_my = matched['id']
ids_cosmos = cosmos_table['id'].astype(str)
#match_mask = ids_my.match_to_id

# Build coordinates
coords_my = SkyCoord(ra=matched['ra']*u.deg, dec=matched['dec']*u.deg)
coords_cosmos = SkyCoord(ra=cosmos_table['ra']*u.deg, dec=cosmos_table['dec']*u.deg)

# Match (within 0.3 arcsec, for instance)
idx, d2d, _ = coords_my.match_to_catalog_sky(coords_cosmos)
match_mask = d2d < 0.2 * u.arcsec

# Build matched table
my_matched = matched[match_mask]    # important to take the matched catalogue!
cosmos_matched = cosmos_table[idx[match_mask]]

# Combine tables: rename columns to avoid name collision
cosmos_matched.rename_columns(
    cosmos_matched.colnames,
    [name + "_cosmos" if name in my_matched.colnames else name for name in cosmos_matched.colnames]
)

# Merge horizontally
sky_matched = hstack([my_matched, cosmos_matched])

print(sky_matched.columns)
sky_matched.info()

sky_matched.write('/Users/benjamincollins/University/master/Red_Cardinal/COSMOS-Web_DR1/Phot_Table_sky_matched.fits', overwrite=True)


Define and call huge plotting function

Creates a 3x3 massive grid

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from astropy.table import Table
from scipy import stats
import warnings
warnings.filterwarnings('ignore')

def load_and_prepare_data(fits_file_path):
    """
    Load the FITS table and prepare data for comparison
    """
    # Load the FITS table
    table = Table.read(fits_file_path)
    
    # Extract F770W data from multi-dimensional arrays
    # For Flux and Flux_Err, take the first value (F770W filter)
    flux_personal = []
    flux_err_personal = []
    ab_mag_personal = []
    
    for i in range(len(table)):
        # Handle Flux (F770W is first element)
        if table['Flux'].mask[i][0] == False:
            flux_val = table['Flux'][i]*1e6 # convert to µJy
            if hasattr(flux_val, '__len__') and len(flux_val) > 0:
                flux_personal.append(flux_val[0])
            else:
                flux_personal.append(flux_val)
        else:
            flux_personal.append(np.nan)
            
        # Handle Flux_Err (F770W is first element)
        if table['Flux_Err'].mask[i][0] == False:
            flux_err_val = table['Flux_Err'][i]*1e6 # convert to µJy
            if hasattr(flux_err_val, '__len__') and len(flux_err_val) > 0:
                flux_err_personal.append(flux_err_val[0])
            else:
                flux_err_personal.append(flux_err_val)
        else:
            flux_err_personal.append(np.nan)
            
        # Handle AB_Mag (F770W is first element)
        if table['AB_Mag'].mask[i][0] == False:
            ab_mag_val = table['AB_Mag'][i]
            if hasattr(ab_mag_val, '__len__') and len(ab_mag_val) > 0:
                ab_mag_personal.append(ab_mag_val[0])
            else:
                ab_mag_personal.append(ab_mag_val)
        else:
            ab_mag_personal.append(np.nan)
    
    # Convert to numpy arrays
    flux_personal = np.array(flux_personal)
    flux_err_personal = np.array(flux_err_personal)
    ab_mag_personal = np.array(ab_mag_personal)
    
    # Extract public catalogue data
    flux_public = np.array(table['flux_auto_f770w'])
    flux_err_public = np.array(table['flux_err_auto_f770w'])
    mag_public = np.array(table['mag_auto_f770w'])
    
    # Create masks for valid data
    valid_flux = (~np.isnan(flux_personal)) & (~np.isnan(flux_public)) & \
                 (flux_personal > 0) & (flux_public > 0)
    valid_flux_err = (~np.isnan(flux_personal)) & (~np.isnan(flux_public)) & \
                     (flux_personal > 0) & (flux_public > 0)
    
    return {
        'table': table,
        'flux_personal': flux_personal,
        'flux_err_personal': flux_err_personal,
        'ab_mag_personal': ab_mag_personal,
        'flux_public': flux_public,
        'flux_err_public': flux_err_public,
        'mag_public': mag_public,
        'valid_flux': valid_flux,
        'valid_flux_err': valid_flux_err
    }

def calculate_statistics(x, y, valid_mask):
    """
    Calculate comparison statistics
    """
    if np.sum(valid_mask) < 3:
        return {}
    
    x_valid = x[valid_mask]
    y_valid = y[valid_mask]
    
    # Linear correlation
    corr_coef, p_value = stats.pearsonr(x_valid, y_valid)
    
    # Calculate residuals and statistics
    residuals = y_valid - x_valid
    mean_residual = np.mean(residuals)
    median_residual = np.median(residuals)
    std_residual = np.std(residuals)
    rms_residual = np.sqrt(np.mean(residuals**2))
    
    # Fractional differences for positive values
    frac_diff = (y_valid - x_valid) / x_valid
    median_frac_diff = np.median(frac_diff)
    mean_frac_diff = np.mean(frac_diff)
    std_frac_diff = np.std(frac_diff)
    
    return {
        'correlation': corr_coef,
        'p_value': p_value,
        'median_residual': median_residual,
        'mean_residual': mean_residual,
        'std_residual': std_residual,
        'rms_residual': rms_residual,
        'median_frac_diff': median_frac_diff,
        'mean_frac_diff': mean_frac_diff,
        'std_frac_diff': std_frac_diff,
        'n_objects': len(x_valid)
    }

def create_comparison_plot(data):
    """
    Create comprehensive comparison plots
    """
    fig = plt.figure(figsize=(15, 15))
    
    # Define color scheme
    colors = {'scatter': '#1f77b4', 'line': '#ff7f0e', 'hist': '#2ca02c'}
    
    # 1. Flux comparison (log-log scale)
    ax1 = plt.subplot(3, 3, 1)
    valid_flux = data['valid_flux']
    if np.sum(valid_flux) > 0:
        x_flux = data['flux_personal'][valid_flux]
        y_flux = data['flux_public'][valid_flux]
        
        ax1.scatter(x_flux, y_flux, alpha=0.6, s=30, color=colors['scatter'])
        
        # Add 1:1 line
        min_val = min(np.min(x_flux), np.min(y_flux))
        max_val = max(np.max(x_flux), np.max(y_flux))
        ax1.plot([min_val, max_val], [min_val, max_val], '--', color=colors['line'], lw=2)
        
        ax1.set_xscale('log')
        ax1.set_yscale('log')
        ax1.set_xlabel('This Work Flux (µJy)')
        ax1.set_ylabel('COSMOS-Web DR1 Catalogue Flux (µJy)')
        ax1.set_title('Flux Comparison (F770W)')
        ax1.grid(True, alpha=0.3)
        
        # Add statistics
        stats_flux = calculate_statistics(x_flux, y_flux, np.ones(len(x_flux), dtype=bool))
        if stats_flux:
            ax1.text(0.05, 0.95, f'r = {stats_flux["correlation"]:.3f}\nN = {stats_flux["n_objects"]}', 
                    transform=ax1.transAxes, verticalalignment='top', 
                    bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))
    
    # 2. Flux error comparison (log-log scale)
    ax2 = plt.subplot(3, 3, 2)
    valid_flux_err = data['valid_flux_err']
    if np.sum(valid_flux_err) > 0:
        x_err = data['flux_err_personal'][valid_flux_err]
        y_err = data['flux_err_public'][valid_flux_err]
        
        ax2.scatter(x_err, y_err, alpha=0.6, s=30, color=colors['scatter'])
        
        # Add 1:1 line
        min_val = min(np.min(x_err), np.min(y_err))
        max_val = max(np.max(x_err), np.max(y_err))
        ax2.plot([min_val, max_val], [min_val, max_val], '--', color=colors['line'], lw=2)
        
        ax2.set_xscale('log')
        ax2.set_yscale('log')
        ax2.set_xlabel('This Work Flux Error (µJy)')
        ax2.set_ylabel('COSMOS-Web DR1 Catalogue Flux Error (µJy)')
        ax2.set_title('Flux Error Comparison (F770W)')
        ax2.grid(True, alpha=0.3)
        
        # Add statistics
        stats_err = calculate_statistics(x_err, y_err, np.ones(len(x_err), dtype=bool))
        if stats_err:
            ax2.text(0.05, 0.95, f'r = {stats_err["correlation"]:.3f}\nN = {stats_err["n_objects"]}', 
                    transform=ax2.transAxes, verticalalignment='top',
                    bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))
    
    
    # 3. Signal-to-noise ratio comparison
    ax3 = plt.subplot(3, 3, 3)
    valid_snr = data['valid_flux'] & data['valid_flux_err']
    if np.sum(valid_snr) > 0:
        snr_personal = data['flux_personal'][valid_snr] / data['flux_err_personal'][valid_snr]
        snr_public = data['flux_public'][valid_snr] / data['flux_err_public'][valid_snr]
        
        max_idx = np.argmax(snr_public)
        
        snr_personal = np.delete(snr_personal, max_idx)
        snr_public = np.delete(snr_public, max_idx)
        
        ax3.scatter(snr_personal, snr_public, alpha=0.6, s=30, color=colors['scatter'])
        
        # Add 1:1 line
        min_val = min(np.min(snr_personal), np.min(snr_public))
        max_val = max(np.max(snr_personal), np.max(snr_public))
        ax3.plot([min_val, max_val], [min_val, max_val], '--', color=colors['line'], lw=2)
        
        ax3.set_xlabel('This Work S/N')
        ax3.set_ylabel('COSMOS-Web DR1 Catalogue S/N')
        ax3.set_title('Signal-to-Noise Ratio Comparison')
        ax3.grid(True, alpha=0.3)
        
        # Add statistics
        stats_snr = calculate_statistics(snr_personal, snr_public, np.ones(len(snr_personal), dtype=bool))
        if stats_snr:
            ax3.text(0.05, 0.95, f'r = {stats_snr["correlation"]:.3f}\nN = {stats_snr["n_objects"]}', 
                    transform=ax3.transAxes, verticalalignment='top',
                    bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))
    
    # 4. Flux residuals vs flux
    ax4 = plt.subplot(3, 3, 4)
    if np.sum(valid_flux) > 0:
        x_flux = data['flux_personal'][valid_flux]
        y_flux = data['flux_public'][valid_flux]
        residuals = y_flux - x_flux
        
        ax4.scatter(x_flux, residuals, alpha=0.6, s=30, color=colors['scatter'])
        ax4.axhline(y=0, color=colors['line'], linestyle='--', lw=2)
        ax4.set_xscale('log')
        ax4.set_xlabel('This Work Flux (µJy)')
        ax4.set_ylabel('Flux Residuals (COSMOS-Web DR1 - This Work)')
        ax4.set_title('Flux Residuals vs Flux')
        ax4.grid(True, alpha=0.3)
    
    # 5. Fractional flux differences
    ax5 = plt.subplot(3, 3, 5)
    if np.sum(valid_flux) > 0:
        x_flux = data['flux_personal'][valid_flux]
        y_flux = data['flux_public'][valid_flux]
        frac_diff = (y_flux - x_flux) / x_flux
        
        ax5.scatter(x_flux, frac_diff, alpha=0.6, s=30, color=colors['scatter'])
        ax5.axhline(y=0, color=colors['line'], linestyle='--', lw=2)
        ax5.set_xscale('log')
        ax5.set_xlabel('This Work Flux (µJy)')
        ax5.set_ylabel('Fractional Flux Difference')
        ax5.set_title('Fractional Flux Differences')
        ax5.grid(True, alpha=0.3)
    
    # 6. Error bar comparison
    ax6 = plt.subplot(3, 3, 6)
    if np.sum(valid_flux) & np.sum(valid_flux_err) > 0:
        valid_both = valid_flux & valid_flux_err
        x_flux = data['flux_personal'][valid_both]
        y_flux = data['flux_public'][valid_both]
        x_err = data['flux_err_personal'][valid_both]
        y_err = data['flux_err_public'][valid_both]
        
        # Plot a subset of points with error bars to avoid cluttering
        #n_plot = min(50, len(x_flux))
        #indices = np.random.choice(len(x_flux), n_plot, replace=False)
        
        ax6.errorbar(x_flux, y_flux, 
                    xerr=x_err, yerr=y_err,
                    fmt='o', alpha=0.6, capsize=3, color=colors['scatter'])
        
        # Add 1:1 line
        min_val = min(np.min(x_flux), np.min(y_flux))
        max_val = max(np.max(x_flux), np.max(y_flux))
        ax6.plot([min_val, max_val], [min_val, max_val], '--', color=colors['line'], lw=2)
        
        ax6.loglog()
        ax6.set_xlabel('This Work Flux (µJy)')
        ax6.set_ylabel('COSMOS-Web DR1 Catalogue Flux (µJy)')
        ax6.set_title(f'Flux with Error Bars')
        ax6.grid(True, alpha=0.3)
    
    # 7. Flux histogram comparison
    ax7 = plt.subplot(3, 3, 7)
    if np.sum(valid_flux) > 0:
        x_flux = data['flux_personal'][valid_flux]
        y_flux = data['flux_public'][valid_flux]
        
        bins = np.logspace(np.log10(min(np.min(x_flux), np.min(y_flux))), 
                          np.log10(max(np.max(x_flux), np.max(y_flux))), 20)
        
        ax7.hist(x_flux, bins=bins, alpha=0.6, label='This Work', color=colors['scatter'])
        ax7.hist(y_flux, bins=bins, alpha=0.6, label='COSMOS-Web DR1', color=colors['line'])
        ax7.set_xscale('log')
        ax7.set_xlabel('Flux (µJy)')
        ax7.set_ylabel('Number of Objects')
        ax7.set_title('Flux Distribution Comparison')
        ax7.legend()
        ax7.grid(True, alpha=0.3)
    
    # 8. Residuals histogram
    ax8 = plt.subplot(3, 3, 8)
    if np.sum(valid_flux) > 0:
        x_flux = data['flux_personal'][valid_flux]
        y_flux = data['flux_public'][valid_flux]
        frac_diff = (y_flux - x_flux) / x_flux
        
        ax8.hist(frac_diff, bins=30, alpha=0.7, color=colors['hist'])
        ax8.axvline(x=0, color=colors['line'], linestyle='--', lw=2)
        ax8.axvline(x=np.median(frac_diff), color='red', linestyle='-', lw=2, label=f'Median: {np.median(frac_diff):.3f}')
        ax8.axvline(x=np.mean(frac_diff), color='blue', linestyle='-', lw=2, label=f'Mean: {np.mean(frac_diff):.3f}')
        ax8.set_xlabel('Fractional Flux Difference')
        ax8.set_ylabel('Number of Objects')
        ax8.set_title('Fractional Difference Distribution')
        ax8.legend()
        ax8.grid(True, alpha=0.3)
    
    # 9. Summary statistics text
    ax12 = plt.subplot(3, 3, 9)
    ax12.axis('off')
    
    summary_text = "\n  PHOTOMETRY COMPARISON SUMMARY  \n\n"
    
    if np.sum(valid_flux) > 0:
        stats_flux = calculate_statistics(data['flux_personal'][valid_flux], 
                                        data['flux_public'][valid_flux], 
                                        np.ones(np.sum(valid_flux), dtype=bool))
        if stats_flux:
            summary_text += f"  FLUX COMPARISON (N={stats_flux['n_objects']}):  \n"
            summary_text += f"    Correlation: {stats_flux['correlation']:.3f}  \n"
            summary_text += f"    Median fractional diff: {stats_flux['median_frac_diff']:.3f}  \n"
            summary_text += f"    Mean fractional diff: {stats_flux['mean_frac_diff']:.3f}  \n"
            summary_text += f"    Std fractional diff: {stats_flux['std_frac_diff']:.3f}  \n\n"
    
    summary_text += f"  DATA COVERAGE:\n"
    summary_text += f"    Total objects: {len(data['table'])}  \n"
    summary_text += f"    Valid flux measurements: {np.sum(valid_flux)}  \n"
    
    ax12.text(0.05, 0.95, summary_text, transform=ax12.transAxes, 
             verticalalignment='top', fontfamily='monospace', fontsize=14,
             bbox=dict(boxstyle='round', facecolor='lightgray', alpha=0.8))
    
    plt.tight_layout()
    
    return fig

# Main execution function
def analyse_photometry_comparison(fits_file_path, output_plot_path=None):
    """
    Main function to perform photometry comparison analysis
    """
    print("Loading and preparing data...")
    data = load_and_prepare_data(fits_file_path)
    
    print("Creating comparison plots...")
    fig = create_comparison_plot(data)
    
    if output_plot_path:
        print(f"Saving plot to {output_plot_path}")
        plt.savefig(output_plot_path, dpi=300, bbox_inches='tight')
    
    plt.show()
    
    # Print summary statistics
    print("\n" + "="*60)
    print("PHOTOMETRY COMPARISON ANALYSIS SUMMARY")
    print("="*60)
    
    valid_flux = data['valid_flux']
    
    if np.sum(valid_flux) > 0:
        stats_flux = calculate_statistics(data['flux_personal'][valid_flux], 
                                        data['flux_public'][valid_flux], 
                                        np.ones(np.sum(valid_flux), dtype=bool))
        if stats_flux:
            print(f"\nFLUX COMPARISON ({stats_flux['n_objects']} objects):")
            print(f"  Pearson correlation coefficient: {stats_flux['correlation']:.4f}")
            print(f"  Mean fractional difference: {stats_flux['mean_frac_diff']:.4f}")
            print(f"  Standard deviation of fractional differences: {stats_flux['std_frac_diff']:.4f}")
            print(f"  RMS of absolute residuals: {stats_flux['rms_residual']:.4f} µJy")
    
    print(f"\nDATA COVERAGE:")
    print(f"  Total objects in table: {len(data['table'])}")
    print(f"  Objects with valid flux measurements: {np.sum(valid_flux)}")
    
    return data, fig


Creates a 3x2, more thesis-appropriate grid

In [None]:
def create_clean_appendix_plot(data):
    """
    Create a clean 3x2 appendix plot with the most essential comparisons
    """
    fig = plt.figure(figsize=(9, 10))
    
    # Define color scheme
    colors = {'scatter': '#1f77b4', 'line': '#ff7f0e', 'hist': '#2ca02c', 'error': '#d62728'}
    
    # 1. Flux comparison with error bars (TOP LEFT)
    ax1 = plt.subplot(3, 2, 1)
    valid_flux = data['valid_flux']
    valid_flux_err = data['valid_flux_err']
    
    if np.sum(valid_flux) & np.sum(valid_flux_err) > 0:
        valid_both = valid_flux & valid_flux_err
        x_flux = data['flux_personal'][valid_both]
        y_flux = data['flux_public'][valid_both]
        x_err = data['flux_err_personal'][valid_both]
        y_err = data['flux_err_public'][valid_both]
        
        ax1.errorbar(x_flux, y_flux, 
                    xerr=x_err, yerr=y_err,
                    fmt='o', alpha=0.7, capsize=2, markersize=4, 
                    color=colors['scatter'], ecolor=colors['scatter'])
        
        # Add 1:1 line
        min_val = min(np.min(x_flux), np.min(y_flux))
        max_val = max(np.max(x_flux), np.max(y_flux))
        ax1.plot([min_val, max_val], [min_val, max_val], '--', 
                color=colors['line'], lw=2, label='1:1 line')
        
        ax1.loglog()
        ax1.set_xlabel('This Work F770W Flux (µJy)', fontsize=13)
        ax1.set_ylabel('COSMOS2025 F770W Flux (µJy)', fontsize=13)
        ax1.set_title('(a) Flux Comparison', fontsize=14, fontweight='bold')
        ax1.grid(True, alpha=0.3)
        ax1.legend(fontsize=11)
        
        # Add statistics
        stats_flux = calculate_statistics(x_flux, y_flux, np.ones(len(x_flux), dtype=bool))
        if stats_flux:
            ax1.text(0.05, 0.95, f'r = {stats_flux["correlation"]:.3f}\nN = {stats_flux["n_objects"]}', 
                    transform=ax1.transAxes, verticalalignment='top', fontsize=12,
                    bbox=dict(boxstyle='round', facecolor='white', alpha=0.9, edgecolor='gray'))
    
    # 2. Flux error comparison (TOP MIDDLE)
    ax2 = plt.subplot(3, 2, 2)
    if np.sum(valid_flux_err) > 0:
        x_err = data['flux_err_personal'][valid_flux_err]
        y_err = data['flux_err_public'][valid_flux_err]
        
        ax2.scatter(x_err, y_err, alpha=0.7, s=40, color=colors['error'], 
                   edgecolors='white', linewidth=0.5)
        
        # Add 1:1 line
        min_val = min(np.min(x_err), np.min(y_err))
        max_val = max(np.max(x_err), np.max(y_err))
        ax2.plot([min_val, max_val], [min_val, max_val], '--', 
                color=colors['line'], lw=2, label='1:1 line')
        
        ax2.set_xscale('log')
        ax2.set_yscale('log')
        ax2.set_xlabel('This Work Flux Error (µJy)', fontsize=13)
        ax2.set_ylabel('COSMOS2025 Flux Error (µJy)', fontsize=13)
        ax2.set_title('(b) Flux Error Comparison', fontsize=14, fontweight='bold')
        ax2.grid(True, alpha=0.3)
        ax2.legend(fontsize=11)
        
        # Add statistics
        stats_err = calculate_statistics(x_err, y_err, np.ones(len(x_err), dtype=bool))
        if stats_err:
            ax2.text(0.05, 0.95, f'r = {stats_err["correlation"]:.3f}\nN = {stats_err["n_objects"]}', 
                    transform=ax2.transAxes, verticalalignment='top', fontsize=12,
                    bbox=dict(boxstyle='round', facecolor='white', alpha=0.9, edgecolor='gray'))
    
    # 3. Fractional flux differences (TOP RIGHT)
    ax3 = plt.subplot(3, 2, 3)
    if np.sum(valid_flux) > 0:
        x_flux = data['flux_personal'][valid_flux]
        y_flux = data['flux_public'][valid_flux]
        frac_diff = (y_flux - x_flux) / x_flux
        
        ax3.scatter(x_flux, frac_diff, alpha=0.7, s=40, color=colors['scatter'], 
                   edgecolors='white', linewidth=0.5)
        ax3.axhline(y=0, color=colors['line'], linestyle='--', lw=2)
        #ax3.axhline(y=np.median(frac_diff), color=colors['error'], linestyle='-', lw=2, 
        #           label=f'Median: {np.median(frac_diff):.3f} µJy')
        
        ax3.set_xscale('log')
        ax3.set_xlabel('This Work F770W Flux (µJy)', fontsize=13)
        ax3.set_ylabel('Fractional Flux Difference\n(COSMOS2025 - This Work) / This Work', fontsize=13)
        ax3.set_title('(c) Systematic Differences', fontsize=14, fontweight='bold')
        ax3.grid(True, alpha=0.3)
        #ax3.legend(fontsize=11)
        
        # Add statistics
        ax3.text(0.95, 0.95, f'σ = {np.std(frac_diff):.3f}\nRMS = {np.sqrt(np.mean(frac_diff**2)):.3f}', 
                transform=ax3.transAxes, verticalalignment='top', horizontalalignment='right', fontsize=12,
                bbox=dict(boxstyle='round', facecolor='white', alpha=0.9, edgecolor='gray'))
    
    # 4. Flux histogram comparison (BOTTOM LEFT)
    ax4 = plt.subplot(3, 2, 4)
    if np.sum(valid_flux) > 0:
        x_flux = data['flux_personal'][valid_flux]
        y_flux = data['flux_public'][valid_flux]
        
        bins = np.logspace(np.log10(min(np.min(x_flux), np.min(y_flux))), 
                          np.log10(max(np.max(x_flux), np.max(y_flux))), 20)
        
        ax4.hist(x_flux, bins=bins, alpha=0.7, label='This Work', 
                color=colors['scatter'], edgecolor='white', linewidth=0.5)
        ax4.hist(y_flux, bins=bins, alpha=0.7, label='COSMOS2025', 
                color=colors['line'], edgecolor='white', linewidth=0.5)
        ax4.set_xscale('log')
        ax4.set_xlabel('F770W Flux (µJy)', fontsize=13)
        ax4.set_ylabel('Number of Objects', fontsize=13)
        ax4.set_title('(d) Flux Distribution Comparison', fontsize=14, fontweight='bold')
        ax4.legend(fontsize=11)
        ax4.grid(True, alpha=0.3)
    
    # 5. Residuals histogram (BOTTOM MIDDLE)
    ax5 = plt.subplot(3, 2, 5)
    if np.sum(valid_flux) > 0:
        x_flux = data['flux_personal'][valid_flux]
        y_flux = data['flux_public'][valid_flux]
        frac_diff = (y_flux - x_flux) / x_flux
        
        ax5.hist(frac_diff, bins=30, alpha=0.8, color=colors['hist'], 
                edgecolor='white', linewidth=0.5)
        #ax5.axvline(x=0, color=colors['line'], linestyle='--', lw=2, label='Zero')
        ax5.axvline(x=np.median(frac_diff), color=colors['error'], linestyle='-', lw=2, 
                   label=f'Median: {np.median(frac_diff):.3f} µJy')
        ax5.axvline(x=np.mean(frac_diff), color='purple', linestyle='-', lw=2, 
                   label=f'Mean: {np.mean(frac_diff):.3f} µJy')
        ax5.set_xlabel('Fractional Flux Difference', fontsize=13)
        ax5.set_ylabel('Number of Objects', fontsize=13)
        ax5.set_title('(e) Residuals Distribution', fontsize=14, fontweight='bold')
        ax5.legend(fontsize=11)
        ax5.grid(True, alpha=0.3)
    
    # 6. Summary statistics (BOTTOM RIGHT)
    ax6 = plt.subplot(3, 2, 6)
    ax6.axis('off')
    
    summary_text = "PHOTOMETRY VALIDATION\nSUMMARY STATISTICS\n" + "="*25 + "\n\n"
    
    if np.sum(valid_flux) > 0:
        stats_flux = calculate_statistics(data['flux_personal'][valid_flux], 
                                        data['flux_public'][valid_flux], 
                                        np.ones(np.sum(valid_flux), dtype=bool))
        if stats_flux:
            summary_text += "FLUX COMPARISON:\n"
            summary_text += f"  • Matched objects: {stats_flux['n_objects']}\n"
            summary_text += f"  • Pearson correlation: {stats_flux['correlation']:.4f}\n"
            summary_text += f"  • Mean frac. difference: {stats_flux['mean_frac_diff']:.4f}\n"
            summary_text += f"  • Std frac. difference: {stats_flux['std_frac_diff']:.4f}\n"
            summary_text += f"  • RMS residual: {stats_flux['rms_residual']:.2f} µJy\n\n"
    
    if np.sum(valid_flux_err) > 0:
        stats_err = calculate_statistics(data['flux_err_personal'][valid_flux_err], 
                                       data['flux_err_public'][valid_flux_err], 
                                       np.ones(np.sum(valid_flux_err), dtype=bool))
        if stats_err:
            summary_text += "ERROR COMPARISON:\n"
            summary_text += f"  • Error correlation: {stats_err['correlation']:.4f}\n"
            summary_text += f"  • Mean error ratio: {stats_err['mean_frac_diff']+1:.4f}"
    
    ax6.text(0.05, 0.95, summary_text, transform=ax6.transAxes, 
             verticalalignment='top', fontfamily='monospace', fontsize=11,
             bbox=dict(boxstyle='round,pad=0.5', facecolor='lightblue', alpha=0.1, edgecolor='gray'))
    
    #ax6.set_title('(f) Summary & Assessment', fontsize=14, fontweight='bold', pad=20)
    
    plt.tight_layout()
    plt.subplots_adjust(hspace=0.35, wspace=0.35)
    
    return fig

# Updated main function for the clean appendix version
def analyse_photometry_comparison(fits_file_path, output_plot_path=None, plot_type='appendix'):
    """
    Create clean 3x2 appendix figure for photometry validation
    """
    print("Loading and preparing data...")
    data = load_and_prepare_data(fits_file_path)
    
    print("Creating clean appendix validation plot...")
    if plot_type == 'appendix':
        fig = create_clean_appendix_plot(data)
        output_plot_path = os.path.join(output_plot_path, "phot_comp_appendix_v2.png")
    elif plot_type == 'thesis':
        fig = create_reduced_comparison_plot(data)
        output_plot_path = os.path.join(output_plot_path, "phot_comp_thesis.png")
    else: fig = None

    if output_plot_path and fig:
        print(f"Saving plot to {output_plot_path}")
        plt.savefig(output_plot_path, dpi=300, bbox_inches='tight', facecolor='white')
    
    plt.show()
    
    return data, fig

Thesis-ready comparison plot

In [None]:
def create_reduced_comparison_plot(data):
    """
    Create a condensed thesis-ready comparison plot (2x1 format)
    """
    fig = plt.figure(figsize=(10, 4))
    
    # Define color scheme
    colors = {'scatter': '#1f77b4', 'line': '#ff7f0e', 'hist': '#2ca02c', 'error': '#d62728'}
    
    # 1. Main flux comparison (log-log scale) - TOP LEFT
    ax1 = plt.subplot(1, 2, 1)
    valid_flux = data['valid_flux']
    valid_flux_err = data['valid_flux_err']
    
    if np.sum(valid_flux) & np.sum(valid_flux_err) > 0:
        valid_both = valid_flux & valid_flux_err
        x_flux = data['flux_personal'][valid_both]
        y_flux = data['flux_public'][valid_both]
        x_err = data['flux_err_personal'][valid_both]
        y_err = data['flux_err_public'][valid_both]
        
        # Plot a subset of points with error bars to avoid cluttering
        #n_plot = min(50, len(x_flux))
        #indices = np.random.choice(len(x_flux), n_plot, replace=False)
        
        ax1.errorbar(x_flux, y_flux, 
                    xerr=x_err, yerr=y_err,
                    fmt='o', alpha=0.6, capsize=3, color=colors['scatter'])
        
        # Add 1:1 line
        min_val = min(np.min(x_flux), np.min(y_flux))
        max_val = max(np.max(x_flux), np.max(y_flux))
        ax1.plot([min_val, max_val], [min_val, max_val], '--', color=colors['line'], lw=2, label='1:1 line')
        ax1.legend()
        ax1.loglog()
        ax1.set_xlabel('This Work Flux (µJy)')
        ax1.set_ylabel('COSMOS2025 Flux (µJy)')
        ax1.set_title(f'Flux Comparison')
        ax1.grid(True, alpha=0.3)
    
    # 2. Signal-to-noise ratio comparison - TOP RIGHT
    ax2 = plt.subplot(1, 2, 2)
    valid_snr = data['valid_flux'] & data['valid_flux_err']
    if np.sum(valid_flux) > 0:
        x_flux = data['flux_personal'][valid_flux]
        y_flux = data['flux_public'][valid_flux]
        frac_diff = (y_flux - x_flux) / x_flux
        
        ax2.scatter(x_flux, frac_diff, alpha=0.6, s=30, color=colors['scatter'])
        #ax2.axhline(y=0, color=colors['line'], linestyle='--', lw=2, label='Zero difference')
        ax2.axhline(y=np.median(frac_diff), color=colors['error'], linestyle='-', lw=2, 
                   label=f'Median: {np.median(frac_diff):.3f} µJy')
        ax2.set_xscale('log')
        ax2.set_xlabel('This Work Flux (µJy)')
        ax2.set_ylabel(r'$\Delta$ f')
        ax2.set_title('Fractional Flux Differences')
        ax2.legend()
        ax2.grid(True, alpha=0.3)
    
    fig = plt.figure(figsize=(10, 4))
    
    # 2. Flux error comparison (TOP MIDDLE)
    ax2 = plt.subplot(1, 2, 1)
    if np.sum(valid_flux_err) > 0:
        x_err = data['flux_err_personal'][valid_flux_err]
        y_err = data['flux_err_public'][valid_flux_err]
        
        ax2.scatter(x_err, y_err, alpha=0.7, s=40, color=colors['error'], 
                   edgecolors='white', linewidth=0.5)
        
        # Add 1:1 line
        min_val = min(np.min(x_err), np.min(y_err))
        max_val = max(np.max(x_err), np.max(y_err))
        ax2.plot([min_val, max_val], [min_val, max_val], '--', 
                color=colors['line'], lw=2, label='1:1 line')
        
        ax2.set_xscale('log')
        ax2.set_yscale('log')
        ax2.set_xlabel('This Work Flux Error (µJy)')
        ax2.set_ylabel('COSMOS2025 Flux Error (µJy)')
        ax2.set_title('Flux Error Comparison')
        ax2.grid(True, alpha=0.3)
        ax2.legend(fontsize=11)
        
        # Add statistics
        stats_err = calculate_statistics(x_err, y_err, np.ones(len(x_err), dtype=bool))
        if stats_err:
            ax2.text(0.05, 0.95, f'r = {stats_err["correlation"]:.3f}\nN = {stats_err["n_objects"]}', 
                    transform=ax2.transAxes, verticalalignment='top', fontsize=12,
                    bbox=dict(boxstyle='round', facecolor='white', alpha=0.9, edgecolor='gray'))
    
    # 4. Flux histogram comparison (BOTTOM LEFT)
    ax4 = plt.subplot(1, 2, 2)
    if np.sum(valid_flux) > 0:
        x_flux = data['flux_personal'][valid_flux]
        y_flux = data['flux_public'][valid_flux]
        
        bins = np.logspace(np.log10(min(np.min(x_flux), np.min(y_flux))), 
                          np.log10(max(np.max(x_flux), np.max(y_flux))), 20)
        
        ax4.hist(x_flux, bins=bins, alpha=0.7, label='This Work', 
                color=colors['scatter'], edgecolor='white', linewidth=0.5)
        ax4.hist(y_flux, bins=bins, alpha=0.7, label='COSMOS2025', 
                color=colors['line'], edgecolor='white', linewidth=0.5)
        ax4.set_xscale('log')
        ax4.set_xlabel('F770W Flux (µJy)')
        ax4.set_ylabel('Number of Objects')
        ax4.set_title('Flux Distribution Comparison')
        ax4.legend(fontsize=11)
        ax4.grid(True, alpha=0.3)

Call the function

In [None]:
 # Replace with your FITS file path
fits_file_path = '/Users/benjamincollins/University/master/Red_Cardinal/COSMOS-Web_DR1/Phot_Table_sky_matched.fits'


# Optional: specify output path for the plot
output_plot_path = '/Users/benjamincollins/University/master/Red_Cardinal/COSMOS-Web_DR1/plots/'

# Run the analysis
data, fig = analyse_photometry_comparison(fits_file_path, output_plot_path, plot_type='appendix')

#data = load_and_prepare_data(fits_file_path)
create_reduced_comparison_plot(data)

# Section to perform photometry with all 4 filters (F770W, F1000W, F1800W, F2100W)

Performing the photometry became much easier now!

In [None]:
aperture_table = '/Users/benjamincollins/University/master/Red_Cardinal/photometry/apertures/aperture_table_v5.csv'
output_folder = '/Users/benjamincollins/University/master/Red_Cardinal/photometry/'

phot.perform_photometry(f770w_files, aperture_table, output_folder, suffix='v7')

phot.perform_photometry(f1000w_files, aperture_table, output_folder, suffix='v7')

phot.perform_photometry(f1800w_files, aperture_table, output_folder, suffix='v7')

phot.perform_photometry(f2100w_files, aperture_table, output_folder, suffix='v7')


Now combine the csv files into one big table

In [None]:
fits_table_v6 = 'Photometry_Table_MIRI_v7.fits'

results_dir = '/Users/benjamincollins/University/master/Red_Cardinal/photometry/results/'

f770w_fname  = os.path.join(results_dir, 'phot_table_F770W_v6.csv')
f1000w_fname = os.path.join(results_dir, 'phot_table_F1000W_v6.csv')
f1800w_fname = os.path.join(results_dir, 'phot_table_F1800W_v6.csv')
f2100w_fname = os.path.join(results_dir, 'phot_table_F2100W_v6.csv')

csv_paths = [f770w_fname, f1000w_fname, f1800w_fname, f2100w_fname]

# Now create the combined FITS table
phot.create_fits_table_from_csv(csv_paths, output_file=fits_table_v6)

In [None]:
# Example of how to read and interpret the flags:
fits_table_v5 = '/Users/benjamincollins/University/master/Red_Cardinal/photometry/phot_tables/Flux_Aperture_PSFMatched_AperCorr_MIRI_v5.fits'
table = Table.read(fits_table_v5)

for i, gid in enumerate(table['ID']):
    filters_available = table['Filters'][i].split(',')
    has_companion = table['Flag_Com'][i]
    artifact_flags = table['Flag_Art'][i]
    
    print(f"Galaxy {gid}: Companion = {has_companion}")
    for j, filt in enumerate(filters_available):
        has_artifact = artifact_flags[j]
        print(f"  {filt}: Artifact = {has_artifact}")
    print('\n')

Now let's see how the heatmap looks with only detections displayed

In [None]:
nondetections = {
        "F770W": [7696,7730,8465,8843,9517,9809,9901,9986,10415,10600,11086,11137,
            11247,11451,11481,11853,12133,12175,12202,12213,12443,12513,16424,16615,
            17534,17793,18769,19024,19098,19307,19681,20720,21218,21424,21451,
            21472,21477,21547,22606],
        "F1000W": [11716,11723,11853,12133,12164,12202,12332,12443,12513,13297,16424,
                    16615,17534,17793,17984,18327,19307],
        "F1800W": [7102,7904,7922,7934,8338,8465,9517,10054,10128,10339,10400,10415,
                    10565,10592,10600,11142,11247,11420,11451,11716,11723,12014,12133,
                    12164,12175,12202,12213,12332,12513,16419,18332,18769,19024,19098,
                    19393,19563,19681,21451],
        "F2100W": [7102,11142,11247,11494,11716,11723,11853,12014,12133,12164,12175,
                    12202,12213,12332,12443,12513,13297,16419,16424,16474,16516,16615,
                    16874,17000,17517,17534,17842,17916,17984,18094]
    }

det_f770w = [7102,7136,7185,7549,7904,7922,7934,8013,8338,8469,8500,9180,9395,9519,
             9871,10021,10054,10128,10245,10314,10339,10400,10565,10592,11136,11142,
             11420,11494,11716,11723,12014,12020,12148,12164,12282,12332,12340,12443,
             12717,13103,16419,16474,16516,16874,17000,17517,17669,18332,18977,19393,
             19563,20238,20397,21165,21452,21541,22199]

det_f1000w = [7102,11136,11142,11494,12020,12282,12340,12717,13103,13174,16419,16474,
              16516,17000,17517,17669,17842,18252]

det_f1800w = [8500,9871,10021,10245,10314,11136,11494,12020,12282,12717,13103,16874,
              18977]

det_f2100w = [11136,12020,12282,12340,12717,13103,13174,17669,18139,18252]

detections = {
    'F770W': det_f770w,
    'F1000W': det_f1000w,
    'F1800W': det_f1800w,
    'F2100W': det_f2100w
}


print(len(det_f770w))
print(len(det_f1000w))
print(len(det_f1800w))
print(len(det_f2100w))

#fits_table_v5 = '/Users/benjamincollins/University/master/Red_Cardinal/photometry/phot_tables/Flux_Aperture_PSFMatched_AperCorr_MIRI_v5.fits'
#fig_path = '/Users/benjamincollins/University/master/Red_Cardinal/photometry/miri_detections_v3.png'
#stats_path = '~/University/master/Red_Cardinal/photometry/miri_detections.txt'
#phot.galaxy_statistics(fits_table_v5, fig_path=fig_path, detections=detections, cols=2)

#fig_path = '/Users/benjamincollins/University/master/Red_Cardinal/photometry/miri_coverage_v3.png'
#phot.galaxy_statistics(fits_table_v5, fig_path=fig_path, cols=2)
#phot.plot_galaxy_filter_matrix(fits_table_v5, fig_path, 'MIRI Detections', detections)

#fig_path = '~/University/master/Red_Cardinal/photometry/miri_coverage.png'
#phot.plot_galaxy_filter_matrix(fits_table_v5, fig_path, 'MIRI Coverage')

Test for my newest function

In [None]:
vis_dir = '/Users/benjamincollins/University/master/Red_Cardinal/photometry/vis_data_small/'
mosaic_dir = '/Users/benjamincollins/University/master/Red_Cardinal/photometry/mosaic_plots_v2/'
plane_sub_dir = '/Users/benjamincollins/University/master/Red_Cardinal/photometry/plots/plane_sub/'

phot.create_mosaics(vis_dir, mosaic_dir=mosaic_dir, plane_sub_dir=plane_sub_dir)

Code for mask visualisation

In [None]:
from photutils.aperture import EllipticalAperture, EllipticalAnnulus, aperture_photometry

vis_data = phot.load_vis('/Users/benjamincollins/University/master/Red_Cardinal/photometry/vis_data/21424_F770W.h5')

# Extract data from the dictionary
image_data = vis_data['original_data']
background_plane = vis_data['background_plane']
background_subtracted = vis_data['background_subtracted']
segm_mask = vis_data['segmentation_mask']
mask_vis = vis_data['mask_vis']
aperture_params = vis_data['aperture_params']
sigma = vis_data['sigma']
region_name = vis_data['region_name']
galaxy_id = vis_data['galaxy_id']
filter = vis_data['filter']

# Create aperture objects for plotting
x_center = aperture_params['x_center']
y_center = aperture_params['y_center']
a = aperture_params['a']
b = aperture_params['b']
theta = aperture_params['theta']

source_aperture = EllipticalAperture(
    positions=(x_center, y_center),
    a=a,
    b=b,
    theta=theta
)

# Create figure with three subplots in a horizontal row
fig, axes = plt.subplots(1, 2, figsize=(12, 4.3))

# Background-subtracted data
vmin = np.nanpercentile(background_subtracted, 5)
vmax = np.nanpercentile(background_subtracted, 95)

im1 = axes[0].imshow(background_subtracted, origin='lower', cmap='magma', vmin=vmin, vmax=vmax)
plt.colorbar(im1, ax=axes[0], label='Background-subtracted Flux [MJy/(sr pixel)]')
source_aperture.plot(ax=axes[0], color='blue', lw=4)

# Mask visualisation
cmap = plt.cm.get_cmap('viridis', 4)
im3 = axes[1].imshow(mask_vis, origin='lower', cmap=cmap, vmin=-0.5, vmax=3.5)
cbar = plt.colorbar(im3, ax=axes[1], ticks=[0, 1, 2, 3])
cbar.set_ticklabels([f'Excluded\n', 'Used for fitting', 
                        f'{region_name} region', 'Source'])
    
# Tight layout and saving the figure‚
plt.tight_layout()
plt.subplots_adjust(top=0.85)  # Adjust to prevent overlap with annotation
plt.savefig(os.path.join('/Users/benjamincollins/University/master/Red_Cardinal/random_plots/21424_regions.png'), dpi=150)
plt.close(fig)
        

    
    
    