# Notebook to overplot MIRI photometry on existing PROSPECTOR fits

In [None]:
%load_ext autoreload
%autoreload 2

import os
import glob
import numpy as np
import matplotlib.pyplot as plt
import pickle as pkl
import pandas as pd

from astropy.table import Table
from prospector_utils.plotting import *
from prospector_utils.analysis import compute_residuals, get_galaxy_properties, get_extremes
import prospect
print(prospect.__file__)

# Section to overplot MIRI

Set the galaxy_ids array

In [None]:
table_path = '/Users/benjamincollins/University/Master/Red_Cardinal/photometry/phot_tables/Photometry_Table_MIRI_v6.fits'

table = Table.read(table_path, format='fits')
galaxy_ids = np.asarray([str(gid) for gid in table['ID']])

#galaxy_ids = reversed(galaxy_ids)


Reconstruct and plot the prospector outputs with MIRI

In [None]:
plot = '/Users/benjamincollins/University/master/Red_Cardinal/prospector/fits_v3/'
plot_nodust = '/Users/benjamincollins/University/master/Red_Cardinal/prospector/fits_nodust/'

stats = '/Users/benjamincollins/University/master/Red_Cardinal/prospector/pickle_files/'
stats_nodust = '/Users/benjamincollins/University/master/Red_Cardinal/prospector/pickle_nodust/'

for gid in galaxy_ids:
    reconstruct(int(gid), stats_dir=stats)
    #reconstruct(int(gid), plot_dir=plot_nodust, stats_dir=stats_nodust, add_duste=False)
    
#reconstruct(12513, plot_dir=plot_nodust, stats_dir=stats_nodust, add_duste=False)
#reconstruct(16424, plot_dir=plot_nodust, stats_dir=stats_nodust, add_duste=False)
#reconstruct(9871, plot_dir=plot_nodust, stats_dir=stats_nodust, add_duste=False)
#reconstruct(17916, plot_dir=plot, stats_dir=stats)
#reconstruct(12717, plot_dir=plot, stats_dir=stats, add_duste=True)
#reconstruct(12717, plot_dir=plot_nodust, stats_dir=stats_nodust, add_duste=False)


Section just to load pickle file and display the plot

In [None]:
table_path = '/Users/benjamincollins/University/Master/Red_Cardinal/photometry/phot_tables/Photometry_Table_MIRI_v6.fits'

table = Table.read(table_path, format='fits')
galaxy_ids = np.asarray([str(gid) for gid in table['ID']])

galaxy_ids = reversed(galaxy_ids)

for gid in galaxy_ids:
    load_and_display(gid, outfile=f'/Users/benjamincollins/University/Master/Red_Cardinal/prospector/fits_v3/{gid}.png')
#load_and_display(12717)
#load_and_display(18769)
#load_and_display(8465)

Check the stored obs file in pickle:

In [None]:
table_path = '/Users/benjamincollins/University/Master/Red_Cardinal/photometry/phot_tables/Photometry_Table_MIRI_v6.fits'

table = Table.read(table_path, format='fits')
galaxy_ids = np.asarray([str(gid) for gid in table['ID']])

galaxy_ids = reversed(galaxy_ids)

#galaxy_ids = ['11136', '17842', '17916', '17984', '18139']

# galaxy_ids = ['8465'] # undetected galaxy -> no valid photometry -> skipped in the PROSPECTOR fits - Problem solved ✅

all_rows = []

for objid in galaxy_ids:
    print(f"Processing galaxy ID: {objid}")
    galaxy_rows = compute_residuals(objid, show_plot=True)
    if galaxy_rows is None:
        continue
    else:
        all_rows.extend(galaxy_rows)  # concatenate lists

df = pd.DataFrame(all_rows)

os.makedirs('/Users/benjamincollins/University/Master/Red_Cardinal/prospector/analysis', exist_ok=True)
df.to_csv('/Users/benjamincollins/University/Master/Red_Cardinal/prospector/analysis/residuals_abs.csv', index=False)

Now let's create the histograms

In [None]:
csv_path = '/Users/benjamincollins/University/Master/Red_Cardinal/prospector/analysis/residuals_abs.csv'
hist_dir = '/Users/benjamincollins/University/Master/Red_Cardinal/prospector/histograms_v2/'

create_hist(csv_path, out_dir=hist_dir)

# Section to analyse the sample

Let's call our function to create the pickle files

In [None]:
# Read the photometry table
phot_table = '/Users/benjamincollins/University/master/Red_Cardinal/photometry/phot_tables/Photometry_Table_MIRI.fits'
table = Table.read(phot_table)

# Convert IDs to strings if they are in bytes
gal_ids = [id.decode() if isinstance(id, bytes) else str(id) for id in table['ID']]

non_detections = {
    'F770W': [11137, 11723, 17793, 8843, 12175, 7696, 7185, 8465, 19098, 12443, 12202, 21547, 9517, 9901, 10415, 12213, 
              21451, 11853, 11086, 22606, 18769, 9809, 11481, 21472, 19681, 12513, 21218, 12133, 16615, 10600, 11247, 20720, 17534], 
    'F1000W': [17984, 12513, 12164, 12133, 11716, 16615, 16424, 12202, 11723, 11853, 13297, 18327, 12443, 17534], 
    'F1800W': [12164, 11716, 10565, 10054, 11723, 12175, 19024, 8465, 8338, 18769, 7102, 10400, 12513, 19681, 7904, 
               10339, 12133, 10600, 9517, 10415, 11247, 12213, 11451, 7934], 
    'F2100W': [17984, 12164, 11716, 16516, 11723, 11853, 12175, 16474, 12443, 12513, 12133, 16615, 16424, 12202, 
               12332, 17517, 12014, 11247, 13297, 12213, 17916, 17534]
    }

all_galaxies = {}

for gid in gal_ids:
    gid = int(gid)
    if gid in [12513, 18977]:
        continue
    galaxy_data = get_galaxy_properties(gid, non_detections=non_detections)
    if galaxy_data: all_galaxies[gid] = galaxy_data

pickle_file = '/Users/benjamincollins/University/Master/Red_Cardinal/prospector/sample_stats/sample_data.pkl'

with open(pickle_file, "wb") as f:
    pkl.dump(all_galaxies, f)
    
print(f"✅ Successfully wrote sample summary statistics to {pickle_file}")

Now this is where the fun begins! Let's load our galaxy data and display them nicely!

In [None]:
# Your existing data loading code
pickle_file = '/Users/benjamincollins/University/Master/Red_Cardinal/prospector/sample_stats/sample_data.pkl'
with open(pickle_file, "rb") as f:
    all_galaxies = pkl.load(f)

print(all_galaxies[12717])

# Thank god I was smart enough to store all these data hehe - next step is to colourise by Nsigma!
fit_quality_stats = pd.read_csv('/Users/benjamincollins/University/Master/Red_Cardinal/prospector/analysis/fit_quality.csv')

# Convert your dictionary into arrays for plotting
gids = []
zreds = []
dust = []
logmasses = []
masses = []
sfr100 = []
ssfr = []
ndetections = []
detections = []
fluxes = []
errors = []
bands = []
nsigmas = []    # Array of nsigmas
mean_nsig = []  # mean value of nsigma across all available filter
frac_diffs = []
mean_frac_diff = []
chi2_red = []   # one value per galaxy

for gid, g in all_galaxies.items():
    gids.append(gid)
    zreds.append(g['zred'])
    dust.append(g['dust'])
    logmasses.append(g['logmass'])
    # Convert logmass -> Msun formed (or use a return fraction if you like)
    masses.append(10**g['logmass'])
    sfr100.append(g['sfr_last100'])
    detections.append(g['detections'])
    ndetect = sum(g['detections'].values())  # counts True in detections
    ndetections.append(ndetect)
    fluxes.append(g['fluxes'])
    errors.append(g['errors'].values())
    bands.append(g['fluxes'].keys())
    
    nsigmas.append(g['nsig'])
    #mean_nsig = mean_nsig.append(np.mean(g['nsig'].values()))
    
    frac_diffs.append(g['frac_diff'].values())
    #mean_frac_diff = mean_frac_diff.append(np.mean(g['frac_diff'].values()))
    
    chi2_red.append(g['chi2_red'])
    
# Convert to numpy arrays
zreds = np.array(zreds)
dust = np.array(dust)
logmasses = np.array(logmasses)
masses = np.array(masses)
sfr100 = np.array(sfr100)
# Calculate sSFR
ssfr = sfr100/masses
detections = np.array(detections)
ndetections = np.array(ndetections)
errors = np.array(errors)
bands = np.array(bands)
mean_nsig = np.array(mean_nsig)
frac_diffs = np.array(frac_diffs)
mean_frac_diff = np.array(mean_frac_diff)
chi2_red = np.array(chi2_red)


zred_ms = np.median(zreds)
print("Median redshift of the sample: ", zred_ms)

dust_median = np.median(dust)
print("Median dust attenuation of the sample: ", dust_median)

In [None]:
# Generate all plots with publication-quality styling

plot_mass_vs_redshift(zreds, logmasses, detections, gradient='absolute', color_scheme='viridis_r', save_path='/Users/benjamincollins/University/Master/Red_Cardinal/prospector/sample_plots/')
plot_mass_vs_redshift(zreds, logmasses, detections, gradient='relative', color_scheme='YlOrBr', save_path='/Users/benjamincollins/University/Master/Red_Cardinal/prospector/sample_plots/')

plot_main_sequence(masses, sfr100, zred_ms, detections, gradient='relative', color_scheme='YlOrBr', save_path='/Users/benjamincollins/University/Master/Red_Cardinal/prospector/sample_plots/')
plot_main_sequence(masses, sfr100, zred_ms, detections, ms_type='Leja', gradient='relative', color_scheme='YlOrBr', save_path='/Users/benjamincollins/University/Master/Red_Cardinal/prospector/sample_plots/')


plot_main_sequence(masses, sfr100, zred_ms, detections, data=fluxes, gradient='f770w', save_path='/Users/benjamincollins/University/Master/Red_Cardinal/prospector/sample_plots/')
plot_main_sequence(masses, sfr100, zred_ms, detections, data=fluxes, gradient='f1000w', save_path='/Users/benjamincollins/University/Master/Red_Cardinal/prospector/sample_plots/')
plot_main_sequence(masses, sfr100, zred_ms, detections, data=fluxes, gradient='f1800w', save_path='/Users/benjamincollins/University/Master/Red_Cardinal/prospector/sample_plots/')
plot_main_sequence(masses, sfr100, zred_ms, detections, data=fluxes, gradient='f2100w', save_path='/Users/benjamincollins/University/Master/Red_Cardinal/prospector/sample_plots/')

plot_main_sequence(masses, sfr100, zred_ms, detections, data=nsigmas, gradient='nsig_f770w', save_path='/Users/benjamincollins/University/Master/Red_Cardinal/prospector/sample_plots/')
plot_main_sequence(masses, sfr100, zred_ms, detections, data=nsigmas, gradient='nsig_f1000w', save_path='/Users/benjamincollins/University/Master/Red_Cardinal/prospector/sample_plots/')
plot_main_sequence(masses, sfr100, zred_ms, detections, data=nsigmas, gradient='nsig_f1800w', save_path='/Users/benjamincollins/University/Master/Red_Cardinal/prospector/sample_plots/')
plot_main_sequence(masses, sfr100, zred_ms, detections, data=nsigmas, gradient='nsig_f2100w', save_path='/Users/benjamincollins/University/Master/Red_Cardinal/prospector/sample_plots/')



#plot_mass_vs_redshift(zreds, logmasses, detections, data=fluxes, gradient='f770w', save_path='/Users/benjamincollins/University/Master/Red_Cardinal/prospector/sample_plots/')
#plot_mass_vs_redshift(zreds, logmasses, detections, data=fluxes, gradient='f1000w', save_path='/Users/benjamincollins/University/Master/Red_Cardinal/prospector/sample_plots/')
#plot_mass_vs_redshift(zreds, logmasses, detections, data=fluxes, gradient='f1800w', save_path='/Users/benjamincollins/University/Master/Red_Cardinal/prospector/sample_plots/')
#plot_mass_vs_redshift(zreds, logmasses, detections, data=fluxes, gradient='f2100w', save_path='/Users/benjamincollins/University/Master/Red_Cardinal/prospector/sample_plots/')

#plot_mass_vs_redshift(zreds, logmasses, detections, data=nsigmas, color_scheme='gnuplot2_r', gradient='nsig_f770w', save_path='/Users/benjamincollins/University/Master/Red_Cardinal/prospector/sample_plots/')
#plot_mass_vs_redshift(zreds, logmasses, detections, data=nsigmas, color_scheme='gnuplot2_r', gradient='nsig_f1000w', save_path='/Users/benjamincollins/University/Master/Red_Cardinal/prospector/sample_plots/')
#plot_mass_vs_redshift(zreds, logmasses, detections, data=nsigmas, color_scheme='gnuplot2_r', gradient='nsig_f1800w', save_path='/Users/benjamincollins/University/Master/Red_Cardinal/prospector/sample_plots/')
#plot_mass_vs_redshift(zreds, logmasses, detections, data=nsigmas, color_scheme='gnuplot2_r', gradient='nsig_f2100w', save_path='/Users/benjamincollins/University/Master/Red_Cardinal/prospector/sample_plots/')

#plot_mass_vs_redshift(zreds, logmasses, detections, gradient='absolute', color_scheme='viridis_r', save_path='/Users/benjamincollins/University/Master/Red_Cardinal/prospector/sample_plots/')

Check items in the photometry table

In [None]:
from astropy.io import fits
phot_table = "/Users/benjamincollins/University/Master/Red_Cardinal/photometry/phot_tables/Photometry_Table_MIRI.fits"
table = fits.open(phot_table)
print(table[1].data)

Now let's focus on the F1800W and F2100W bands and produce the last set of plots for my thesis contrasting N_${\sigma}$ with the SFR and dust attenuation

In [None]:
# Extract nsigma arrays for the bands you care about
nsig_F1800W = np.array([g['nsig']['F1800W'] if 'F1800W' in g['nsig'] else np.nan 
                        for g in all_galaxies.values()])
nsig_F2100W = np.array([g['nsig']['F2100W'] if 'F2100W' in g['nsig'] else np.nan 
                        for g in all_galaxies.values()])

# Convert to log sSFR (common in the literature, avoids huge ranges)
log_ssfr = np.log10(sfr100 / masses)

# Make the plots
plot_nsigma_vs_params(nsig_F1800W, "F1800W", log_ssfr, dust, save_path='/Users/benjamincollins/University/Master/Red_Cardinal/prospector/sample_plots/nsigma_f1800w.png')
plot_nsigma_vs_params(nsig_F2100W, "F2100W", log_ssfr, dust, save_path='/Users/benjamincollins/University/Master/Red_Cardinal/prospector/sample_plots/nsigma_f2100w.png')


Let's have a look at the outliers!

In [None]:

# --- F1800W extremes with NaN filtering ---
mask_1800 = ~np.isnan(nsig_F1800W)
if np.any(mask_1800):
    idx_max_1800 = np.nanargmax(np.abs(nsig_F1800W))
    idx_min_1800 = np.nanargmin(np.abs(nsig_F1800W))
    print("Max N_sigma in F1800W:", gids[idx_max_1800], nsig_F1800W[idx_max_1800])
    print("Min N_sigma in F1800W:", gids[idx_min_1800], nsig_F1800W[idx_min_1800])
else:
    print("All values in F1800W are NaN!")

# --- F2100W extremes ---
mask_2100 = ~np.isnan(nsig_F2100W)
if np.any(mask_2100):
    idx_max_2100 = np.nanargmax(np.abs(nsig_F2100W))
    idx_min_2100 = np.nanargmin(np.abs(nsig_F2100W))
    print("Max N_sigma in F2100W:", gids[idx_max_2100], nsig_F2100W[idx_max_2100])
    print("Min N_sigma in F2100W:", gids[idx_min_2100], nsig_F2100W[idx_min_2100])

# --- Dust extremes ---
if np.any(dust):
    idx_max_dust = np.nanargmax(dust)
    idx_min_dust = np.nanargmin(dust)
    print("Max A_V (dust):", gids[idx_max_dust], dust[idx_max_dust])
    print("Min A_V (dust):", gids[idx_min_dust], dust[idx_min_dust])


In [None]:
extremes_f1800w = get_extremes(nsig_F1800W, gids, n=3, abs=True)
print("Lowest N_sigma in F1800W:", extremes_f1800w["lowest"])
print("Highest N_sigma in F1800W:", extremes_f1800w["highest"])
print()

extremes_f2100w = get_extremes(nsig_F2100W, gids, n=3, abs=True)
print("Lowest N_sigma in F2100W:", extremes_f2100w["lowest"])
print("Highest N_sigma in F2100W:", extremes_f2100w["highest"])
print()

extremes_dust = get_extremes(dust, gids, n=3)
print("Lowest dust:", extremes_dust["lowest"])
print("Highest dust:", extremes_dust["highest"])
print()

extremes_ssfr = get_extremes(ssfr, gids, n=3)
print("Lowest sSFR:", extremes_ssfr["lowest"])
print("Highest sSFR:", extremes_ssfr["highest"])
print()

def collect_all_extreme_ids(*extremes_dicts):
    ids = []
    for d in extremes_dicts:
        #for key in ("lowest", "highest"):
        ids.extend([gid for gid, _ in d["highest"]])
    return list(set(ids))  # unique IDs



The extreme ends of the N_sigma distribution:

In [None]:
base_paths = {
    "miri": "/Users/benjamincollins/University/Master/Red_Cardinal/photometry/vis_data",
    "nircam": "/Users/benjamincollins/University/Master/Red_Cardinal/NIRCam/F444W_cutouts",
    "prospector": "/Users/benjamincollins/University/Master/Red_Cardinal/prospector/fits_v3"
}

extreme_ids = collect_all_extreme_ids(
    extremes_f1800w,
    extremes_f2100w
)

print("Total unique extreme galaxy IDs:", len(extreme_ids))
print(extreme_ids)

In [None]:

for gid in extreme_ids:   # from collect_all_extreme_ids()
    plot_extremes(gid, base_paths, add_fit=True, save_path=f"/Users/benjamincollins/University/Master/Red_Cardinal/prospector/fits_ext/{gid}_high_nsigma.png")
    plot_extremes(gid, base_paths, add_fit=False, save_path=f"/Users/benjamincollins/University/Master/Red_Cardinal/prospector/fits_ext/{gid}_high_nsigma_nofit.png")



In [None]:
plot_extremes(11723, base_paths, add_fit=False, save_path=f"/Users/benjamincollins/University/Master/Red_Cardinal/prospector/fits_ext/11723_high_nsigma_nofit.png")


Now let's have a look at our dusty galaxies

In [None]:
def collect_all_extreme_ids(key, *extremes_dicts):
    ids = []
    for d in extremes_dicts:
        ids.extend([gid for gid, _ in d[key]])
    return list(set(ids))  # unique IDs

extreme_ids = collect_all_extreme_ids(
    "lowest", extremes_dust
)

for gid in extreme_ids:   # from collect_all_extreme_ids()
    plot_extremes(gid, base_paths, add_fit=True, save_path=f"/Users/benjamincollins/University/Master/Red_Cardinal/prospector/fits_ext/{gid}_low_dust.png")
    plot_extremes(gid, base_paths, add_fit=False, save_path=f"/Users/benjamincollins/University/Master/Red_Cardinal/prospector/fits_ext/{gid}_low_dust_nofit.png")

extreme_ids = collect_all_extreme_ids(
    "highest", extremes_dust
)

for gid in extreme_ids:   # from collect_all_extreme_ids()
    plot_extremes(gid, base_paths, add_fit=True, save_path=f"/Users/benjamincollins/University/Master/Red_Cardinal/prospector/fits_ext/{gid}_high_dust.png")
    plot_extremes(gid, base_paths, add_fit=False, save_path=f"/Users/benjamincollins/University/Master/Red_Cardinal/prospector/fits_ext/{gid}_high_dust_nofit.png")


Now we check out the sSFR extremes!

In [None]:
def collect_all_extreme_ids(key, *extremes_dicts):
    ids = []
    for d in extremes_dicts:
        ids.extend([gid for gid, _ in d[key]])
    return list(set(ids))  # unique IDs

extreme_ids = collect_all_extreme_ids(
    "lowest", extremes_ssfr
)

for gid in extreme_ids:   # from collect_all_extreme_ids()
    plot_extremes(gid, base_paths, add_fit=True, save_path=f"/Users/benjamincollins/University/Master/Red_Cardinal/prospector/fits_ext/{gid}_low_ssfr.png")
    plot_extremes(gid, base_paths, add_fit=False, save_path=f"/Users/benjamincollins/University/Master/Red_Cardinal/prospector/fits_ext/{gid}_low_ssfr_nofit.png")

extreme_ids = collect_all_extreme_ids(
    "highest", extremes_ssfr
)

for gid in extreme_ids:   # from collect_all_extreme_ids()
    plot_extremes(gid, base_paths, add_fit=True, save_path=f"/Users/benjamincollins/University/Master/Red_Cardinal/prospector/fits_ext/{gid}_high_ssfr.png")
    plot_extremes(gid, base_paths, add_fit=False, save_path=f"/Users/benjamincollins/University/Master/Red_Cardinal/prospector/fits_ext/{gid}_high_ssfr_nofit.png")
