In [None]:
### Imports

import numpy as np
from astropy.io import fits
from astropy.stats import sigma_clipped_stats
from scipy.ndimage import label, maximum_position, distance_transform_edt
import matplotlib.pyplot as plt
import os

## Helper Function

In [None]:
### Function to filter through potential sources and assess whether the candidate is a local maximum and has a sufficient radial profile

def is_star_candidate(image, x, y, X_full, Y_full, box_size=7, inner_radius=3, outer_radius=7, min_contrast=2.0):

    x_int = int(round(x))
    y_int = int(round(y))
    
    half_box = box_size // 2
    y_min = max(0, y_int - half_box)
    y_max = min(image.shape[0], y_int + half_box + 1)
    x_min = max(0, x_int - half_box)
    x_max = min(image.shape[1], x_int + half_box + 1)
    sub_box = image[y_min:y_max, x_min:x_max]
    if np.nanmax(sub_box) != image[y_int, x_int]:
        return False

    local_y_min = max(0, y_int - outer_radius)
    local_y_max = min(image.shape[0], y_int + outer_radius + 1)
    local_x_min = max(0, x_int - outer_radius)
    local_x_max = min(image.shape[1], x_int + outer_radius + 1)
    Y_local = Y_full[local_y_min:local_y_max, local_x_min:local_x_max]
    X_local = X_full[local_y_min:local_y_max, local_x_min:local_x_max]
    
    r = np.sqrt((X_local - x_int)**2 + (Y_local - y_int)**2)
    annulus_mask = (r >= inner_radius) & (r < outer_radius)
    if np.sum(annulus_mask) < 2:
        return False
    annulus_median = np.nanmedian(image[local_y_min:local_y_max, local_x_min:local_x_max][annulus_mask])
    annulus_std = np.nanstd(image[local_y_min:local_y_max, local_x_min:local_x_max][annulus_mask])
    
    return image[y_int, x_int] > annulus_median + min_contrast * annulus_std

## Main Source Subtraction Function

In [None]:
def psf_subtraction_cycle(detection_image, working_image, cycle_label, psf_data, output_dir,
                          mosaic_header, permanent_mask, mask_distance, min_mask_sep=3,
                          catalog_filename=None, threshold_sigma=3.0):

    print(f"\n--- Starting Cycle {cycle_label} ---")
    
    # Calculate Background Statistics
    _, global_median, global_std = sigma_clipped_stats(detection_image, sigma=3.0)
    threshold = global_median + threshold_sigma * global_std
    print(f"Cycle {cycle_label}: Global median = {global_median:.3f}, std = {global_std:.3f}, threshold = {threshold:.3f}")
    
    # Identify Potential Sources and Positions
    bmask = detection_image > threshold
    labeled, num_regions = label(bmask)
    print(f"Cycle {cycle_label}: Number of connected regions above threshold: {num_regions}")
    
    initial_positions = maximum_position(detection_image, labels=labeled,
                                          index=np.arange(1, num_regions+1))

    # Filter Potential Sources and Create Final Source List
    source_list = []
    Y_full, X_full = np.indices(detection_image.shape)
    for pos in initial_positions:
        y, x = pos
        
        if mask_distance[y, x] < min_mask_sep:
            continue
            
        brightness = detection_image[y, x]
        if np.isnan(brightness):
            continue
            
        if is_star_candidate(detection_image, x, y, X_full, Y_full, box_size=1, inner_radius=1, outer_radius=2, min_contrast=0.5):
            source_list.append((x, y, brightness))

    # Sort Final List by Decreasing Brightness
    source_list = sorted(source_list, key=lambda s: s[2], reverse=True)
    
    print(f"Cycle {cycle_label}: {len(source_list)} candidate sources passed.")

    # Initialize List to Keep Track of Subtracted Sources
    new_subtracted_sources = []
    
    # Define Magnitude Zero-Point
    mag_zeropoint = 20.787  # Set by user based on mosaic in use

    # Establish Header for Catalog - can be set by user
    if catalog_filename is not None and not os.path.exists(catalog_filename):
        with open(catalog_filename, "w") as f:
            f.write("# Cycle  SourceIndex   X(pix)   Y(pix)   PeakDN   ScalingFactor   ABmag\n")
            f.write("# -----------------------------------------------------------------------\n")
    
    # Iterate Over Each Source
    for i, (x, y, bright) in enumerate(source_list, start=1):
        
        psf_shape = psf_data.shape
        half_psf_x = psf_shape[1] // 2
        half_psf_y = psf_shape[0] // 2
        x_int, y_int = int(round(x)), int(round(y))
        xmin = max(0, x_int - half_psf_x)
        xmax = min(working_image.shape[1], x_int + half_psf_x)
        ymin = max(0, y_int - half_psf_y)
        ymax = min(working_image.shape[0], y_int + half_psf_y)
        
        # Extract Region Corresponding to PSF Model
        region = working_image[ymin:ymax, xmin:xmax]
        psf_resized = psf_data[:region.shape[0], :region.shape[1]]
        
        # Calculate PSF Scaling Factor
        s = np.nansum(region * psf_resized) / np.nansum(psf_resized**2)
        
        # Compute AB Magnitude 
        if bright > 0:
            peak_mag = -2.5 * np.log10(s) + mag_zeropoint
        else:
            # Dummy value to save if brightness is not a positive value
            peak_mag = 99.999
        
        # Save Statistics to Catalog
        if catalog_filename is not None:
            with open(catalog_filename, "a") as f:
                f.write(f"{cycle_label:<5d} {i:<12d} {x:9.1f} {y:8.1f} {bright:9.2f} {s:14.3f} {peak_mag:9.3f}\n")
        
        # PSF Subtraction
        working_image[ymin:ymax, xmin:xmax] -= s * psf_resized

        # Save Subtracted Source to List
        new_subtracted_sources.append((x, y))
    
    # Save Updated Residual Image After Each Iteration
    out_file = os.path.join(output_dir, f'final_psf_subtracted_cycle{cycle_label}.fits')
    final_display = np.nan_to_num(working_image, nan=0)
    fits.PrimaryHDU(final_display, header=mosaic_header).writeto(out_file, overwrite=True)
    print(f"Cycle {cycle_label}: Saved PSF-subtracted image as {out_file}")
    
    # Write Total Number of Sources Subtracted in this Cycle to Catalog
    if catalog_filename is not None:
        with open(catalog_filename, "a") as f:
            f.write(f"# Cycle {cycle_label}: {len(new_subtracted_sources)} sources subtracted\n")
    
    return working_image, new_subtracted_sources

## Main Iterative Workflow

In [None]:
# Paths to be Set by User
mosaic_file = 'mosaic_file'
psf_file = 'psf_file'
output_dir = 'output_directory'
os.makedirs(output_dir, exist_ok=True)

# Establish Catalog
catalog_filename = os.path.join(output_dir, "source_catalog.tex")

# Load Data
with fits.open(mosaic_file) as hdu:
    data = np.copy(hdu[0].data)
    mosaic_header = hdu[0].header

with fits.open(psf_file) as hdu_psf:
    psf_data = np.copy(hdu_psf[0].data)

# Print Mosaic Dimensions
ny, nx = data.shape
print(f"Mosaic dimensions: {nx} x {ny} pixels")

# Set 'mosaic' Variable
mosaic = data

### Routine to Mask Out Saturated Pixels

saturation_threshold = 60.0              # saturation threshold; to be adjusted by user
small_initial_mask_radius = 160          # small mask radius; to be adjusted by user  
large_initial_mask_radius = 220          # large mask radius; to be adjusted by user 
   
# Define Global Background
global_bg = np.nanmedian(mosaic)

# Identify Saturated Regions
saturated_pixels = mosaic > saturation_threshold
labeled_saturated, num_sat = label(saturated_pixels)
print(f"Number of saturated regions detected: {num_sat}")

# Iterate Through Saturated Regions and Collect Statistics
region_info = []
for region_label in range(1, num_sat+1):
    y_indices, x_indices = np.where(labeled_saturated == region_label)
    if len(y_indices) == 0:
        continue
    region_values = mosaic[y_indices, x_indices]
    peak_index = np.argmax(region_values)
    y_peak, x_peak = y_indices[peak_index], x_indices[peak_index]
    
    distances_region = np.sqrt((x_indices - x_peak)**2 + (y_indices - y_peak)**2)
    effective_radius = np.max(distances_region)
    
    x_min_box, x_max_box = np.min(x_indices), np.max(x_indices)
    y_min_box, y_max_box = np.min(y_indices), np.max(y_indices)
    width = x_max_box - x_min_box + 1
    height = y_max_box - y_min_box + 1
    aspect_ratio = width / height if height != 0 else 1.0
    peak_flux = np.nanmax(region_values)
    
    region_info.append((region_label, y_peak, x_peak, effective_radius, aspect_ratio, peak_flux))

# Calculate Average Effective Radius of Saturated Regions
if region_info:
    avg_effective_radius = np.mean([info[3] for info in region_info])
else:
    avg_effective_radius = 0
print(f"Average effective radius = {avg_effective_radius:.1f} pixels")

# Set Mask Size for Each Region Based on Effective Radius
for (region_label, y_peak, x_peak, effective_radius, aspect_ratio, peak_flux) in region_info:
    if effective_radius <= avg_effective_radius * 1.5:
        r_final = small_initial_mask_radius
    else:
        r_final = large_initial_mask_radius
    Y, X = np.indices(mosaic.shape)
    distance_arr = np.sqrt((X - x_peak)**2 + (Y - y_peak)**2)
    final_mask = distance_arr <= r_final
    mosaic[final_mask] = np.nan

permanent_mask = np.isnan(mosaic)

# Sets Mask Separation Distance - Only Sources At Least the 'min_mask_sep' Distance from Masked Regions Will be Considered
mask_distance = distance_transform_edt(~permanent_mask)
min_mask_sep = 2  

# Make Copies of Mosaic and Masked Image; Save Masked Image to Output Directory
original_mosaic = mosaic.copy()  
masked_image = original_mosaic.copy()
masked_image[permanent_mask] = 0
masked_image_file = os.path.join(output_dir, "masked_image.fits")
fits.PrimaryHDU(masked_image, header=mosaic_header).writeto(masked_image_file, overwrite=True)
print(f"Masked image saved as {masked_image_file}")

### Iterative Cycle Set Up

ncycles = 4   # Number of cycles: set by user

cumulative_working = mosaic.copy()
cumulative_exclusion_mask = np.zeros_like(mosaic, dtype=bool)
all_new_sources = []

# Iterate Through All Cycles
for cycle in range(1, ncycles+1):
    print(f"\n========== Starting Iterative Cycle {cycle} ==========")
    detection_image = cumulative_working.copy()
    detection_image[cumulative_exclusion_mask] = np.nan
    
    if cycle == 1:
        tsig = 5.0    # threshold above background; set by user
    else:
        tsig = 3.0    # optional; lower threshold after first iteration

    # PSF Subtraction Function
    cumulative_working, new_sources = psf_subtraction_cycle(
        detection_image, cumulative_working, cycle, psf_data, output_dir,
        mosaic_header, permanent_mask, mask_distance, min_mask_sep,
        catalog_filename, threshold_sigma=tsig)
    all_new_sources.extend(new_sources)
    
    # Update Exclusion Mask; Prevents Re-Subtraction
    box_half = 1 # set size for each source
    for (x, y) in new_sources:
        x_center = int(round(x))
        y_center = int(round(y))
        x_min_box = max(0, x_center - box_half)
        x_max_box = min(mosaic.shape[1], x_center + box_half + 1)
        y_min_box = max(0, y_center - box_half)
        y_max_box = min(mosaic.shape[0], y_center + box_half + 1)
        box = cumulative_working[y_min_box:y_max_box, x_min_box:x_max_box]
        if np.all(np.isnan(box)):
            continue
        flat = box.flatten()
        valid = np.where(~np.isnan(flat))[0]
        if len(valid) == 0:
            continue
        sorted_idx = valid[np.argsort(flat[valid])[::-1]]
        for idx in sorted_idx[:3]:
            row = y_min_box + idx // (x_max_box - x_min_box)
            col = x_min_box + idx % (x_max_box - x_min_box)
            cumulative_exclusion_mask[row, col] = True

# Recalculate Total Number of Subtracted Sources; Ensures No Duplicates
unique_sources = set((round(x,1), round(y,1)) for (x, y) in all_new_sources)
with open(catalog_filename, "a") as f:
    f.write(f"# Total unique sources: {len(unique_sources)}\n")

print(f"Catalog saved as {catalog_filename}")


The above Source Subtraction and Source Counts Code was developed by Emily McCallum as part of her Applied Mathematics Senior Thesis at Harvard College. Latest update: 27 Mar 2025