In [1]:
import json, os, torch, sys
# allows us to import from the custom configs directory w/o affecting deepdisc library imports
sys.path.insert(0, '/u/yse2/deepdisc/configs')
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# import matplotlib.gridspec as gridspec
import matplotlib.cm as cm
import matplotlib.colors as colors
from matplotlib.gridspec import GridSpec
import seaborn as sns
import FoFCatalogMatching
import pycocotools.mask as mask_util
from multiprocessing import Pool
from functools import partial
# for unrec blend
from astropy.coordinates import search_around_sky, SkyCoord
from astropy.visualization import make_lupton_rgb
from astropy.wcs import WCS
import astropy.units as u
from astropy.table import Table
import pycocotools.mask as mask_util
# detectron2 and deepdisc
from detectron2.config import LazyConfig
from detectron2.data import MetadataCatalog, DatasetCatalog
from detectron2.structures import Instances, Boxes
from deepdisc.astrodet.visualizer import Visualizer, ColorMode
from deepdisc.data_format.register_data import register_data_set
from custom.mappers import FileNameWCSMapper

In [2]:
root_dir = os.path.expanduser('~/lsst_data/')
root_run_dir = os.path.expanduser('~/lsst_runs/')
folder = 'annotations_lvl5'
test_data_fn = f'{root_dir}{folder}/test_8k.json'
test_cats_dir = f'{root_dir}test_cats_lvl5/test_8k/'
test_data = pd.read_json(test_data_fn)
run_name = 'lsst5_30k_4h200_bs192_ep50'
run_dir = f'{root_run_dir}{run_name}/'
print(f"Loaded test data from {test_data_fn} with {len(test_data)} images.")
print(f"Run name: {run_name} and run dir: {run_dir}")

Loaded test data from /u/yse2/lsst_data/annotations_lvl5/test_8k.json with 8571 images.
Run name: lsst5_30k_4h200_bs192_ep50 and run dir: /u/yse2/lsst_runs/lsst5_30k_4h200_bs192_ep50/


In [3]:
cfg_file = os.path.expanduser("~/deepdisc/configs/solo/swin_lsst_job.py")
cfg = LazyConfig.load(cfg_file) # using a Swin Transformer
for key in cfg.get("MISC", dict()).keys():
    cfg[key] = cfg.MISC[key]

cfg.DATASETS.TEST = "test"
cfg.dataloader.augs = None # no augs for test set since we want preds on OG images
cfg.dataloader.test.mapper = FileNameWCSMapper # setting test DataLoader's mapper so that filename gets added to each sample
print(f"Config loaded successfully!")
print(f"Registering training dataset from: {test_data_fn}")
try:
    DatasetCatalog.remove(cfg.DATASETS.TEST)
    MetadataCatalog.remove(cfg.DATASETS.TEST)
except:
    pass
custom_colors = [
    (0, 255, 0),    # green for galaxies
    (0, 0, 255),    # blue for stars
]
astrotest_metadata = register_data_set(
    cfg.DATASETS.TEST, test_data_fn, thing_classes=cfg.metadata.classes, thing_colors=custom_colors
)
test_score_thresh = 0.25
nms_thresh = 0.5
print(f"Dataset registered successfully!")
print(f"Test Score Threshold: {test_score_thresh}")
print(f"NMS Threshold: {nms_thresh}")

Config loaded successfully!
Registering training dataset from: /u/yse2/lsst_data/annotations_lvl5/test_8k.json
Dataset registered successfully!
Test Score Threshold: 0.25
NMS Threshold: 0.5


In [4]:
buffers = [1, 2]
mag_limits = {
    'power_law': 26.07,
    'gold': 25.3,
    'nominal': 26.42,
}

In [5]:
truth_mag_limit = mag_limits['gold'] + buffers[0]
truth_fn = f'{test_cats_dir}test_truth_cat_maglim_{truth_mag_limit:.2f}.parquet'
# truth_fn = f'/u/yse2/lsst_runs/{run_name}/test_cats/full_test_truth_cat.parquet'
print(f'Loading LSST truth catalog from: {truth_fn}')
lsst_truth_cat = pd.read_parquet(truth_fn)

Loading LSST truth catalog from: /u/yse2/lsst_data/test_cats_lvl5/test_8k/test_truth_cat_maglim_26.30.parquet


In [6]:
pred_fn = f'{run_dir}preds/pred_with_mag_s{test_score_thresh}_n{nms_thresh}.json'
with open(pred_fn, 'r') as f:
    dd_det = json.load(f)
dd_det_cat = pd.DataFrame(dd_det)

In [7]:
lsst_det_cat = pd.read_json(f'{test_cats_dir}test_det_cat.json')

In [12]:
# skip here to re-load saved results (but make sure to set truth_mag_limit correctly)
analysis = {}
counts = {}
all_matches = {
    '0.5': {},
    '1.0': {}
}
for ll in ['0.5', '1.0']:
    analysis[ll] = pd.read_parquet(f'{os.path.expanduser("~")}/lsst_runs/{run_name}/analysis{ll}_{truth_mag_limit}.parquet')
    counts[ll] = pd.read_parquet(f'{run_dir}grp_class{ll}_{truth_mag_limit}.parquet')
    all_matches[ll]['dd'] = pd.read_parquet(f'{run_dir}obj_matches_dd{ll}_{truth_mag_limit}.parquet')
    all_matches[ll]['lsst'] = pd.read_parquet(f'{run_dir}obj_matches_lsst{ll}_{truth_mag_limit}.parquet')

# Object-Level Detection Completeness
Now that we have all object-level match records, we can calculate detection completeness
metrics. Object-level completeness measures the fraction of individual truth objects
that were successfully detected and matched. We calculate the fraction of truth objects that were successfully matched to detections.

In [13]:
def calculate_object_completeness(object_matches, prefix='dd'):
    """
    Calculate object-level detection completeness (recall).
    
    Args:
        object_matches: DataFrame with object-level match records
        prefix: 'dd' or 'lsst'
    
    Returns:
        dict: Completeness statistics
    """
    # Filter to truth objects only (exclude spurious detections where truth_row_idx == -1)
    truth_objects = object_matches[object_matches['truth_row_idx'] != -1].copy()
    # Each truth object should appear exactly once in the match records
    # (either matched or unmatched)
    truth_objects_unique = truth_objects.drop_duplicates(subset='truth_row_idx')
    
    # Count matched vs total
    total_truth = len(truth_objects_unique)
    matched_truth = truth_objects_unique['matched'].sum()
    unmatched_truth = total_truth - matched_truth
    
    completeness = matched_truth / total_truth if total_truth > 0 else 0.0
    
    return {
        'total_truth': total_truth,
        'matched_truth': matched_truth,
        'unmatched_truth': unmatched_truth,
        'completeness': completeness
    }

In [14]:
print("OBJECT-LEVEL DETECTION COMPLETENESS")
completeness_results = {}

for ll in ['0.5', '1.0']:
    print(f"\n{'='*60}")
    print(f"Linking Length {ll}\"")
    print(f"{'='*60}\n")
    
    completeness_results[ll] = {}
    
    # DD Completeness
    print(f"--- DeepDISC Completeness ---")
    dd_stats = calculate_object_completeness(all_matches[ll]['dd'], 'dd')
    completeness_results[ll]['dd'] = dd_stats
    
    print(f"Total truth objects:     {dd_stats['total_truth']:,}")
    print(f"Matched truth objects:   {dd_stats['matched_truth']:,}")
    print(f"Unmatched truth objects: {dd_stats['unmatched_truth']:,}")
    print(f"Completeness (Recall):   {dd_stats['completeness']:.2%}\n")
    
    # LSST Completeness
    print(f"--- LSST Pipeline Completeness ---")
    lsst_stats = calculate_object_completeness(all_matches[ll]['lsst'], 'lsst')
    completeness_results[ll]['lsst'] = lsst_stats
    
    print(f"Total truth objects:     {lsst_stats['total_truth']:,}")
    print(f"Matched truth objects:   {lsst_stats['matched_truth']:,}")
    print(f"Unmatched truth objects: {lsst_stats['unmatched_truth']:,}")
    print(f"Completeness (Recall):   {lsst_stats['completeness']:.2%}\n")
    
    # Comparison
    diff = dd_stats['completeness'] - lsst_stats['completeness']
    print(f"--- Comparison ---")
    print(f"DeepDISC advantage: {diff:+.2%} ({diff*100:+.1f} percentage points)\n")

print(f"{'='*80}\n")

OBJECT-LEVEL DETECTION COMPLETENESS

Linking Length 0.5"

--- DeepDISC Completeness ---
Total truth objects:     224,192
Matched truth objects:   182,507
Unmatched truth objects: 41,685
Completeness (Recall):   81.41%

--- LSST Pipeline Completeness ---
Total truth objects:     224,192
Matched truth objects:   185,678
Unmatched truth objects: 38,514
Completeness (Recall):   82.82%

--- Comparison ---
DeepDISC advantage: -1.41% (-1.4 percentage points)


Linking Length 1.0"

--- DeepDISC Completeness ---
Total truth objects:     224,192
Matched truth objects:   197,391
Unmatched truth objects: 26,801
Completeness (Recall):   88.05%

--- LSST Pipeline Completeness ---
Total truth objects:     224,192
Matched truth objects:   187,125
Unmatched truth objects: 37,067
Completeness (Recall):   83.47%

--- Comparison ---
DeepDISC advantage: +4.58% (+4.6 percentage points)




# Completeness by Magnitude
Calculate completeness separately for different mag bins to understand how detection performance varies with object magnitude.

In [16]:
def calculate_comp_by_mag(object_matches, truth_catalog, prefix='dd', mag_bins=[14, 22, 23, 24, 25, 26, 27, 28]):
    """
    Calculate completeness stratified by magnitude bins.
    
    Args:
        object_matches: DataFrame with object-level match records
        truth_catalog: Truth catalog with magnitude information
        prefix: 'dd' or 'lsst'
        mag_bins: Bin edges for magnitude stratification
    
    Returns:
        DataFrame with completeness per magnitude bin
    """
    # Filter to truth objects only
    truth_matches = object_matches[object_matches['truth_row_idx'] != -1].copy()
    # Drop duplicates to get unique truth objects
    truth_matches_unique = truth_matches.drop_duplicates(subset='truth_row_idx')
    # Join with truth catalog to get magnitudes
    truth_with_mags = truth_matches_unique.merge(
        truth_catalog[['mag_i']],
        left_on='truth_row_idx',
        right_index=True,
        how='left'
    )
    # Create magnitude bins
    truth_with_mags['mag_bin'] = pd.cut(
        truth_with_mags['mag_i'],
        bins=mag_bins,
        labels=[f"{mag_bins[i]:.0f}-{mag_bins[i+1]:.0f}" for i in range(len(mag_bins)-1)],
        include_lowest=True
    )
    # Calculate completeness per bin
    completeness_by_bin = []
    for mag_bin in truth_with_mags['mag_bin'].cat.categories:
        bin_data = truth_with_mags[truth_with_mags['mag_bin'] == mag_bin]
        
        if len(bin_data) > 0:
            n_total = len(bin_data)
            n_matched = bin_data['matched'].sum()
            completeness = n_matched / n_total
            
            completeness_by_bin.append({
                'mag_bin': mag_bin,
                'n_total': n_total,
                'n_matched': n_matched,
                'completeness': completeness
            })
    
    return pd.DataFrame(completeness_by_bin)

In [18]:
print(f"\n{'='*80}")
print("COMPLETENESS BY MAGNITUDE")
print(f"{'='*80}\n")
mag_comp = {}

for ll in ['0.5', '1.0']:
    print(f"\n{'='*60}")
    print(f"Linking Length {ll}\"")
    print(f"{'='*60}\n")
    
    mag_comp[ll] = {}
    # DD Completeness by Magnitude
    print(f"--- DeepDISC Completeness by Magnitude ---")
    dd_mag = calculate_comp_by_mag(all_matches[ll]['dd'], lsst_truth_cat, 'dd')
    mag_comp[ll]['dd'] = dd_mag
    
    for _, row in dd_mag.iterrows():
        print(f"mag_i {row['mag_bin']}: {row['completeness']:.2%} ({row['n_total']:,} objects)")
    
    # LSST Completeness by Magnitude
    print(f"\n--- LSST Pipeline Completeness by Magnitude ---")
    lsst_mag = calculate_comp_by_mag(all_matches[ll]['lsst'], lsst_truth_cat, 'lsst')
    mag_comp[ll]['lsst'] = lsst_mag
    
    for _, row in lsst_mag.iterrows():
        print(f"mag_i {row['mag_bin']}: {row['completeness']:.2%} ({row['n_total']:,} objects)")


COMPLETENESS BY MAGNITUDE


Linking Length 0.5"

--- DeepDISC Completeness by Magnitude ---
mag_i 14-22: 61.86% (10,125 objects)
mag_i 22-23: 75.27% (10,548 objects)
mag_i 23-24: 80.86% (20,843 objects)
mag_i 24-25: 84.91% (44,782 objects)
mag_i 25-26: 86.45% (92,364 objects)
mag_i 26-27: 73.86% (45,463 objects)

--- LSST Pipeline Completeness by Magnitude ---
mag_i 14-22: 97.06% (10,125 objects)
mag_i 22-23: 96.10% (10,548 objects)
mag_i 23-24: 94.03% (20,843 objects)
mag_i 24-25: 90.11% (44,782 objects)
mag_i 25-26: 81.93% (92,364 objects)
mag_i 26-27: 66.10% (45,463 objects)

Linking Length 1.0"

--- DeepDISC Completeness by Magnitude ---
mag_i 14-22: 72.64% (10,125 objects)
mag_i 22-23: 86.20% (10,548 objects)
mag_i 23-24: 91.35% (20,843 objects)
mag_i 24-25: 93.87% (44,782 objects)
mag_i 25-26: 91.70% (92,364 objects)
mag_i 26-27: 77.33% (45,463 objects)

--- LSST Pipeline Completeness by Magnitude ---
mag_i 14-22: 97.20% (10,125 objects)
mag_i 22-23: 96.16% (10,548 objects)
mag_

# Completeness by Scenario
Calculate completeness separately for isolated systems vs blended systems and break down blended systems by their classification outcomes.

In [19]:
def calculate_comp_by_scenario(object_matches, group_classifications, prefix='dd'):
    """
    Calculate completeness separately for isolated vs blended systems.
    
    Args:
        object_matches: DataFrame with object-level match records
        group_classifications: DataFrame with group-level classifications
        prefix: 'dd' or 'lsst'
    
    Returns:
        dict: Completeness statistics by scenario
    """
    final_col = f'{prefix}_final'
    # Filter to truth objects only
    truth_matches = object_matches[object_matches['truth_row_idx'] != -1].copy()
    # Drop duplicates to get unique truth objects
    truth_matches_unique = truth_matches.drop_duplicates(subset='truth_row_idx')
    # Join with group classifications
    matches_with_groups = truth_matches_unique.merge(
        group_classifications[[final_col, 'n_truth']],
        left_on='group_id',
        right_index=True,
        how='left'
    )
    results = {}
    # Overall
    results['overall'] = {
        'n_total': len(matches_with_groups),
        'n_matched': matches_with_groups['matched'].sum(),
        'completeness': matches_with_groups['matched'].sum() / len(matches_with_groups)
    }    
    # Isolated (1 truth object)
    isolated_mask = matches_with_groups['n_truth'] == 1
    if isolated_mask.any():
        isolated_data = matches_with_groups[isolated_mask]
        results['isolated'] = {
            'n_total': len(isolated_data),
            'n_matched': isolated_data['matched'].sum(),
            'completeness': isolated_data['matched'].sum() / len(isolated_data)
        }
    
    # Blended (2+ truth objects)
    blended_mask = matches_with_groups['n_truth'] >= 2
    if blended_mask.any():
        blended_data = matches_with_groups[blended_mask]
        results['blended'] = {
            'n_total': len(blended_data),
            'n_matched': blended_data['matched'].sum(),
            'completeness': blended_data['matched'].sum() / len(blended_data)
        }
    # Break down by blend outcome
    blend_outcomes = ['resolved_blend', 'partial_deblend', 'unrec_blend', 
                     'shredded', 'unrec_blend_spurious']
    
    for outcome in blend_outcomes:
        outcome_mask = matches_with_groups[final_col] == outcome
        if outcome_mask.any():
            outcome_data = matches_with_groups[outcome_mask]
            results[outcome] = {
                'n_total': len(outcome_data),
                'n_matched': outcome_data['matched'].sum(),
                'completeness': outcome_data['matched'].sum() / len(outcome_data)
            }
    
    return results

In [21]:
print("COMPLETENESS BY SCENARIO")
scenario_comp = {}

for ll, counts_df in counts.items():
    print(f"Linking Length {ll}\"")
    scenario_comp[ll] = {}
    # DD Completeness by Scenario
    print(f"--- DeepDISC Completeness by Scenario ---")
    dd_scenario = calculate_comp_by_scenario(all_matches[ll]['dd'], counts_df, 'dd')
    scenario_comp[ll]['dd'] = dd_scenario
    
    for scenario, stats in dd_scenario.items():
        print(f"{scenario:20s}: {stats['completeness']:6.2%} ({stats['n_total']:,} truth objects)")
    # LSST Completeness by Scenario
    print(f"\n--- LSST Pipeline Completeness by Scenario ---")
    lsst_scenario = calculate_comp_by_scenario(all_matches[ll]['lsst'], counts_df, 'lsst')
    scenario_comp[ll]['lsst'] = lsst_scenario
    
    for scenario, stats in lsst_scenario.items():
        print(f"{scenario:20s}: {stats['completeness']:6.2%} ({stats['n_total']:,} truth objects)")
    print()
print(f"{'='*80}\n")

COMPLETENESS BY SCENARIO
Linking Length 0.5"
--- DeepDISC Completeness by Scenario ---
overall             : 81.41% (224,192 truth objects)
isolated            : 83.50% (212,002 truth objects)
blended             : 45.04% (12,190 truth objects)
resolved_blend      : 100.00% (332 truth objects)
partial_deblend     : 65.22% (46 truth objects)
unrec_blend         : 49.39% (10,334 truth objects)
shredded            : 100.00% (1,831 truth objects)
unrec_blend_spurious: 50.00% (12 truth objects)

--- LSST Pipeline Completeness by Scenario ---
overall             : 82.82% (224,192 truth objects)
isolated            : 84.86% (212,002 truth objects)
blended             : 47.37% (12,190 truth objects)
resolved_blend      : 100.00% (4 truth objects)
unrec_blend         : 49.31% (11,703 truth objects)
shredded            : 100.00% (34 truth objects)

Linking Length 1.0"
--- DeepDISC Completeness by Scenario ---
overall             : 88.05% (224,192 truth objects)
isolated            : 92.15% (192,