In [3]:
import os, sys
sys.path.insert(0, '/global/homes/j/jwp/DC2/gcr-catalogs')
import GCRCatalogs
print(GCRCatalogs.__file__)
from astropy.coordinates import SkyCoord, matching
import astropy.units as u
%load_ext autoreload
%autoreload 2

/global/homes/j/jwp/DC2/gcr-catalogs/GCRCatalogs/__init__.py


In [4]:
import numpy as np
import pandas as pd

### Matching Source and Object catalogs back to truth

Truth matching will only be applied to the objects. This means the sources that did not get matched to objects will not be matched to the truth.

The "truth" is the union of galaxies in `proto-dc2_v3.0` (on which Run 1.2i was based) and `dc2_truth_run1.2_static`. We include the truth catalog for the stars that are not in the extragalactic catalog.

In [6]:
# Fetch protoDC2 with the columns we need, restricted to our area
extragal_gcr = GCRCatalogs.load_catalog('proto-dc2_v3.0_test') # _test skips the md5 check
# 'cosmoDC2_v1.0_image' takes ~14 sec
extragal_quantities = ['galaxy_id', 'ra_true', 'dec_true', 'redshift_true', 
                      'size_bulge_true', 'size_minor_bulge_true', 'sersic_bulge', 'ellipticity_1_bulge_true',
                      'ellipticity_2_bulge_true', 'ellipticity_bulge_true',
                      'size_disk_true', 'size_minor_disk_true', 'sersic_disk', 'ellipticity_1_disk_true',
                      'ellipticity_2_disk_true', 'ellipticity_disk_true',
                      'bulge_to_total_ratio_i',
                      'mag_true_u_lsst',
                      'mag_true_g_lsst',
                      'mag_true_r_lsst',
                      'mag_true_i_lsst',
                      'mag_true_z_lsst',
                      'mag_true_Y_lsst',
                      'halo_mass',]
galaxies_dict = extragal_gcr.get_quantities(extragal_quantities, filters=[])
galaxies = pd.DataFrame(galaxies_dict, index=None)
print(galaxies.shape)

(9211556, 24)


In [7]:
# Find which ra, dec box corresponds to Tract 4850  
from skymap_utils import get_tract_corners
corners = get_tract_corners(tract_id=4850)

tract 4850 has corners (56.410, -29.783), (54.618, -29.783), (54.631, -28.228), (56.397, -28.228) (RA, Dec deg) and 7 x 7 patches


In [8]:
min_ra, min_dec = np.min(corners, axis=0)
max_ra, max_dec = np.max(corners, axis=0)
buffer = 0.1 # deg

In [9]:
# Fetch truth with the columns we need, restricted to our area
truth_gcr = GCRCatalogs.load_catalog('dc2_truth_run1.2_static')
truth_quantities = ['object_id', 'sprinkled', 'star', 'agn', 'ra', 'dec', 'redshift'] + list('ugrizy') 
#print(sorted(truth_gcr.list_all_quantities(include_native=True)))
truth_filters = ['ra > %f' %(min_ra - buffer),
                'ra < %f' %(max_ra + buffer),
                'dec > %f' %(min_dec - buffer),
                'dec < %f' %(max_dec + buffer),]
truth_dict = truth_gcr.get_quantities(truth_quantities, native_filters=truth_filters)
truth_df = pd.DataFrame(truth_dict, index=None)

### Join extragalactic catalog with truth catalog
In the case of protoDC2, this step is necessary to rotate the protoDC2 field onto the DC2 field. Only galaxies will survive the join.

In [18]:
truth_extended = galaxies.merge(truth_df, how='inner', left_on='galaxy_id', right_on='object_id')

In [55]:
truth_extended.head()

Unnamed: 0,halo_mass,sersic_bulge,ellipticity_bulge_true,mag_true_u_lsst,ellipticity_disk_true,size_bulge_true,ellipticity_2_bulge_true,mag_true_z_lsst,galaxy_id,size_minor_disk_true,...,y,sprinkled,ra,u,object_id,g,star,redshift,r,agn
0,350952000000.0,4.0,0.347326,13.402844,0.046173,9.898567,0.052789,10.86164,7,6.225793,...,10.498124,False,54.599659,12.220207,7,11.366055,False,0.007052,10.996866,True
1,116444900000.0,4.0,0.298637,19.124431,0.010436,2.022574,0.256484,17.642028,26,2.108287,...,17.56222,False,54.913981,19.15146,26,18.259135,False,0.012973,17.925473,True
2,19407490000.0,4.0,,19.726181,0.105133,0.0,,17.58602,52,1.940603,...,17.627993,False,54.966244,19.625544,52,18.495268,False,0.013717,17.978307,False
3,30728520000.0,4.0,0.193061,22.939206,0.13734,1.098874,-0.148197,21.373796,60,0.458977,...,21.272098,False,55.077733,22.931867,60,21.894835,False,0.020161,21.543838,False
4,8133354000000.0,4.0,0.210889,20.949763,0.128405,0.923869,-0.035678,19.838191,80,0.393055,...,19.757425,False,54.657801,21.029238,80,20.13263,False,0.023965,19.917222,True


In [19]:
# IDK what's going on with the star==True rows that got matched to protoDC2
print(truth_extended.shape)
truth_extended = truth_extended[truth_extended['star']==False]
print(truth_extended.shape)

(1155438, 37)


(1155435, 37)

### Save a subset of the truth catalog with just stars

In [16]:
truth_point = truth_df[truth_df['star']==True].copy()
print(truth_point.shape)

(34467, 13)


In [30]:
truth_combined = pd.concat([truth_extended, truth_point], axis=0, sort=False, ignore_index=True)

(1189902, 37)

In [33]:
truth_combined.tail()

Unnamed: 0,ellipticity_2_bulge_true,ellipticity_1_disk_true,mag_true_u_lsst,ellipticity_bulge_true,size_minor_disk_true,dec_true,size_bulge_true,ellipticity_1_bulge_true,galaxy_id,mag_true_i_lsst,...,sprinkled,ra,u,y,z,object_id,redshift,dec,agn,star
1189897,,,,,,,,,,,...,False,54.863315,15.269224,13.895497,13.89298,459361771,0.0,-29.874452,False,True
1189898,,,,,,,,,,,...,False,54.86116,21.349933,20.213625,20.201078,1568792530,0.0,-29.874606,False,True
1189899,,,,,,,,,,,...,False,54.871137,21.242134,20.014843,20.006028,1568792417,0.0,-29.873105,False,True
1189900,,,,,,,,,,,...,False,55.620697,18.581434,17.477655,17.470917,834836,0.0,-29.592843,False,True
1189901,,,,,,,,,,,...,False,54.537082,15.445417,14.435807,14.427729,834744,0.0,-29.005442,False,True


In [43]:
truth_combined.to_csv('truth_combined.csv', index=False)

In [44]:
truth_combined.columns.values

array(['ellipticity_2_bulge_true', 'ellipticity_1_disk_true',
       'mag_true_u_lsst', 'ellipticity_bulge_true',
       'size_minor_disk_true', 'dec_true', 'size_bulge_true',
       'ellipticity_1_bulge_true', 'galaxy_id', 'mag_true_i_lsst',
       'redshift_true', 'mag_true_Y_lsst', 'ellipticity_2_disk_true',
       'halo_mass', 'ellipticity_disk_true', 'sersic_disk',
       'mag_true_r_lsst', 'size_disk_true', 'mag_true_z_lsst',
       'size_minor_bulge_true', 'ra_true', 'bulge_to_total_ratio_i',
       'mag_true_g_lsst', 'sersic_bulge', 'i', 'g', 'r', 'sprinkled',
       'ra', 'u', 'y', 'z', 'object_id', 'redshift', 'dec', 'agn', 'star'],
      dtype=object)

### Concatenate the coordinates of `truth_extended` and `truth_point` in preparation for matching

In [34]:
truth_skyCoord = SkyCoord(truth_combined['ra'].values, truth_combined['dec'].values, unit=(u.deg, u.deg))

obj = pd.read_csv('obj_opsim.csv', index_col=None)
obj_skyCoord = SkyCoord(obj['ra'].values, obj['dec'].values, unit=(u.deg, u.deg))

In [38]:
# Use astropy matching as in:
# https://github.com/LSSTDESC/DC2-production/blob/881125e14ff0cd5c4ea35b7725273369167cf98a/scripts/merge_source_cat.py#L292

matching_radius = 1.0 * u.arcsec
idx, sep2d, _ = matching.match_coordinates_sky(obj_skyCoord, truth_skyCoord)

In [39]:
def assign_truth_id(idx, sep2d, truth, matching_radius):
    is_fail = sep2d > matching_radius
    matched_truth = truth.iloc[idx]['object_id'].values.astype(int)
    matched_truth[is_fail] = -1
    return matched_truth

truth_ids = assign_truth_id(idx, sep2d, truth_combined, matching_radius)

In [41]:
obj['truth_id'] = truth_ids

In [49]:
# This many objects had a truth match
print(truth_ids[truth_ids>0].shape)

(85049,)

In [80]:
obj_master = obj.merge(truth_combined, how='inner', left_on='truth_id', right_on='object_id', suffixes=('_obs', '_truth'))

In [81]:
obj_master.to_csv('obj_master.csv', index=None)

In [82]:
sorted(obj_master.columns.values)

['Ixx',
 'IxxPSF',
 'IxxPSF_g',
 'IxxPSF_i',
 'IxxPSF_r',
 'IxxPSF_u',
 'IxxPSF_y',
 'IxxPSF_z',
 'Ixx_g',
 'Ixx_i',
 'Ixx_r',
 'Ixx_u',
 'Ixx_y',
 'Ixx_z',
 'Ixy',
 'IxyPSF',
 'IxyPSF_g',
 'IxyPSF_i',
 'IxyPSF_r',
 'IxyPSF_u',
 'IxyPSF_y',
 'IxyPSF_z',
 'Ixy_g',
 'Ixy_i',
 'Ixy_r',
 'Ixy_u',
 'Ixy_y',
 'Ixy_z',
 'Iyy',
 'IyyPSF',
 'IyyPSF_g',
 'IyyPSF_i',
 'IyyPSF_r',
 'IyyPSF_u',
 'IyyPSF_y',
 'IyyPSF_z',
 'Iyy_g',
 'Iyy_i',
 'Iyy_r',
 'Iyy_u',
 'Iyy_y',
 'Iyy_z',
 'PSF_sigma2',
 'agn',
 'airmass',
 'bulge_to_total_ratio_i',
 'cModelFluxErr_g',
 'cModelFluxErr_i',
 'cModelFluxErr_r',
 'cModelFluxErr_u',
 'cModelFluxErr_y',
 'cModelFluxErr_z',
 'cModelFlux_flag_g',
 'cModelFlux_flag_i',
 'cModelFlux_flag_r',
 'cModelFlux_flag_u',
 'cModelFlux_flag_y',
 'cModelFlux_flag_z',
 'cModelFlux_g',
 'cModelFlux_i',
 'cModelFlux_r',
 'cModelFlux_u',
 'cModelFlux_y',
 'cModelFlux_z',
 'dec_obs',
 'dec_true',
 'dec_truth',
 'dist2Moon',
 'ellipticity_1_bulge_true',
 'ellipticity_1_disk_true',
 'e

### Match to Source