## Matching protoDC2, Source, and Object catalogs

In [3]:
import os, sys
sys.path.insert(0, '/global/homes/j/jwp/DC2/gcr-catalogs')
import GCRCatalogs

In [4]:
import itertools
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

### Determine sky area

We'll limit sky area based on tracts and patches, rather than ra and dec.

In [5]:
import sqlite3
tracts_mapping_path = '/global/cscratch1/sd/desc/DC2/data/Run1.2i/rerun/281118/tracts_mapping.sqlite3'
conn = sqlite3.connect(tracts_mapping_path)
# Check the table name
cursor = conn.cursor()
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
table_name = cursor.fetchall()[0][0]
print("Table name: ", table_name)
# Turn table into Pandas df
overlaps = pd.read_sql(sql="SELECT * from '%s'" %table_name, con=conn)

Table name:  overlaps


In [6]:
overlaps.columns

Index(['id', 'tract', 'patch', 'visit', 'detector', 'filter', 'layer'], dtype='object')

In [7]:
overlaps.head()

Unnamed: 0,id,tract,patch,visit,detector,filter,layer
0,1,5063,"(0, 4)",230,53,r,
1,2,5063,"(1, 4)",230,53,r,
2,3,5063,"(2, 4)",230,53,r,
3,4,5063,"(0, 5)",230,53,r,
4,5,5063,"(1, 5)",230,53,r,


In [8]:
#overlaps[overlaps['tract'].isin([4849, 4850, 4848])].shape
overlaps['tract'].unique()

array([5063, 5064, 5062, 4849, 4850, 4848, 4851, 4852, 5066, 4429, 4636,
       4637, 4430, 5065, 4640, 4639, 4432, 4433, 4431, 4638])

In [9]:
relevant_tracts = [4850,]
relevant_visits = overlaps[overlaps['tract'].isin(relevant_tracts)]['visit'].unique()
relevant_detectors = overlaps[overlaps['tract'].isin(relevant_tracts)]['detector'].unique()

print(relevant_visits.shape, relevant_detectors.shape)

(1235,) (189,)


In [None]:
#overlaps[overlaps['tract'] == 4850]['id']

### Fetch desired quantities from Object catalog

In [10]:
obj_gcr = GCRCatalogs.load_catalog('dc2_object_run1.2i_tract4850')

In [15]:

for col in obj_gcr.list_all_quantities(include_native=True):
    if 'Ixx' in col:
        print(col)
        #print(obj_gcr.get_quantity_info(col))


Ixx_y
IxxPSF_g
Ixx_r
IxxPSF_u
Ixx_z
IxxPSF
IxxPSF_r
Ixx
Ixx_u
Ixx_i
Ixx_g
IxxPSF_z
IxxPSF_i
IxxPSF_y


In [16]:
obj_quantities = ['ext_shapeHSM_HsmSourceMoments_xx', 'ext_shapeHSM_HsmSourceMoments_xy', 'ext_shapeHSM_HsmSourceMoments_yy']
obj_quantities += ['ext_shapeHSM_HsmPsfMoments_xx', 'ext_shapeHSM_HsmPsfMoments_xy', 'ext_shapeHSM_HsmPsfMoments_yy']
obj_quantities += ['ext_shapeHSM_HsmSourceMoments_flag', 'ext_shapeHSM_HsmPsfMoments_flag']
obj_quantities += ['x', 'xErr', 'y', 'yErr', 'xy_flag', 'ra', 'dec', 'Ixx', 'Ixy', 'Iyy', 'IxxPSF', 'IxyPSF', 'IyyPSF']
obj_quantities += ['extendedness', 'objectId', 'parentObjectId', 'tract', 'patch']
# Columns that are prefixes to band
obj_quantities_prefix = ['cModelFlux_', 'psFlux_', 'mag_']
obj_quantities_prefix += ['cModelFluxErr_', 'psFluxErr_', 'magerr_', 'cModelFlux_flag_', 'psFlux_flag_',]
obj_quantities_prefix += ['Ixx_', 'Ixy_', 'Iyy_']
obj_quantities_prefix += ['IxxPSF_', 'IxyPSF_', 'IyyPSF_']
# Columns that are suffixes to band
obj_quantities_suffix = ['_ext_photometryKron_KronFlux_instFlux',]
obj_quantities_suffix += ['_ext_photometryKron_KronFlux_flag', '_ext_photometryKron_KronFlux_instFluxErr']
obj_quantities_suffix += ['_ext_photometryKron_KronFlux_radius', '_ext_photometryKron_KronFlux_psf_radius',]
obj_quantities_suffix += ['_base_CircularApertureFlux_70_0_instFlux',]
obj_quantities_suffix += ['_base_CircularApertureFlux_70_0_instFluxErr']
obj_quantities_suffix += ['_coord_ra', '_coord_dec']

obj_quantities += [t[0] + t[1] for t in list(itertools.product(obj_quantities_prefix, list('ugrizy')))]
obj_quantities += [t[1] + t[0] for t in list(itertools.product(obj_quantities_suffix, list('ugrizy')))]

In [17]:
#obj_filters = [(filter_on_tract, 'tract'),]
obj_filters = [] # filter not necessary when using the tract4850 reader

In [19]:
obj_dict = obj_gcr.get_quantities(obj_quantities, filters=obj_filters)
obj_df = pd.DataFrame(obj_dict, index=None)
obj_df.to_csv('obj_tract4850.csv', index=None)

relevant_object_ids = obj_df['objectId'].unique()

### Fetch desired quantities from Source catalog

In [14]:
src_gcr = GCRCatalogs.load_catalog('dc2_source_run1.2i')

In [30]:
for col in src_gcr.list_all_quantities(include_native=True):
    if 'visit' in col:
        print(col)
        #print(src_gcr.get_quantity_info(col))

visit


In [21]:
src_quantities = ['apFlux', 'psFlux', 'mag']
src_quantities += ['apFlux_flag', 'psFlux_flag', 'mag_err']
src_quantities += ['apFluxErr', 'psFluxErr', 'fluxmag0']
src_quantities += ['Ixx', 'Ixy', 'Iyy',]
src_quantities += ['IxxPSF', 'IxyPSF', 'IyyPSF']
src_quantities += ['ra', 'dec', 'x', 'y',]
src_quantities += ['xErr', 'yErr', 'xy_flag',]
src_quantities += ['sky', 'skyErr', 'sky_flag']
src_quantities += ['base_ClassificationExtendedness_value', 'extendedness'] # base... not native
src_quantities += ['sourceId', 'objectId', 'parentObjectId', 'visit', 'detector', 'filter']
# TODO: separate native quantities

In [22]:
def filter_on_detector(detectors):
    return [d in relevant_detectors for d in detectors]

def filter_on_visit(visits):
    return [v in relevant_visits for v in visits]

def filter_on_object(object_ids):
    return [i in relevant_object_ids for i in object_ids]

src_filters = [(filter_on_detector, 'detector'), (filter_on_visit, 'visit'), (filter_on_object, 'objectId')]

In [23]:
# takes 4 hr
src_dict = src_gcr.get_quantities(src_quantities, filters=src_filters)
src_df = pd.DataFrame(src_dict, index=None)
src_df.to_csv('src_tract4850_obs.csv', index=None)

In [None]:
src_quantities