# Join SFD map with a point source catalog

We need LSDB for that

In [1]:
from pathlib import Path

import lsdb
import numpy as np
import pandas as pd
from hipscat.pixel_math.hipscat_id import HIPSCAT_ID_COLUMN, hipscat_id_to_healpix
from lsdb.core.crossmatch.abstract_crossmatch_algorithm import AbstractCrossmatchAlgorithm

from paths import *

### Load data lazily

Hardcoded path to the test catalog from LSDB - sorry for that!

In [2]:
STARS_PATH = Path('/Users/hombit/projects/lincc-frameworks/lsdb/tests/data/small_sky_order1')
SFD_PATH = OUTPUT_DIR / 'sfd'

In [3]:
stars = lsdb.read_hipscat(STARS_PATH)
stars

Unnamed: 0_level_0,id,ra,dec,ra_error,dec_error,Norder,Dir,Npix
npartitions=4,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
,int64,float64,float64,int64,int64,int32,int32,int32
,...,...,...,...,...,...,...,...
,...,...,...,...,...,...,...,...
,...,...,...,...,...,...,...,...
,...,...,...,...,...,...,...,...


In [4]:
sfd = lsdb.read_hipscat(SFD_PATH)
sfd

Unnamed: 0_level_0,_hipscat_index,pixel_Norder,pixel_Npix,ebv,Norder,Dir,Npix
npartitions=3072,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
,uint64,uint8,uint32,float32,int32,int32,int32
,...,...,...,...,...,...,...
...,...,...,...,...,...,...,...
,...,...,...,...,...,...,...
,...,...,...,...,...,...,...


### We are using LSDB's cross-matching interface for joining

In [5]:
class JoinWithContinuousMap(AbstractCrossmatchAlgorithm):
    DISTANCE_COLUMN_NAME = '_DIST'
    
    def crossmatch(self) -> pd.DataFrame:
        # Initial implementation, to be re-written with linear search
        idx = np.searchsorted(self.right[HIPSCAT_ID_COLUMN], self.left.index) - 1
        
        # np.searchsorted output must be between 0 and N,
        # so we are checking -1 case only
        assert np.all(idx >= 0)
        
        self._rename_columns_with_suffix(self.left, self.suffixes[0])
        self._rename_columns_with_suffix(self.right, self.suffixes[1])
        
        left_join_part = self.left.reset_index()
        right_join_part = self.right.iloc[idx].reset_index(drop=True)
        
        out = pd.concat(
            [
                left_join_part,
                right_join_part,
            ],
            axis=1,
        )
        out[self.DISTANCE_COLUMN_NAME] = 0.0
        out.set_index(HIPSCAT_ID_COLUMN, inplace=True)
        
        return out

In [6]:
result = stars.crossmatch(sfd, algorithm=JoinWithContinuousMap).compute()
result

Unnamed: 0_level_0,id_small_sky_order1,ra_small_sky_order1,dec_small_sky_order1,ra_error_small_sky_order1,dec_error_small_sky_order1,Norder_small_sky_order1,Dir_small_sky_order1,Npix_small_sky_order1,_hipscat_index_sfd,pixel_Norder_sfd,pixel_Npix_sfd,ebv_sfd,Norder_sfd,Dir_sfd,Npix_sfd,_DIST
_hipscat_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
12749688880727326720,707,308.5,-69.5,0,0,1,0,44,12749688878638563328,14,2968518268,0.052311,4,0,2830,0.0
12751184493818150912,792,320.5,-69.5,0,0,1,0,44,12751184493625212928,14,2968866493,0.036569,4,0,2831,0.0
12753202806647685120,723,315.5,-68.5,0,0,1,0,44,12753202806131785728,14,2969336418,0.043449,4,0,2831,0.0
12753202806647685121,811,315.5,-68.5,0,0,1,0,44,12753202806131785728,14,2969336418,0.043449,4,0,2831,0.0
12770681119980912640,826,335.5,-69.5,0,0,1,0,44,12770681119708282880,14,2973405905,0.026551,4,0,2835,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13591216801265483776,791,312.5,-28.5,0,0,1,0,47,13591216797494804480,14,3164451755,0.085061,4,0,3017,0.0
13596001812279721984,824,305.5,-28.5,0,0,1,0,47,13596001811969343488,14,3165565853,0.060051,4,0,3018,0.0
13598131468743213056,702,310.5,-27.5,0,0,1,0,47,13598131467208097792,14,3166061702,0.071358,4,0,3019,0.0
13601023174257934336,767,314.5,-29.5,0,0,1,0,47,13601023174199214080,14,3166734980,0.083808,4,0,3020,0.0


### Validation

First, we check that both hipscat indexes and SFD pixel index-order pair are all consistent 

In [7]:
np.testing.assert_array_equal(
    hipscat_id_to_healpix(result['_hipscat_index_sfd'], result['pixel_Norder_sfd']),
    result['pixel_Npix_sfd'],
)
np.testing.assert_array_equal(
    hipscat_id_to_healpix(result.index, result['pixel_Norder_sfd']),
    result['pixel_Npix_sfd'],
)

Check that SFD map values are close enough to the ones from `dustmap` module.
The difference must be well below 16%.

In [8]:
# Validate
from astropy.coordinates import SkyCoord
from dustmaps.sfd import SFDQuery

sfd_query = SFDQuery(INPUT_DIR)
coord = SkyCoord(ra=result['ra_small_sky_order1'], dec=result['dec_small_sky_order1'], unit='deg')
dustmaps_sfd_values = sfd_query(coord)

diff = (
    np.abs(result['ebv_sfd'] - dustmaps_sfd_values)
    / np.where(result['ebv_sfd'] > dustmaps_sfd_values, result['ebv_sfd'], dustmaps_sfd_values)
)
np.max(diff)

0.0072447546