In [71]:
import pandas as pd
import healpy as hp
import numpy as np



In [67]:
data = pd.read_parquet(file_name, engine='pyarrow')


In [66]:
HIPSCAT_ID_HEALPIX_ORDER = 19


def compute_hipscat_id(ra_values, dec_values):
    """Compute the hipscat ID field.

    This index is defined as a 64-bit integer which has two parts:
        - healpix pixel (at order 19)
        - incrementing counter (within same healpix, for uniqueness)

    |------------------------------------------|-------------------|
    |<-----    healpixel at order 19    ------>|<--   counter   -->|

    This provides us with an increasing index, that will not overlap
    between spatially partitioned data files.
    """
    if len(ra_values) != len(dec_values):
        raise ValueError("ra and dec arrays should have the same length")

    ## Construct the bit-shifted healpix segment
    value_count = len(ra_values)
    mapped_pixels = hp.ang2pix(
        2**HIPSCAT_ID_HEALPIX_ORDER, ra_values, dec_values, nest=True, lonlat=True
    )
    shifted_pixels = mapped_pixels.astype(np.uint64) << (
        64 - (4 + 2 * HIPSCAT_ID_HEALPIX_ORDER)
    )

    ## We sort to put pixels next to each other that will need to be counted.
    ## This simplifies the counter logic, as we can subtract the index where
    ## we first see the pixel value from the current index to get the offset counter.
    sort_index = np.argsort(shifted_pixels)
    shifted_pixels = shifted_pixels[sort_index]
    _, unique_inverses, unique_indexes = np.unique(
        shifted_pixels, return_inverse=True, return_index=True
    )

    ## Construct the counter.
    unique_inverses = unique_inverses.astype(np.uint64)
    boring_number_index = np.arange(value_count, dtype=np.uint64)
    offset_counter = boring_number_index - unique_inverses[unique_indexes]
    shifted_pixels = shifted_pixels + offset_counter

    ## Map back to the original, unsorted, values
    unsort_index = np.argsort(sort_index)
    return shifted_pixels[unsort_index]

In [73]:
file_name="/data3/epyc/data3/hipscat/catalogs/ztf_dr14_explode/Norder1/Npix33/source_hop.parquet"

data = pd.read_parquet(file_name, engine='pyarrow')


new_column = "_hipscat_id"
data[new_column] = compute_hipscat_id(data["ra"].values, data["dec"].values)

data = data.set_index(new_column).sort_index()
print(data[:5])



data.to_parquet("/data3/epyc/data3/hipscat/catalogs/ztf_dr14_explode/Norder1/Npix33/source_hipscat_id.parquet")

                             ps1_objid         ra        dec  catflags  \
_hipscat_id                                                              
9745676850465603584  71180673097035613  67.309784 -30.678856         0   
9745677245711646720  71190672942652378  67.294304 -30.673319         0   
9745677245711646721  71190672942652378  67.294304 -30.673319         0   
9745677245711646722  71190672942652378  67.294304 -30.673319         1   
9745677245711646723  71190672942652378  67.294304 -30.673319         0   

                     fieldID        mag   maggerr          mjd  rcID band  
_hipscat_id                                                                
9745676850465603584     1248  20.618536  0.186703  59205.25167     4    r  
9745677245711646720     1248  15.936892  0.019306  59176.31437     4    r  
9745677245711646721     1248  15.899614  0.019212  59233.18631     4    r  
9745677245711646722     1248  15.885571  0.019178  59205.25121     4    r  
9745677245711646723     1

In [35]:
bin(12*2**(20*2) <<19)


'0b110000000000000000000000000000000000000000000000000000000000000'