**Andy Tzanidakis**

In [25]:
import numpy as np
import matplotlib.pyplot as plt
import astropy.units as u
%matplotlib inline
%config InlineBackend.figure_format = "retina"
from matplotlib import rcParams
rcParams['savefig.dpi'] = 550
rcParams['font.size'] = 20
plt.rc('font', family='serif')

import lsdb
from lsdb import lsdb_client
client = lsdb_client(dask_on_ray=True, num_workers=8)


2023-11-27 08:54:56,029	INFO worker.py:1507 -- Calling ray.init() again after it has already been called.


In [2]:
#Hipcatts
gaia = lsdb.read_hipscat("/data3/epyc/projects3/ivoa_demo/gaia/catalog")
ztf = lsdb.read_hipscat("/data3/epyc/data3/hipscat/catalogs/ztf_axs/ztf_dr14")

#sources load takes a minute, since it creates a healpix alignment on load
ztf_sources = lsdb.read_hipscat("/data3/epyc/data3/hipscat/catalogs/ztf_axs/ztf_source")

## Task 1

- Cone search Gaia + ZTF FG dwarf sources with classprob_dsc_combmod_star_gaia > 0.5

- Condition sources such that: 
    1. mag[catflags] has been applied (i.e removing bad catflags)
    2. Compute biweight location
    3. Query/cut sources based on biweight location 

In [5]:
%%time
_sample = gaia.cone_search(
    ra=31,
    dec=3,
    radius=1,
).crossmatch(
    ztf
).query(
    "nobs_g_ztf_dr14 > 100 and nobs_r_ztf_dr14 > 100 and \
    parallax_gaia > 0 and parallax_over_error_gaia > 5 and \
    teff_gspphot_gaia > 5380 and teff_gspphot_gaia < 7220 and logg_gspphot_gaia > 4.5 and logg_gspphot_gaia < 4.72 and classprob_dsc_combmod_star_gaia > 0.5"
).compute()

[36m(dask:cone_filter-aed2e003-59ef-4e4d-a7ee-8de1e4d56cc0 pid=76399)[0m   data_frame["_CONE_SEP"] = df_separations


CPU times: user 566 ms, sys: 94.4 ms, total: 660 ms
Wall time: 9.53 s


In [22]:
_sample_hips = lsdb.from_dataframe(
    _sample, 
    lowest_order=5, 
    highest_order=8, 
    set_hipscat_index=False, 
    ra_column="ra_gaia", 
    dec_column="dec_gaia", 
    threshold=1_000_000
)

_sample_sources = _sample_hips.join(
    ztf_sources, left_on="ps1_objid_ztf_dr14", right_on="ps1_objid"
).compute()

In [23]:
def fetch_lc(hips_id, table=_sample_sources, band='r'):
    """This function fetches the light curve for a given hips_id and band.
    
    Parameters
    ----------
    hips_id : int
    band (str): photometric band (default='r')

    Returns
    -------
    light curve table for a given band 
    """
    one_object = table.query(f"_hipscat_index == {hips_id}")

    if band=='r':
        one_object_rband = one_object.query("band == \"r\"")
        cat = one_object_rband['catflags'] != -32768 
        return one_object_rband["mjd"][cat], one_object_rband["mag"][cat], one_object_rband["magerr"][cat]

    elif band=='g':
        one_object_gband = one_object.query("band == \"g\"")
        cat = one_object_gband['catflags'] != -32768 
        return one_object_gband["mjd"][cat], one_object_gband["mag"][cat], one_object_gband["magerr"][cat]

    elif band=='i':
        one_object_iband = one_object.query("band == \"i\"")
        return one_object_iband["mjd"], one_object_iband["mag"], one_object_iband["magerr"]

    elif band=='all':
        one_object_rband = one_object.query("band == \"r\"")
        one_object_gband = one_object.query("band == \"g\"")
        one_object_iband = one_object.query("band == \"i\"")

        gband_mjd, gband_mag, gband_magerr = one_object_gband["mjd"], one_object_gband["mag"], one_object_gband["magerr"]
        rband_mjd, rband_mag, rband_magerr = one_object_rband["mjd"], one_object_rband["mag"], one_object_rband["magerr"]
        iband_mjd, iband_mag, iband_magerr = one_object_iband["mjd"], one_object_iband["mag"], one_object_iband["magerr"]


        return gband_mjd, gband_mag, gband_magerr, rband_mjd, rband_mag, rband_magerr, iband_mjd, iband_mag, iband_magerr

In [24]:
%%time

# random sourceID
rnd = 5029289713001824256
lc_r = fetch_lc(rnd, band='r')

CPU times: user 57.2 ms, sys: 9.31 ms, total: 66.5 ms
Wall time: 63.7 ms


In [28]:
# bleh querying each light curve seems 
(63*u.ms * (1_000_000)).to(u.hr)

<Quantity 17.5 h>

In [None]:
# let's try writing a custom function

def custom_function(df):
    df_rband = df.query("band == \"r\"")

    cat = df_rband['catflags'] != -32768 # remove spurious measurements
    x, y, yerr = df_rband["mjd"][cat], df_rband["mag"][cat], df_rband["magerr"][cat]

    return pd.DataFrame(
        {
            "ps1_objid" : [df["ps1_objid"].values.tolist()[0]],
            "sigma_cat": [np.std(y)]
        }
    )