In [1]:
import numpy as np
import pandas as pd
import pyarrow.parquet as pq

from fink_science.fast_transient_rate.processor import fast_transient_rate, fast_transient_module
from fink_science.fast_transient_rate import rate_module_output_schema

## Local usage

In [2]:
local_path_data = "/spark_mongo_tmp/roman.le-montagner/Doctorat/Alert_analysis/local_alert.parquet"
local_alerts = pd.read_parquet(local_path_data)

In [5]:
local_alerts.columns

Index(['objectId', 'candid', 'fink_class', 'magpsf', 'sigmapsf', 'diffmaglim',
       'fid', 'jd', 'jdstarthist', 'distnr', 'distpsnr1', 'drb', 'nalerthist',
       'DR3Name', 'gcvs', 'vsx', 'cmagpsf', 'csigmapsf', 'cjd', 'cfid',
       'cdiffmaglim'],
      dtype='object')

call the fast_transient_rate with local data

* Input size: len(local_alerts) = 2,347,634 alerts
* local performance(%timeit): `1min 36s ± 842 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)`

~ 24,455 alerts/second

In [3]:
ft_res = fast_transient_rate(local_alerts, 100)



The fast_transient_rate function preserve the ordering of the input dataframe so, pandas.concat can be used to merge the mag rate results and the output. 

In [29]:
local_alerts = pd.concat([local_alerts, ft_res], axis=1)

In [31]:
local_alerts

Unnamed: 0,objectId,candid,fink_class,magpsf,sigmapsf,diffmaglim,fid,jd,jdstarthist,distnr,...,cfid,cdiffmaglim,jd_first_real_det,jdstarthist_dt,mag_rate,sigma_rate,lower_rate,upper_rate,delta_time,from_upper
0,ZTF17aacnsuw,2226098711115015020,PulsV*,17.482327,0.049767,19.322489,1,2.459981e+06,2.458074e+06,0.247496,...,"[1, 1, 2, 2, 1, 2, 2, 2, 2, 2, 1, 1, 2, 1]","[19.05500030517578, 19.2278995513916, 18.88159...",2.459952e+06,1877.943160,-0.090689,0.036926,-0.156031,-0.055870,28.969607,1.0
1,ZTF18abtymdf,2226099660915015004,Unknown,19.315453,0.187820,19.228329,1,2.459981e+06,2.458362e+06,0.809979,...,"[1, 2, 2, 2, 2, 1, 2, 1]","[19.176599502563477, 19.363800048828125, 19.66...",2.459952e+06,1589.712072,-0.028602,0.035168,-0.097171,0.005483,28.968657,1.0
2,ZTF17aaagdna,2226100135615015004,RRLyr,17.630871,0.067974,19.490292,1,2.459981e+06,2.458319e+06,0.360943,...,"[1, 2, 2, 2, 1, 2, 1]","[19.346200942993164, 18.888099670410156, 19.38...",2.459952e+06,1632.690266,-0.003940,0.003577,-0.009100,0.001446,28.968183,0.0
3,ZTF23aabuyvy,2226102484515015009,Unknown,18.845779,0.125232,19.428043,1,2.459981e+06,2.459981e+06,12.347309,...,"[1, 1, 2, 1, 2, 2, 2, 2, 2, 2, 1, 1, 2, 1]","[19.340200424194336, 19.26889991760254, 18.684...",2.459952e+06,-28.967222,-0.051712,0.033515,-0.117434,-0.018123,28.967222,1.0
4,ZTF17aaawdac,2226102961515015000,EB*,18.678907,0.144207,19.349253,1,2.459981e+06,2.458102e+06,0.139220,...,"[1, 2, 2, 2, 2, 2, 1, 2, 1, 2, 1, 1]","[19.084400177001953, 18.869199752807617, 19.86...",2.459952e+06,1850.056968,-0.018005,0.009406,-0.030134,-0.004088,28.906563,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2347629,ZTF18abccnkb,2267541261015015010,EB*,17.599257,0.079423,18.545355,2,2.460022e+06,2.458260e+06,0.522566,...,"[2, 2, 2, 2, 2, 2, 2, 2, 2, 2]","[19.41990089416504, 19.304399490356445, 19.166...",2.460021e+06,1761.067245,-3.077182,1.245463,-5.744243,-1.897572,1.009028,1.0
2347630,ZTF18abebuuf,2267541261115010003,EB*,18.324001,0.164355,18.541004,2,2.460022e+06,2.458248e+06,0.270476,...,"[2, 2, 2, 2, 2, 2, 2, 2, 2, 2]","[19.39550018310547, 19.30699920654297, 19.1739...",2.460021e+06,1773.059155,-2.059002,0.935663,-4.047659,-1.144861,1.009028,1.0
2347631,ZTF18abslrxe,2227200611415015007,Unknown,17.417374,0.061571,19.054756,2,2.459982e+06,2.458347e+06,0.067986,...,"[1, 1, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 2, 2, 2, ...","[18.87980079650879, 18.923099517822266, 18.988...",2.459952e+06,1604.717269,-0.087011,0.036688,-0.160453,-0.053043,29.932940,1.0
2347632,ZTF23aabxese,2227200612415015010,Unknown,18.742260,0.122616,19.203909,2,2.459982e+06,2.459982e+06,9.626899,...,"[1, 2, 2, 1, 1, 2, 1, 2, 2, 2, 1, 1, 2]","[19.02039909362793, 19.203500747680664, 20.323...",2.459952e+06,-29.993889,-0.050947,0.033083,-0.107176,-0.016043,29.922245,1.0


## Spark usage

In [38]:
df = spark.read\
.option("basePath", "/user/julien.peloton/archive/science/")\
.format("parquet")\
.load([
    "/user/julien.peloton/archive/science/year=2023/month=02",
    "/user/julien.peloton/archive/science/year=2023/month=03",
        
])

In [39]:
df = fast_transient_module(df, 100)

In [44]:
requested_cols = [
    "objectId",
    "candid",
    "candidate.magpsf",
    "candidate.sigmapsf",
    "candidate.diffmaglim",
    "candidate.fid",
    "candidate.jd",
    "candidate.jdstarthist",
    'candidate.distnr',
    'candidate.distpsnr1',
    'candidate.drb',
    'nalerthist',
    'DR3Name',
    'gcvs',
    'vsx'
] + list(rate_module_output_schema.keys())

In [45]:
local_alerts = df.select(requested_cols).toPandas()

call the fast_transient_rate with distributed data

* Input size: len(local_alerts) = 2,347,634 alerts
* cluster configuration: 56 cores, 4 cores per executor, 8GB of RAM per executor
* local performance(mesos cluster manager job duration): `15 seconds`

~ 2,795 alerts/second/core

In [46]:
local_alerts

Unnamed: 0,objectId,candid,magpsf,sigmapsf,diffmaglim,fid,jd,jdstarthist,distnr,distpsnr1,...,gcvs,vsx,jd_first_real_det,jdstarthist_dt,mag_rate,sigma_rate,lower_rate,upper_rate,delta_time,from_upper
0,ZTF17aacnsuw,2226098711115015020,17.482327,0.049767,19.322489,1,2.459981e+06,2.458074e+06,0.247496,0.189533,...,Unknown,HADS,2.459952e+06,1877.943160,-0.084391,0.031064,-0.145034,-0.055715,28.969607,True
1,ZTF18abtymdf,2226099660915015004,19.315453,0.187820,19.228329,1,2.459981e+06,2.458362e+06,0.809979,0.920700,...,Unknown,Unknown,2.459952e+06,1589.712072,-0.027999,0.032916,-0.087669,0.004651,28.968657,True
2,ZTF17aaagdna,2226100135615015004,17.630871,0.067974,19.490292,1,2.459981e+06,2.458319e+06,0.360943,0.285493,...,Unknown,RRAB,2.459952e+06,1632.690266,-0.003419,0.003058,-0.008531,0.001353,28.968183,False
3,ZTF23aabuyvy,2226102484515015009,18.845779,0.125232,19.428043,1,2.459981e+06,2.459981e+06,12.347309,0.077212,...,Unknown,Unknown,2.459952e+06,-28.967222,-0.061084,0.038918,-0.131920,-0.018341,28.967222,True
4,ZTF17aaawdac,2226102961515015000,18.678907,0.144207,19.349253,1,2.459981e+06,2.458102e+06,0.139220,0.207298,...,Unknown,EW,2.459952e+06,1850.056968,-0.017503,0.009791,-0.033926,-0.002278,28.906563,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2347629,ZTF18abccnkb,2267541261015015010,17.599257,0.079423,18.545355,2,2.460022e+06,2.458260e+06,0.522566,0.543567,...,Unknown,EW,2.460021e+06,1761.067245,-3.152817,1.298543,-5.435456,-1.843358,1.009028,True
2347630,ZTF18abebuuf,2267541261115010003,18.324001,0.164355,18.541004,2,2.460022e+06,2.458248e+06,0.270476,0.219090,...,Unknown,EW,2.460021e+06,1773.059155,-2.076063,0.990303,-4.133712,-1.075004,1.009028,True
2347631,ZTF18abslrxe,2227200611415015007,17.417374,0.061571,19.054756,2,2.459982e+06,2.458347e+06,0.067986,0.095887,...,Unknown,RS:,2.459952e+06,1604.717269,-0.095699,0.047241,-0.189128,-0.055139,29.932940,True
2347632,ZTF23aabxese,2227200612415015010,18.742260,0.122616,19.203909,2,2.459982e+06,2.459982e+06,9.626899,9.324074,...,Unknown,Unknown,2.459952e+06,-29.993889,-0.051124,0.039948,-0.115566,-0.016076,29.922245,True


mag_rate and sigma_rate values fluctuates between the local and the cluster, probably due to float casting and libraries version