In [9]:
import sqlalchemy as sa
import pandas as pd

import json
import logging
import requests

from sqlalchemy import inspect
from sqlalchemy.sql import text

In [10]:
def get_database_engine(environment: str):
    if environment == 'production':
        url = 'https://raw.githubusercontent.com/alercebroker/usecases/master/alercereaduser_v4.json'
        params = requests.get(url).json()['params']
        engine = sa.create_engine(
            f"postgresql+psycopg2://{params['user']}:{params['password']}@{params['host']}/{params['dbname']}"
        )
    
    elif environment == 'staging':
        with open('alerceread_db_staging.json', 'r', encoding='utf-8') as f:
            params = json.load(f)
        engine = sa.create_engine(
            f"postgresql+psycopg2://{params['user']}:{params['password']}@{params['host']}:{params['port']}/{params['dbname']}"
        )
    else:
        raise ValueError(f'Environment "{environment}" not defined')
        
    inspector = inspect(engine)
    tables = inspector.get_table_names()
    print('Available Tables:\n', tables)
    return engine

In [11]:
ENVIRONMENT = "production"
engine = get_database_engine(environment=ENVIRONMENT)
engine.begin()

Available Tables:
 ['outlier', 'feature_version', 'gaia_ztf', 'alembic_version', 'ss_ztf', 'allwise', 'non_detection', 'step', 'taxonomy', 'feature', 'pipeline', 'lc_classifier_top', 'lc_classifier_periodic', 'lc_classifier_transient', 'magstat', 'lc_classifier', 'stamp_classifier', 'dataquality', 'object', 'lc_classifier_stochastic', 'lc_classifier_beta', 'forced_photometry', 'lc_classifier_bhrf_forced_phot_childs', 'reference', 'ps1_ztf', 'xmatch', 'detection', 'probability']


<contextlib._GeneratorContextManager at 0x7fa660a62c20>

In [19]:
import glob

unique_oids = [oid.split('/')[-1].split('.')[0] for oid in glob.glob('db_production/data/*') if oid.find('detections_log') == -1]
oids = [f"'{oid}'" for oid in unique_oids]
len(unique_oids)

99

In [13]:
ao_list = []
for oid_used in unique_oids[:1]:
    ao_list.append(pd.read_pickle(f'db_production/data/{oid_used}.pkl'))

In [14]:
len(ao_list)

1

In [15]:
from lc_classifier.features.preprocess.ztf import (
    ZTFLightcurvePreprocessor,
)
from lc_classifier.features.composites.ztf import ZTFFeatureExtractor

lightcurve_preprocessor = ZTFLightcurvePreprocessor(drop_bogus=False)

for i in range(len(ao_list)):
    oid = ao_list[i].detections['oid'].values[0]
    lightcurve_preprocessor.preprocess_single_object(ao_list[i])
    feature_extractor = ZTFFeatureExtractor()
    feature_extractor.compute_features_single_object(ao_list[i])
    ao_list[i].features['oid'] = oid

In [16]:
ao_list[0].features

Unnamed: 0,name,value,fid,sid,version,oid
0,g-r_mean,0.102719,"g,r",ZTF,1.0.0,ZTF17aacoobq
1,g-r_max,0.124357,"g,r",ZTF,1.0.0,ZTF17aacoobq
2,g-r_mean_corr,0.435117,"g,r",ZTF,1.0.0,ZTF17aacoobq
3,g-r_max_corr,0.236002,"g,r",ZTF,1.0.0,ZTF17aacoobq
0,W1-W2,-0.092000,,ZTF,1.0.0,ZTF17aacoobq
...,...,...,...,...,...,...
3,mean_chinr,0.479045,"g,r",ZTF,1.0.0,ZTF17aacoobq
0,Timespan,2311.687217,,ZTF,1.0.0,ZTF17aacoobq
0,Coordinate_x,0.518721,,ZTF,1.0.0,ZTF17aacoobq
1,Coordinate_y,0.854414,,ZTF,1.0.0,ZTF17aacoobq


In [17]:
import numpy as np

mapping_bands = {'g': 1, 'r': 2, 'g,r': 12, None: 0, np.nan: 0}

mapping_feat = {
'Power_rate_1_2': 'Power_rate_1/2', 
'Power_rate_1_3': 'Power_rate_1/3', 
'Power_rate_1_4': 'Power_rate_1/4',
}

In [18]:
feat_local = ao_list[0].features.sort_values('name').reset_index(drop=True)
feat_local.fid.replace(mapping_bands, inplace=True)
feat_local.fid = feat_local.fid.astype(int)
feat_local.name.replace(mapping_feat, inplace=True)
feat_local

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  feat_local.fid.replace(mapping_bands, inplace=True)
  feat_local.fid.replace(mapping_bands, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  feat_local.name.replace(mapping_feat, inplace=True)


Unnamed: 0,name,value,fid,sid,version,oid
0,Amplitude,0.788303,1,ZTF,1.1.0,ZTF17aacoobq
1,Amplitude,0.601075,2,ZTF,1.1.0,ZTF17aacoobq
2,AndersonDarling,1.000000,1,ZTF,1.1.0,ZTF17aacoobq
3,AndersonDarling,1.000000,2,ZTF,1.1.0,ZTF17aacoobq
4,Autocor_length,1.000000,1,ZTF,1.1.0,ZTF17aacoobq
...,...,...,...,...,...,...
204,ulens_t0,1761.067674,1,ZTF,1.0.2,ZTF17aacoobq
205,ulens_tE,0.217278,2,ZTF,1.0.2,ZTF17aacoobq
206,ulens_tE,3.581479,1,ZTF,1.0.2,ZTF17aacoobq
207,ulens_u0,0.453833,1,ZTF,1.0.2,ZTF17aacoobq


In [25]:
# GET FEATURES
query_features = f"""
                SELECT * FROM feature as f 
                WHERE f.oid in ({','.join([oids[0]])}) and f.version = '27.5.0';
                """

feat_prod = pd.read_sql_query(query_features, con=engine)
feat_prod

Unnamed: 0,oid,name,value,fid,version
0,ZTF17aacoobq,Amplitude,0.788303,1,27.5.0
1,ZTF17aacoobq,Amplitude,0.601075,2,27.5.0
2,ZTF17aacoobq,AndersonDarling,1.000000,1,27.5.0
3,ZTF17aacoobq,AndersonDarling,1.000000,2,27.5.0
4,ZTF17aacoobq,Autocor_length,1.000000,1,27.5.0
...,...,...,...,...,...
204,ZTF17aacoobq,ulens_u0,0.429272,1,27.5.0
205,ZTF17aacoobq,ulens_u0,0.429853,2,27.5.0
206,ZTF17aacoobq,W1-W2,-0.092000,0,27.5.0
207,ZTF17aacoobq,W2-W3,3.035001,0,27.5.0


In [21]:
features_comparison = feat_local.merge(feat_prod, on=['oid', 'name', 'fid'], suffixes=('_local', '_prod'))
features_comparison

Unnamed: 0,name,value_local,fid,sid,version_local,oid,value_prod,version_prod
0,Amplitude,0.788303,1,ZTF,1.1.0,ZTF17aacoobq,0.788303,27.5.0
1,Amplitude,0.601075,2,ZTF,1.1.0,ZTF17aacoobq,0.601075,27.5.0
2,AndersonDarling,1.000000,1,ZTF,1.1.0,ZTF17aacoobq,1.000000,27.5.0
3,AndersonDarling,1.000000,2,ZTF,1.1.0,ZTF17aacoobq,1.000000,27.5.0
4,Autocor_length,1.000000,1,ZTF,1.1.0,ZTF17aacoobq,1.000000,27.5.0
...,...,...,...,...,...,...,...,...
204,ulens_t0,1761.067674,1,ZTF,1.0.2,ZTF17aacoobq,1761.095625,27.5.0
205,ulens_tE,0.217278,2,ZTF,1.0.2,ZTF17aacoobq,2.588941,27.5.0
206,ulens_tE,3.581479,1,ZTF,1.0.2,ZTF17aacoobq,4.040406,27.5.0
207,ulens_u0,0.453833,1,ZTF,1.0.2,ZTF17aacoobq,0.429272,27.5.0


In [22]:
features_comparison['value_diff'] = abs(features_comparison['value_local'] - features_comparison['value_prod'])
feat_diff_sorted = features_comparison.sort_values('value_diff', ascending=False)[['name', 'value_local', 'value_prod', 'value_diff', 'fid']]

feat_equal = feat_diff_sorted[feat_diff_sorted['value_diff'] == 0]
feat_diff = feat_diff_sorted[feat_diff_sorted['value_diff'] != 0]
feat_nan = feat_diff_sorted[feat_diff_sorted['value_diff'].isna()]

print(f'Num. of different features: {feat_diff.shape[0]}')
print(f'Num. of features without difference: {feat_equal.shape[0]}')
print(f'Num. of features with NaN: {feat_nan.shape[0]}')

display(feat_diff_sorted.style.apply(lambda row: ['' for col in row], axis=1))

Num. of different features: 159
Num. of features without difference: 50
Num. of features with NaN: 10


Unnamed: 0,name,value_local,value_prod,value_diff,fid
62,MHPS_high_30,338209.15625,168083.0625,170126.09375,1
66,MHPS_low_365,134189.65625,66749.625,67440.03125,1
64,MHPS_low,120776.523438,59454.589844,61321.933594,1
60,MHPS_high,67391.914062,33416.984375,33974.929688,1
158,fleet_t0,9999.999546,-28.686793,10028.686339,1
116,SPM_chi,512.930162,2731.010736,2218.080574,2
115,SPM_chi,502.599298,1322.508374,819.909076,1
120,SPM_t0,1763.001454,2114.119805,351.118351,1
119,SPM_t0,2113.184786,1883.116148,230.068639,2
171,max_brightness_after_band,545.058,743.409625,198.351625,2


In [36]:
def preprocess_features_from_db(features: pd.DataFrame) -> pd.DataFrame:
    """features come from db in column format"""
    def fid_map(v):
        if v == 1:
            return "_1"  #'g'
        if v == 2:
            return "_2"  #'r'
        if v == 0:
            return ""
        
        if v == 12:
            return "_12"

    def normalize_name(v):
        return str(v).replace("-", "_").replace('/', '_')

    features_ = features.copy()
    features_["fid_name"] = features_["fid"].apply(lambda x: fid_map(x))
    features_["name"] = features_["name"] + features_["fid_name"]
    features_["name"] = features_["name"].apply(lambda x: normalize_name(x))
    features_ = features_[["oid", "name", "value"]]
    features_ = features_.pivot_table(
        index="oid", columns="name", values="value", aggfunc="first", dropna=False
    )

    return features_

In [31]:
import glob

oids = [oid.split('/')[-1].split('.')[0] for oid in glob.glob('db_production/data/*') if oid.find('detections_log') == -1]
#oids = [oids[0]]
oids = [f"'{oid}'" for oid in oids]
oids

["'ZTF17aacoobq'",
 "'ZTF18actbfbf'",
 "'ZTF17aabvxoe'",
 "'ZTF20aageyrd'",
 "'ZTF18acclctr'",
 "'ZTF18admpdys'",
 "'ZTF18aaadunf'",
 "'ZTF17aaagyhh'",
 "'ZTF22abiztag'",
 "'ZTF19acavpcq'",
 "'ZTF17aabwfmq'",
 "'ZTF18acrtzpo'",
 "'ZTF20aahmwdc'",
 "'ZTF18admfewi'",
 "'ZTF19aanqgpx'",
 "'ZTF17aabuyoa'",
 "'ZTF21abyrtsj'",
 "'ZTF23abscwjd'",
 "'ZTF19aalwfly'",
 "'ZTF18acsjity'",
 "'ZTF19aainmws'",
 "'ZTF18abmokgr'",
 "'ZTF18abuyqnw'",
 "'ZTF19aagmvlc'",
 "'ZTF18achckfp'",
 "'ZTF18aaadyyw'",
 "'ZTF18acbvsvv'",
 "'ZTF21acmwott'",
 "'ZTF21aakcnuk'",
 "'ZTF20acmgbds'",
 "'ZTF18abtsjog'",
 "'ZTF18aaadsbl'",
 "'ZTF17aabwfqr'",
 "'ZTF18acrfnjh'",
 "'ZTF18aczcffq'",
 "'ZTF18acbvrpy'",
 "'ZTF21acmpuqn'",
 "'ZTF21achyhzf'",
 "'ZTF20aarbzad'",
 "'ZTF19aalvzlx'",
 "'ZTF19acmgybn'",
 "'ZTF18aaadttd'",
 "'ZTF18acrumfa'",
 "'ZTF18acyuqxl'",
 "'ZTF18abxhofs'",
 "'ZTF18abxhoet'",
 "'ZTF20aafqudh'",
 "'ZTF20aafdvjk'",
 "'ZTF18acgmezv'",
 "'ZTF21aadokov'",
 "'ZTF19aagmuhn'",
 "'ZTF20aaekjfa'",
 "'ZTF22abpu

In [32]:
len(oids)

100

## Query to BBDD

### Detections and Forced Photometry

In [21]:
query_detections = f"""
    SELECT * FROM detection
    WHERE oid IN ({','.join(oids)}) and rb >= 0.55;
"""

detections = pd.read_sql_query(query_detections, con=engine)
display(detections.head())
print(detections.shape)
print(detections.columns)

Unnamed: 0,oid,candid,mjd,fid,pid,diffmaglim,isdiffpos,nid,ra,dec,...,sigmagapbig,rfid,magpsf_corr,sigmapsf_corr,sigmapsf_corr_ext,corrected,dubious,parent_candid,has_stamp,step_id_corr
0,ZTF17aaagyhh,703270634415015008,58457.270637,2,703270634415,19.5753,1,703,84.40088,-15.417544,...,0.0179,,14.363715,100.0,0.00694,True,False,7.122954e+17,False,bulk_1.0.0
1,ZTF17aaagyhh,712295394415015113,58466.295394,1,712295394415,18.910126,1,712,84.400824,-15.417513,...,0.0404,306120144.0,14.838718,0.010558,0.014,True,False,7.332102e+17,False,bulk_1.0.0
2,ZTF17aaagyhh,717271824415010095,58471.271829,2,717271824415,19.152456,-1,717,84.400784,-15.417624,...,0.0291,306120244.0,14.740381,100.0,0.01156,True,False,7.332102e+17,False,bulk_1.0.0
3,ZTF17aaagyhh,727289224415010001,58481.289224,1,727289224415,19.964779,-1,727,84.400705,-15.417494,...,0.0202,306120144.0,15.394738,100.0,0.013211,True,False,7.332102e+17,False,bulk_1.0.0
4,ZTF17aaagyhh,737194024415015002,58491.194028,1,737194024415,19.8563,1,737,84.400881,-15.417556,...,0.0211,,14.870885,0.011813,0.01514,True,False,7.522501e+17,False,bulk_1.0.0


(27023, 30)
Index(['oid', 'candid', 'mjd', 'fid', 'pid', 'diffmaglim', 'isdiffpos', 'nid',
       'ra', 'dec', 'magpsf', 'sigmapsf', 'magap', 'sigmagap', 'distnr', 'rb',
       'rbversion', 'drb', 'drbversion', 'magapbig', 'sigmagapbig', 'rfid',
       'magpsf_corr', 'sigmapsf_corr', 'sigmapsf_corr_ext', 'corrected',
       'dubious', 'parent_candid', 'has_stamp', 'step_id_corr'],
      dtype='object')


In [22]:
# FORCED PHOTOMETRY
query_forced_photometry = f"""
                    SELECT * FROM forced_photometry
                    WHERE oid in ({','.join(oids)}) and procstatus in ('0', '57');
                    """

forced_photometry = pd.read_sql_query(query_forced_photometry, con=engine)
display(forced_photometry.head())
print(forced_photometry.shape)
print(forced_photometry.columns)

Unnamed: 0,pid,oid,mjd,fid,ra,dec,e_ra,e_dec,mag,e_mag,...,diffmaglim,programid,procstatus,distnr,ranr,decnr,magnr,sigmagnr,chinr,sharpnr
0,2530217185015,ZTF18abmrxgg,60284.217188,1,52.75594,64.497422,,,22.48044,1.158725,...,20.364799,1,0,0.305711,52.755898,64.497505,17.917,0.018,0.791,-0.009
1,2509436186215,ZTF18abmrxgg,60263.436181,2,52.75594,64.497422,,,18.584002,0.035797,...,20.282499,1,0,0.154826,52.755886,64.497459,16.978001,0.025,1.16,-0.089
2,2507431666215,ZTF18abmrxgg,60261.431667,1,52.75594,64.497422,,,20.256533,0.14325,...,20.315701,1,0,0.18271,52.755898,64.497467,18.011,0.028,1.37,-0.1
3,2507267266215,ZTF18abmrxgg,60261.267268,2,52.75594,64.497422,,,18.928444,0.050106,...,20.156799,1,0,0.154826,52.755886,64.497459,16.978001,0.025,1.16,-0.089
4,2505406726215,ZTF18abmrxgg,60259.406725,1,52.75594,64.497422,,,20.005064,0.136611,...,20.0893,1,0,0.18271,52.755898,64.497467,18.011,0.028,1.37,-0.1


(11212, 42)
Index(['pid', 'oid', 'mjd', 'fid', 'ra', 'dec', 'e_ra', 'e_dec', 'mag',
       'e_mag', 'mag_corr', 'e_mag_corr', 'e_mag_corr_ext', 'isdiffpos',
       'corrected', 'dubious', 'parent_candid', 'has_stamp', 'field', 'rcid',
       'rfid', 'sciinpseeing', 'scibckgnd', 'scisigpix', 'magzpsci',
       'magzpsciunc', 'magzpscirms', 'clrcoeff', 'clrcounc', 'exptime',
       'adpctdif1', 'adpctdif2', 'diffmaglim', 'programid', 'procstatus',
       'distnr', 'ranr', 'decnr', 'magnr', 'sigmagnr', 'chinr', 'sharpnr'],
      dtype='object')


### To compute Features locally

* Queries (Reference, Xmatch, AllWise, PanStarr)

In [23]:
def patch_wise(oid_list):
    columns = ["oid", "w1mpro", "w2mpro", "w3mpro", "w4mpro"]
    df = pd.DataFrame({"oid": oid_list})  # Fill only the oid column
    for col in columns[1:]:  # Add empty columns for the rest
        df[col] = None
    return df

In [24]:
# REFERENCE
query_reference = f"""
                    SELECT oid, rfid, sharpnr, chinr FROM reference
                    WHERE oid in ({','.join(oids)}) and chinr >= 0;
                    """
reference = pd.read_sql_query(query_reference, con=engine)
reference = reference.drop_duplicates("rfid")

# XMATCH
query_xmatch = f"""
                SELECT oid, oid_catalog, dist FROM xmatch
                WHERE oid in ({','.join(oids)}) and catid='allwise';
                """
xmatch = pd.read_sql_query(query_xmatch, con=engine)
xmatch = xmatch.sort_values("dist").drop_duplicates("oid")
oid_catalog = [f"'{oid}'" for oid in xmatch["oid_catalog"].values]

#########################################################################################
# ALL WISE
query_wise = f"""
            SELECT oid_catalog, w1mpro, w2mpro, w3mpro, w4mpro FROM allwise
            WHERE oid_catalog in ({','.join(oid_catalog)});
            """
try:
    wise = pd.read_sql_query(query_wise, con=engine).set_index("oid_catalog")
    wise = pd.merge(xmatch, wise, on="oid_catalog", how="outer")
    wise = wise[["oid", "w1mpro", "w2mpro", "w3mpro", "w4mpro"]].set_index("oid")
except:
    wise = patch_wise(oids)
    logging.warning("wise patched")

# PANSTARR
query_ps = f"""
            SELECT oid, sgscore1, sgmag1, srmag1, simag1, szmag1, distpsnr1 FROM ps1_ztf
            WHERE oid in ({','.join(oids)});
            """
ps = pd.read_sql_query(query_ps, con=engine)
ps = ps.drop_duplicates("oid").set_index("oid")
#########################################################################################

xmatch = pd.concat([wise, ps], axis=1).reset_index()

display(reference)
display(xmatch)

Unnamed: 0,oid,rfid,sharpnr,chinr
0,ZTF17aaagyhh,306120144,-0.023,0.717
1,ZTF17aaagyhh,306120244,-0.012,0.367
2,ZTF17aaahinf,304120249,-0.020,0.984
3,ZTF17aaahinf,355120111,-0.004,0.254
4,ZTF17aaahinf,304120149,-0.025,0.552
...,...,...,...,...
226,ZTF22abybtmt,262120201,-0.038,1.005
227,ZTF23aabness,257120246,-0.019,0.696
228,ZTF23aabness,257120146,-0.009,0.790
229,ZTF23abscwjd,306120258,-0.042,1.074


Unnamed: 0,oid,w1mpro,w2mpro,w3mpro,w4mpro,sgscore1,sgmag1,srmag1,simag1,szmag1,distpsnr1
0,ZTF18abtshil,,,,,0.974292,15.8005,14.3887,13.4560,12.9280,0.213200
1,ZTF19aanqgpx,,,,,0.995833,15.2764,14.8642,14.6973,14.6495,0.166230
2,ZTF17aabuyoa,,,,,0.996542,18.6765,17.0628,16.8932,16.5093,0.656968
3,ZTF20aafqudh,,,,,0.997500,18.5454,18.1714,18.0943,18.0039,0.494706
4,ZTF18acsjity,,,,,0.998750,16.1603,15.5508,15.3305,15.2188,0.290732
...,...,...,...,...,...,...,...,...,...,...,...
93,ZTF18abxhoet,,,,,0.987292,18.6909,18.1570,17.5135,17.3824,0.454897
94,ZTF19aagmuhn,,,,,0.881250,18.3621,18.0250,17.8388,17.8035,0.503479
95,ZTF19aalwfly,,,,,0.293458,22.0266,21.0857,21.0815,20.7441,0.133504
96,ZTF20aageyrd,,,,,0.980500,19.3093,18.7500,18.5655,18.7476,0.190350


* Create Astro Object

In [25]:
import numpy as np
from lc_classifier.utils import (
    all_features_from_astro_objects,
    create_astro_object,
    EmptyLightcurveException,
)

def patch_xmatch_by_oid(oid: str):
    data = {
        "oid": [oid],
        "w1mpro": [np.nan],
        "w2mpro": [np.nan],
        "w3mpro": [np.nan],
        "w4mpro": [np.nan],
        "sgscore1": [np.nan],
        "sgmag1": [np.nan],
        "srmag1": [np.nan],
        "simag1": [np.nan],
        "szmag1": [np.nan],
        "distpsnr1": [np.nan],
    }
    return pd.DataFrame(data)

def dataframes_to_astro_object_list(
    detections,
    forced_photometry,
    xmatch,
    reference,
    features=None,
    data_origin="database",
    verbose=True,
):
    oids = detections["oid"].unique()
    detections.set_index("oid", inplace=True)
    detections.sort_index(inplace=True)
    forced_photometry_columns = forced_photometry.columns
    forced_photometry.set_index("oid", inplace=True)
    forced_photometry.sort_index(inplace=True)
    xmatch.set_index("oid", inplace=True)
    reference_columns = reference.columns
    reference.set_index("oid", inplace=True)
    astro_objects_list = []
    for oid in oids:
        try:
            xmatch_oid = xmatch.loc[[oid]].reset_index()
        except:
            logging.warning("xmatch metadata patched!")
            xmatch_oid = patch_xmatch_by_oid(oid=oid).reset_index()

        assert len(xmatch_oid) == 1
        xmatch_oid = xmatch_oid.iloc[0]
        if oid in forced_photometry.index:
            forced_photometry_oid = forced_photometry.loc[[oid]].reset_index()
        else:
            forced_photometry_oid = pd.DataFrame(columns=forced_photometry_columns)

        if oid in reference.index:
            reference_oid = reference.loc[[oid]].reset_index()
        else:
            reference_oid = pd.DataFrame(columns=reference_columns)
        try:
            ao = create_astro_object(
                data_origin=data_origin,
                detections=detections.loc[[oid]].reset_index(),
                forced_photometry=forced_photometry_oid,
                xmatch=xmatch_oid,
                reference=reference_oid,
                non_detections=None,
            )
            if features is not None:
                """add features from db"""
                try:
                    ao.features = features.loc[features.oid == oid][
                        ["name", "value", "fid", "version"]
                    ]
                except:
                    ao.features = None
        except EmptyLightcurveException:
            continue

        astro_objects_list.append(ao)
    return astro_objects_list

In [26]:
ao_list = dataframes_to_astro_object_list(
    detections=detections,
    forced_photometry=forced_photometry,
    xmatch=xmatch,
    reference=reference,
    features=None,
)



In [7]:
# GET FEATURES
query_features = f"""
                SELECT * FROM feature as f 
                WHERE f.oid in ({','.join(oids)}) and f.version = '27.5.0';
                """

features = pd.read_sql_query(query_features, con=engine)
features = preprocess_features_from_db(features)
features

name,Amplitude_1,Amplitude_2,AndersonDarling_1,AndersonDarling_2,Autocor_length_1,Autocor_length_2,Beyond1Std_1,Beyond1Std_2,Con_1,Con_2,...,ulens_fs_1,ulens_fs_2,ulens_mag0_1,ulens_mag0_2,ulens_t0_1,ulens_t0_2,ulens_tE_1,ulens_tE_2,ulens_u0_1,ulens_u0_2
oid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ZTF17aaagyhh,0.316409,0.309808,1.000000,1.000000,1.0,1.0,0.327044,0.365517,0.0,0.0,...,1.000000,1.000000,15.083394,14.594233,1495.181488,800.321947,13.715539,4.090437,9.357354e-01,9.509671e-01
ZTF17aaahinf,0.600024,0.432720,1.000000,1.000000,1.0,1.0,0.487500,0.388350,0.0,0.0,...,0.531400,1.000000,16.578478,16.394721,289.085667,694.848322,0.136763,0.999667,1.167508e-01,8.780373e-10
ZTF17aaarvek,0.230988,0.218993,1.000000,1.000000,1.0,1.0,0.405034,0.454321,0.0,0.0,...,0.277149,0.999995,16.731932,15.670571,1983.684952,1233.766632,0.002665,18.471085,3.196195e-02,2.678936e-10
ZTF17aabuyoa,0.619639,0.441053,1.000000,1.000000,1.0,1.0,0.425837,0.435567,0.0,0.0,...,0.000253,0.517568,18.165204,17.277786,2198.928737,2050.102694,6.864093,0.995069,2.843445e-04,6.724024e-02
ZTF17aabvxoe,0.234099,0.221737,1.000000,1.000000,1.0,1.0,0.352941,0.419355,0.0,0.0,...,0.083431,1.000000,14.960351,14.419074,225.741039,831.207850,211.600441,3.039176,7.386241e-06,8.577218e-07
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZTF22abpujoh,0.089690,0.120265,0.381993,0.939787,1.0,1.0,0.309524,0.298507,0.0,0.0,...,0.999999,0.060807,18.697553,18.530954,610.813649,877.096151,66.751735,0.002594,3.133226e-04,5.336261e-01
ZTF22abybtmt,,0.064967,,0.723670,,1.0,,0.235294,,0.0,...,1.000000,1.000000,19.058006,17.620424,760.800505,816.011074,0.482324,2.119831,2.884365e-07,5.549644e-07
ZTF23aabness,,,,,,,,,,,...,1.000000,1.000000,14.296145,14.027384,655.449678,698.418726,59.631588,61.964096,1.480142e+00,1.062567e+00
ZTF23abscwjd,,0.028358,,0.381189,,1.0,,0.250000,,0.0,...,1.000000,0.008408,18.217107,17.189221,58.433809,69.199411,10.211299,7.810661,2.000637e+00,8.179657e-02


In [8]:
from alerce_classifiers.base.dto import InputDTO
from alerce_classifiers.base._types import *

input_dto = InputDTO(
        Detections(pd.DataFrame()),
        NonDetections(pd.DataFrame()),
        Features(pd.DataFrame(features)),
        Xmatch(pd.DataFrame()),
        Stamps(pd.DataFrame()),
    )

In [9]:
from alerce_classifiers.squidward.model import (
    SquidwardFeaturesClassifier,
    SquidwardMapper,
)

SQUIDWARD_MODEL = "https://alerce-models.s3.amazonaws.com/squidward/1.1.5/hierarchical_random_forest_model.pkl"

model = SquidwardFeaturesClassifier(
    model_path=SQUIDWARD_MODEL,
    mapper=SquidwardMapper(),
)

predictions = model.predict(input_dto)
df_local_hbrf = predictions.probabilities
df_local_hbrf = df_local_hbrf.reset_index()
df_local_hbrf = df_local_hbrf.melt(id_vars=['oid'], var_name='class_name', value_name='probability')
df_local_hbrf['ranking'] = df_local_hbrf.groupby('oid')['probability'].rank(method="first", ascending=False).astype(int)
df_local_hbrf

Unnamed: 0,oid,class_name,probability,ranking
0,ZTF17aaagyhh,AGN,0.000614,13
1,ZTF17aaahinf,AGN,0.001686,17
2,ZTF17aaarvek,AGN,0.000916,14
3,ZTF17aabuyoa,AGN,0.002008,15
4,ZTF17aabvxoe,AGN,0.000375,18
...,...,...,...,...
2173,ZTF22abpujoh,YSO,0.061014,6
2174,ZTF22abybtmt,YSO,0.196118,1
2175,ZTF23aabness,YSO,0.034943,9
2176,ZTF23abscwjd,YSO,0.114478,3


In [10]:
query_to_prod_hbrf = f"""
            SELECT * FROM probability as p
            WHERE oid  in ({','.join(oids)})
                    and p.classifier_name = 'LC_classifier_BHRF_forced_phot(beta)' 
                    and classifier_version = '1.1.5';
        """

df_prod_hbrf = pd.read_sql_query(query_to_prod_hbrf, con=engine) #[df_local_hbrf.columns]
df_prod_hbrf

Unnamed: 0,oid,classifier_name,classifier_version,class_name,probability,ranking
0,ZTF17aaagyhh,LC_classifier_BHRF_forced_phot(beta),1.1.5,AGN,0.000928,13
1,ZTF17aaagyhh,LC_classifier_BHRF_forced_phot(beta),1.1.5,Blazar,0.001624,11
2,ZTF17aaagyhh,LC_classifier_BHRF_forced_phot(beta),1.1.5,CEP,0.016920,8
3,ZTF17aaagyhh,LC_classifier_BHRF_forced_phot(beta),1.1.5,CV/Nova,0.042572,6
4,ZTF17aaagyhh,LC_classifier_BHRF_forced_phot(beta),1.1.5,DSCT,0.050760,4
...,...,...,...,...,...,...
2063,ZTF23abscwjd,LC_classifier_BHRF_forced_phot(beta),1.1.5,SNII,0.004960,18
2064,ZTF23abscwjd,LC_classifier_BHRF_forced_phot(beta),1.1.5,SNIIb,0.000992,22
2065,ZTF23abscwjd,LC_classifier_BHRF_forced_phot(beta),1.1.5,SNIIn,0.004464,20
2066,ZTF23abscwjd,LC_classifier_BHRF_forced_phot(beta),1.1.5,TDE,0.029388,10


In [11]:
df_hbrf = df_local_hbrf.merge(df_prod_hbrf, on=['oid', 'class_name'], suffixes=('_local', '_prod'))
df_hbrf

Unnamed: 0,oid,class_name,probability_local,ranking_local,classifier_name,classifier_version,probability_prod,ranking_prod
0,ZTF17aaagyhh,AGN,0.000614,13,LC_classifier_BHRF_forced_phot(beta),1.1.5,0.000928,13
1,ZTF17aaahinf,AGN,0.001686,17,LC_classifier_BHRF_forced_phot(beta),1.1.5,0.002480,17
2,ZTF17aaarvek,AGN,0.000916,14,LC_classifier_BHRF_forced_phot(beta),1.1.5,0.001200,13
3,ZTF17aabuyoa,AGN,0.002008,15,LC_classifier_BHRF_forced_phot(beta),1.1.5,0.001440,16
4,ZTF17aabvxoe,AGN,0.000375,18,LC_classifier_BHRF_forced_phot(beta),1.1.5,0.000896,18
...,...,...,...,...,...,...,...,...
2063,ZTF22abillrz,YSO,0.007273,4,LC_classifier_BHRF_forced_phot(beta),1.1.5,0.003728,10
2064,ZTF22abiztag,YSO,0.017725,7,LC_classifier_BHRF_forced_phot(beta),1.1.5,0.005232,13
2065,ZTF22abpujoh,YSO,0.061014,6,LC_classifier_BHRF_forced_phot(beta),1.1.5,0.093564,3
2066,ZTF22abybtmt,YSO,0.196118,1,LC_classifier_BHRF_forced_phot(beta),1.1.5,0.205792,1


In [12]:
df_hbrf.columns

Index(['oid', 'class_name', 'probability_local', 'ranking_local',
       'classifier_name', 'classifier_version', 'probability_prod',
       'ranking_prod'],
      dtype='object')