# Compare SQLs

# Dataset

In [4]:
import pandas as pd
# load csv
db_train = pd.read_csv('llm/data/txt2sql_alerce_train_v4_0.csv')
db_test = pd.read_csv('llm/data/txt2sql_alerce_test_v4_0.csv')

In [5]:
import os
import sqlalchemy as sa
import requests
import time
# Create a connection to the ALeRCE database
def create_conn_alerce(access_time: int = 2):
  """
  Create a connection to the ALeRCE database.
  
  Args:
    access_time: Integer representing the time limit for the connection.
    2 for default access, 10 for extended access.
    
  Returns:
    SQLAlchemy engine object
  
  Raises:
    ValueError: If URL fetch fails or credentials are invalid
  """
  # Common URL for both access levels
  url = "https://raw.githubusercontent.com/alercebroker/usecases/master/alercereaduser_v4.json"
  
  # For security, get extended access credentials from environment variables
  user_extended = os.environ.get('ALERCE_USER_EXTENDED', '')
  pass_extended = os.environ.get('ALERCE_PASS_EXTENDED', '')
  
  n_tries = 3
  params = None
  
  # Fetch parameters from URL with retry logic
  for n_try in range(n_tries):
    try:
      response = requests.get(url)
      if response.status_code != 200:
        if n_try < n_tries - 1:
          time.sleep(2 ** n_try)  # exponential backoff
          continue
        else:
          raise ValueError(f"Failed to fetch URL: {url}, Status Code: {response.status_code}")
      
      params = response.json().get('params')
      if not params:
        raise ValueError("Missing 'params' in the JSON response")
      break
        
    except requests.RequestException as e:
      if n_try < n_tries - 1:
        time.sleep(2 ** n_try)
        continue
      else:
        raise ValueError(f"Network error when fetching {url}: {str(e)}")
    except ValueError as e:
      if "JSON" in str(e):
        raise ValueError("Invalid JSON response from URL")
      else:
        raise e
  
  # Create connection string based on access level
  if access_time == 2:
    conn_string = f"postgresql+psycopg2://{params['user']}:{params['password']}@{params['host']}/{params['dbname']}"
  elif access_time == 10:
    if not user_extended or not pass_extended:
      raise ValueError("Extended access credentials not found in environment variables")
    conn_string = f"postgresql+psycopg2://{user_extended}:{pass_extended}@{params['host']}/{params['dbname']}"
  else:
    raise ValueError(f'Access time {access_time} not supported')
  
  # Create and return engine with connection pooling disabled
  engine = sa.create_engine(conn_string, poolclass=sa.pool.NullPool)
  return engine


def run_sql_alerce(
    sql: str, 
    access_time: int = 2, 
    n_tries: int = 3, 
    query_time: bool = False
):
  ''' Execute the SQL query at the ALeRCE database and return the result
    Args:
        sql: SQL query to execute
        access_time: Integer representing the time limit for the connection. 
        2 for default access, 10 for extended access.
        n_tries: Number of tries to execute the query
        query_time: Boolean indicating whether to track query execution time
    Returns:
    query: pandas dataframe with the result of the query
    error: error message if the query could not be executed
    execution_time: time taken to execute the query    
  '''

  try:
    engine = create_conn_alerce(access_time=access_time)
  except ValueError as e:
    return None, e
    
  query = None
  error = None
  execution_time = None
  
  try:
    for n_try in range(n_tries):
      try:
        with engine.begin() as conn:
          start_time = time.time()
          query = pd.read_sql_query(sa.text(sql), conn)
          if query_time:
            execution_time = time.time() - start_time
          error = None
          break
      except Exception as e:
        error = e
        if n_try < n_tries - 1:
          time.sleep(2 ** n_try)  # exponential backoff
        else:
          # Last attempt failed, keep the error
          pass
  
  finally:
    # Always dispose of the engine to close connections
    engine.dispose()

  if query_time:
    return query, error, execution_time
  else:
    return query, error




## Running test of train and test queries


In [60]:
gold_columns_train = []
# iter db_train 
n = 5
for indx, row in db_train.iterrows():
    
    query = row['gold_query']
    tables_i, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
    if error:
        print(f'Error in query {row.req_id}: {error}')
        gold_columns_train.append({"req_id": row.req_id, "cols": [], "shape": []})
    else:
        print(f'Query {row.req_id} executed successfully, shape: {tables_i.shape}')
        gold_columns_train.append({"req_id": row.req_id, "cols": tables_i.columns.values.tolist(), "shape": tables_i.shape})

KeyboardInterrupt: 

In [None]:
gold_columns_test = []
# iter db_test 
n = 5
for indx, row in db_test.iterrows():
    
    query = row['gold_query']
    tables_i, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
    if error:
        print(f'Error in query {row.req_id}: {error}')
        gold_columns_test.append({"req_id": row.req_id, "cols": [], "shape": []})
    else:
        print(f'Query {row.req_id} executed successfully, shape: {tables_i.shape}')
        gold_columns_test.append({"req_id": row.req_id, "cols": tables_i.columns.values.tolist(), "shape": tables_i.shape})

Query 8 executed successfully, shape: (186, 28)
Query 27 executed successfully, shape: (5520, 11)
Error in query 17: (psycopg2.errors.QueryCanceled) canceling statement due to statement timeout

[SQL: 
SELECT
  *
FROM
  (
SELECT *
FROM (
SELECT
  feature.oid, feature.value, feature.name, feature.version, feature.fid
FROM
  (
SELECT
    object.oid, object.meanra, object.meandec
FROM
    object INNER JOIN
    probability
    ON object.oid = probability.oid
WHERE
    probability.classifier_name='lc_classifier'
    AND probability.class_name = 'AGN'
    AND probability.ranking = 1
) as obj_oids
    INNER JOIN
    feature ON feature.oid = obj_oids.oid
WHERE
  feature.name = 'ExcessVar'
  AND feature.fid = 1
  AND feature.value < -0.001
ORDER BY feature.value ASC
) as sq1
UNION
SELECT *
FROM (
SELECT
  feature.oid, feature.value, feature.name, feature.version, feature.fid
FROM
  (
SELECT
    object.oid, object.meanra, object.meandec
FROM
    object INNER JOIN
    probability
    ON object.oi

## Columns

In [None]:
gold_columns_train

[{'req_id': 13,
  'cols': ['oid', 'class_name', 'lastmjd', 'firstmjd'],
  'shape': (4471, 4)},
 {'req_id': 10, 'cols': ['oid', 'sn_prob', 'cv_prob'], 'shape': (184, 3)},
 {'req_id': 15,
  'cols': ['oid',
   'probability',
   'candid',
   'fid',
   'mjd',
   'magstat_fid',
   'dmdt_first'],
  'shape': (42, 7)},
 {'req_id': 4,
  'cols': ['oid',
   'fid',
   'dmdt_first',
   'candid',
   'f_id',
   'magpsf',
   'sigmapsf_corr',
   'sigmapsf_corr_ext'],
  'shape': (1537, 8)},
 {'req_id': 25,
  'cols': ['oid',
   'ndethist',
   'ncovhist',
   'mjdstarthist',
   'mjdendhist',
   'corrected',
   'stellar',
   'ndet',
   'g_r_max',
   'g_r_max_corr',
   'g_r_mean',
   'g_r_mean_corr',
   'meanra',
   'meandec',
   'sigmara',
   'sigmadec',
   'deltajd',
   'firstmjd',
   'lastmjd',
   'step_id_corr',
   'diffpos',
   'reference_change',
   'oid',
   'classifier_name',
   'class_name',
   'probability',
   'ranking',
   'source_id',
   'ra',
   'dec'],
  'shape': (4, 30)},
 {'req_id': 7, 'cols'

In [None]:
gold_columns_test


[{'req_id': 8,
  'cols': ['oid',
   'ndethist',
   'ncovhist',
   'mjdstarthist',
   'mjdendhist',
   'corrected',
   'stellar',
   'ndet',
   'g_r_max',
   'g_r_max_corr',
   'g_r_mean',
   'g_r_mean_corr',
   'meanra',
   'meandec',
   'sigmara',
   'sigmadec',
   'deltajd',
   'firstmjd',
   'lastmjd',
   'step_id_corr',
   'diffpos',
   'reference_change',
   'oid',
   'classifier_name',
   'classifier_version',
   'class_name',
   'probability',
   'ranking'],
  'shape': (186, 28)},
 {'req_id': 27,
  'cols': ['oid',
   'meanra',
   'meandec',
   'ndet',
   'firstmjd',
   'deltajd',
   'g_r_max',
   'classifier_name',
   'class_name',
   'ranking',
   'probability'],
  'shape': (5520, 11)},
 {'req_id': 17, 'cols': [], 'shape': []},
 {'req_id': 37,
  'cols': ['oid',
   'classifier_name',
   'classifier_version',
   'class_name',
   'probability',
   'ranking'],
  'shape': (200, 6)},
 {'req_id': 34,
  'cols': ['oid', 'name', 'value', 'fid', 'version'],
  'shape': (354, 5)},
 {'req_id

## Test single query

In [None]:
query = '''
SELECT
    *
FROM
    magstat
WHERE
    oid IN
(
SELECT
    probability.oid
FROM
    probability INNER JOIN
    xmatch
    ON probability.oid = xmatch.oid
    INNER JOIN
    allwise
    ON xmatch.oid_catalog = allwise.oid_catalog
WHERE
    w1mpro > 12
    AND w1mpro < 13
    AND classifier_name = 'lc_classifier'
    AND class_name = 'AGN'
    AND probability > 0.6
LIMIT 50
)
'''
query_columns = []
n = 3
tables_gold, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_gold.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_gold.columns.values.tolist(), "shape": tables_gold.shape})
tables_gold

Query executed successfully, shape: (96, 28)


Unnamed: 0,oid,fid,stellar,corrected,ndet,ndubious,dmdt_first,dm_first,sigmadm_first,dt_first,...,magmedian_corr,magmax_corr,magmin_corr,magsigma_corr,maglast_corr,magfirst_corr,firstmjd,lastmjd,step_id_corr,saturation_rate
0,ZTF19aaslszp,1,False,True,14,0,,,,,...,18.339777,18.424961,18.263086,0.045689,18.313805,18.424961,60120.430,60651.098,26.0.0,0.0
1,ZTF19aaslszp,2,False,True,38,0,,,,,...,17.458006,17.704653,17.340534,0.096294,17.387550,17.511440,58627.406,60651.120,26.0.0,0.0
2,ZTF21aacbvyj,1,False,True,28,0,0.254962,0.990300,-18.960983,5.106713,...,18.204933,18.271690,18.124393,0.042302,18.218922,18.228945,59198.297,59968.140,1.1.6,0.0
3,ZTF21aacbvyj,2,False,True,12,0,-1.233094,-2.546882,-20.072512,1.958646,...,17.879313,17.957926,17.088337,0.231817,17.903180,17.088337,59222.190,59906.297,1.1.6,0.0
4,ZTF18aaityfe,1,False,True,21,0,,,,,...,17.407772,17.418003,17.397541,0.010231,17.418003,,58480.543,60446.215,24.4.1,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
91,ZTF23abllsfp,1,False,True,12,0,,,,,...,17.475672,17.487580,17.444767,0.010975,17.476606,,60231.453,60294.348,ALeRCE ZTF,0.0
92,ZTF23abllsfp,2,False,True,2,0,,,,,...,16.660275,16.660275,16.660275,0.000000,16.660275,,60235.453,60292.402,ALeRCE ZTF,0.0
93,ZTF24aaqnytx,1,False,True,15,1,,,,,...,17.926128,17.974104,17.906946,0.018508,17.937592,17.965010,60439.344,60467.330,24.5.1,0.0
94,ZTF19acbuwdb,1,False,True,34,2,,,,,...,17.277626,17.629467,17.136470,0.114029,17.136470,17.192860,58850.426,60699.363,27.4.0,0.0


In [None]:

query = '''
-- Find at most 50 ZTF objects that are AGNs with specific WISE W1 magnitude
WITH agn_objects AS (
    SELECT p.oid
    FROM probability p
    WHERE p.class_name = 'AGN'
      AND p.classifier_name = 'lc_classifier'
      AND p.probability > 0.6
      AND p.ranking = 1
)
SELECT m.*
FROM magstat m
JOIN agn_objects a ON m.oid = a.oid
JOIN object o ON m.oid = o.oid
WHERE EXISTS (
    SELECT 1
    FROM allwise w
    WHERE q3c_join(o.meanra, o.meandec, w.ra, w.dec, 0.000277778) -- 1 arcsec in degrees
      AND w.w1mpro BETWEEN 12 AND 13
)
LIMIT 50
'''
query_columns = []
n = 3
tables_gold, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_gold.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_gold.columns.values.tolist(), "shape": tables_gold.shape})
tables_gold

KeyboardInterrupt: 

In [None]:
query =  '''
-- Find ZTF objects that are AGNs with specific WISE W1 magnitude range
WITH agn_objects AS (
    SELECT p.oid
    FROM probability p
    WHERE p.classifier_name = 'lc_classifier'
      AND p.class_name = 'AGN'
      AND p.probability > 0.6
      AND p.ranking = 1
),
wise_objects AS (
    SELECT x.oid
    FROM agn_objects a
    JOIN xmatch x ON a.oid = x.oid
    JOIN allwise w ON x.oid_catalog = w.oid_catalog
    WHERE w.w1mpro BETWEEN 12 AND 13
    LIMIT 50
)
SELECT m.*
FROM magstat m
JOIN wise_objects w ON m.oid = w.oid;
'''
n = 3
tables_i, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_i.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_i.columns.values.tolist(), "shape": tables_i.shape})
tables_i

Query executed successfully, shape: (101, 28)


Unnamed: 0,oid,fid,stellar,corrected,ndet,ndubious,dmdt_first,dm_first,sigmadm_first,dt_first,...,magmedian_corr,magmax_corr,magmin_corr,magsigma_corr,maglast_corr,magfirst_corr,firstmjd,lastmjd,step_id_corr,saturation_rate
0,ZTF17aaaxudr,1,False,True,21,0,,,,,...,18.490250,18.545198,18.098269,0.159082,18.111622,18.166320,58507.184,60706.320,27.3.0,0.0
1,ZTF17aaaxudr,2,False,True,36,0,,,,,...,17.360033,17.395657,17.143759,0.041741,17.329798,17.143759,59119.477,60374.266,27.3.0,0.0
2,ZTF18acussse,1,False,True,10,0,,,,,...,17.274618,17.491066,17.244513,0.097584,17.255821,17.333017,58585.387,60695.523,27.0.0,0.0
3,ZTF18acussse,2,False,True,8,0,,,,,...,16.670755,16.688547,16.657848,0.011579,16.659653,16.679638,59359.234,60016.460,27.0.0,0.0
4,ZTF20aakdzwh,1,False,True,143,1,,,,,...,19.190640,19.653301,18.520113,0.262789,19.105764,18.662980,58849.133,60735.137,27.4.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
96,ZTF19aafjuyh,2,False,True,33,0,,,,,...,17.396248,17.457926,17.355825,0.021408,17.372660,17.383950,59165.480,60082.170,1.2.0,0.0
97,ZTF19aawvjxk,1,False,True,33,2,-0.068224,-0.41253,-20.301409,2.996597,...,17.913612,18.237703,17.819384,0.069931,17.906635,17.944168,58637.227,59909.492,1.1.6,0.0
98,ZTF19aawvjxk,2,False,True,11,0,-0.024437,-0.28940,-20.186365,2.920336,...,17.725573,17.753117,17.545736,0.060360,17.695545,17.753117,58637.200,59902.516,1.1.6,0.0
99,ZTF18aatojqv,1,False,True,33,0,,,,,...,18.075810,18.091986,18.038828,0.018482,18.074806,,59401.380,60470.414,24.5.1,0.0


In [None]:
tables_gold[tables_gold.duplicated(keep=False)]

Unnamed: 0,oid,fid,stellar,corrected,ndet,ndubious,dmdt_first,dm_first,sigmadm_first,dt_first,...,magmedian_corr,magmax_corr,magmin_corr,magsigma_corr,maglast_corr,magfirst_corr,firstmjd,lastmjd,step_id_corr,saturation_rate


In [None]:
# check repeated rows
tables_i[tables_i.duplicated(keep=False)]

Unnamed: 0,oid,fid,stellar,corrected,ndet,ndubious,dmdt_first,dm_first,sigmadm_first,dt_first,...,magmedian_corr,magmax_corr,magmin_corr,magsigma_corr,maglast_corr,magfirst_corr,firstmjd,lastmjd,step_id_corr,saturation_rate
23,ZTF18adaryet,1,False,True,111,0,,,,,...,17.690979,18.12243,17.491518,0.163959,17.683527,18.071615,58480.266,60706.25,27.4.0,0.0
24,ZTF18adaryet,2,False,True,101,3,,,,,...,16.80501,17.094336,16.73757,0.082908,16.79567,17.028706,58480.3,60728.184,27.4.0,0.0
25,ZTF18adaryet,3,False,True,2,0,,,,,...,16.398884,16.417692,16.380075,0.018808,16.380075,16.417692,60576.477,60577.516,27.4.0,0.0
59,ZTF22abugimg,1,False,True,143,0,,,,,...,,,,,,,58494.5,60116.19,ALeRCE ZTF,0.0
60,ZTF22abugimg,2,False,True,245,0,,,,,...,17.070656,17.099583,17.04949,0.015174,17.074087,,58487.527,60369.4,ALeRCE ZTF,0.0
71,ZTF22abugimg,1,False,True,143,0,,,,,...,,,,,,,58494.5,60116.19,ALeRCE ZTF,0.0
72,ZTF22abugimg,2,False,True,245,0,,,,,...,17.070656,17.099583,17.04949,0.015174,17.074087,,58487.527,60369.4,ALeRCE ZTF,0.0
92,ZTF18adaryet,1,False,True,111,0,,,,,...,17.690979,18.12243,17.491518,0.163959,17.683527,18.071615,58480.266,60706.25,27.4.0,0.0
93,ZTF18adaryet,2,False,True,101,3,,,,,...,16.80501,17.094336,16.73757,0.082908,16.79567,17.028706,58480.3,60728.184,27.4.0,0.0
94,ZTF18adaryet,3,False,True,2,0,,,,,...,16.398884,16.417692,16.380075,0.018808,16.380075,16.417692,60576.477,60577.516,27.4.0,0.0


In [None]:
tables_i.columns

Index(['oid', 'fid', 'stellar', 'corrected', 'ndet', 'ndubious', 'dmdt_first',
       'dm_first', 'sigmadm_first', 'dt_first', 'magmean', 'magmedian',
       'magmax', 'magmin', 'magsigma', 'maglast', 'magfirst', 'magmean_corr',
       'magmedian_corr', 'magmax_corr', 'magmin_corr', 'magsigma_corr',
       'maglast_corr', 'magfirst_corr', 'firstmjd', 'lastmjd', 'step_id_corr',
       'saturation_rate'],
      dtype='object')

In [None]:
print(set(sorted(tables_gold['firstmjd'])))
print(set(sorted(tables_i['firstmjd'])))

{58369.145, 58374.438, 59401.38, 58380.47, 58895.426, 60437.15, 60439.344, 58904.484, 59951.234, 58423.484, 58423.543, 58423.242, 58428.29, 58431.492, 59967.426, 59969.383, 58434.492, 59468.473, 59996.457, 58464.5, 58469.527, 58472.105, 58480.266, 58480.3, 58480.543, 58482.477, 58996.188, 58997.395, 58998.45, 58999.273, 58487.527, 58494.5, 59009.227, 59522.344, 59014.477, 58507.184, 58510.273, 58513.402, 58513.457, 60576.457, 60576.477, 60604.207, 60604.25, 58575.21, 60120.43, 58585.387, 59118.49, 59119.477, 58627.406, 58637.2, 58637.227, 59150.52, 59672.324, 60184.457, 59163.48, 59165.48, 58661.2, 59178.547, 59182.523, 59198.297, 60231.453, 60232.4, 59721.344, 60235.453, 59222.19, 58718.44, 59232.562, 58733.406, 59257.4, 58748.527, 60293.418, 59783.42, 59278.312, 60315.547, 59813.42, 59311.44, 58302.19, 58313.37, 58316.35, 58830.176, 58830.234, 58830.562, 58833.19, 58833.242, 58833.38, 59359.234, 58849.133, 58850.426, 58852.504, 58854.566, 59371.215, 60403.484, 58360.168, 58361.45, 58

In [None]:
print(set(sorted(tables_gold['oid'])))
print(set(sorted(tables_i['oid'])))

{'ZTF18acussse', 'ZTF21aaitotq', 'ZTF19aagnapn', 'ZTF22abugimg', 'ZTF21aacbvyj', 'ZTF19aaeycah', 'ZTF20aaccuah', 'ZTF23abpqica', 'ZTF23aaavvpg', 'ZTF18abshgbq', 'ZTF20actrcji', 'ZTF23abllsfp', 'ZTF21aclcrho', 'ZTF18aclsckj', 'ZTF20aaezfjd', 'ZTF18aamfxav', 'ZTF23aaaauav', 'ZTF18acbwaow', 'ZTF18adooxog', 'ZTF18acbxqwy', 'ZTF20acxkffg', 'ZTF18acyxlzm', 'ZTF21aaksefr', 'ZTF18admzvfs', 'ZTF19abujsyc', 'ZTF18aaityfe', 'ZTF19aaptufk', 'ZTF24aaifxdw', 'ZTF19aaslszp', 'ZTF20aakdzwh', 'ZTF19aawvjxk', 'ZTF20abefbzi', 'ZTF24aaqnytx', 'ZTF20aadbnsx', 'ZTF18aaqrcws', 'ZTF18aatojqv', 'ZTF19acljmbq', 'ZTF18adaryet', 'ZTF19aafjuyh', 'ZTF18abdmdgx', 'ZTF20abhagbz', 'ZTF24aadamqj', 'ZTF19acbuwdb', 'ZTF17aaaxudr', 'ZTF20abcabkt', 'ZTF19aayocpp', 'ZTF18abycxrb', 'ZTF18aadzncm'}
{'ZTF18acussse', 'ZTF21aaitotq', 'ZTF19aagnapn', 'ZTF22abugimg', 'ZTF21aacbvyj', 'ZTF19aaeycah', 'ZTF20aaccuah', 'ZTF23abpqica', 'ZTF23aaavvpg', 'ZTF18abshgbq', 'ZTF20actrcji', 'ZTF23abllsfp', 'ZTF21aclcrho', 'ZTF18aclsckj', 'ZTF20

In [None]:
# Simon's version
query =  '''
-- Find at most 50 ZTF objects that are AGNs with specific WISE W1 magnitude
WITH agn_objects AS (
    SELECT p.oid
    FROM probability p
    WHERE p.class_name = 'AGN'
      AND p.classifier_name = 'lc_classifier'
      AND p.probability > 0.6
      AND p.ranking = 1
)
SELECT m.*
FROM magstat m
JOIN agn_objects a ON m.oid = a.oid
JOIN object o ON m.oid = o.oid
WHERE EXISTS (
    SELECT 1
    FROM allwise w
    WHERE q3c_join(o.meanra, o.meandec, w.ra, w.dec, 0.000277778) -- 1 arcsec in degrees
      AND w.w1mpro BETWEEN 12 AND 13
)
LIMIT 50
'''
n = 3
tables_i, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_i.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_i.columns.values.tolist(), "shape": tables_i.shape})
tables_i

Error in query: (psycopg2.errors.QueryCanceled) canceling statement due to statement timeout

[SQL: 
-- Find at most 50 ZTF objects that are AGNs with specific WISE W1 magnitude
WITH agn_objects AS (
    SELECT p.oid
    FROM probability p
    WHERE p.class_name = 'AGN'
      AND p.classifier_name = 'lc_classifier'
      AND p.probability > 0.6
      AND p.ranking = 1
)
SELECT m.*
FROM magstat m
JOIN agn_objects a ON m.oid = a.oid
JOIN object o ON m.oid = o.oid
WHERE EXISTS (
    SELECT 1
    FROM allwise w
    WHERE q3c_join(o.meanra, o.meandec, w.ra, w.dec, 0.000277778) -- 1 arcsec in degrees
      AND w.w1mpro BETWEEN 12 AND 13
)
LIMIT 50
]
(Background on this error at: https://sqlalche.me/e/20/e3q8)


In [None]:
print(set(tables_gold['oid']))
print(set(tables_i['oid']))

{'ZTF19aaoohqa', 'ZTF19abnrcwv', 'ZTF19aargois', 'ZTF18aaynqox', 'ZTF20aajdllt', 'ZTF18abasovn', 'ZTF22aabwnvh', 'ZTF18absitkp', 'ZTF18abmrcsn', 'ZTF19abdydbj', 'ZTF18abvztqd', 'ZTF19abdyprc', 'ZTF18aazwigp', 'ZTF18acwbhzi', 'ZTF17aaagqxl', 'ZTF18abivpiy', 'ZTF19aaonuql', 'ZTF18adaqdfq', 'ZTF19ackxixe', 'ZTF19aarxvzz', 'ZTF20abzcyob', 'ZTF19abdydlu', 'ZTF18ablpmbs', 'ZTF19acytwir', 'ZTF18aaydigk', 'ZTF20ablxsbu', 'ZTF18abadjvy', 'ZTF18aciepmk', 'ZTF19aaatlrf', 'ZTF21aaaqswa', 'ZTF18abkmmts', 'ZTF19abdfahf', 'ZTF18abpogdx', 'ZTF18achfgss', 'ZTF22aacjcsw', 'ZTF18ablqndm', 'ZTF18acuwvpr', 'ZTF19abdpqai', 'ZTF20acqwnlk', 'ZTF17aacpvbf', 'ZTF19adbmgjt', 'ZTF18aaypmtr', 'ZTF18adapbif', 'ZTF19aaxoztn', 'ZTF18adamzmn', 'ZTF18abommtg', 'ZTF21aaguuxw', 'ZTF18aayuugb', 'ZTF18absoocn', 'ZTF18abspqgp', 'ZTF18acfrjme', 'ZTF20aaiiqej', 'ZTF19aayrbaq', 'ZTF20abgdwcs', 'ZTF19aaoznlz', 'ZTF17aabwftj', 'ZTF19aaonwpp', 'ZTF18aayujpj', 'ZTF18admorlm', 'ZTF18abvgrec', 'ZTF18aayvacc', 'ZTF19abbxfxd', 'ZTF18a

In [None]:
# Simon's version
query =  '''
-- Find at most 100 ZTF objects with a multiband period < 5 days in the specified feature version,
-- and return all columns from the 'probability' table for those objects,
-- including only rows for the light curve classifier ('lc_classifier') with ranking 1 or 2.

SELECT *
FROM probability
WHERE oid IN (
    SELECT oid
    FROM feature
    WHERE name = 'multiband_period'
      AND value < 5
      AND version = 'lc_classifier_1.2.1-P'
    LIMIT 100
)
AND classifier_name = 'lc_classifier'
AND ranking IN (1, 2)

'''
n = 3
tables_i, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_i.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_i.columns.values.tolist(), "shape": tables_i.shape})
tables_i

Query executed successfully, shape: (0, 6)


Unnamed: 0,oid,classifier_name,classifier_version,class_name,probability,ranking


query 14


For the next list of oids: ['ZTF23aavzgjg' 'ZTF23aaynzyk' 'ZTF23aavqxos' 'ZTF23aaknyni'
 'ZTF23aavsdtc' 'ZTF18aandkua' 'ZTF23aaxfewt' 'ZTF23aavshwi'
 'ZTF22aawasao' 'ZTF23aaxgvnt'], return the unique object identifier, candidate identifier, filter identifier, modified julian date, magnitud, magnitud error, whether the object has stamps, deep learning real bogus score, the star galaxy score of the nearest object, and the distance to the nearest source in panstarrs for objects that have a deep learning real bogus score greater than 0.5 and that either have a star galaxy score less than 0.5 or a distance to the nearest panstarrs source smaller than 1 arcsec.

In [187]:

query = '''
SELECT
    det.oid, det.candid,
    det.drb, det.fid,
    det.mjd, det.magpsf, det.sigmapsf,
    det.has_stamp,
    ps1.sgscore1, ps1.distpsnr1
FROM
    (SELECT *
    FROM detection
    WHERE oid in ('ZTF23aavzgjg','ZTF23aaynzyk','ZTF23aavqxos','ZTF23aaknyni','ZTF23aavsdtc','ZTF18aandkua','ZTF23aaxfewt','ZTF23aavshwi','ZTF22aawasao','ZTF23aaxgvnt')
    ) as det
    INNER JOIN
    (SELECT *
    FROM ps1_ztf
    WHERE oid in ('ZTF23aavzgjg','ZTF23aaynzyk','ZTF23aavqxos','ZTF23aaknyni','ZTF23aavsdtc','ZTF18aandkua','ZTF23aaxfewt','ZTF23aavshwi','ZTF22aawasao','ZTF23aaxgvnt')
    ) as ps1
    ON det.oid=ps1.oid
WHERE
     (ps1.sgscore1 < 0.5 OR ps1.distpsnr1 < 1)
    AND det.drb > 0.5
'''
query_columns = []
n = 3
tables_gold, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_gold.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_gold.columns.values.tolist(), "shape": tables_gold.shape})
tables_gold

Query executed successfully, shape: (30, 10)


Unnamed: 0,oid,candid,drb,fid,mjd,magpsf,sigmapsf,has_stamp,sgscore1,distpsnr1
0,ZTF18aandkua,2434183903015015056,0.994159,1,60188.183901,17.907194,0.180156,True,0.075167,0.326697
1,ZTF18aandkua,2756269351915010002,0.993011,2,60510.269352,19.379133,0.161114,True,0.075167,0.326697
2,ZTF18aandkua,2772265531915010009,0.850113,2,60526.265532,19.641638,0.183068,True,0.075167,0.326697
3,ZTF18aandkua,2979404491915010015,0.696538,1,60733.404491,18.887863,0.175351,True,0.075167,0.326697
4,ZTF18aandkua,2434183903015015056,0.994159,1,60188.183901,17.907194,0.180156,True,0.075167,1.234589
5,ZTF18aandkua,2756269351915010002,0.993011,2,60510.269352,19.379133,0.161114,True,0.075167,1.234589
6,ZTF18aandkua,2772265531915010009,0.850113,2,60526.265532,19.641638,0.183068,True,0.075167,1.234589
7,ZTF18aandkua,2979404491915010015,0.696538,1,60733.404491,18.887863,0.175351,True,0.075167,1.234589
8,ZTF22aawasao,2754319833415015025,0.755912,2,60508.319838,18.594233,0.14378,True,0.380149,0.673811
9,ZTF22aawasao,2754319833415015025,0.755912,2,60508.319838,18.594233,0.14378,True,0.380149,0.904783


In [188]:

query = '''
-- Return requested columns for given oids with specified conditions

SELECT d.oid,
       d.candid,
       d.fid,
       d.mjd,
       d.magpsf,
       d.sigmapsf,
       d.has_stamp,
       d.drb,
       p.sgscore1,
       p.distpsnr1
FROM detection d
LEFT JOIN ps1_ztf p ON d.oid = p.oid
AND d.candid = p.candid
WHERE d.oid IN ('ZTF23aavzgjg',
                'ZTF23aaynzyk',
                'ZTF23aavqxos',
                'ZTF23aaknyni',
                'ZTF23aavsdtc',
                'ZTF18aandkua',
                'ZTF23aaxfewt',
                'ZTF23aavshwi',
                'ZTF22aawasao',
                'ZTF23aaxgvnt')
  AND d.drb > 0.5
  AND ((p.sgscore1 < 0.5)
       OR (p.distpsnr1 < 1)) ;
'''
query_columns = []
n = 3
tables_i, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_i.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_i.columns.values.tolist(), "shape": tables_i.shape})
tables_i


Query executed successfully, shape: (7, 10)


Unnamed: 0,oid,candid,fid,mjd,magpsf,sigmapsf,has_stamp,drb,sgscore1,distpsnr1
0,ZTF23aavqxos,2403456661315015121,2,60157.456667,19.240726,0.181016,True,0.949127,0.024583,0.183817
1,ZTF23aavsdtc,2404379221215015002,1,60158.379225,19.22012,0.216384,True,0.879817,0.209,0.343322
2,ZTF23aavshwi,2404407764015015005,2,60158.407766,18.1681,0.067061,True,1.0,0.009583,2.024725
3,ZTF23aaxfewt,2415290061315015127,2,60169.290069,18.205078,0.063636,True,0.999312,0.134304,0.635949
4,ZTF18aandkua,2434183903015015056,1,60188.183901,17.907194,0.180156,True,0.994159,0.075167,0.326697
5,ZTF22aawasao,2754319833415015025,2,60508.319838,18.594233,0.14378,True,0.755912,0.380149,0.904783
6,ZTF18aandkua,2979404491915010015,1,60733.404491,18.887863,0.175351,True,0.696538,0.075167,1.234589


In [189]:
print(set(sorted(tables_gold['oid'])) == set(sorted(tables_i['oid'])))
print(set(sorted(tables_gold['candid'])) == set(sorted(tables_i['candid'])))

True
False


In [190]:
print(set(sorted(tables_gold['oid'])))
print(set(sorted(tables_i['oid'])))

{'ZTF23aaxfewt', 'ZTF23aavshwi', 'ZTF23aavqxos', 'ZTF22aawasao', 'ZTF18aandkua', 'ZTF23aavsdtc'}
{'ZTF23aaxfewt', 'ZTF23aavshwi', 'ZTF23aavqxos', 'ZTF22aawasao', 'ZTF18aandkua', 'ZTF23aavsdtc'}


In [191]:
print(set(sorted(tables_gold['candid'])))
print(set(sorted(tables_i['candid'])))

{2448496784015015039, 2434183903015015056, 2403456661315015121, 2756269351915010002, 2415290061315015127, 2772265531915010009, 2404379221215015002, 3003155891315015002, 2404407764015015005, 2407423334015015007, 2414445984015015007, 2407462734015015009, 2419451634015015008, 2432427924015015008, 2430476444015015012, 2446471444015015013, 2419475694015015014, 2440490024015015015, 2424486604015015016, 2428469354015015016, 2414423484015015018, 2416422074015015019, 2430436074015015016, 2979404491915010015, 2754319833415015025}
{2979404491915010015, 2434183903015015056, 2403456661315015121, 2754319833415015025, 2415290061315015127, 2404379221215015002, 2404407764015015005}


In [195]:
print(set(sorted(tables_gold.columns)))
print(set(sorted(tables_i.columns)))

{'fid', 'sgscore1', 'distpsnr1', 'mjd', 'has_stamp', 'magpsf', 'drb', 'candid', 'oid', 'sigmapsf'}
{'fid', 'sgscore1', 'distpsnr1', 'mjd', 'has_stamp', 'magpsf', 'drb', 'candid', 'oid', 'sigmapsf'}


In [194]:
 # Get the identifiers from each dataframe
gold_identifiers = set(tables_gold['candid'].astype(str).values)
pred_identifiers = set(tables_i['candid'].astype(str).values)

# Calculate precision, recall, and F1 score
true_positives = len(gold_identifiers.intersection(pred_identifiers))
false_positives = len(pred_identifiers - gold_identifiers)
false_negatives = len(gold_identifiers - pred_identifiers)

precision = true_positives / (true_positives + false_positives) if true_positives + false_positives > 0 else 0
recall = true_positives / (true_positives + false_negatives) if true_positives + false_negatives > 0 else 0
f1_score = 2 * (precision * recall) / (precision + recall) if precision + recall > 0 else 0
print(f'Precision: {precision}, Recall: {recall}, F1 Score: {f1_score}')

Precision: 1.0, Recall: 0.28, F1 Score: 0.43750000000000006


In [185]:
# check duplicated rows
tables_gold[tables_gold.duplicated(keep=False)]

Unnamed: 0,oid,candid,drb,fid,mjd,magpsf,sigmapsf,has_stamp,sgscore1,distpsnr1


In [None]:
tables_i[tables_i.duplicated(keep=False)]


Unnamed: 0,oid,candid,fid,mjd,magpsf,sigmapsf,has_stamp,drb,sgscore1,distpsnr1


In [163]:
# Simon's version
query =  '''
-- Return requested columns for the given list of oids, with the specified filters
SELECT
    d.oid,                -- unique object identifier
    d.candid,             -- candidate identifier
    d.fid,                -- filter identifier
    d.mjd,                -- modified julian date
    d.magpsf,             -- magnitude
    d.sigmapsf,           -- magnitude error
    d.has_stamp,          -- whether the object has stamps
    d.drb,                -- deep learning real bogus score
    p.sgscore1,           -- star galaxy score of the nearest object
    p.distpsnr1           -- distance to the nearest source in panstarrs
FROM detection d
JOIN ps1_ztf p
  ON d.oid = p.oid AND d.candid = p.candid
WHERE d.oid IN (
    'ZTF23aavzgjg', 'ZTF23aaynzyk', 'ZTF23aavqxos', 'ZTF23aaknyni',
    'ZTF23aavsdtc', 'ZTF18aandkua', 'ZTF23aaxfewt', 'ZTF23aavshwi',
    'ZTF22aawasao', 'ZTF23aaxgvnt'
)
AND d.drb > 0.5
AND (
    p.sgscore1 < 0.5
    OR p.distpsnr1 < 1
)
'''
n = 3
tables_i, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_i.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_i.columns.values.tolist(), "shape": tables_i.shape})
tables_i

Query executed successfully, shape: (7, 10)


Unnamed: 0,oid,candid,fid,mjd,magpsf,sigmapsf,has_stamp,drb,sgscore1,distpsnr1
0,ZTF23aavqxos,2403456661315015121,2,60157.456667,19.240726,0.181016,True,0.949127,0.024583,0.183817
1,ZTF23aavsdtc,2404379221215015002,1,60158.379225,19.22012,0.216384,True,0.879817,0.209,0.343322
2,ZTF23aavshwi,2404407764015015005,2,60158.407766,18.1681,0.067061,True,1.0,0.009583,2.024725
3,ZTF23aaxfewt,2415290061315015127,2,60169.290069,18.205078,0.063636,True,0.999312,0.134304,0.635949
4,ZTF18aandkua,2434183903015015056,1,60188.183901,17.907194,0.180156,True,0.994159,0.075167,0.326697
5,ZTF22aawasao,2754319833415015025,2,60508.319838,18.594233,0.14378,True,0.755912,0.380149,0.904783
6,ZTF18aandkua,2979404491915010015,1,60733.404491,18.887863,0.175351,True,0.696538,0.075167,1.234589


In [None]:
# Simon's version
query =  '''
-- Return requested columns for the given list of oids, with the specified filters
SELECT
    d.oid,                -- unique object identifier
    d.candid,             -- candidate identifier
    d.fid,                -- filter identifier
    d.mjd,                -- modified julian date
    d.magpsf,             -- magnitude
    d.sigmapsf,           -- magnitude error
    d.has_stamp,          -- whether the object has stamps
    d.drb,                -- deep learning real bogus score
    p.sgscore1,           -- star galaxy score of the nearest object
    p.distpsnr1           -- distance to the nearest source in panstarrs
FROM detection d
WHERE d.oid IN (
    'ZTF23aavzgjg',
    'ZTF23aaynzyk',
    'ZTF23aavqxos',
    'ZTF23aaknyni',
    'ZTF23aavsdtc',
    'ZTF18aandkua',
    'ZTF23aaxfewt',
    'ZTF23aavshwi',
    'ZTF22aawasao',
    'ZTF23aaxgvnt'
)
AND d.drb > 0.5
AND EXISTS (
    SELECT 1
    FROM ps1_ztf p
    WHERE p.oid = d.oid
      AND p.candid = d.candid
      AND (
            p.sgscore1 < 0.5
         OR p.distpsnr1 < 1
      )
)
-- Join to ps1_ztf in a subquery to retrieve the required columns
-- (as per instructions, avoid explicit JOINs)
ORDER BY d.oid, d.candid
'''
n = 3
tables_i, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_i.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_i.columns.values.tolist(), "shape": tables_i.shape})
tables_i

Error in query: (psycopg2.errors.UndefinedTable) missing FROM-clause entry for table "p"
LINE 12:     p.sgscore1,           -- star galaxy score of the neares...
             ^

[SQL: 
-- Return requested columns for the given list of oids, with the specified filters
SELECT
    d.oid,                -- unique object identifier
    d.candid,             -- candidate identifier
    d.fid,                -- filter identifier
    d.mjd,                -- modified julian date
    d.magpsf,             -- magnitude
    d.sigmapsf,           -- magnitude error
    d.has_stamp,          -- whether the object has stamps
    d.drb,                -- deep learning real bogus score
    p.sgscore1,           -- star galaxy score of the nearest object
    p.distpsnr1           -- distance to the nearest source in panstarrs
FROM detection d
WHERE d.oid IN (
    'ZTF23aavzgjg',
    'ZTF23aaynzyk',
    'ZTF23aavqxos',
    'ZTF23aaknyni',
    'ZTF23aavsdtc',
    'ZTF18aandkua',
    'ZTF23aaxfewt',
    '

In [None]:
'''-- Find at most 100 ZTF objects with a multiband period < 5 days in feature version 'lc_classifier_1.2.1-P'
-- Return all columns from 'probability' for these objects, only for the light curve classifier and ranking 1 or 2

SELECT *
FROM probability
WHERE oid IN (
    SELECT oid
    FROM feature
    WHERE name = 'multiband_period'
      AND value < 5
      AND version = 'lc_classifier_1.2.1-P'
)
AND classifier_name = 'lc_classifier'
AND ranking IN (1, 2)
LIMIT 100'''


In [None]:
# query 96
query = '''-- Final Query to retrieve all detections for ZTF objects within 2 arcseconds of Solar System identifiers '2003FP134' and '2009UK56'
WITH solar_system_objects AS ( -- Define the RA/Dec coordinates for the Solar System identifiers 
SELECT DISTINCT ssnamenr, oid, candid, ssdistnr, ssmagnr FROM ss_ztf WHERE ssnamenr IN ('2003FP134', '2009UK56')),
filtered_objects AS ( -- Perform spatial filtering using q3c_radial_query to find objects within 2 arcseconds 
SELECT sso.*, d.* FROM solar_system_objects sso INNER JOIN detection d ON sso.candid = d.candid 
WHERE q3c_radial_query(d.ra, d.dec, sso.ssdistnr, 0.0005556) -- 2 arcseconds in degrees
) -- Select required columns and sort the results
SELECT  sso.*,  d.mjd AS detection_date,  d.fid AS filter_identifier,  d.isdiffpos,  d.ra,  d.dec,  d.magpsf AS difference_magnitude,  d.sigmapsf AS magnitude_uncertainty 
FROM filtered_objects sso
ORDER BY sso.ssnamenr, d.mjd'''

query = '''-- Final Query to retrieve all detections for ZTF objects within 2 arcseconds of Solar System identifiers '2003FP134' and '2009UK56'
WITH catalog AS (    -- Define the Solar System objects of interest with their RA/Dec coordinates    
SELECT '2003FP134' AS source_id, ra_value1 AS ra, dec_value1 AS dec
UNION ALL    SELECT '2009UK56' AS source_id, ra_value2 AS ra, dec_value2 AS dec)
SELECT     ss_ztf.*, -- All columns from the ss_ztf table    
detection.mjd AS detection_date, -- Detection date
detection.fid AS filter_identifier, -- Filter identifier
detection.isdiffpos, -- Isdiffpos flag
detection.ra, -- RA coordinate
detection.dec, -- Dec coordinate
detection.magpsf AS difference_magnitude, -- Difference magnitude    
detection.sigmapsf AS magnitude_uncertainty -- Magnitude uncertainty
FROM     ss_ztf
INNER JOIN     catalog    ON q3c_radial_query(ss_ztf.ra, ss_ztf.dec, catalog.ra, catalog.dec, 0.0005556) -- Spatial filtering within 2 arcseconds
INNER JOIN     detection    ON ss_ztf.oid = detection.oid -- Join with detection table on oid
WHERE     ss_ztf.ssnamenr IN ('2003FP134', '2009UK56') -- Filter for specific Solar System objects
ORDER BY     ss_ztf.ssnamenr ASC, -- Sort by Solar System object name    
detection.mjd ASC -- Sort by detection date'''

query = '''-- [FINAL QUERY HERE]
SELECT ss_ztf.*, detection.mjd AS detection_date, detection.fid AS filter_identifier, detection.isdiffpos, detection.ra, detection.dec, 
detection.magpsf AS difference_magnitude, detection.sigmapsf AS magnitude_uncertainty
FROM ss_ztf
INNER JOIN detection ON ss_ztf.oid = detection.oid
WHERE ss_ztf.ssnamenr IN ('2003FP134', '2009UK56')
AND q3c_radial_query(detection.ra, detection.dec, ss_ztf.ra, ss_ztf.dec, 0.0005556)
ORDER BY ss_ztf.ssnamenr ASC, detection.mjd ASC'''


n = 3
tables_i, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_i.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_i.columns.values.tolist(), "shape": tables_i.shape})
tables_i

Query executed successfully, shape: (1, 7)


Unnamed: 0,oid,candid,neargaia,neargaiabright,maggaia,maggaiabright,unique1
0,ZTF18acxlskz,1917201552615010028,0.74125,-999.0,15.321053,-999.0,False


In [None]:
db_test[db_test.req_id == 90].request.values[0]

"Find at most 100 ZTF objects that have a multiband period lower than 5 days in the 'lc_classifier_1.2.1-P' feature version. Return all columns from the 'probability' table for such objects, including only data for the light curve classifier, with rankings either 1 or 2"

In [None]:
print(db_test[db_test.req_id == 90].gold_query.values[0])

SELECT
    *
FROM
    probability
WHERE
    classifier_name = 'lc_classifier'
    AND ranking <= 2
    AND oid in
(
SELECT
    oid
FROM
    feature
WHERE
    name = 'Multiband_period'
    AND version = 'lc_classifier_1.2.1-P'
    AND value < 5
LIMIT 100
)


In [None]:
db_test[db_test.req_id == 90].request.values[0]


"Find at most 100 ZTF objects that have a multiband period lower than 5 days in the 'lc_classifier_1.2.1-P' feature version. Return all columns from the 'probability' table for such objects, including only data for the light curve classifier, with rankings either 1 or 2"

In [None]:
# query 90
query = '''SELECT DISTINCT ON (oid)
    *
FROM
    probability
WHERE
    classifier_name = 'lc_classifier'
    AND ranking <= 2
    AND oid in
(
SELECT
    oid
FROM
    feature
WHERE
    name = 'Multiband_period'
    AND version = 'lc_classifier_1.2.1-P'
    AND value < 5
LIMIT 100
)'''
# # run 1 simon 
# # mutiband_period with lower case
# # limit 100 at the end, not inside the subquery
# query='''
# -- Final Query to retrieve at most 100 ZTF objects with a multiband period lower than 5 days
# -- in the 'lc_classifier_1.2.1-P' feature version, returning all columns from the 'probability' table
# -- for the light curve classifier with rankings 1 or 2.
# WITH filtered_features AS (    -- Subquery 1: Filter objects based on multiband period
#     SELECT oid    FROM feature    WHERE name = 'multiband_period'      AND value < 5
#     AND version = 'lc_classifier_1.2.1-P'),
#     filtered_probabilities AS (    -- Subquery 2: Filter probabilities for the light curve classifier
#     SELECT *    FROM probability    WHERE classifier_name = 'lc_classifier'
#     AND ranking IN (1, 2)) -- Combine the filters using an INNER JOIN
#     SELECT p.*
#     FROM filtered_probabilities p
#     INNER JOIN filtered_features f
#     ON p.oid = f.oid-- Limit the results to at most 100 objects
#     LIMIT 100
# '''
# # run 2 simon
# # mutiband_period with upper case
# # limit 100 at the end, not inside the subquery
# query='''
# -- Final Query to retrieve at most 100 ZTF objects with a multiband period lower than 5 days
# -- in the 'lc_classifier_1.2.1-P' feature version, returning all columns from the 'probability' table
# -- for the light curve classifier with rankings 1 or 2.
# WITH filtered_features AS (    -- Subquery 1: Filter objects based on multiband period
#     SELECT oid    FROM feature
#         WHERE name = 'Multiband_period'
#               AND value < 5
#               AND version = 'lc_classifier_1.2.1-P'),
#               filtered_probabilities AS (    -- Subquery 2: Filter probabilities for the light curve classifier
#                 SELECT *    FROM probability    
#                 WHERE classifier_name = 'lc_classifier'      
#                 AND ranking IN (1, 2))-- Combine the filters using an INNER JOIN
#                 SELECT p.*
#                 FROM filtered_probabilities p
#                 INNER JOIN filtered_features f
#                 ON p.oid = f.oid -- Limit the results to at most 100 objects
#                 LIMIT 100
# '''
# # run 3 simon
# query='''
# -- Final Query to retrieve at most 100 ZTF objects with a multiband period lower than 5 days
# -- in the 'lc_classifier_1.2.1-P' feature version, returning all columns from the 'probability' table
# -- for the light curve classifier with rankings of 1 or 2.
# WITH filtered_features AS (    -- Subquery 1: Filter the 'feature' table for objects with multiband_period < 5
#     SELECT oid    FROM feature    
#     WHERE name = 'Multiband_period'      
#     AND value < 5      
#     AND version = 'lc_classifier_1.2.1-P'),
#     filtered_probabilities AS (    -- Subquery 2: Filter the 'probability' table for the light curve classifier with rankings 1 or 2
#         SELECT *    FROM probability    
#         WHERE classifier_name = 'lc_classifier'      
#         AND (ranking = 1 OR ranking = 2))-- Final Query: Join the filtered results and limit to 100 rows
#         SELECT p.*
#         FROM filtered_probabilities p
#         INNER JOIN filtered_features f
#         ON p.oid = f.oid
#         LIMIT 100
# '''
n = 3
tables_i, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_i.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_i.columns.values.tolist(), "shape": tables_i.shape})
tables_i

Query executed successfully, shape: (100, 6)


Unnamed: 0,oid,classifier_name,classifier_version,class_name,probability,ranking
0,ZTF17aaaessu,lc_classifier,lc_classifier_1.1.13,CEP,0.225984,2
1,ZTF17aaagqxl,lc_classifier,hierarchical_rf_1.1.0,E,0.317112,1
2,ZTF17aabwftj,lc_classifier,lc_classifier_1.1.13,Periodic-Other,0.262268,1
3,ZTF17aacpvbf,lc_classifier,hierarchical_rf_1.1.0,E,0.494016,1
4,ZTF18aaydigk,lc_classifier,hierarchical_rf_1.1.0,YSO,0.494768,1
...,...,...,...,...,...,...
95,ZTF21aaaqswa,lc_classifier,hierarchical_rf_1.1.0,AGN,0.132756,2
96,ZTF21aaguuxw,lc_classifier,hierarchical_rf_1.1.0,Periodic-Other,0.280500,1
97,ZTF21abcdsuv,lc_classifier,hierarchical_rf_1.1.0,E,0.426688,1
98,ZTF22aabwnvh,lc_classifier,hierarchical_rf_1.1.0,QSO,0.678160,1


In [None]:
# query 90
query = '''SELECT 
    *
FROM
    probability
WHERE
    classifier_name = 'lc_classifier'
    AND ranking <= 2
    AND oid in
(
SELECT
    oid
FROM
    feature
WHERE
    name = 'Multiband_period'
    AND version = 'lc_classifier_1.2.1-P'
    AND value < 5
LIMIT 100
)'''
n = 3
tables_i, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_i.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_i.columns.values.tolist(), "shape": tables_i.shape})
tables_i

Query executed successfully, shape: (346, 6)


Unnamed: 0,oid,classifier_name,classifier_version,class_name,probability,ranking
0,ZTF19abdyprc,lc_classifier,lc_classifier_1.1.13,CEP,0.161280,2
1,ZTF19abdyprc,lc_classifier,hierarchical_rf_1.1.0,DSCT,0.283360,1
2,ZTF19abdyprc,lc_classifier,lc_classifier_1.1.13,RRL,0.190720,1
3,ZTF19abdyprc,lc_classifier,hierarchical_rf_1.1.0,RRL,0.227040,2
4,ZTF18abadjvy,lc_classifier,hierarchical_rf_1.1.0,E,0.093744,2
...,...,...,...,...,...,...
341,ZTF18abasovn,lc_classifier,hierarchical_rf_1.1.0,E,0.424664,1
342,ZTF18abasovn,lc_classifier,lc_classifier_1.1.13,E,0.309624,1
343,ZTF18abasovn,lc_classifier,hierarchical_rf_1.1.0,RRL,0.188956,2
344,ZTF22aabwnvh,lc_classifier,hierarchical_rf_1.1.0,Blazar,0.205800,2


In [None]:
sorted(tables_i.oid.unique())

['ZTF17aaaessu',
 'ZTF17aaagqxl',
 'ZTF17aabwftj',
 'ZTF17aacpvbf',
 'ZTF18aaydigk',
 'ZTF18aayhppd',
 'ZTF18aaynqnn',
 'ZTF18aaynqox',
 'ZTF18aaypmtr',
 'ZTF18aayujpj',
 'ZTF18aayutyl',
 'ZTF18aayuugb',
 'ZTF18aayvacc',
 'ZTF18aayvbrz',
 'ZTF18aazwigp',
 'ZTF18abadjvy',
 'ZTF18abaqmri',
 'ZTF18abasovn',
 'ZTF18abcxguj',
 'ZTF18abgsptz',
 'ZTF18abilxvt',
 'ZTF18abivpiy',
 'ZTF18abkjhnt',
 'ZTF18abkmmts',
 'ZTF18ablpmbs',
 'ZTF18ablqndm',
 'ZTF18ablvyyw',
 'ZTF18abmjosl',
 'ZTF18abmrcsn',
 'ZTF18abommtg',
 'ZTF18aboskew',
 'ZTF18abpogdx',
 'ZTF18abrnfpd',
 'ZTF18abrrinj',
 'ZTF18abscvmd',
 'ZTF18absitkp',
 'ZTF18absoocn',
 'ZTF18abspqgp',
 'ZTF18abvgrec',
 'ZTF18abvztqd',
 'ZTF18acajzhc',
 'ZTF18acavigy',
 'ZTF18acdasee',
 'ZTF18acfrjme',
 'ZTF18achfgss',
 'ZTF18achixag',
 'ZTF18aciepmk',
 'ZTF18acnambs',
 'ZTF18acrugci',
 'ZTF18acsbrqc',
 'ZTF18actytgu',
 'ZTF18acuwvpr',
 'ZTF18acwbhzi',
 'ZTF18adamiis',
 'ZTF18adamzmn',
 'ZTF18adaoqcm',
 'ZTF18adapbif',
 'ZTF18adapnnv',
 'ZTF18adaqdfq

In [None]:
sorted(tables_i.oid.unique())

['ZTF17aaaessu',
 'ZTF17aaagqxl',
 'ZTF17aabwftj',
 'ZTF17aacpvbf',
 'ZTF18aaydigk',
 'ZTF18aayhppd',
 'ZTF18aaynqnn',
 'ZTF18aaynqox',
 'ZTF18aaypmtr',
 'ZTF18aayujpj',
 'ZTF18aayutyl',
 'ZTF18aayuugb',
 'ZTF18aayvacc',
 'ZTF18aayvbrz',
 'ZTF18aazwigp',
 'ZTF18abadjvy',
 'ZTF18abaqmri',
 'ZTF18abasovn',
 'ZTF18abcxguj',
 'ZTF18abgsptz',
 'ZTF18abilxvt',
 'ZTF18abivpiy',
 'ZTF18abkjhnt',
 'ZTF18abkmmts',
 'ZTF18ablpmbs',
 'ZTF18ablqndm',
 'ZTF18ablvyyw',
 'ZTF18abmjosl',
 'ZTF18abmrcsn',
 'ZTF18abommtg',
 'ZTF18aboskew',
 'ZTF18abpogdx',
 'ZTF18abrnfpd',
 'ZTF18abrrinj',
 'ZTF18abscvmd',
 'ZTF18absitkp',
 'ZTF18absoocn',
 'ZTF18abspqgp',
 'ZTF18abvgrec',
 'ZTF18abvztqd',
 'ZTF18acajzhc',
 'ZTF18acavigy',
 'ZTF18acdasee',
 'ZTF18acfrjme',
 'ZTF18achfgss',
 'ZTF18achixag',
 'ZTF18aciepmk',
 'ZTF18acnambs',
 'ZTF18acrugci',
 'ZTF18acsbrqc',
 'ZTF18actytgu',
 'ZTF18acuwvpr',
 'ZTF18acwbhzi',
 'ZTF18adamiis',
 'ZTF18adamzmn',
 'ZTF18adaoqcm',
 'ZTF18adapbif',
 'ZTF18adapnnv',
 'ZTF18adaqdfq

In [None]:
# query 84
query = '''
SELECT
    oid, xmatch.oid_catalog, dist,
    w1mpro, w2mpro, w3mpro, w4mpro
FROM
    xmatch INNER JOIN
    allwise
    ON xmatch.oid_catalog = allwise.oid_catalog
WHERE
    xmatch.oid = 'ZTF19aascdol'
'''
# # run 2 simon 
query = '''
-- Retrieve information about the ZTF object 'ZTF19aascdol' and its ALLWISE match(es)
SELECT     object.oid AS ztf_identifier, -- ZTF object identifier    
allwise.oid_catalog AS allwise_identifier, -- ALLWISE catalog identifier    
q3c_dist(object.meanra, object.meandec, allwise.ra, allwise.dec) AS distance, -- Angular distance between counterparts    
allwise.w1mpro, -- WISE W1 magnitude    
allwise.w2mpro, -- WISE W2 magnitude    
allwise.w3mpro, -- WISE W3 magnitude    
allwise.w4mpro  -- WISE W4 magnitude
FROM     object
INNER JOIN     allwise
ON     q3c_join(object.meanra, object.meandec, allwise.ra, allwise.dec, 0.00138889) -- Match within 5 arcseconds (0.00138889 degrees)
WHERE     object.oid = 'ZTF19aascdol' -- Filter for the specific ZTF object
ORDER BY     distance ASC -- Sort by angular distance to prioritize the closest match
'''
n = 3
tables_i, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_i.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_i.columns.values.tolist(), "shape": tables_i.shape})
tables_i

Query executed successfully, shape: (1, 7)


Unnamed: 0,ztf_identifier,allwise_identifier,distance,w1mpro,w2mpro,w3mpro,w4mpro
0,ZTF19aascdol,J154334.93+152539.5,3.8e-05,15.286,14.354,11.747,8.611


Query 27

In [None]:
# query 27
query = '''
SELECT
    object.oid, object.meanra, object.meandec, object.ndet,
    object.firstMJD, object.deltajd, object.g_r_max,
    probability.classifier_name, probability.class_name,
    probability.ranking, probability.probability
FROM
    object INNER JOIN probability
    ON object.oid = probability.oid
WHERE
    probability.classifier_name = 'lc_classifier'
    AND object.ndet >= 100
    AND object.oid IN
(
SELECT
    oid
FROM
    probability
WHERE
    classifier_name = 'lc_classifier'
    AND class_name IN ('CEP')
    AND ranking = 1
    AND probability > 0.76
)
'''
n = 3
gold_table, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {gold_table.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": gold_table.columns.values.tolist(), "shape": gold_table.shape})
gold_table

Query executed successfully, shape: (5565, 11)


Unnamed: 0,oid,meanra,meandec,ndet,firstmjd,deltajd,g_r_max,classifier_name,class_name,ranking,probability
0,ZTF18abcjwgb,342.022389,61.750446,136,58289.422477,2394.697431,,lc_classifier,AGN,14,0.000104
1,ZTF18abcjwgb,342.022389,61.750446,136,58289.422477,2394.697431,,lc_classifier,AGN,15,0.000976
2,ZTF18abcjwgb,342.022389,61.750446,136,58289.422477,2394.697431,,lc_classifier,Blazar,13,0.000312
3,ZTF18abcjwgb,342.022389,61.750446,136,58289.422477,2394.697431,,lc_classifier,Blazar,13,0.003416
4,ZTF18abcjwgb,342.022389,61.750446,136,58289.422477,2394.697431,,lc_classifier,CEP,1,0.791208
...,...,...,...,...,...,...,...,...,...,...,...
5560,ZTF18achaojc,333.447494,55.410730,133,58432.181667,2106.183576,,lc_classifier,SNIbc,12,0.000452
5561,ZTF18achaojc,333.447494,55.410730,133,58432.181667,2106.183576,,lc_classifier,SNII,10,0.017820
5562,ZTF18achaojc,333.447494,55.410730,133,58432.181667,2106.183576,,lc_classifier,SNII,12,0.000452
5563,ZTF18achaojc,333.447494,55.410730,133,58432.181667,2106.183576,,lc_classifier,YSO,2,0.092564


In [None]:
# query 27
query = '''
/* Objects with ndet >= 100 whose most likely lc_classifier class is CEP with probability > 0.76 */ WITH eligible AS
  (SELECT pr.oid,
          pr.classifier_version
   FROM probability pr
   WHERE pr.classifier_name = 'lc_classifier'
     AND pr.class_name = 'CEP'
     AND pr.ranking = 1
     AND pr.probability > 0.76
     AND pr.oid IN
       (SELECT o.oid
        FROM OBJECT o
        WHERE o.ndet >= 100))
SELECT p.oid, /* Object-level columns retrieved via scalar subqueries to avoid explicit JOINs */
  (SELECT o.meanra
   FROM OBJECT o
   WHERE o.oid = p.oid) AS meanra,

  (SELECT o.meandec
   FROM OBJECT o
   WHERE o.oid = p.oid) AS meandec,

  (SELECT o.ndet
   FROM OBJECT o
   WHERE o.oid = p.oid) AS ndet,

  (SELECT o.firstmjd
   FROM OBJECT o
   WHERE o.oid = p.oid) AS firstmjd,

  (SELECT o.deltajd
   FROM OBJECT o
   WHERE o.oid = p.oid) AS deltajd,

  (SELECT o.g_r_max
   FROM OBJECT o
   WHERE o.oid = p.oid) AS g_r_max, /* Probability-level columns for each lc_classifier class and eligible version */  p.classifier_name,
                                                                                                                       p.class_name,
                                                                                                                       p.ranking,
                                                                                                                       p.probability
FROM probability p
WHERE p.classifier_name = 'lc_classifier'
  AND EXISTS
    (SELECT 1
     FROM eligible e
     WHERE e.oid = p.oid
       AND e.classifier_version = p.classifier_version)
ORDER BY p.oid,
         p.ranking;
'''
n = 3
tables_i, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_i.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_i.columns.values.tolist(), "shape": tables_i.shape})
tables_i

Query executed successfully, shape: (2850, 11)


Unnamed: 0,oid,meanra,meandec,ndet,firstmjd,deltajd,g_r_max,classifier_name,class_name,ranking,probability
0,ZTF17aaadxzc,78.863275,37.372453,556,58334.446481,2594.983889,,lc_classifier,CEP,1,0.82368
1,ZTF17aaadxzc,78.863275,37.372453,556,58334.446481,2594.983889,,lc_classifier,Periodic-Other,2,0.06144
2,ZTF17aaadxzc,78.863275,37.372453,556,58334.446481,2594.983889,,lc_classifier,YSO,3,0.03648
3,ZTF17aaadxzc,78.863275,37.372453,556,58334.446481,2594.983889,,lc_classifier,E,4,0.03072
4,ZTF17aaadxzc,78.863275,37.372453,556,58334.446481,2594.983889,,lc_classifier,RRL,5,0.02112
...,...,...,...,...,...,...,...,...,...,...,...
2845,ZTF21aaatwek,109.247183,-15.307077,111,59194.398553,1787.018183,,lc_classifier,QSO,10,0.00024
2846,ZTF21aaatwek,109.247183,-15.307077,111,59194.398553,1787.018183,,lc_classifier,SNIbc,11,0.00000
2847,ZTF21aaatwek,109.247183,-15.307077,111,59194.398553,1787.018183,,lc_classifier,SNIa,11,0.00000
2848,ZTF21aaatwek,109.247183,-15.307077,111,59194.398553,1787.018183,,lc_classifier,SNII,11,0.00000


In [None]:
set(sorted(gold_table['oid'])) == set(sorted(tables_i['oid']))

True

In [None]:
# check duplicated rows
gold_table[gold_table.duplicated(keep=False)]

Unnamed: 0,oid,meanra,meandec,ndet,firstmjd,deltajd,g_r_max,classifier_name,class_name,ranking,probability
706,ZTF18abmnjay,335.060209,60.770549,260,58313.446644,2353.720475,,lc_classifier,QSO,15,0.0
707,ZTF18abmnjay,335.060209,60.770549,260,58313.446644,2353.720475,,lc_classifier,QSO,15,0.0
1696,ZTF18aaxjixb,301.018689,39.104179,488,58266.469942,2681.783009,,lc_classifier,QSO,15,0.0
1697,ZTF18aaxjixb,301.018689,39.104179,488,58266.469942,2681.783009,,lc_classifier,QSO,15,0.0
1895,ZTF19aauyssj,272.444532,-18.377319,185,58617.394907,2333.725139,,lc_classifier,SLSN,12,0.0
1896,ZTF19aauyssj,272.444532,-18.377319,185,58617.394907,2333.725139,,lc_classifier,SLSN,12,0.0
2911,ZTF18abjrfdd,29.628958,57.819833,222,58325.449745,2589.001354,,lc_classifier,QSO,15,0.0
2912,ZTF18abjrfdd,29.628958,57.819833,222,58325.449745,2589.001354,,lc_classifier,QSO,15,0.0
2971,ZTF18abiljam,17.045782,62.269888,727,58319.409803,2655.810417,,lc_classifier,QSO,15,0.0
2972,ZTF18abiljam,17.045782,62.269888,727,58319.409803,2655.810417,,lc_classifier,QSO,15,0.0


In [None]:
tables_i[tables_i.duplicated(keep=False)]


Unnamed: 0,oid,meanra,meandec,ndet,firstmjd,deltajd,g_r_max,classifier_name,class_name,ranking,probability


query 37

In [None]:
# query 37
query = '''

SELECT
    *
FROM
  probability
WHERE
    classifier_name = 'lc_classifier'
    AND class_name IN ('SNIa')
    AND ranking=1
ORDER BY probability DESC
LIMIT 200

'''
n = 3
tables_gold, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_gold.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_gold.columns.values.tolist(), "shape": tables_gold.shape})
tables_gold

Query executed successfully, shape: (200, 6)


Unnamed: 0,oid,classifier_name,classifier_version,class_name,probability,ranking
0,ZTF20acobvxk,lc_classifier,hierarchical_rf_1.1.0,SNIa,0.722000,1
1,ZTF22aalpfln,lc_classifier,hierarchical_rf_1.1.0,SNIa,0.722000,1
2,ZTF21abuyhau,lc_classifier,hierarchical_rf_1.1.0,SNIa,0.720000,1
3,ZTF21abywdxt,lc_classifier,hierarchical_rf_1.1.0,SNIa,0.716564,1
4,ZTF22abqdmwt,lc_classifier,hierarchical_rf_1.1.0,SNIa,0.714568,1
...,...,...,...,...,...,...
195,ZTF21aaufthj,lc_classifier,hierarchical_rf_1.1.0,SNIa,0.636000,1
196,ZTF21aarmkuj,lc_classifier,hierarchical_rf_1.1.0,SNIa,0.636000,1
197,ZTF20abgfvav,lc_classifier,hierarchical_rf_1.1.0,SNIa,0.636000,1
198,ZTF20abewogd,lc_classifier,hierarchical_rf_1.1.0,SNIa,0.636000,1


In [None]:
# query 37
query = '''
-- Top 200 unique objects classified as SNIa by the light curve classifier (ranking=1),
-- ordered by probability in descending order. DISTINCT ON ensures one row per oid
WITH snias AS
  (SELECT DISTINCT ON (oid) *
   FROM probability
   WHERE classifier_name = 'lc_classifier'
     AND class_name = 'SNIa'
     AND ranking = 1
   ORDER BY oid,
            probability DESC)
SELECT *
FROM snias
ORDER BY probability DESC
LIMIT 200;
'''
n = 3
tables_i, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_i.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_i.columns.values.tolist(), "shape": tables_i.shape})
tables_i

Query executed successfully, shape: (200, 6)


Unnamed: 0,oid,classifier_name,classifier_version,class_name,probability,ranking
0,ZTF20acobvxk,lc_classifier,hierarchical_rf_1.1.0,SNIa,0.722000,1
1,ZTF22aalpfln,lc_classifier,hierarchical_rf_1.1.0,SNIa,0.722000,1
2,ZTF21abuyhau,lc_classifier,hierarchical_rf_1.1.0,SNIa,0.720000,1
3,ZTF21abywdxt,lc_classifier,hierarchical_rf_1.1.0,SNIa,0.716564,1
4,ZTF22abqdmwt,lc_classifier,hierarchical_rf_1.1.0,SNIa,0.714568,1
...,...,...,...,...,...,...
195,ZTF20abgfvav,lc_classifier,hierarchical_rf_1.1.0,SNIa,0.636000,1
196,ZTF18abmmdif,lc_classifier,hierarchical_rf_1.1.0,SNIa,0.636000,1
197,ZTF21aarmkuj,lc_classifier,hierarchical_rf_1.1.0,SNIa,0.636000,1
198,ZTF23aaznifc,lc_classifier,hierarchical_rf_1.1.0,SNIa,0.636000,1


In [None]:
set(sorted(tables_gold['oid'])) == set(sorted(tables_i['oid']))

False

In [None]:
print(set(sorted(tables_gold['oid'])))
print(set(sorted(tables_i['oid'])))

{'ZTF22abhxptt', 'ZTF23abjaule', 'ZTF22abjeeqt', 'ZTF22aaknvcz', 'ZTF22abvebdc', 'ZTF23aapmokr', 'ZTF23abgspys', 'ZTF19aavnwxf', 'ZTF20acwxhka', 'ZTF21achhwdz', 'ZTF23abhvzbm', 'ZTF20abqcvtk', 'ZTF23aakawpn', 'ZTF23abclpxs', 'ZTF20aairvsi', 'ZTF18abfchzq', 'ZTF18aazjztm', 'ZTF23abcegjv', 'ZTF22abivrnq', 'ZTF19abitbcj', 'ZTF23aakcszy', 'ZTF21aaqvmix', 'ZTF20abndzyc', 'ZTF22aboisvs', 'ZTF19abakjao', 'ZTF21aarcldd', 'ZTF20acrdemq', 'ZTF20acynaba', 'ZTF19abahvdh', 'ZTF20abisvtr', 'ZTF21aasctwo', 'ZTF21abwurky', 'ZTF20abupbub', 'ZTF20acwxbyr', 'ZTF22aalpfln', 'ZTF20acwmpsj', 'ZTF23abfroaw', 'ZTF20acgzjje', 'ZTF22abyuxlq', 'ZTF21abufayv', 'ZTF21abcgxwy', 'ZTF23abjhvzr', 'ZTF20abgfvav', 'ZTF18abdfwur', 'ZTF19achejoc', 'ZTF22aambnsg', 'ZTF22abnemvx', 'ZTF23abijnsm', 'ZTF22aaonqcc', 'ZTF20abjcuep', 'ZTF21aaqzkhx', 'ZTF21aaujuqq', 'ZTF21abbzcxe', 'ZTF22abcuctb', 'ZTF23abhtsna', 'ZTF19aazzpje', 'ZTF20acpumut', 'ZTF20actpatx', 'ZTF20aamgdkb', 'ZTF22abiyeae', 'ZTF21abrhggj', 'ZTF20acnzcmp', 'ZTF23a

In [None]:
'ZTF21aaufvpg'
'ZTF23aaltnze'

'ZTF23aaltnze'

In [None]:
# check duplicated rows
tables_gold[tables_gold.duplicated(keep=False)]

Unnamed: 0,oid,classifier_name,classifier_version,class_name,probability,ranking


In [None]:
tables_i[tables_i.duplicated(keep=False)]


Unnamed: 0,oid,classifier_name,classifier_version,class_name,probability,ranking


query 30

In [8]:
# query 30
query = '''
select oid, class_name, probability from probability where classifier_name = 'lc_classifier' and ranking = 1;
'''
n = 3
tables_gold, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_gold.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_gold.columns.values.tolist(), "shape": tables_gold.shape})
tables_gold

Query executed successfully, shape: (4415545, 3)


Unnamed: 0,oid,class_name,probability
0,ZTF18abaqeom,YSO,0.412920
1,ZTF18abfnzcn,Periodic-Other,0.249480
2,ZTF19abxxedn,AGN,0.751824
3,ZTF19aaxtxon,LPV,0.496992
4,ZTF18abeqxaw,Periodic-Other,0.227136
...,...,...,...
4415540,ZTF18abcxkxk,Periodic-Other,0.361176
4415541,ZTF18abnztbs,LPV,0.397760
4415542,ZTF18admflab,E,0.588440
4415543,ZTF18abcjubt,RRL,0.526640


In [7]:
# query 
query = '''
SELECT probability.oid,
       probability.class_name,
       probability.probability
FROM probability
WHERE probability.classifier_name = 'lc_classifier'
  AND probability.ranking = 1;
'''
n = 3
tables_i, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_i.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_i.columns.values.tolist(), "shape": tables_i.shape})
tables_i

Query executed successfully, shape: (4415499, 3)


Unnamed: 0,oid,class_name,probability
0,ZTF18abctuqk,LPV,0.433440
1,ZTF18aaoyghm,E,0.569908
2,ZTF18abnbmyd,LPV,0.419440
3,ZTF18abnxksq,E,0.257480
4,ZTF19abrgkvo,YSO,0.245604
...,...,...,...
4415494,ZTF18abdkebi,Periodic-Other,0.237168
4415495,ZTF20abahjuf,YSO,0.354496
4415496,ZTF18abrhwht,Periodic-Other,0.594392
4415497,ZTF18abchmtr,LPV,0.705600


In [5]:
set(sorted(tables_gold['oid'])) == set(sorted(tables_i['oid']))

TypeError: 'NoneType' object is not subscriptable

In [None]:
# print(set(sorted(tables_gold['oid'])))
# print(set(sorted(tables_i['oid'])))

In [None]:
# check duplicated rows
tables_gold[tables_gold.duplicated(keep=False)]

Unnamed: 0,oid,class_name,probability
10269,ZTF21abxetsc,QSO,0.621504
11206,ZTF18ablwbkr,Periodic-Other,0.505080
12737,ZTF22aaakakr,YSO,0.445408
34310,ZTF20abaommt,AGN,0.602792
37501,ZTF18acjxmtb,Periodic-Other,0.409696
...,...,...,...
4364249,ZTF18abtnoqt,Periodic-Other,0.371680
4379931,ZTF19aanxqeq,QSO,0.730000
4392056,ZTF19adcxzzu,QSO,0.736000
4394873,ZTF21aazcdyd,QSO,0.589248


In [None]:
tables_i[tables_i.duplicated(keep=False)]


Unnamed: 0,oid,class_name,probability
9395,ZTF21abxetsc,QSO,0.621504
11203,ZTF18ablwbkr,Periodic-Other,0.505080
14738,ZTF22aaakakr,YSO,0.445408
34969,ZTF20abaommt,AGN,0.602792
37500,ZTF18acjxmtb,Periodic-Other,0.409696
...,...,...,...
4364260,ZTF18abtnoqt,Periodic-Other,0.371680
4379925,ZTF19aanxqeq,QSO,0.730000
4392081,ZTF19adcxzzu,QSO,0.736000
4394874,ZTF21aazcdyd,QSO,0.589248


query 22

In [None]:
# query 30
query = '''
SELECT
    *
FROM
    object INNER JOIN (
    SELECT
        *
    FROM
        probability
    WHERE
        classifier_name='lc_classifier'
        AND ranking=1
    ) AS probability_oid
    ON object.oid=probability_oid.oid
WHERE
    probability_oid.classifier_name='lc_classifier'
    AND object.firstmjd BETWEEN 58239.0 AND 58270.0
'''
n = 3
tables_gold, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_gold.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_gold.columns.values.tolist(), "shape": tables_gold.shape})
tables_gold

Query executed successfully, shape: (104921, 28)


Unnamed: 0,oid,ndethist,ncovhist,mjdstarthist,mjdendhist,corrected,stellar,ndet,g_r_max,g_r_max_corr,...,lastmjd,step_id_corr,diffpos,reference_change,oid.1,classifier_name,classifier_version,class_name,probability,ranking
0,ZTF20abnlspq,65,2036,58300.391308,60353.549838,True,False,766,,,...,60408.447222,23.12.25,,,ZTF20abnlspq,lc_classifier,lc_classifier_1.1.13,LPV,0.517188,1
1,ZTF18aanvtig,1357,5498,58204.484039,60980.174178,True,False,694,,,...,60980.174178,27.5.6,False,False,ZTF18aanvtig,lc_classifier,lc_classifier_1.1.13,Periodic-Other,0.200192,1
2,ZTF18aavlkyp,252,1015,58247.453750,60901.186528,True,False,188,,,...,60901.186528,27.5.6,False,False,ZTF18aavlkyp,lc_classifier,lc_classifier_1.1.13,LPV,0.627640,1
3,ZTF18aawvbel,704,2194,58218.510556,60980.098414,True,False,469,,,...,60980.098414,27.5.6,False,True,ZTF18aawvbel,lc_classifier,lc_classifier_1.1.13,LPV,0.807128,1
4,ZTF18aavznxg,677,4007,58209.516817,60980.098414,True,False,369,,,...,60980.098414,27.5.6,False,False,ZTF18aavznxg,lc_classifier,lc_classifier_1.1.13,Periodic-Other,0.247736,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
104916,ZTF18aapowib,1810,3452,58205.518935,60976.115937,True,False,818,,,...,60976.115937,27.5.6,False,False,ZTF18aapowib,lc_classifier,lc_classifier_1.1.13,RRL,0.493056,1
104917,ZTF18aaxarxa,394,2037,58209.516817,60561.236968,True,False,131,,,...,60561.236968,24.5.2a6,False,True,ZTF18aaxarxa,lc_classifier,lc_classifier_1.1.13,LPV,0.625368,1
104918,ZTF18aaxawlu,1089,4205,58254.440567,60980.171806,True,False,699,,,...,60980.171806,27.5.6,False,True,ZTF18aaxawlu,lc_classifier,lc_classifier_1.1.13,Periodic-Other,0.524896,1
104919,ZTF18aaavvya,2640,4000,58156.492095,60973.107512,True,False,768,,,...,60973.107512,27.5.6,False,False,ZTF18aaavvya,lc_classifier,lc_classifier_1.1.13,RRL,0.461964,1


In [None]:
# query 
query = '''
-- Objects first detected between 2018-06-01 (MJD 58239.0) and 2018-07-01 (MJD 58270.0), inclusive,
-- that have a Light Curve classifier ('lc_classifier') probability entry with top rank (ranking = 1).
-- We return all columns from the probability table for these objects.

SELECT *
FROM probability
WHERE classifier_name = 'lc_classifier'
  AND ranking = 1
  AND oid IN
    (SELECT oid
     FROM OBJECT
     WHERE firstmjd BETWEEN 58239.0 AND 58270.0);
'''
n = 3
tables_i, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_i.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_i.columns.values.tolist(), "shape": tables_i.shape})
tables_i

Query executed successfully, shape: (104921, 6)


Unnamed: 0,oid,classifier_name,classifier_version,class_name,probability,ranking
0,ZTF20abnlspq,lc_classifier,lc_classifier_1.1.13,LPV,0.517188,1
1,ZTF18aavlkyp,lc_classifier,lc_classifier_1.1.13,LPV,0.627640,1
2,ZTF18aawvbel,lc_classifier,lc_classifier_1.1.13,LPV,0.807128,1
3,ZTF18aavznxg,lc_classifier,lc_classifier_1.1.13,Periodic-Other,0.247736,1
4,ZTF18aanvtig,lc_classifier,lc_classifier_1.1.13,Periodic-Other,0.200192,1
...,...,...,...,...,...,...
104916,ZTF18aawbjqv,lc_classifier,lc_classifier_1.1.13,LPV,0.782400,1
104917,ZTF22aabchym,lc_classifier,lc_classifier_1.1.13,LPV,0.420336,1
104918,ZTF18aaavvya,lc_classifier,lc_classifier_1.1.13,RRL,0.461964,1
104919,ZTF18aaxarxa,lc_classifier,lc_classifier_1.1.13,LPV,0.625368,1


In [None]:
set(sorted(tables_gold.loc[:, ~tables_gold.columns.duplicated()]['oid'])) == set(sorted(tables_i['oid']))

True

In [None]:
print(set(sorted(tables_gold.loc[:, ~tables_gold.columns.duplicated()]['oid'])))
print(set(sorted(tables_i['oid'])))

{'ZTF18aavqeof', 'ZTF18aaiowhx', 'ZTF18aajsvxp', 'ZTF18aakzryb', 'ZTF18aaxluai', 'ZTF18aakjxiu', 'ZTF18aakjzif', 'ZTF18aavefdj', 'ZTF18aaxakvu', 'ZTF18aaxalai', 'ZTF18abddqrn', 'ZTF18aawmdgg', 'ZTF18aawlvvx', 'ZTF18aantvcp', 'ZTF18aaatyec', 'ZTF18aajssxv', 'ZTF18aawbrfa', 'ZTF18aavzojb', 'ZTF18aagwoom', 'ZTF18aavqmpx', 'ZTF18aavlldf', 'ZTF18aaxdnyc', 'ZTF18aaoyupy', 'ZTF18aavefvo', 'ZTF18aabylss', 'ZTF18aaxdpoy', 'ZTF18aawsumu', 'ZTF18aabemyu', 'ZTF18aasnink', 'ZTF17aabwpae', 'ZTF18aasnmre', 'ZTF18aawalpv', 'ZTF18aawkhls', 'ZTF18aaueasa', 'ZTF18aasjsjw', 'ZTF18aagsgga', 'ZTF18aaxdcyv', 'ZTF18aavzjmg', 'ZTF18aavqmta', 'ZTF18aahtzgp', 'ZTF18aawccmf', 'ZTF18aapvlbc', 'ZTF18aawvaov', 'ZTF18aanvjqj', 'ZTF18aawcneb', 'ZTF18aaxllxz', 'ZTF18aauymhd', 'ZTF18aavlinr', 'ZTF18aaudzfx', 'ZTF18aaqfibb', 'ZTF18aavqngw', 'ZTF18aawsvip', 'ZTF18aakgvtl', 'ZTF18aavlgkx', 'ZTF17aabtupx', 'ZTF18aasnqmd', 'ZTF18aawbmra', 'ZTF18aaxjhns', 'ZTF18aanveam', 'ZTF17aaclnpe', 'ZTF18aagthmj', 'ZTF18aasnngm', 'ZTF18a

In [None]:
# check duplicated rows
tables_gold[tables_gold.duplicated(keep=False)]

Unnamed: 0,oid,ndethist,ncovhist,mjdstarthist,mjdendhist,corrected,stellar,ndet,g_r_max,g_r_max_corr,...,lastmjd,step_id_corr,diffpos,reference_change,oid.1,classifier_name,classifier_version,class_name,probability,ranking


In [None]:
tables_i[tables_i.duplicated(keep=False)]


Unnamed: 0,oid,classifier_name,classifier_version,class_name,probability,ranking


query 75

In [None]:
# query gold
query = '''
SELECT
    *
FROM
    ss_ztf
WHERE
    ssnamenr != 'null'
LIMIT 100
'''
n = 3
tables_gold, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_gold.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_gold.columns.values.tolist(), "shape": tables_gold.shape})
tables_gold

Query executed successfully, shape: (100, 5)


Unnamed: 0,oid,candid,ssdistnr,ssmagnr,ssnamenr
0,ZTF21aaslbdt,1554482515915015054,0.0,19.3,14915
1,ZTF18absunga,611270055815015029,0.0,18.3,9880
2,ZTF25abvjjii,3197329683215015008,0.0,19.7,100942
3,ZTF18absupre,611272755915015033,0.0,15.7,1662
4,ZTF18absutny,611272753715015047,0.0,18.3,48375
...,...,...,...,...,...
95,ZTF18abtdwyx,611311630515015042,0.0,20.0,2002NG83
96,ZTF25abvjlnm,3197332113115015020,0.0,19.1,151429
97,ZTF18abtdxwk,611310264415015021,0.0,18.2,108564
98,ZTF24abecxqz,2798432250315015002,0.0,19.8,276991


In [None]:
# query 
query = '''
-- Get up to 100 rows with a known (non-null) nearest Solar System object name
-- Note: ssnamenr may store the literal string 'null' for unknowns, so we exclude that too

SELECT *
FROM ss_ztf
WHERE ssnamenr IS NOT NULL
  AND lower(ssnamenr) <> 'null'
LIMIT 100;
'''
n = 3
tables_i, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_i.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_i.columns.values.tolist(), "shape": tables_i.shape})
tables_i

Query executed successfully, shape: (100, 5)


Unnamed: 0,oid,candid,ssdistnr,ssmagnr,ssnamenr
0,ZTF21aaslbdt,1554482515915015054,0.0,19.3,14915
1,ZTF18absunga,611270055815015029,0.0,18.3,9880
2,ZTF25abvjjii,3197329683215015008,0.0,19.7,100942
3,ZTF18absupre,611272755915015033,0.0,15.7,1662
4,ZTF18absutny,611272753715015047,0.0,18.3,48375
...,...,...,...,...,...
95,ZTF18abtdwyx,611311630515015042,0.0,20.0,2002NG83
96,ZTF25abvjlnm,3197332113115015020,0.0,19.1,151429
97,ZTF18abtdxwk,611310264415015021,0.0,18.2,108564
98,ZTF24abecxqz,2798432250315015002,0.0,19.8,276991


In [None]:
set(sorted(tables_gold.loc[:, ~tables_gold.columns.duplicated()]['oid'])) == set(sorted(tables_i['oid']))

True

In [None]:
print(set(sorted(tables_gold.loc[:, ~tables_gold.columns.duplicated()]['oid'])))
print(set(sorted(tables_i['oid'])))

{'ZTF18abtglud', 'ZTF18abtmftz', 'ZTF18abtmnhr', 'ZTF24abecxuj', 'ZTF24abedwzd', 'ZTF18abtmvfd', 'ZTF25abvjjqm', 'ZTF18abtgbdu', 'ZTF18abtgojq', 'ZTF18absuxzq', 'ZTF24abecycj', 'ZTF18abtotyq', 'ZTF18absvbrh', 'ZTF18abtmqek', 'ZTF21aaslata', 'ZTF24abdsdpp', 'ZTF21aaslaso', 'ZTF18abtnehw', 'ZTF21aaslahb', 'ZTF18ablmiyh', 'ZTF18abtdwyx', 'ZTF18abtplva', 'ZTF25abvjwfe', 'ZTF18abtqlla', 'ZTF18abtncfx', 'ZTF21aaskzzk', 'ZTF25abvjuau', 'ZTF24abedhun', 'ZTF18abtgmiq', 'ZTF18abdtkxd', 'ZTF18abtjqte', 'ZTF18abtmswv', 'ZTF18abtnuaf', 'ZTF21aaslbdt', 'ZTF18abtlnzo', 'ZTF18abthmgl', 'ZTF24abedxou', 'ZTF18abtnnic', 'ZTF18abtmlko', 'ZTF24abedxol', 'ZTF18abtjnju', 'ZTF18abtnege', 'ZTF24abedxlr', 'ZTF25abvjlqg', 'ZTF18abthvom', 'ZTF18abtljqr', 'ZTF21aaskzeu', 'ZTF24abdsbxg', 'ZTF21aaskzfj', 'ZTF24abecxzo', 'ZTF25abvjwjg', 'ZTF18absunga', 'ZTF18absupre', 'ZTF18abtqjvn', 'ZTF21aaskzbe', 'ZTF21aaskzko', 'ZTF21aaslcnw', 'ZTF21aaskzet', 'ZTF18abtphhj', 'ZTF18abtqkwy', 'ZTF18abszfkr', 'ZTF18abtjpue', 'ZTF18a

In [None]:
# check duplicated rows
tables_gold[tables_gold.duplicated(keep=False)]

Unnamed: 0,oid,candid,ssdistnr,ssmagnr,ssnamenr


In [None]:
tables_i[tables_i.duplicated(keep=False)]


Unnamed: 0,oid,candid,ssdistnr,ssmagnr,ssnamenr


query 43

In [None]:
# query gold
query = '''
SELECT
    oid, candid, xpos, ypos, scorr
FROM
    dataquality
WHERE
    oid IN ('ZTF20acwrybw','ZTF22abugigx')
    AND fid = 2
'''
n = 3
tables_gold, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_gold.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_gold.columns.values.tolist(), "shape": tables_gold.shape})
tables_gold

Query executed successfully, shape: (14, 5)


Unnamed: 0,oid,candid,xpos,ypos,scorr
0,ZTF20acwrybw,1433166885615015067,172.229,563.697,9.62815
1,ZTF20acwrybw,1440139345615015033,173.3743,564.3097,5.196096
2,ZTF20acwrybw,1789128565615015030,214.138,575.0902,6.518314
3,ZTF20acwrybw,1944491225615015075,150.4586,529.5159,8.528032
4,ZTF20acwrybw,1956473945615015064,139.7179,545.4087,5.655714
5,ZTF20acwrybw,2154166925615015018,208.1079,593.9471,6.181045
6,ZTF20acwrybw,2159150415615015068,222.3189,579.3848,5.737109
7,ZTF20acwrybw,2212139135615015081,178.9274,618.2436,5.307838
8,ZTF20acwrybw,2311500890615010051,1432.1919,2603.5596,5.402905
9,ZTF20acwrybw,2455283545615015060,213.3926,570.4003,6.419781


In [None]:
# query 
query = '''
-- r-band epochs (fid=2) in dataquality for the specified ZTF objects
WITH requested_oids(oid) AS (
                             VALUES ('ZTF20acwrybw'), ('ZTF22abugigx')),
     target_oids AS
  (-- prioritize obtaining OIDs via a subquery from the object table
 SELECT o.oid
   FROM OBJECT o
   WHERE o.oid IN
       (SELECT oid
        FROM requested_oids))
SELECT dq.oid, -- ZTF identifier
 dq.candid, -- candidate identifier
 dq.xpos, -- x-image position [pixels]
 dq.ypos, -- y-image position [pixels]
 dq.scorr -- peak-pixel S/N
FROM dataquality dq
WHERE dq.oid IN
    (SELECT oid
     FROM target_oids)
  AND dq.fid = 2
ORDER BY dq.oid,
         dq.candid;
'''
n = 3
tables_i, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_i.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_i.columns.values.tolist(), "shape": tables_i.shape})
tables_i

Query executed successfully, shape: (14, 5)


Unnamed: 0,oid,candid,xpos,ypos,scorr
0,ZTF20acwrybw,1433166885615015067,172.229,563.697,9.62815
1,ZTF20acwrybw,1440139345615015033,173.3743,564.3097,5.196096
2,ZTF20acwrybw,1789128565615015030,214.138,575.0902,6.518314
3,ZTF20acwrybw,1944491225615015075,150.4586,529.5159,8.528032
4,ZTF20acwrybw,1956473945615015064,139.7179,545.4087,5.655714
5,ZTF20acwrybw,2154166925615015018,208.1079,593.9471,6.181045
6,ZTF20acwrybw,2159150415615015068,222.3189,579.3848,5.737109
7,ZTF20acwrybw,2212139135615015081,178.9274,618.2436,5.307838
8,ZTF20acwrybw,2311500890615010051,1432.1919,2603.5596,5.402905
9,ZTF20acwrybw,2455283545615015060,213.3926,570.4003,6.419781


In [None]:
set(sorted(tables_gold.loc[:, ~tables_gold.columns.duplicated()]['oid'])) == set(sorted(tables_i['oid']))

True

In [None]:
print(set(sorted(tables_gold.loc[:, ~tables_gold.columns.duplicated()]['oid'])))
print(set(sorted(tables_i['oid'])))

{'ZTF22abugigx', 'ZTF20acwrybw'}
{'ZTF22abugigx', 'ZTF20acwrybw'}


In [None]:
# check duplicated rows
tables_gold[tables_gold.duplicated(keep=False)]

Unnamed: 0,oid,candid,xpos,ypos,scorr


In [None]:
tables_i[tables_i.duplicated(keep=False)]


Unnamed: 0,oid,candid,xpos,ypos,scorr


query 89

In [None]:
# query gold
query = '''
SELECT
    *
FROM
    dataquality
WHERE
    oid in
(
SELECT
    oid
FROM
    probability
WHERE
    classifier_name = 'stamp_classifier'
    AND class_name = 'SN'
    AND probability > 0.7
LIMIT 50
)
'''
n = 3
tables_gold, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_gold.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_gold.columns.values.tolist(), "shape": tables_gold.shape})
tables_gold

Query executed successfully, shape: (179, 32)


Unnamed: 0,oid,candid,fid,xpos,ypos,chipsf,sky,fwhm,classtar,mindtoedge,...,magzpsciunc,magzpscirms,nmatches,clrcoeff,clrcounc,zpclrcov,zpmed,clrmed,clrrms,exptime
0,ZTF25aakinjs,3003513781415015001,2,2307.9307,158.8417,1.267480,0.640848,1.115423,0.952,158.8417,...,0.000001,0.023513,2030,0.071709,0.000004,-0.000002,26.104,0.494,0.200315,30.0
1,ZTF25aajsiix,3004405555915015013,1,145.0131,2782.4612,1.104322,-0.598952,2.800000,0.848,145.0131,...,0.000014,0.032131,429,-0.032522,0.000023,-0.000016,26.311,0.649,0.309331,30.0
2,ZTF25aajymav,3004419243915015014,1,1622.9146,2906.6592,1.586670,0.241518,3.990000,0.920,173.8408,...,0.000010,0.033807,500,-0.055777,0.000017,-0.000012,26.278,0.602,0.314324,30.0
3,ZTF25aakimoj,3003511892515015112,2,1174.3499,1914.1132,1.807065,-2.122332,2.070000,0.970,1166.3868,...,0.000003,0.017872,1944,0.079705,0.000005,-0.000004,26.380,0.759,0.140049,30.0
4,ZTF25aakgumm,3003422250015015006,1,2974.3380,1111.8564,1.059774,0.064759,2.080000,0.976,98.1620,...,0.000002,0.025660,1748,-0.045019,0.000004,-0.000003,26.161,0.532,0.262334,30.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
174,ZTF22abnfaik,3003260962815015013,2,879.7968,3026.3057,1.641895,-0.105915,1.229870,0.851,54.1944,...,0.000210,0.098500,261,0.139074,0.000277,-0.000224,25.875,0.836,0.325356,30.0
175,ZTF25aakijbg,3003509234315015001,2,2767.4492,238.0564,1.302257,0.382990,1.890000,0.957,238.0564,...,0.000002,0.031387,1722,0.087491,0.000004,-0.000002,26.447,0.531,0.267966,30.0
176,ZTF25aakijbg,3005499604315015001,1,2771.1118,206.2150,1.435758,0.149859,2.900000,0.923,206.2150,...,0.000002,0.032225,1724,-0.078601,0.000005,-0.000003,26.335,0.530,0.267424,30.0
177,ZTF21acdvucy,3003405755915015024,1,891.9036,2682.0880,1.222102,0.114554,4.090000,0.882,398.4122,...,0.000004,0.031431,1140,-0.055049,0.000008,-0.000005,26.289,0.526,0.289702,30.0


In [None]:
# query 
query = '''
-- Return all dataquality rows for up to 50 objects classified as Supernova (SN) by the stamp classifier with probability > 0.7
WITH target_oids AS
  (SELECT DISTINCT oid
   FROM probability
   WHERE classifier_name = 'stamp_classifier'
     AND class_name = 'SN'
     AND probability > 0.7
   LIMIT 50)
SELECT *
FROM dataquality
WHERE oid IN
    (SELECT oid
     FROM target_oids);
'''
n = 3
tables_i, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_i.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_i.columns.values.tolist(), "shape": tables_i.shape})
tables_i

Query executed successfully, shape: (6532, 32)


Unnamed: 0,oid,candid,fid,xpos,ypos,chipsf,sky,fwhm,classtar,mindtoedge,...,magzpsciunc,magzpscirms,nmatches,clrcoeff,clrcounc,zpclrcov,zpmed,clrmed,clrrms,exptime
0,ZTF17aaaazwi,733193623915015022,1,619.4112,2680.1460,8.900924,-0.238488,2.87,0.984,400.3541,...,0.000014,0.036747,1196.0,-0.057056,0.000019,-0.000016,26.227,0.776,0.230453,30.0
1,ZTF17aaaazwi,733238423915015027,2,643.1961,2690.6110,36.841510,0.250795,1.96,0.986,389.8890,...,0.000003,0.027897,1238.0,0.088927,0.000004,-0.000003,26.364,0.789,0.247867,30.0
2,ZTF17aaaazwi,734237773915015019,1,649.7446,2676.9995,12.378316,0.256539,1.98,0.984,403.5005,...,0.000011,0.033187,1225.0,-0.046897,0.000013,-0.000011,26.101,0.782,0.243566,30.0
3,ZTF17aaaazwi,736206863915015025,2,634.2486,2670.9470,10.828661,0.002817,2.17,0.983,409.5531,...,0.000004,0.028774,1231.0,0.089544,0.000004,-0.000004,26.357,0.788,0.248194,30.0
4,ZTF17aaaazwi,737187523915015013,1,637.0925,2667.7952,3.761357,-0.493442,3.03,0.974,412.7048,...,0.000017,0.038256,1167.0,-0.045960,0.000023,-0.000019,26.223,0.773,0.223142,30.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6527,ZTF18aaaapyn,2937227162315015016,2,1242.6093,1797.5860,5.594045,0.616612,2.29,0.997,1242.6093,...,0.000001,0.018123,2094.0,0.084729,0.000003,-0.000002,26.446,0.534,0.151693,30.0
6528,ZTF18aaaapyn,2937247222315015013,1,1242.6656,1789.3666,2.228070,-0.817201,3.51,0.984,1242.6656,...,0.000003,0.022401,2084.0,-0.079220,0.000007,-0.000004,26.370,0.533,0.152470,30.0
6529,ZTF18aaaapyn,2939201152315015023,1,1236.0463,1821.2249,2.149205,0.415131,2.05,0.983,1236.0463,...,0.000004,0.025400,2088.0,-0.081242,0.000010,-0.000006,26.349,0.533,0.152470,30.0
6530,ZTF18aaaapyn,3017176442315015005,1,1244.4071,1801.2042,3.958217,0.763982,2.01,0.983,1244.4071,...,0.000002,0.019744,2082.0,-0.061601,0.000006,-0.000004,26.297,0.533,0.153565,30.0


In [None]:
set(sorted(tables_gold.loc[:, ~tables_gold.columns.duplicated()]['oid'])) == set(sorted(tables_i['oid']))

False

In [None]:
print(set(sorted(tables_gold.loc[:, ~tables_gold.columns.duplicated()]['oid'])))
print(set(sorted(tables_i['oid'])))

{'ZTF25aakimpf', 'ZTF25aakimgq', 'ZTF18adkreya', 'ZTF25aajsaap', 'ZTF22abnfaik', 'ZTF25aajywmz', 'ZTF25aakiony', 'ZTF25aakhpdo', 'ZTF25aakdwio', 'ZTF25aakhmwn', 'ZTF24aaonjmk', 'ZTF25aakefgn', 'ZTF25aajymav', 'ZTF25aakgnzd', 'ZTF25aakijbg', 'ZTF25aakikro', 'ZTF25aajvvzm', 'ZTF25aakhvsj', 'ZTF25aakiljc', 'ZTF25aakgpiq', 'ZTF25aakiqcw', 'ZTF25aakgqrv', 'ZTF25aakgmoq', 'ZTF25aakgoku', 'ZTF25aakinjs', 'ZTF25aakifod', 'ZTF23aaxxmuj', 'ZTF25aadgegf', 'ZTF25aakiovi', 'ZTF25aakhanf', 'ZTF25aakgqip', 'ZTF25aakimoj', 'ZTF21acdvucy', 'ZTF25aakinvs', 'ZTF25aakimhm', 'ZTF25aajgmfp', 'ZTF25aakhmrz', 'ZTF25aakghvs', 'ZTF25aajsigm', 'ZTF25aaknwwy', 'ZTF25aakhars', 'ZTF25aakihep', 'ZTF25aakeqyr', 'ZTF25aakgumm', 'ZTF25aaknwqj', 'ZTF25aakiesl', 'ZTF25aakcynb', 'ZTF19abpcioc', 'ZTF25aakmhgu', 'ZTF25aajsiix'}
{'ZTF17aaatpct', 'ZTF17aaaecja', 'ZTF18aaaaorq', 'ZTF17aaaebyo', 'ZTF17aaawcmi', 'ZTF18aaaapyn', 'ZTF17aaaheci', 'ZTF17aaarvch', 'ZTF17aaaazwi', 'ZTF17aabpjlu', 'ZTF17aadgpat', 'ZTF17aaaqldq', 'ZTF17

In [None]:
# check duplicated rows
tables_gold[tables_gold.duplicated(keep=False)]

Unnamed: 0,oid,candid,fid,xpos,ypos,chipsf,sky,fwhm,classtar,mindtoedge,...,magzpsciunc,magzpscirms,nmatches,clrcoeff,clrcounc,zpclrcov,zpmed,clrmed,clrrms,exptime


In [None]:
tables_i[tables_i.duplicated(keep=False)]


Unnamed: 0,oid,candid,fid,xpos,ypos,chipsf,sky,fwhm,classtar,mindtoedge,...,magzpsciunc,magzpscirms,nmatches,clrcoeff,clrcounc,zpclrcov,zpmed,clrmed,clrrms,exptime


query 72

In [None]:
# query gold
query = '''
SELECT
    *
FROM
    reference
WHERE
    oid = 'ZTF20actnuls'
    AND rfid = '712120223'
'''
n = 3
tables_gold, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_gold.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_gold.columns.values.tolist(), "shape": tables_gold.shape})
tables_gold

Query executed successfully, shape: (1, 15)


Unnamed: 0,rfid,oid,candid,fid,rcid,field,magnr,sigmagnr,chinr,sharpnr,ranr,decnr,mjdstartref,mjdendref,nframesref
0,712120223,ZTF20actnuls,1423498792315015010,2,23,712,18.255,0.102,9.673,0.411,159.524295,39.17621,58154.300891,58184.315474,15


In [None]:
# query 
query = '''
-- Return all columns from the 'reference' table for the specified ZTF object and reference identifier

SELECT *
FROM reference
WHERE oid IN
    (-- Obtain the object's OID via a nested query (avoiding JOINs)
 SELECT oid 
     FROM OBJECT 
     WHERE oid = 'ZTF20actnuls')
  AND rfid = 712120223;
'''
n = 3
tables_i, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_i.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_i.columns.values.tolist(), "shape": tables_i.shape})
tables_i

Query executed successfully, shape: (1, 15)


Unnamed: 0,rfid,oid,candid,fid,rcid,field,magnr,sigmagnr,chinr,sharpnr,ranr,decnr,mjdstartref,mjdendref,nframesref
0,712120223,ZTF20actnuls,1423498792315015010,2,23,712,18.255,0.102,9.673,0.411,159.524295,39.17621,58154.300891,58184.315474,15


In [None]:
set(sorted(tables_gold.loc[:, ~tables_gold.columns.duplicated()]['oid'])) == set(sorted(tables_i['oid']))

True

In [None]:
print(set(sorted(tables_gold.loc[:, ~tables_gold.columns.duplicated()]['oid'])))
print(set(sorted(tables_i['oid'])))

{'ZTF20actnuls'}
{'ZTF20actnuls'}


In [None]:
# check duplicated rows
tables_gold[tables_gold.duplicated(keep=False)]

Unnamed: 0,rfid,oid,candid,fid,rcid,field,magnr,sigmagnr,chinr,sharpnr,ranr,decnr,mjdstartref,mjdendref,nframesref


In [None]:
tables_i[tables_i.duplicated(keep=False)]


Unnamed: 0,rfid,oid,candid,fid,rcid,field,magnr,sigmagnr,chinr,sharpnr,ranr,decnr,mjdstartref,mjdendref,nframesref


query 99

In [7]:
print(db_test[db_test.req_id == 99].request.values[0])

Find at most 10 ZTF objects that have a first detection date later than mjd=60310 days, as well as a probability of 1 of being an asteroid in the stamp classifier, ordered by the first detection date. Return all columns from the 'forced_photometry' table for such objects


In [6]:
# query gold
query = '''
SELECT
    *
FROM
    forced_photometry
WHERE
    oid in
(
SELECT
    object.oid
FROM
    object INNER JOIN
    probability
    ON object.oid = probability.oid
WHERE
    firstmjd > 60310
    AND classifier_name = 'stamp_classifier'
    AND class_name = 'asteroid'
    AND probability = 1
ORDER BY firstmjd
LIMIT 10
)

'''
n = 3
tables_gold, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_gold.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_gold.columns.values.tolist(), "shape": tables_gold.shape})
tables_gold

Query executed successfully, shape: (69, 42)


Unnamed: 0,pid,oid,mjd,fid,ra,dec,e_ra,e_dec,mag,e_mag,...,diffmaglim,programid,procstatus,distnr,ranr,decnr,magnr,sigmagnr,chinr,sharpnr
0,2536220341615,ZTF24aaaajnz,60290.220347,1,356.638975,47.487980,,,24.769798,7.675689,...,20.420700,1,56,-99999.000000,-99999.000000,-99999.000000,-99999.000,-99999.000,-99999.000,-99999.000
1,2536180323015,ZTF24aaaajnz,60290.180324,2,356.638975,47.487980,,,100.000000,100.000000,...,20.528500,1,61,-99999.000000,-99999.000000,-99999.000000,-99999.000,-99999.000,-99999.000,-99999.000
2,2534197701615,ZTF24aaaajnz,60288.197708,2,356.638975,47.487980,,,21.659468,0.900523,...,19.729099,1,56,-99999.000000,-99999.000000,-99999.000000,-99999.000,-99999.000,-99999.000,-99999.000
3,2534136721615,ZTF24aaaajnz,60288.136724,1,356.638975,47.487980,,,24.218550,5.723178,...,20.183001,1,0,-99999.000000,-99999.000000,-99999.000000,-99999.000,-99999.000,-99999.000,-99999.000
4,2530219763015,ZTF24aaaajnz,60284.219768,1,356.638975,47.487980,,,100.000000,100.000000,...,20.335699,1,61,-99999.000000,-99999.000000,-99999.000000,-99999.000,-99999.000,-99999.000,-99999.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64,2530138582415,ZTF24aaaajsp,60284.138588,2,340.820096,46.927049,,,23.181665,1.554161,...,20.576700,1,0,4.621940,340.821625,46.927788,17.041,0.017,0.740,-0.052
65,2534136252415,ZTF24aaaajsp,60288.136250,1,340.820096,46.927049,,,24.333900,6.619290,...,20.122700,1,0,4.609645,340.821625,46.927795,17.605,0.021,0.903,-0.069
66,2534221122415,ZTF24aaaajsp,60288.221123,2,340.820096,46.927049,,,23.318583,5.318606,...,19.507900,1,0,4.621940,340.821625,46.927788,17.041,0.017,0.740,-0.052
67,2536143512415,ZTF24aaaajsp,60290.143519,2,340.820096,46.927049,,,22.721621,1.108646,...,20.472601,1,0,4.621940,340.821625,46.927788,17.041,0.017,0.740,-0.052


In [16]:
# query 
query = '''
-- Find at most 10 ZTF objects with first detection date > 60310,
-- and probability=1 for class 'asteroid' in the 'stamp_classifier',
-- then return all columns from 'forced_photometry' for those objects,
-- ordered by first detection date

SELECT *
FROM forced_photometry
WHERE oid IN
    (SELECT oid
     FROM OBJECT
     WHERE firstmjd > 60310
       AND oid IN
         (SELECT oid
          FROM probability
          WHERE class_name = 'asteroid'
            AND classifier_name = 'stamp_classifier'
            AND probability = 1)
     ORDER BY firstmjd
     LIMIT 10) ;
'''
n = 3
tables_i, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_i.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_i.columns.values.tolist(), "shape": tables_i.shape})
tables_i

Query executed successfully, shape: (69, 42)


Unnamed: 0,pid,oid,mjd,fid,ra,dec,e_ra,e_dec,mag,e_mag,...,diffmaglim,programid,procstatus,distnr,ranr,decnr,magnr,sigmagnr,chinr,sharpnr
0,2536220341615,ZTF24aaaajnz,60290.220347,1,356.638975,47.487980,,,24.769798,7.675689,...,20.420700,1,56,-99999.000000,-99999.000000,-99999.000000,-99999.000,-99999.000,-99999.000,-99999.000
1,2536180323015,ZTF24aaaajnz,60290.180324,2,356.638975,47.487980,,,100.000000,100.000000,...,20.528500,1,61,-99999.000000,-99999.000000,-99999.000000,-99999.000,-99999.000,-99999.000,-99999.000
2,2534197701615,ZTF24aaaajnz,60288.197708,2,356.638975,47.487980,,,21.659468,0.900523,...,19.729099,1,56,-99999.000000,-99999.000000,-99999.000000,-99999.000,-99999.000,-99999.000,-99999.000
3,2534136721615,ZTF24aaaajnz,60288.136724,1,356.638975,47.487980,,,24.218550,5.723178,...,20.183001,1,0,-99999.000000,-99999.000000,-99999.000000,-99999.000,-99999.000,-99999.000,-99999.000
4,2530219763015,ZTF24aaaajnz,60284.219768,1,356.638975,47.487980,,,100.000000,100.000000,...,20.335699,1,61,-99999.000000,-99999.000000,-99999.000000,-99999.000,-99999.000,-99999.000,-99999.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64,2530138582415,ZTF24aaaajsp,60284.138588,2,340.820096,46.927049,,,23.181665,1.554161,...,20.576700,1,0,4.621940,340.821625,46.927788,17.041,0.017,0.740,-0.052
65,2534136252415,ZTF24aaaajsp,60288.136250,1,340.820096,46.927049,,,24.333900,6.619290,...,20.122700,1,0,4.609645,340.821625,46.927795,17.605,0.021,0.903,-0.069
66,2534221122415,ZTF24aaaajsp,60288.221123,2,340.820096,46.927049,,,23.318583,5.318606,...,19.507900,1,0,4.621940,340.821625,46.927788,17.041,0.017,0.740,-0.052
67,2536143512415,ZTF24aaaajsp,60290.143519,2,340.820096,46.927049,,,22.721621,1.108646,...,20.472601,1,0,4.621940,340.821625,46.927788,17.041,0.017,0.740,-0.052


In [None]:
# query 
query = '''
SELECT fp.*
FROM OBJECT o
INNER JOIN probability p ON o.oid = p.oid
INNER JOIN forced_photometry fp ON o.oid = fp.oid
WHERE o.firstmjd > 60310
  AND p.class_name = 'asteroid'
  AND p.classifier_name = 'stamp_classifier'
  AND p.probability = 1
  AND p.ranking = 1
ORDER BY o.firstmjd ASC
LIMIT 10;
'''
n = 3
tables_i, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_i.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_i.columns.values.tolist(), "shape": tables_i.shape})
tables_i


Query executed successfully, shape: (10, 42)


Unnamed: 0,pid,oid,mjd,fid,ra,dec,e_ra,e_dec,mag,e_mag,...,diffmaglim,programid,procstatus,distnr,ranr,decnr,magnr,sigmagnr,chinr,sharpnr
0,2537176313815,ZTF24aaaajcx,60291.176319,1,325.590518,77.130726,,,23.913935,5.575612,...,19.9352,1,0,3.010236,325.59024,77.131561,23.011999,0.276,0.55,-0.297
1,2537120003815,ZTF24aaaajcx,60291.12,2,325.590518,77.130726,,,22.52446,1.32901,...,20.0959,1,0,-99999.0,-99999.0,-99999.0,-99999.0,-99999.0,-99999.0,-99999.0
2,2535178323815,ZTF24aaaajcx,60289.178322,1,325.590518,77.130726,,,22.38731,1.364911,...,19.8801,1,0,3.010236,325.59024,77.131561,23.011999,0.276,0.55,-0.297
3,2533133663815,ZTF24aaaajcx,60287.133669,1,325.590518,77.130726,,,21.872555,0.786584,...,19.916599,1,0,3.010236,325.59024,77.131561,23.011999,0.276,0.55,-0.297
4,2533123443815,ZTF24aaaajcx,60287.123449,2,325.590518,77.130726,,,21.412384,0.511367,...,19.9074,1,0,-99999.0,-99999.0,-99999.0,-99999.0,-99999.0,-99999.0,-99999.0
5,2536220341615,ZTF24aaaajnz,60290.220347,1,356.638975,47.48798,,,24.769798,7.675689,...,20.4207,1,56,-99999.0,-99999.0,-99999.0,-99999.0,-99999.0,-99999.0,-99999.0
6,2536180323015,ZTF24aaaajnz,60290.180324,2,356.638975,47.48798,,,100.0,100.0,...,20.5285,1,61,-99999.0,-99999.0,-99999.0,-99999.0,-99999.0,-99999.0,-99999.0
7,2534197701615,ZTF24aaaajnz,60288.197708,2,356.638975,47.48798,,,21.659468,0.900523,...,19.729099,1,56,-99999.0,-99999.0,-99999.0,-99999.0,-99999.0,-99999.0,-99999.0
8,2534136721615,ZTF24aaaajnz,60288.136724,1,356.638975,47.48798,,,24.21855,5.723178,...,20.183001,1,0,-99999.0,-99999.0,-99999.0,-99999.0,-99999.0,-99999.0,-99999.0
9,2530219763015,ZTF24aaaajnz,60284.219768,1,356.638975,47.48798,,,100.0,100.0,...,20.335699,1,61,-99999.0,-99999.0,-99999.0,-99999.0,-99999.0,-99999.0,-99999.0


In [None]:
set(sorted(tables_gold.loc[:, ~tables_gold.columns.duplicated()]['pid'])) == set(sorted(tables_i['pid']))

True

In [17]:
print(set(sorted(tables_gold.loc[:, ~tables_gold.columns.duplicated()]['oid'])))
print(set(sorted(tables_i['oid'])))

{'ZTF24aaaajzf', 'ZTF24aaaajcx', 'ZTF24aaaajqo', 'ZTF24aaaakhc', 'ZTF24aaaajws', 'ZTF24aaaajjf', 'ZTF24aaaajor', 'ZTF24aaaajnz', 'ZTF24aaaajuw', 'ZTF24aaaajsp'}
{'ZTF24aaaajzf', 'ZTF24aaaajcx', 'ZTF24aaaajqo', 'ZTF24aaaakhc', 'ZTF24aaaajws', 'ZTF24aaaajjf', 'ZTF24aaaajor', 'ZTF24aaaajnz', 'ZTF24aaaajuw', 'ZTF24aaaajsp'}


In [None]:
# check duplicated rows
tables_gold[tables_gold.duplicated(keep=False)]

Unnamed: 0,pid,oid,mjd,fid,ra,dec,e_ra,e_dec,mag,e_mag,...,diffmaglim,programid,procstatus,distnr,ranr,decnr,magnr,sigmagnr,chinr,sharpnr


In [None]:
tables_i[tables_i.duplicated(keep=False)]


Unnamed: 0,pid,oid,mjd,fid,ra,dec,e_ra,e_dec,mag,e_mag,...,diffmaglim,programid,procstatus,distnr,ranr,decnr,magnr,sigmagnr,chinr,sharpnr


In [11]:
# query gold
query = '''
SELECT
    *
FROM
    object
LIMIT 1
'''
n = 3
tables_gold, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_gold.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_gold.columns.values.tolist(), "shape": tables_gold.shape})
tables_gold

Query executed successfully, shape: (1, 22)


Unnamed: 0,oid,ndethist,ncovhist,mjdstarthist,mjdendhist,corrected,stellar,ndet,g_r_max,g_r_max_corr,...,meanra,meandec,sigmara,sigmadec,deltajd,firstmjd,lastmjd,step_id_corr,diffpos,reference_change
0,ZTF24aakxrgp,1,703,60424.262257,60424.262257,False,False,1,,,...,195.374787,-3.900703,0.085197,0.085,0.0,60424.262257,60424.262257,24.4.1,,


In [12]:
tables_gold.columns

Index(['oid', 'ndethist', 'ncovhist', 'mjdstarthist', 'mjdendhist',
       'corrected', 'stellar', 'ndet', 'g_r_max', 'g_r_max_corr', 'g_r_mean',
       'g_r_mean_corr', 'meanra', 'meandec', 'sigmara', 'sigmadec', 'deltajd',
       'firstmjd', 'lastmjd', 'step_id_corr', 'diffpos', 'reference_change'],
      dtype='object')

query 40

In [None]:
# query gold
query = '''
SELECT
    oid_catalog, ra, dec, w1mpro, w2mpro,
    w1sigmpro, w2sigmpro, j_m_2mass, j_msig_2mass
FROM
    allwise
WHERE
    q3c_radial_query(ra, dec, 23.6488, 30.7314, 0.000277778)
'''
n = 3
tables_gold, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_gold.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_gold.columns.values.tolist(), "shape": tables_gold.shape})
tables_gold

Query executed successfully, shape: (1, 9)


Unnamed: 0,oid_catalog,ra,dec,w1mpro,w2mpro,w1sigmpro,w2sigmpro,j_m_2mass,j_msig_2mass
0,J013435.71+304352.7,23.648805,30.731309,13.842,13.735,0.03,0.038,15.552,0.051


In [None]:
# query 
query = '''
-- Cone search of 1 arcsec (1/3600 deg) around RA=23.6488 deg, Dec=30.7314 deg

SELECT oid_catalog, -- ALLWISE identifier
 ra, dec, w1mpro, -- WISE W1 magnitude
 w1sigmpro, -- WISE W1 magnitude uncertainty
 w2mpro, -- WISE W2 magnitude
 w2sigmpro, -- WISE W2 magnitude uncertainty
 j_m_2mass, -- 2MASS J magnitude
 j_msig_2mass -- 2MASS J magnitude uncertainty
FROM allwise
WHERE q3c_radial_query(ra, dec, 23.6488, 30.7314, 1.0/3600.0)
ORDER BY q3c_dist(ra, dec, 23.6488, 30.7314) ASC;
'''
n = 3
tables_i, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_i.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_i.columns.values.tolist(), "shape": tables_i.shape})
tables_i

Query executed successfully, shape: (1, 9)


Unnamed: 0,oid_catalog,ra,dec,w1mpro,w1sigmpro,w2mpro,w2sigmpro,j_m_2mass,j_msig_2mass
0,J013435.71+304352.7,23.648805,30.731309,13.842,0.03,13.735,0.038,15.552,0.051


In [None]:
set(sorted(tables_gold.loc[:, ~tables_gold.columns.duplicated()]['oid_catalog'])) == set(sorted(tables_i['oid_catalog']))

True

In [None]:
print(set(sorted(tables_gold.loc[:, ~tables_gold.columns.duplicated()]['oid_catalog'])))
print(set(sorted(tables_i['oid_catalog'])))

{'J013435.71+304352.7'}
{'J013435.71+304352.7'}


In [None]:
# check duplicated rows
tables_gold[tables_gold.duplicated(keep=False)]

Unnamed: 0,oid_catalog,ra,dec,w1mpro,w2mpro,w1sigmpro,w2sigmpro,j_m_2mass,j_msig_2mass


In [None]:
tables_i[tables_i.duplicated(keep=False)]


Unnamed: 0,oid_catalog,ra,dec,w1mpro,w1sigmpro,w2mpro,w2sigmpro,j_m_2mass,j_msig_2mass


query 80

In [None]:
# query gold
query = '''
SELECT
    *
FROM
    xmatch
WHERE
    oid IN ('ZTF22abcmmkz','ZTF18abbufof')
    AND dist < 0.5
'''
n = 3
tables_gold, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_gold.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_gold.columns.values.tolist(), "shape": tables_gold.shape})
tables_gold

Query executed successfully, shape: (1, 6)


Unnamed: 0,oid,catid,oid_catalog,dist,class_catalog,period
0,ZTF18abbufof,allwise,3398p348_ac51-035739,0.264541,,


In [None]:
# query 
query = '''
-- Find all cross-matches for the specified ZTF objects with distance < 0.5 arcsec

SELECT *
FROM xmatch
WHERE oid IN
    (SELECT oid
     FROM OBJECT
     WHERE oid IN ('ZTF22abcmmkz',
                   'ZTF18abbufof'))
  AND dist < 0.5;
'''
n = 3
tables_i, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_i.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_i.columns.values.tolist(), "shape": tables_i.shape})
tables_i

Query executed successfully, shape: (1, 6)


Unnamed: 0,oid,catid,oid_catalog,dist,class_catalog,period
0,ZTF18abbufof,allwise,3398p348_ac51-035739,0.264541,,


In [None]:
print(set(sorted(tables_gold.loc[:, ~tables_gold.columns.duplicated()]['oid_catalog'])) == set(sorted(tables_i['oid_catalog'])))
print(set(sorted(tables_gold.loc[:, ~tables_gold.columns.duplicated()]['oid_catalog'])) == set(sorted(tables_i['oid_catalog'])))

True
True


In [None]:
print(set(sorted(tables_gold.loc[:, ~tables_gold.columns.duplicated()]['oid_catalog'])))
print(set(sorted(tables_i['oid_catalog'])))

{'3398p348_ac51-035739'}
{'3398p348_ac51-035739'}


In [None]:
# check duplicated rows
tables_gold[tables_gold.duplicated(keep=False)]

Unnamed: 0,oid,catid,oid_catalog,dist,class_catalog,period


In [None]:
tables_i[tables_i.duplicated(keep=False)]


Unnamed: 0,oid,catid,oid_catalog,dist,class_catalog,period


query 45

In [6]:
db_test[db_test['req_id'] == 45].request.values[0]

"For objects with ZTF identifiers 'ZTF23abjikaf' and 'ZTF20achlced', find how many rows in the 'dataquality' table have a minimum reduced chi-square value of 5"

In [3]:
# query gold
query = '''
SELECT
    COUNT(chipsf)
FROM
    dataquality
WHERE
    oid IN ('ZTF23abjikaf','ZTF20achlced')
    AND chipsf >= 5
'''
n = 3
tables_gold, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_gold.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_gold.columns.values.tolist(), "shape": tables_gold.shape})
tables_gold

Query executed successfully, shape: (1, 1)


Unnamed: 0,count
0,81


In [4]:
# query 
query = '''
-- Count the number of rows in dataquality for the specified ZTF objects with minimum chipsf = 5

SELECT COUNT(*)
FROM dataquality
WHERE oid IN ('ZTF23abjikaf',
              'ZTF20achlced')
  AND chipsf = 5;
'''
n = 3
tables_i, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_i.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_i.columns.values.tolist(), "shape": tables_i.shape})
tables_i

Query executed successfully, shape: (1, 1)


Unnamed: 0,count
0,0


In [None]:
print(set(sorted(tables_gold.loc[:, ~tables_gold.columns.duplicated()]['count'])) == set(sorted(tables_i['rows_with_chipsf_ge_5'])))

True


In [None]:
print(set(sorted(tables_gold.loc[:, ~tables_gold.columns.duplicated()]['count'])))
print(set(sorted(tables_i['rows_with_chipsf_ge_5'])))

{75, 6}
{75, 6}


In [None]:
# check duplicated rows
tables_gold[tables_gold.duplicated(keep=False)]

Unnamed: 0,count,oid


In [None]:
tables_i[tables_i.duplicated(keep=False)]


Unnamed: 0,oid,rows_with_chipsf_ge_5


query 63

In [None]:
# query gold
query = '''
SELECT
    *
FROM
    probability
WHERE
    oid IN ('ZTF21aaobkmg','ZTF21aaomuka')
    AND classifier_name = 'lc_classifier'
    AND ranking <= 2
ORDER BY ranking
'''
n = 3
tables_gold, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_gold.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_gold.columns.values.tolist(), "shape": tables_gold.shape})
tables_gold

Query executed successfully, shape: (4, 6)


Unnamed: 0,oid,classifier_name,classifier_version,class_name,probability,ranking
0,ZTF21aaobkmg,lc_classifier,hierarchical_rf_1.1.0,SNII,0.584,1
1,ZTF21aaomuka,lc_classifier,hierarchical_rf_1.1.0,SNIa,0.46,1
2,ZTF21aaobkmg,lc_classifier,hierarchical_rf_1.1.0,SLSN,0.256,2
3,ZTF21aaomuka,lc_classifier,hierarchical_rf_1.1.0,SNIbc,0.298,2


In [None]:
# query 
query = '''
-- Return all probability rows for the specified OIDs, restricted to the Light Curve classifier and rankings 1 or 2

SELECT *
FROM probability
WHERE oid IN
    (-- Obtain OIDs via subquery as recommended
 SELECT oid
     FROM OBJECT
     WHERE oid IN ('ZTF21aaobkmg',
                   'ZTF21aaomuka'))
  AND classifier_name = 'lc_classifier'
  AND ranking IN (1,
                  2)
ORDER BY ranking;
'''
n = 3
tables_i, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_i.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_i.columns.values.tolist(), "shape": tables_i.shape})
tables_i

Query executed successfully, shape: (4, 6)


Unnamed: 0,oid,classifier_name,classifier_version,class_name,probability,ranking
0,ZTF21aaobkmg,lc_classifier,hierarchical_rf_1.1.0,SNII,0.584,1
1,ZTF21aaomuka,lc_classifier,hierarchical_rf_1.1.0,SNIa,0.46,1
2,ZTF21aaobkmg,lc_classifier,hierarchical_rf_1.1.0,SLSN,0.256,2
3,ZTF21aaomuka,lc_classifier,hierarchical_rf_1.1.0,SNIbc,0.298,2


In [None]:
print(set(sorted(tables_gold.loc[:, ~tables_gold.columns.duplicated()]['oid'])) == set(sorted(tables_i['oid'])))

True


In [None]:
print(set(sorted(tables_gold.loc[:, ~tables_gold.columns.duplicated()]['oid'])))
print(set(sorted(tables_i['oid'])))

{'ZTF21aaomuka', 'ZTF21aaobkmg'}
{'ZTF21aaomuka', 'ZTF21aaobkmg'}


In [None]:
# check duplicated rows
tables_gold[tables_gold.duplicated(keep=False)]

Unnamed: 0,count,oid


In [None]:
tables_i[tables_i.duplicated(keep=False)]


Unnamed: 0,oid,rows_with_chipsf_ge_5


query 69

In [None]:
# query gold
query = '''
SELECT
    *
FROM
    ps1_ztf
WHERE
    oid IN ('ZTF21abstkhj','ZTF22aacqjeb','ZTF21aamwpdf')
    AND sgscore1 < 0.2
'''
n = 3
tables_gold, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_gold.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_gold.columns.values.tolist(), "shape": tables_gold.shape})
tables_gold

Query executed successfully, shape: (2, 27)


Unnamed: 0,oid,candid,objectidps1,sgmag1,srmag1,simag1,szmag1,sgscore1,distpsnr1,objectidps2,...,sgmag3,srmag3,simag3,szmag3,sgscore3,distpsnr3,nmtchps,unique1,unique2,unique3
0,ZTF21abstkhj,1684349673915015107,157473481124645500,16.9345,16.0276,15.9801,15.6985,0.163065,0.650434,157473481124194460,...,-999.0,19.2237,18.4609,17.9329,0.256286,5.943302,12,True,True,True
1,ZTF22aacqjeb,1886297531515015012,83461337558428560,18.4601,17.9604,17.7242,17.3151,0.04625,2.060955,83461337536007620,...,20.905,19.974,18.3732,17.6284,0.988167,8.479204,10,True,True,True


In [None]:
# query 
query = '''
-- Return all ps1_ztf rows for the specified ZTF objects where the nearest PS1 source (sgscore1) < 0.2

SELECT *
FROM ps1_ztf
WHERE oid IN
    (SELECT oid
     FROM (
           VALUES ('ZTF21abstkhj'), ('ZTF22aacqjeb'), ('ZTF21aamwpdf')) AS oids(oid))
  AND sgscore1 < 0.2;
'''
n = 3
tables_i, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_i.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_i.columns.values.tolist(), "shape": tables_i.shape})
tables_i

Query executed successfully, shape: (2, 27)


Unnamed: 0,oid,candid,objectidps1,sgmag1,srmag1,simag1,szmag1,sgscore1,distpsnr1,objectidps2,...,sgmag3,srmag3,simag3,szmag3,sgscore3,distpsnr3,nmtchps,unique1,unique2,unique3
0,ZTF21abstkhj,1684349673915015107,157473481124645500,16.9345,16.0276,15.9801,15.6985,0.163065,0.650434,157473481124194460,...,-999.0,19.2237,18.4609,17.9329,0.256286,5.943302,12,True,True,True
1,ZTF22aacqjeb,1886297531515015012,83461337558428560,18.4601,17.9604,17.7242,17.3151,0.04625,2.060955,83461337536007620,...,20.905,19.974,18.3732,17.6284,0.988167,8.479204,10,True,True,True


In [None]:
print(set(sorted(tables_gold.loc[:, ~tables_gold.columns.duplicated()]['oid'])) == set(sorted(tables_i['oid'])))
print(set(sorted(tables_gold.loc[:, ~tables_gold.columns.duplicated()]['candid'])) == set(sorted(tables_i['candid'])))
print(set(sorted(tables_gold.loc[:, ~tables_gold.columns.duplicated()]['objectidps1'])) == set(sorted(tables_i['objectidps1'])))

True
True
True


In [None]:
print(set(sorted(tables_gold.loc[:, ~tables_gold.columns.duplicated()]['oid'])))
print(set(sorted(tables_i['oid'])))

{'ZTF22aacqjeb', 'ZTF21abstkhj'}
{'ZTF22aacqjeb', 'ZTF21abstkhj'}


In [None]:
# check duplicated rows
tables_gold[tables_gold.duplicated(keep=False)]

Unnamed: 0,oid,candid,objectidps1,sgmag1,srmag1,simag1,szmag1,sgscore1,distpsnr1,objectidps2,...,sgmag3,srmag3,simag3,szmag3,sgscore3,distpsnr3,nmtchps,unique1,unique2,unique3


In [None]:
tables_i[tables_i.duplicated(keep=False)]


Unnamed: 0,oid,candid,objectidps1,sgmag1,srmag1,simag1,szmag1,sgscore1,distpsnr1,objectidps2,...,sgmag3,srmag3,simag3,szmag3,sgscore3,distpsnr3,nmtchps,unique1,unique2,unique3


query 104

In [None]:
# query gold
query = '''
SELECT
    sq1.oid,
    ss_ztf.candid, ss_ztf.ssdistnr, ss_ztf.ssmagnr,
    ss_ztf.ssnamenr, dataquality.fid,
    dataquality.fwhm, dataquality.nmatches,
    dataquality.exptime
FROM
(
SELECT
    *
FROM
    probability
WHERE
    classifier_name = 'stamp_classifier'
    AND class_name = 'asteroid'
    AND probability > 0.9
    AND classifier_version = 'stamp_classifier_1.0.4'
LIMIT 30
) as sq1
INNER JOIN
ss_ztf
ON sq1.oid = ss_ztf.oid
INNER JOIN
dataquality
ON ss_ztf.oid = dataquality.oid
AND ss_ztf.candid = dataquality.candid
'''
n = 3
tables_gold, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_gold.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_gold.columns.values.tolist(), "shape": tables_gold.shape})
tables_gold

Query executed successfully, shape: (30, 9)


Unnamed: 0,oid,candid,ssdistnr,ssmagnr,ssnamenr,fid,fwhm,nmatches,exptime
0,ZTF25aaknwvs,3004435051015015012,0.0,19.5,92271,1,3.17,430,30.0
1,ZTF25aaknwwn,3004436554415015010,0.0,17.7,13111,1,2.28,541,30.0
2,ZTF25aaknwxj,3004437016315015002,0.0,18.9,126725,1,2.98,681,30.0
3,ZTF25aaknwwx,3004436074415015017,0.0,19.8,86067,1,2.66,660,30.0
4,ZTF25aaknxai,3004437012415015014,0.0,20.2,259147,1,2.51,679,30.0
5,ZTF25aaknwxq,3004437014515015005,0.0,19.1,112052,1,3.02,642,30.0
6,ZTF25aaknwxx,3004437013115015006,0.0,18.8,25337,1,2.61,739,30.0
7,ZTF25aaknwxw,3004437013715015011,0.0,19.1,247434,1,3.2,690,30.0
8,ZTF25aaknwyb,3004436550215015004,0.0,19.9,121201,1,2.63,720,30.0
9,ZTF25aaknwyg,3004436551415015007,0.0,17.4,7488,1,3.95,477,30.0


In [None]:
# query 
query = '''
-- Find ZTF objects classified as asteroids with high probability
WITH asteroid_objects AS
  (SELECT p.oid
   FROM probability p
   WHERE p.class_name = 'asteroid'
     AND p.classifier_name = 'stamp_classifier'
     AND p.classifier_version = 'stamp_classifier_1.0.4'
     AND p.probability > 0.9
   LIMIT 30)
SELECT o.oid AS ztf_identifier,
       s.candid AS candidate_id,
       s.ssdistnr AS distance_to_ss_object,
       s.ssmagnr AS mpc_archive_magnitude,
       s.ssnamenr AS solar_system_object_name
FROM asteroid_objects ao
JOIN OBJECT o ON ao.oid = o.oid
JOIN ss_ztf s ON o.oid = s.oid;
'''
n = 3
tables_i, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_i.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_i.columns.values.tolist(), "shape": tables_i.shape})
tables_i

Query executed successfully, shape: (30, 5)


Unnamed: 0,ztf_identifier,candidate_id,distance_to_ss_object,mpc_archive_magnitude,solar_system_object_name
0,ZTF25aaknwvs,3004435051015015012,0.0,19.5,92271
1,ZTF25aaknwwn,3004436554415015010,0.0,17.7,13111
2,ZTF25aaknwxj,3004437016315015002,0.0,18.9,126725
3,ZTF25aaknwwx,3004436074415015017,0.0,19.8,86067
4,ZTF25aaknxai,3004437012415015014,0.0,20.2,259147
5,ZTF25aaknwxq,3004437014515015005,0.0,19.1,112052
6,ZTF25aaknwxx,3004437013115015006,0.0,18.8,25337
7,ZTF25aaknwxw,3004437013715015011,0.0,19.1,247434
8,ZTF25aaknwyb,3004436550215015004,0.0,19.9,121201
9,ZTF25aaknwyg,3004436551415015007,0.0,17.4,7488


In [None]:
print(set(sorted(tables_gold.loc[:, ~tables_gold.columns.duplicated()]['oid'])) == set(sorted(tables_i['ztf_identifier'])))
print(set(sorted(tables_gold.loc[:, ~tables_gold.columns.duplicated()]['candid'])) == set(sorted(tables_i['candidate_id'])))

True
True


In [None]:
print(set(sorted(tables_gold.loc[:, ~tables_gold.columns.duplicated()]['oid'])))
print(set(sorted(tables_i['ztf_identifier'])))

{'ZTF25aaknxcl', 'ZTF25aaknwxq', 'ZTF25aaknxci', 'ZTF25aaknxda', 'ZTF25aaknxai', 'ZTF25aaknwvs', 'ZTF25aaknxgw', 'ZTF25aaknwyb', 'ZTF25aaknxgu', 'ZTF25aaknxem', 'ZTF25aaknxau', 'ZTF25aaknwxw', 'ZTF25aaknwxj', 'ZTF25aaknxgy', 'ZTF25aaknwzc', 'ZTF25aaknxeo', 'ZTF25aaknwxx', 'ZTF25aaknwwx', 'ZTF25aaknxdv', 'ZTF25aaknwzs', 'ZTF25aaknwzf', 'ZTF25aaknxdz', 'ZTF25aaknxhh', 'ZTF25aaknwwn', 'ZTF25aaknwzb', 'ZTF25aaknwym', 'ZTF25aaknwyg', 'ZTF25aaknwyh', 'ZTF25aaknxhc', 'ZTF25aaknxdb'}
{'ZTF25aaknxcl', 'ZTF25aaknwxq', 'ZTF25aaknxci', 'ZTF25aaknxda', 'ZTF25aaknxai', 'ZTF25aaknwvs', 'ZTF25aaknxgw', 'ZTF25aaknwyb', 'ZTF25aaknxgu', 'ZTF25aaknxem', 'ZTF25aaknxau', 'ZTF25aaknwxw', 'ZTF25aaknwxj', 'ZTF25aaknxgy', 'ZTF25aaknwzc', 'ZTF25aaknxeo', 'ZTF25aaknwxx', 'ZTF25aaknwwx', 'ZTF25aaknxdv', 'ZTF25aaknwzs', 'ZTF25aaknwzf', 'ZTF25aaknxdz', 'ZTF25aaknxhh', 'ZTF25aaknwwn', 'ZTF25aaknwzb', 'ZTF25aaknwym', 'ZTF25aaknwyg', 'ZTF25aaknwyh', 'ZTF25aaknxhc', 'ZTF25aaknxdb'}


In [None]:
# check duplicated rows
tables_gold[tables_gold.duplicated(keep=False)]

Unnamed: 0,oid,candid,ssdistnr,ssmagnr,ssnamenr,fid,fwhm,nmatches,exptime


In [None]:
tables_i[tables_i.duplicated(keep=False)]


Unnamed: 0,ztf_identifier,candidate_id,distance_to_ss_object,mpc_archive_magnitude,solar_system_object_name


query 71

In [None]:
# query gold
query = '''
SELECT
    rfid, oid, candid, magnr, sigmagnr
FROM
    reference
WHERE
    oid IN ('ZTF20aczxeud','ZTF21aappfdu','ZTF22aacqjeb')
    AND fid = 1
ORDER BY oid
'''
n = 3
tables_gold, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_gold.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_gold.columns.values.tolist(), "shape": tables_gold.shape})
tables_gold

Query executed successfully, shape: (3, 5)


Unnamed: 0,rfid,oid,candid,magnr,sigmagnr
0,371120103,ZTF20aczxeud,1601274780315015114,16.132,0.048
1,313120115,ZTF21aappfdu,1539274811515015018,18.253,0.065
2,313120115,ZTF22aacqjeb,1886297531515015012,18.253,0.065


In [None]:
# query 
query = '''
-- Query to find reference table information in g-band (fid=1) for specific ZTF objects
-- Returns reference details including magnitude information for nearest reference object

SELECT r.oid,
       r.rfid,
       r.candid,
       r.magnr,
       r.sigmagnr
FROM reference r
WHERE r.oid IN ('ZTF20aczxeud',
                'ZTF21aappfdu',
                'ZTF22aacqjeb')
  AND r.fid = 1 -- Filter ID 1 corresponds to g-band
ORDER BY r.oid;
'''
n = 3
tables_i, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_i.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_i.columns.values.tolist(), "shape": tables_i.shape})
tables_i

Query executed successfully, shape: (3, 5)


Unnamed: 0,oid,rfid,candid,magnr,sigmagnr
0,ZTF20aczxeud,371120103,1601274780315015114,16.132,0.048
1,ZTF21aappfdu,313120115,1539274811515015018,18.253,0.065
2,ZTF22aacqjeb,313120115,1886297531515015012,18.253,0.065


In [None]:
print(set(sorted(tables_gold.loc[:, ~tables_gold.columns.duplicated()]['oid'])) == set(sorted(tables_i['oid'])))
print(set(sorted(tables_gold.loc[:, ~tables_gold.columns.duplicated()]['candid'])) == set(sorted(tables_i['candid'])))
print(set(sorted(tables_gold.loc[:, ~tables_gold.columns.duplicated()]['rfid'])) == set(sorted(tables_i['rfid'])))

True
True
True


In [None]:
print(set(sorted(tables_gold.loc[:, ~tables_gold.columns.duplicated()]['oid'])))
print(set(sorted(tables_i['oid'])))

{'ZTF21aappfdu', 'ZTF22aacqjeb', 'ZTF20aczxeud'}
{'ZTF21aappfdu', 'ZTF22aacqjeb', 'ZTF20aczxeud'}


In [None]:
# check duplicated rows
tables_gold[tables_gold.duplicated(keep=False)]

Unnamed: 0,rfid,oid,candid,magnr,sigmagnr


In [None]:
tables_i[tables_i.duplicated(keep=False)]


Unnamed: 0,oid,rfid,candid,magnr,sigmagnr


query 42

In [None]:
# query gold
query = '''
SELECT
    oid, candid, fid, chipsf, fwhm, classtar,
    nbad, magzpsci, magzpsciunc, exptime
FROM
    dataquality
WHERE
    oid = 'ZTF21aapnixl'
ORDER BY chipsf

'''
n = 3
tables_gold, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_gold.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_gold.columns.values.tolist(), "shape": tables_gold.shape})
tables_gold

Query executed successfully, shape: (6, 10)


Unnamed: 0,oid,candid,fid,chipsf,fwhm,classtar,nbad,magzpsci,magzpsciunc,exptime
0,ZTF21aapnixl,1538534064015015007,2,0.993519,1.248978,0.876,0,26.197325,2e-06,30.0
1,ZTF21aapnixl,1536538794015015004,2,1.009326,2.54,0.992,0,26.321283,2e-06,30.0
2,ZTF21aapnixl,1537537024015015004,2,1.24343,1.435979,0.931,0,26.299805,3e-06,30.0
3,ZTF21aapnixl,1536541194015015010,2,1.246692,2.34,0.993,0,26.338717,3e-06,30.0
4,ZTF21aapnixl,1537534614015015007,2,1.422183,2.53,0.763,0,26.294783,3e-06,30.0
5,ZTF21aapnixl,1543529474015015016,2,1.682237,2.54,0.979,0,26.292053,3e-06,30.0


In [None]:
# query 
query = '''
SELECT oid,
       candid,
       fid,
       chipsf,
       fwhm,
       classtar,
       nbad,
       magzpsci,
       magzpsciunc,
       exptime
FROM dataquality
WHERE oid = 'ZTF21aapnixl'
ORDER BY chipsf;
'''
n = 3
tables_i, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_i.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_i.columns.values.tolist(), "shape": tables_i.shape})
tables_i

Query executed successfully, shape: (6, 10)


Unnamed: 0,oid,candid,fid,chipsf,fwhm,classtar,nbad,magzpsci,magzpsciunc,exptime
0,ZTF21aapnixl,1538534064015015007,2,0.993519,1.248978,0.876,0,26.197325,2e-06,30.0
1,ZTF21aapnixl,1536538794015015004,2,1.009326,2.54,0.992,0,26.321283,2e-06,30.0
2,ZTF21aapnixl,1537537024015015004,2,1.24343,1.435979,0.931,0,26.299805,3e-06,30.0
3,ZTF21aapnixl,1536541194015015010,2,1.246692,2.34,0.993,0,26.338717,3e-06,30.0
4,ZTF21aapnixl,1537534614015015007,2,1.422183,2.53,0.763,0,26.294783,3e-06,30.0
5,ZTF21aapnixl,1543529474015015016,2,1.682237,2.54,0.979,0,26.292053,3e-06,30.0


In [None]:
print(set(sorted(tables_gold.loc[:, ~tables_gold.columns.duplicated()]['oid'])) == set(sorted(tables_i['oid'])))
print(set(sorted(tables_gold.loc[:, ~tables_gold.columns.duplicated()]['candid'])) == set(sorted(tables_i['candid'])))

True
True


In [None]:
print(set(sorted(tables_gold.loc[:, ~tables_gold.columns.duplicated()]['oid'])))
print(set(sorted(tables_i['oid'])))

{'ZTF21aapnixl'}
{'ZTF21aapnixl'}


In [None]:
# check duplicated rows
tables_gold[tables_gold.duplicated(keep=False)]

Unnamed: 0,oid,candid,fid,chipsf,fwhm,classtar,nbad,magzpsci,magzpsciunc,exptime


In [None]:
tables_i[tables_i.duplicated(keep=False)]


Unnamed: 0,oid,candid,fid,chipsf,fwhm,classtar,nbad,magzpsci,magzpsciunc,exptime


query 83

In [None]:
# query gold
query = '''
SELECT
    *
FROM
    allwise
WHERE
    oid_catalog in
(
SELECT
    oid_catalog
FROM
    xmatch
WHERE
    oid = 'ZTF21aazqwxv'
)
'''
n = 3
tables_gold, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_gold.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_gold.columns.values.tolist(), "shape": tables_gold.shape})
tables_gold

Query executed successfully, shape: (1, 17)


Unnamed: 0,oid_catalog,ra,dec,w1mpro,w2mpro,w3mpro,w4mpro,w1sigmpro,w2sigmpro,w3sigmpro,w4sigmpro,j_m_2mass,h_m_2mass,k_m_2mass,j_msig_2mass,h_msig_2mass,k_msig_2mass
0,J131116.64+045854.7,197.819368,4.981863,13.319,13.129,9.839,7.761,0.033,0.037,0.056,0.247,15.331,14.491,14.027,0.109,0.1,0.1


In [None]:
# query 
query = '''
-- Get all columns in the 'allwise' table for the ZTF object 'ZTF21aazqwxv'
WITH oid_subquery AS
  (SELECT oid
   FROM OBJECT
   WHERE oid = 'ZTF21aazqwxv')
SELECT a.*
FROM allwise a
JOIN xmatch x ON a.oid_catalog = x.oid_catalog
JOIN oid_subquery o ON x.oid = o.oid;
'''
n = 3
tables_i, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_i.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_i.columns.values.tolist(), "shape": tables_i.shape})
tables_i

Query executed successfully, shape: (1, 17)


Unnamed: 0,oid_catalog,ra,dec,w1mpro,w2mpro,w3mpro,w4mpro,w1sigmpro,w2sigmpro,w3sigmpro,w4sigmpro,j_m_2mass,h_m_2mass,k_m_2mass,j_msig_2mass,h_msig_2mass,k_msig_2mass
0,J131116.64+045854.7,197.819368,4.981863,13.319,13.129,9.839,7.761,0.033,0.037,0.056,0.247,15.331,14.491,14.027,0.109,0.1,0.1


In [None]:
print(set(sorted(tables_gold.loc[:, ~tables_gold.columns.duplicated()]['oid_catalog'])) == set(sorted(tables_i['oid_catalog'])))

True


In [None]:
print(set(sorted(tables_gold.loc[:, ~tables_gold.columns.duplicated()]['oid_catalog'])))
print(set(sorted(tables_i['oid_catalog'])))

{'J131116.64+045854.7'}
{'J131116.64+045854.7'}


In [None]:
# check duplicated rows
tables_gold[tables_gold.duplicated(keep=False)]

Unnamed: 0,oid_catalog,ra,dec,w1mpro,w2mpro,w3mpro,w4mpro,w1sigmpro,w2sigmpro,w3sigmpro,w4sigmpro,j_m_2mass,h_m_2mass,k_m_2mass,j_msig_2mass,h_msig_2mass,k_msig_2mass


In [None]:
tables_i[tables_i.duplicated(keep=False)]


Unnamed: 0,oid_catalog,ra,dec,w1mpro,w2mpro,w3mpro,w4mpro,w1sigmpro,w2sigmpro,w3sigmpro,w4sigmpro,j_m_2mass,h_m_2mass,k_m_2mass,j_msig_2mass,h_msig_2mass,k_msig_2mass


query 84

In [None]:
# query gold
query = '''
SELECT
    oid, xmatch.oid_catalog, dist,
    w1mpro, w2mpro, w3mpro, w4mpro
FROM
    xmatch INNER JOIN
    allwise
    ON xmatch.oid_catalog = allwise.oid_catalog
WHERE
    xmatch.oid = 'ZTF19aascdol'
'''
n = 3
tables_gold, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_gold.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_gold.columns.values.tolist(), "shape": tables_gold.shape})
tables_gold

Query executed successfully, shape: (1, 7)


Unnamed: 0,oid,oid_catalog,dist,w1mpro,w2mpro,w3mpro,w4mpro
0,ZTF19aascdol,J154334.93+152539.5,4.3e-05,15.286,14.354,11.747,8.611


In [None]:
# query 
query = '''
-- Query to retrieve ALLWISE match information for ZTF object 'ZTF19aascdol'

SELECT x.oid AS ztf_identifier,
       x.oid_catalog AS allwise_identifier,
       x.dist AS distance_arcsec,
       a.w1mpro AS w1_magnitude,
       a.w2mpro AS w2_magnitude,
       a.w3mpro AS w3_magnitude,
       a.w4mpro AS w4_magnitude
FROM xmatch x
JOIN allwise a ON x.oid_catalog = a.oid_catalog
WHERE x.oid = 'ZTF19aascdol'
  AND x.catid = 'ALLWISE';
'''
n = 3
tables_i, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_i.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_i.columns.values.tolist(), "shape": tables_i.shape})
tables_i

Query executed successfully, shape: (0, 7)


Unnamed: 0,ztf_identifier,allwise_identifier,distance_arcsec,w1_magnitude,w2_magnitude,w3_magnitude,w4_magnitude


In [None]:
print(set(sorted(tables_gold.loc[:, ~tables_gold.columns.duplicated()]['oid'])) == set(sorted(tables_i['ztf_identifier'])))

False


In [None]:
print(set(sorted(tables_gold.loc[:, ~tables_gold.columns.duplicated()]['oid'])))
print(set(sorted(tables_i['ztf_identifier'])))

{'ZTF19aascdol'}
set()


In [None]:
# check duplicated rows
tables_gold[tables_gold.duplicated(keep=False)]

Unnamed: 0,oid,oid_catalog,dist,w1mpro,w2mpro,w3mpro,w4mpro


In [None]:
tables_i[tables_i.duplicated(keep=False)]


Unnamed: 0,ztf_identifier,allwise_identifier,distance_arcsec,w1_magnitude,w2_magnitude,w3_magnitude,w4_magnitude


query 96

For Solar System identifiers '2003FP134' and '2009UK56', get all detections for all ZTF objects that lie within 2 arcsec from any of them. Return the following columns, sort by MPC name and detection date: all columns from the 'ss_ztf' table; and detection date, filter identifier, isdiffpos flag, RA dec coordinates, and difference magnitude (and its uncertainty)

In [None]:
print(db_test[db_test['req_id']==96].request.values[0])

For Solar System identifiers '2003FP134' and '2009UK56', get all detections for all ZTF objects that lie within 2 arcsec from any of them. Return the following columns, sort by MPC name and detection date: all columns from the 'ss_ztf' table; and detection date, filter identifier, isdiffpos flag, RA dec coordinates, and difference magnitude (and its uncertainty)


In [None]:
# query gold
query = '''
SELECT
    ss_ztf.*, detection.mjd, detection.fid,
    detection.isdiffpos, detection.ra, detection.dec,
    detection.magpsf, detection.sigmapsf
FROM
    ss_ztf INNER JOIN
    detection
    ON ss_ztf.oid = detection.oid
    AND ss_ztf.candid = detection.candid
WHERE
    ssnamenr IN ('2003FP134','2009UK56')
    AND ssdistnr < 2
ORDER BY ssnamenr, mjd
'''
n = 3
tables_gold, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_gold.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_gold.columns.values.tolist(), "shape": tables_gold.shape})
tables_gold

Query executed successfully, shape: (27, 12)


Unnamed: 0,oid,candid,ssdistnr,ssmagnr,ssnamenr,mjd,fid,isdiffpos,ra,dec,magpsf,sigmapsf
0,ZTF21abbsmyp,1595376262715015044,0.0,19.5,2003FP134,59349.376262,1,1,256.824875,-25.974222,20.192154,0.17828
1,ZTF21abcqekm,1608313551615015025,0.0,19.0,2003FP134,59362.313553,2,1,254.248201,-24.971674,18.976067,0.17055
2,ZTF21abculin,1609376562915015021,0.0,19.0,2003FP134,59363.376562,2,1,254.000931,-24.876152,19.098774,0.178708
3,ZTF21abdihfx,1612381041715015014,0.0,18.9,2003FP134,59366.381042,2,1,253.289819,-24.597101,19.05293,0.09188
4,ZTF21abdulcm,1614399211715015002,0.0,18.8,2003FP134,59368.399213,2,1,252.804054,-24.40301,18.866552,0.111349
5,ZTF21abeywpa,1619365033915015076,0.0,19.1,2003FP134,59373.365035,2,1,251.609834,-23.908624,19.000805,0.124175
6,ZTF21abfkygj,1621376883815015021,0.0,19.2,2003FP134,59375.376887,2,1,251.135458,-23.70378,18.983727,0.160271
7,ZTF21abgavxk,1624308764315015004,0.0,19.3,2003FP134,59378.308762,2,1,250.466414,-23.403652,19.122366,0.093317
8,ZTF21abgxhjg,1628315454015015046,0.0,19.5,2003FP134,59382.315451,2,1,249.608787,-22.995193,19.243404,0.105018
9,ZTF21abhfyvv,1630314794115015013,0.0,19.5,2003FP134,59384.314792,2,1,249.212822,-22.794441,19.301159,0.215448


In [None]:
# query 
query = '''
WITH ss_objects AS
  (SELECT *
   FROM ss_ztf
   WHERE ssnamenr IN ('2003FP134',
                      '2009UK56'))
SELECT ss.oid,
       ss.candid,
       ss.ssdistnr,
       ss.ssmagnr,
       ss.ssnamenr,
       d.mjd AS detection_date,
       d.fid AS filter_id,
       d.isdiffpos,
       d.ra,
       d.dec,
       d.magpsf AS diff_magnitude,
       d.sigmapsf AS diff_magnitude_uncertainty
FROM ss_objects ss
JOIN detection d ON ss.oid = d.oid
ORDER BY ss.ssnamenr,
         d.mjd;
'''
n = 3
tables_i, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_i.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_i.columns.values.tolist(), "shape": tables_i.shape})
tables_i

Query executed successfully, shape: (40, 12)


Unnamed: 0,oid,candid,ssdistnr,ssmagnr,ssnamenr,detection_date,filter_id,isdiffpos,ra,dec,diff_magnitude,diff_magnitude_uncertainty
0,ZTF18abngpmh,1554480621215015055,25.0,20.6,2003FP134,59308.480625,2,1,255.638166,-27.069111,16.471214,0.108457
1,ZTF18abngpmh,1554480621215015055,25.0,20.6,2003FP134,59317.438785,2,1,255.638119,-27.069181,16.409678,0.12985
2,ZTF21abbsmyp,1595376262715015044,0.0,19.5,2003FP134,59349.376262,1,1,256.824875,-25.974222,20.192154,0.17828
3,ZTF18abngpmh,1554480621215015055,25.0,20.6,2003FP134,59353.418819,2,1,255.638077,-27.069166,16.358088,0.071829
4,ZTF21abcqekm,1608313551615015025,0.0,19.0,2003FP134,59362.313553,2,1,254.248201,-24.971674,18.976067,0.17055
5,ZTF21abculin,1609376562915015021,0.0,19.0,2003FP134,59363.376562,2,1,254.000931,-24.876152,19.098774,0.178708
6,ZTF21abdihfx,1612381041715015014,0.0,18.9,2003FP134,59366.381042,2,1,253.289819,-24.597101,19.05293,0.09188
7,ZTF21abdulcm,1614399211715015002,0.0,18.8,2003FP134,59368.399213,2,1,252.804054,-24.40301,18.866552,0.111349
8,ZTF21abeywpa,1619365033915015076,0.0,19.1,2003FP134,59373.365035,2,1,251.609834,-23.908624,19.000805,0.124175
9,ZTF21abfkygj,1621376883815015021,0.0,19.2,2003FP134,59375.376887,2,1,251.135458,-23.70378,18.983727,0.160271


In [None]:
print(set(sorted(tables_gold.loc[:, ~tables_gold.columns.duplicated()]['oid'])) == set(sorted(tables_i['oid'])))
print(set(sorted(tables_gold.loc[:, ~tables_gold.columns.duplicated()]['candid'])) == set(sorted(tables_i['candid'])))

False
False


In [None]:
print(set(sorted(tables_gold.loc[:, ~tables_gold.columns.duplicated()]['oid_catalog'])))
print(set(sorted(tables_i['oid_catalog'])))

{'J131116.64+045854.7'}
{'J131116.64+045854.7'}


In [None]:
# check duplicated rows
tables_gold[tables_gold.duplicated(keep=False)]

Unnamed: 0,oid_catalog,ra,dec,w1mpro,w2mpro,w3mpro,w4mpro,w1sigmpro,w2sigmpro,w3sigmpro,w4sigmpro,j_m_2mass,h_m_2mass,k_m_2mass,j_msig_2mass,h_msig_2mass,k_msig_2mass


In [None]:
tables_i[tables_i.duplicated(keep=False)]


Unnamed: 0,oid_catalog,ra,dec,w1mpro,w2mpro,w3mpro,w4mpro,w1sigmpro,w2sigmpro,w3sigmpro,w4sigmpro,j_m_2mass,h_m_2mass,k_m_2mass,j_msig_2mass,h_msig_2mass,k_msig_2mass


query 67

In [None]:
# query gold
query = '''
SELECT
    *
FROM
    gaia_ztf
WHERE
    oid IN ('ZTF18acxlskz','ZTF22aanppbi','ZTF22abunrft')
    AND neargaia < 1.5
'''
n = 3
tables_gold, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_gold.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_gold.columns.values.tolist(), "shape": tables_gold.shape})
tables_gold

Query executed successfully, shape: (1, 7)


Unnamed: 0,oid,candid,neargaia,neargaiabright,maggaia,maggaiabright,unique1
0,ZTF18acxlskz,1917201552615010028,0.74125,-999.0,15.321053,-999.0,False


In [None]:
# query 
query = '''
WITH target_objects AS (
                        VALUES ('ZTF18acxlskz'), ('ZTF22aanppbi'), ('ZTF22abunrft'))
SELECT g.*
FROM gaia_ztf g
JOIN target_objects t ON g.oid = t.column1
WHERE g.neargaia <= 1.5;
'''
n = 3
tables_i, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_i.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_i.columns.values.tolist(), "shape": tables_i.shape})
tables_i

Query executed successfully, shape: (1, 7)


Unnamed: 0,oid,candid,neargaia,neargaiabright,maggaia,maggaiabright,unique1
0,ZTF18acxlskz,1917201552615010028,0.74125,-999.0,15.321053,-999.0,False


In [None]:
print(set(sorted(tables_gold.loc[:, ~tables_gold.columns.duplicated()]['oid'])) == set(sorted(tables_i['oid'])))
print(set(sorted(tables_gold.loc[:, ~tables_gold.columns.duplicated()]['candid'])) == set(sorted(tables_i['candid'])))

True
True


In [None]:
print(set(sorted(tables_gold.loc[:, ~tables_gold.columns.duplicated()]['oid'])))
print(set(sorted(tables_i['oid'])))

{'ZTF18acxlskz'}
{'ZTF18acxlskz'}


In [None]:
# check duplicated rows
tables_gold[tables_gold.duplicated(keep=False)]

Unnamed: 0,oid_catalog,ra,dec,w1mpro,w2mpro,w3mpro,w4mpro,w1sigmpro,w2sigmpro,w3sigmpro,w4sigmpro,j_m_2mass,h_m_2mass,k_m_2mass,j_msig_2mass,h_msig_2mass,k_msig_2mass


In [None]:
tables_i[tables_i.duplicated(keep=False)]


Unnamed: 0,oid_catalog,ra,dec,w1mpro,w2mpro,w3mpro,w4mpro,w1sigmpro,w2sigmpro,w3sigmpro,w4sigmpro,j_m_2mass,h_m_2mass,k_m_2mass,j_msig_2mass,h_msig_2mass,k_msig_2mass


query 55

In [None]:
# query gold
query = '''
SELECT
    *
FROM
    object
WHERE
    oid = 'ZTF20aatxryt'
'''
n = 3
tables_gold, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_gold.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_gold.columns.values.tolist(), "shape": tables_gold.shape})
tables_gold

Query executed successfully, shape: (1, 22)


Unnamed: 0,oid,ndethist,ncovhist,mjdstarthist,mjdendhist,corrected,stellar,ndet,g_r_max,g_r_max_corr,...,meanra,meandec,sigmara,sigmadec,deltajd,firstmjd,lastmjd,step_id_corr,diffpos,reference_change
0,ZTF20aatxryt,177,738,58915.212188,59291.286308,True,False,92,0.633835,0.611086,...,167.946571,29.385115,5e-05,4.8e-05,376.07412,58915.212188,59291.286308,correction_0.0.1,True,False


In [None]:
# query 
query = '''
SELECT *
FROM OBJECT
WHERE oid = 'ZTF20aatxryt';
'''
n = 3
tables_i, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_i.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_i.columns.values.tolist(), "shape": tables_i.shape})
tables_i

Query executed successfully, shape: (1, 22)


Unnamed: 0,oid,ndethist,ncovhist,mjdstarthist,mjdendhist,corrected,stellar,ndet,g_r_max,g_r_max_corr,...,meanra,meandec,sigmara,sigmadec,deltajd,firstmjd,lastmjd,step_id_corr,diffpos,reference_change
0,ZTF20aatxryt,177,738,58915.212188,59291.286308,True,False,92,0.633835,0.611086,...,167.946571,29.385115,5e-05,4.8e-05,376.07412,58915.212188,59291.286308,correction_0.0.1,True,False


In [None]:
print(set(sorted(tables_gold.loc[:, ~tables_gold.columns.duplicated()]['oid'])) == set(sorted(tables_i['oid'])))

True


In [None]:
print(set(sorted(tables_gold.loc[:, ~tables_gold.columns.duplicated()]['oid'])))
print(set(sorted(tables_i['oid'])))

{'ZTF20aatxryt'}
{'ZTF20aatxryt'}


In [None]:
# check duplicated rows
tables_gold[tables_gold.duplicated(keep=False)]

Unnamed: 0,oid,ndethist,ncovhist,mjdstarthist,mjdendhist,corrected,stellar,ndet,g_r_max,g_r_max_corr,...,meanra,meandec,sigmara,sigmadec,deltajd,firstmjd,lastmjd,step_id_corr,diffpos,reference_change


In [None]:
tables_i[tables_i.duplicated(keep=False)]


Unnamed: 0,oid,ndethist,ncovhist,mjdstarthist,mjdendhist,corrected,stellar,ndet,g_r_max,g_r_max_corr,...,meanra,meandec,sigmara,sigmadec,deltajd,firstmjd,lastmjd,step_id_corr,diffpos,reference_change


query 47

In [61]:
# query gold
query = '''
SELECT
    oid, candid, fid, mjd, isdiffpos, ra, dec, magpsf, sigmapsf, rb
FROM
    detection
WHERE
    oid IN ('ZTF23aaawbsc','ZTF21abxlmuw')
    AND rb > 0.7
ORDER BY oid, mjd
'''
n = 3
tables_gold, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_gold.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_gold.columns.values.tolist(), "shape": tables_gold.shape})
tables_gold

Query executed successfully, shape: (162, 10)


Unnamed: 0,oid,candid,fid,mjd,isdiffpos,ra,dec,magpsf,sigmapsf,rb
0,ZTF21abxlmuw,1705387805715015007,2,59459.387801,1,21.323490,22.390380,20.659400,0.232067,0.712857
1,ZTF21abxlmuw,1709365085715015008,1,59463.365081,1,21.323480,22.390360,20.626290,0.211267,0.822857
2,ZTF21abxlmuw,1711400435715015009,1,59465.400440,1,21.323425,22.390396,20.667300,0.234949,0.785714
3,ZTF21abxlmuw,1734359965715015007,1,59488.359965,1,21.323386,22.390342,20.724100,0.274779,0.797143
4,ZTF21abxlmuw,2055474085715015001,2,59809.474086,1,21.323460,22.390334,19.865335,0.192379,0.715714
...,...,...,...,...,...,...,...,...,...,...
157,ZTF23aaawbsc,2411222961115015002,2,60165.222963,1,265.214006,66.206344,19.306845,0.090955,0.787143
158,ZTF23aaawbsc,2415300841115015002,2,60169.300845,1,265.213977,66.206306,19.475810,0.106966,0.860000
159,ZTF23aaawbsc,2417329831115015004,1,60171.329838,1,265.213962,66.206365,20.027710,0.166629,0.808571
160,ZTF23aaawbsc,2419217801115015003,1,60173.217801,1,265.214055,66.206338,20.245500,0.207126,0.714286


In [62]:
# query 
query = '''
-- Query to find detections with rb > 0.7 for specified objects

SELECT d.oid AS ztf_id,
       d.candid,
       d.fid,
       d.mjd AS detection_date,
       d.isdiffpos,
       d.ra,
       d.dec,
       d.magpsf AS diff_magnitude,
       d.sigmapsf AS diff_magnitude_uncertainty,
       d.rb AS real_bogus_score
FROM detection d
WHERE d.oid IN ('ZTF23aaawbsc',
                'ZTF21abxlmuw')
  AND d.rb > 0.7
ORDER BY d.oid,
         d.mjd;
'''
n = 3
tables_i, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_i.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_i.columns.values.tolist(), "shape": tables_i.shape})
tables_i

Query executed successfully, shape: (162, 10)


Unnamed: 0,ztf_id,candid,fid,detection_date,isdiffpos,ra,dec,diff_magnitude,diff_magnitude_uncertainty,real_bogus_score
0,ZTF21abxlmuw,1705387805715015007,2,59459.387801,1,21.323490,22.390380,20.659400,0.232067,0.712857
1,ZTF21abxlmuw,1709365085715015008,1,59463.365081,1,21.323480,22.390360,20.626290,0.211267,0.822857
2,ZTF21abxlmuw,1711400435715015009,1,59465.400440,1,21.323425,22.390396,20.667300,0.234949,0.785714
3,ZTF21abxlmuw,1734359965715015007,1,59488.359965,1,21.323386,22.390342,20.724100,0.274779,0.797143
4,ZTF21abxlmuw,2055474085715015001,2,59809.474086,1,21.323460,22.390334,19.865335,0.192379,0.715714
...,...,...,...,...,...,...,...,...,...,...
157,ZTF23aaawbsc,2411222961115015002,2,60165.222963,1,265.214006,66.206344,19.306845,0.090955,0.787143
158,ZTF23aaawbsc,2415300841115015002,2,60169.300845,1,265.213977,66.206306,19.475810,0.106966,0.860000
159,ZTF23aaawbsc,2417329831115015004,1,60171.329838,1,265.213962,66.206365,20.027710,0.166629,0.808571
160,ZTF23aaawbsc,2419217801115015003,1,60173.217801,1,265.214055,66.206338,20.245500,0.207126,0.714286


In [63]:
print(set(sorted(tables_gold.loc[:, ~tables_gold.columns.duplicated()]['oid'])) == set(sorted(tables_i['ztf_id'])))
print(set(sorted(tables_gold.loc[:, ~tables_gold.columns.duplicated()]['candid'])) == set(sorted(tables_i['candid'])))

True
True


In [64]:
print(set(sorted(tables_gold.loc[:, ~tables_gold.columns.duplicated()]['oid'])))
print(set(sorted(tables_i['ztf_id'])))

{'ZTF21abxlmuw', 'ZTF23aaawbsc'}
{'ZTF21abxlmuw', 'ZTF23aaawbsc'}


In [65]:
# check duplicated rows
tables_gold[tables_gold.duplicated(keep=False)]

Unnamed: 0,oid,candid,fid,mjd,isdiffpos,ra,dec,magpsf,sigmapsf,rb


In [66]:
tables_i[tables_i.duplicated(keep=False)]


Unnamed: 0,ztf_id,candid,fid,detection_date,isdiffpos,ra,dec,diff_magnitude,diff_magnitude_uncertainty,real_bogus_score


query 79

In [68]:
# query gold
query = '''
SELECT
    catid, oid_catalog, dist
FROM
    xmatch
WHERE
    oid = 'ZTF20achscch'
'''
n = 3
tables_gold, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_gold.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_gold.columns.values.tolist(), "shape": tables_gold.shape})
tables_gold

Query executed successfully, shape: (1, 3)


Unnamed: 0,catid,oid_catalog,dist
0,allwise,3413m137_ac51-045753,0.290982


In [69]:
# query 
query = '''
-- Query to retrieve catalog information for ZTF object 'ZTF20achscch'

SELECT catid AS CATALOG_NAME,
       oid_catalog AS object_id_in_catalog,
       dist AS distance
FROM xmatch
WHERE oid = 'ZTF20achscch';
'''
n = 3
tables_i, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_i.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_i.columns.values.tolist(), "shape": tables_i.shape})
tables_i

Query executed successfully, shape: (1, 3)


Unnamed: 0,catalog_name,object_id_in_catalog,distance
0,allwise,3413m137_ac51-045753,0.290982


In [70]:
print(set(sorted(tables_gold.loc[:, ~tables_gold.columns.duplicated()]['catid'])) == set(sorted(tables_i['catalog_name'])))
print(set(sorted(tables_gold.loc[:, ~tables_gold.columns.duplicated()]['oid_catalog'])) == set(sorted(tables_i['object_id_in_catalog'])))

True
True


In [72]:
print(set(sorted(tables_gold.loc[:, ~tables_gold.columns.duplicated()]['oid_catalog'])))
print(set(sorted(tables_i['object_id_in_catalog'])))

{'3413m137_ac51-045753'}
{'3413m137_ac51-045753'}


In [73]:
# check duplicated rows
tables_gold[tables_gold.duplicated(keep=False)]

Unnamed: 0,catid,oid_catalog,dist


In [74]:
tables_i[tables_i.duplicated(keep=False)]


Unnamed: 0,catalog_name,object_id_in_catalog,distance


query 94

In [85]:
# query gold
query = '''
SELECT
    *
FROM
    object INNER JOIN
    reference
    ON object.oid = reference.oid
WHERE
    reference_change
LIMIT 50
'''
n = 3
tables_gold, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_gold.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_gold.columns.values.tolist(), "shape": tables_gold.shape})
tables_gold

Query executed successfully, shape: (50, 37)


Unnamed: 0,oid,ndethist,ncovhist,mjdstarthist,mjdendhist,corrected,stellar,ndet,g_r_max,g_r_max_corr,...,field,magnr,sigmagnr,chinr,sharpnr,ranr,decnr,mjdstartref,mjdendref,nframesref
0,ZTF18acqeyez,7,1538,58442.381134,60725.385289,True,True,2,,,...,413,13.504,0.013,0.576,-0.02,122.88081,-3.826346,58160.266065,58430.461609,15
1,ZTF18acqeyez,7,1538,58442.381134,60725.385289,True,True,2,,,...,413,13.894,0.011,0.39,-0.012,122.880806,-3.82635,58227.25,58443.5,24
2,ZTF18acqeyfy,6,1530,58442.381134,60725.385289,True,True,3,,,...,413,13.966,0.015,0.288,-0.03,122.937604,-3.645769,58160.266065,58430.461609,15
3,ZTF18acqeyfy,6,1530,58442.381134,60725.385289,True,True,3,,,...,413,13.895,0.013,0.671,-0.017,122.937599,-3.645771,58227.171227,58443.48522,24
4,ZTF18acqeydf,10,1538,58442.381134,60725.385289,True,True,4,,,...,413,12.981,0.011,0.29,-0.007,122.972595,-3.74427,58160.266065,58430.461609,15
5,ZTF18acqeydf,10,1538,58442.381134,60725.385289,True,True,4,,,...,413,13.312,0.012,0.458,-0.014,122.972588,-3.744273,58227.25,58443.5,24
6,ZTF18acqeyfj,24,1532,58442.381134,60725.385289,True,False,10,,,...,413,13.662,0.013,0.418,-0.008,122.987745,-3.657607,58160.266065,58430.461609,15
7,ZTF18acqeyfj,24,1532,58442.381134,60725.385289,True,False,10,,,...,413,14.292,0.015,0.983,-0.034,122.987738,-3.657618,58227.171227,58443.48522,24
8,ZTF18acqeyga,5,1354,58442.381134,60725.385289,True,False,3,,,...,413,14.117,0.012,0.352,-0.018,122.843348,-3.621933,58160.266065,58430.461609,15
9,ZTF18acqeyga,5,1354,58442.381134,60725.385289,True,False,3,,,...,413,14.853,0.015,0.654,-0.021,122.843339,-3.621937,58227.171227,58443.48522,24


In [86]:
# query 
query = '''
-- Get all columns from the 'object' and 'reference' tables for ZTF objects that had a change in the reference
-- Limited to 50 rows
WITH objects_with_ref_change AS
  (SELECT oid
   FROM OBJECT
   WHERE reference_change = TRUE)
SELECT o.*,
       r.*
FROM OBJECT o
JOIN reference r ON o.oid = r.oid
WHERE o.oid IN
    (SELECT oid
     FROM objects_with_ref_change)
LIMIT 50;
'''
n = 3
tables_i, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_i.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_i.columns.values.tolist(), "shape": tables_i.shape})
tables_i

Query executed successfully, shape: (50, 37)


Unnamed: 0,oid,ndethist,ncovhist,mjdstarthist,mjdendhist,corrected,stellar,ndet,g_r_max,g_r_max_corr,...,field,magnr,sigmagnr,chinr,sharpnr,ranr,decnr,mjdstartref,mjdendref,nframesref
0,ZTF18abxdpce,680,2544,58263.467824,60951.343785,True,False,436,,,...,868,14.733,0.016,0.382,-0.028,343.297024,72.929842,58216.514213,58352.323854,15
1,ZTF18abxdpce,680,2544,58263.467824,60951.343785,True,False,436,,,...,868,15.249,0.022,0.627,-0.036,343.297043,72.929831,58229.507245,58441.262419,40
2,ZTF18aayunyn,13,1573,58277.370729,60952.183194,True,True,14,,,...,686,15.261,0.011,0.276,-0.004,300.560691,31.872731,58205.514028,58272.400069,15
3,ZTF18aayunyn,13,1573,58277.370729,60952.183194,True,True,14,,,...,686,14.745,0.016,0.446,0.02,300.56069,31.87273,58198.529977,58287.458599,15
4,ZTF18abaeads,1403,5015,58224.441493,60978.159838,True,False,935,,,...,849,18.756,0.012,0.178,0.003,288.775499,72.226734,58203.402199,58278.383021,15
5,ZTF18abaeads,1403,5015,58224.441493,60978.159838,True,False,935,,,...,850,18.69,0.016,0.62,-0.024,288.775538,72.22674,58230.488206,58334.307141,15
6,ZTF18abaeads,1403,5015,58224.441493,60978.159838,True,False,935,,,...,1877,17.795,0.02,0.901,-0.015,288.775513,72.226723,58356.25,59150.25,16
7,ZTF18abaeads,1403,5015,58224.441493,60978.159838,True,False,935,,,...,850,17.728,0.019,0.725,-0.041,288.775512,72.226727,58216.499815,58310.408715,15
8,ZTF18abaeads,1403,5015,58224.441493,60978.159838,True,False,935,,,...,849,17.808,0.019,1.179,-0.032,288.77551,72.226741,58186.519086,58278.302095,15
9,ZTF18ablesbq,395,3884,58218.508518,60960.150556,True,False,149,,,...,542,17.109001,0.02,0.759,-0.015,302.309248,13.159155,58230.497407,58341.312269,15


In [87]:
print(set(sorted(tables_gold.loc[:, ~tables_gold.columns.duplicated()]['oid'])) == set(sorted(tables_i.loc[:, ~tables_i.columns.duplicated()]['oid'])))
print(set(sorted(tables_gold.loc[:, ~tables_gold.columns.duplicated()]['candid'])) == set(sorted(tables_i.loc[:, ~tables_i.columns.duplicated()]['candid'])))

False
False


In [88]:
print(set(sorted(tables_gold.loc[:, ~tables_gold.columns.duplicated()]['oid'])))
print(set(sorted(tables_i.loc[:, ~tables_i.columns.duplicated()]['oid'])))

{'ZTF18abccqua', 'ZTF18abcphfj', 'ZTF18aayunyn', 'ZTF18abxdpce', 'ZTF18abmodvy', 'ZTF18acqeyky', 'ZTF18aaxcqsf', 'ZTF18acqeymz', 'ZTF18acqeyfj', 'ZTF18acqeylj', 'ZTF18aazmhpg', 'ZTF18acqeyka', 'ZTF18abmjhoc', 'ZTF18acqeyez', 'ZTF18acqeyio', 'ZTF22aaredsy', 'ZTF18abmqtxx', 'ZTF18acqeyfy', 'ZTF18acqeymv', 'ZTF18acqeyga', 'ZTF18acqeylh', 'ZTF18abhqdcz', 'ZTF18acqeydf', 'ZTF18abaeads'}
{'ZTF18aazuvty', 'ZTF18acqfrhy', 'ZTF18abcxamk', 'ZTF18aayunyn', 'ZTF18abxdpce', 'ZTF18aciabns', 'ZTF18acpbgzw', 'ZTF18acqeyij', 'ZTF18abmapgd', 'ZTF18acqfric', 'ZTF18acwqowd', 'ZTF18abjhzza', 'ZTF18abfwnbf', 'ZTF18ablesbq', 'ZTF18absgfkj', 'ZTF18abaeads', 'ZTF19acryvzx'}


In [89]:
# check duplicated rows
tables_gold[tables_gold.duplicated(keep=False)]

Unnamed: 0,oid,ndethist,ncovhist,mjdstarthist,mjdendhist,corrected,stellar,ndet,g_r_max,g_r_max_corr,...,field,magnr,sigmagnr,chinr,sharpnr,ranr,decnr,mjdstartref,mjdendref,nframesref


In [90]:
tables_i[tables_i.duplicated(keep=False)]


Unnamed: 0,oid,ndethist,ncovhist,mjdstarthist,mjdendhist,corrected,stellar,ndet,g_r_max,g_r_max_corr,...,field,magnr,sigmagnr,chinr,sharpnr,ranr,decnr,mjdstartref,mjdendref,nframesref


query 90

"Find at most 100 ZTF objects that have a multiband period lower than 5 days in the 'lc_classifier_1.2.1-P' feature version. Return all columns from the 'probability' table for such objects, including only data for the light curve classifier, with rankings either 1 or 2"

In [112]:
db_test[db_test['req_id']==90].request.values[0]

"Find at most 100 ZTF objects that have a multiband period lower than 5 days in the 'lc_classifier_1.2.1-P' feature version. Return all columns from the 'probability' table for such objects, including only data for the light curve classifier, with rankings either 1 or 2"

In [134]:
# query gold
query = '''
SELECT
    *
FROM
    probability
WHERE
    classifier_name = 'lc_classifier'
    AND ranking <= 2
    AND oid in
(
SELECT
    oid
FROM
    feature
WHERE
    name = 'Multiband_period'
    AND version = 'lc_classifier_1.2.1-P'
    AND value < 5
LIMIT 100
)
ORDER BY probability.oid,
         probability.ranking;
'''
n = 3
tables_gold, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_gold.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_gold.columns.values.tolist(), "shape": tables_gold.shape})
tables_gold

Query executed successfully, shape: (358, 6)


Unnamed: 0,oid,classifier_name,classifier_version,class_name,probability,ranking
0,ZTF17aaaessu,lc_classifier,hierarchical_rf_1.1.0,E,0.502964,1
1,ZTF17aaaessu,lc_classifier,lc_classifier_1.1.13,E,0.285520,1
2,ZTF17aaaessu,lc_classifier,lc_classifier_1.1.13,CEP,0.209840,2
3,ZTF17aaaessu,lc_classifier,hierarchical_rf_1.1.0,DSCT,0.244524,2
4,ZTF17aaagqxl,lc_classifier,hierarchical_rf_1.1.0,E,0.317112,1
...,...,...,...,...,...,...
353,ZTF22aabwnvh,lc_classifier,hierarchical_rf_1.1.0,Blazar,0.205800,2
354,ZTF22aacjcsw,lc_classifier,lc_classifier_1.1.13,Periodic-Other,0.403144,1
355,ZTF22aacjcsw,lc_classifier,hierarchical_rf_1.1.0,Periodic-Other,0.639768,1
356,ZTF22aacjcsw,lc_classifier,hierarchical_rf_1.1.0,YSO,0.144624,2


In [135]:
# query 
query = '''
-- Find objects with multiband period < 5 days and return their top 2 probabilities
WITH period_objects AS
  (SELECT DISTINCT oid
   FROM feature
   WHERE name = 'Multiband_period'
     AND value < 5
     AND VERSION = 'lc_classifier_1.2.1-P'
   LIMIT 100)
SELECT p.*
FROM probability p
JOIN period_objects po ON p.oid = po.oid
WHERE p.classifier_name = 'lc_classifier'
  AND p.ranking IN (1,
                    2)
ORDER BY p.oid,
         p.ranking;
'''
n = 3
tables_i, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_i.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_i.columns.values.tolist(), "shape": tables_i.shape})
tables_i

Query executed successfully, shape: (388, 6)


Unnamed: 0,oid,classifier_name,classifier_version,class_name,probability,ranking
0,ZTF17aaaaaal,lc_classifier,lc_classifier_1.1.13,Periodic-Other,0.522712,1
1,ZTF17aaaaaal,lc_classifier,hierarchical_rf_1.1.0,Periodic-Other,0.630720,1
2,ZTF17aaaaaal,lc_classifier,hierarchical_rf_1.1.0,E,0.154176,2
3,ZTF17aaaaaal,lc_classifier,lc_classifier_1.1.13,E,0.198024,2
4,ZTF17aaaaaan,lc_classifier,lc_classifier_1.1.13,E,0.252448,1
...,...,...,...,...,...,...
383,ZTF17aaaaanb,lc_classifier,hierarchical_rf_1.1.0,E,0.216544,2
384,ZTF17aaaaanc,lc_classifier,hierarchical_rf_1.1.0,E,0.702000,1
385,ZTF17aaaaanc,lc_classifier,lc_classifier_1.1.13,Periodic-Other,0.394980,1
386,ZTF17aaaaanc,lc_classifier,lc_classifier_1.1.13,E,0.264480,2


In [136]:
print(tables_gold.loc[:, ~tables_gold.columns.duplicated()].reset_index(drop=True).equals(tables_i.loc[:, ~tables_i.columns.duplicated()].reset_index(drop=True)))


False


In [137]:
print(set(sorted(tables_gold.loc[:, ~tables_gold.columns.duplicated()]['oid'])) == set(sorted(tables_i.loc[:, ~tables_i.columns.duplicated()]['oid'])))
print(set(sorted(tables_gold.loc[:, ~tables_gold.columns.duplicated()]['classifier_name'])) == set(sorted(tables_i.loc[:, ~tables_i.columns.duplicated()]['classifier_name'])))

False
True


In [138]:
print(set(sorted(tables_gold.loc[:, ~tables_gold.columns.duplicated()]['oid'])))
print(set(sorted(tables_i.loc[:, ~tables_i.columns.duplicated()]['oid'])))

{'ZTF17aaaessu', 'ZTF18abommtg', 'ZTF19abdustn', 'ZTF18aciepmk', 'ZTF18acfrjme', 'ZTF19aaonuqf', 'ZTF19abdydbj', 'ZTF19abdfahf', 'ZTF18aayujpj', 'ZTF21aaguuxw', 'ZTF18abvztqd', 'ZTF18aaynqox', 'ZTF19abbxfxd', 'ZTF18absitkp', 'ZTF19aaonuql', 'ZTF18acsbrqc', 'ZTF21abcdsuv', 'ZTF20abzcyob', 'ZTF21aaaqswa', 'ZTF18abasovn', 'ZTF18abkmmts', 'ZTF18achfgss', 'ZTF19aaxoztn', 'ZTF18acavigy', 'ZTF19aablcgp', 'ZTF20abvygle', 'ZTF18adamzmn', 'ZTF18abilxvt', 'ZTF19abzuxrq', 'ZTF19aarxvzz', 'ZTF18acdasee', 'ZTF18aaypmtr', 'ZTF18adaqdfq', 'ZTF17aacpvbf', 'ZTF19aadouel', 'ZTF19aayrbaq', 'ZTF20ablxsbu', 'ZTF17aabwftj', 'ZTF19aaoohqa', 'ZTF18abscvmd', 'ZTF18adapnnv', 'ZTF22aabwnvh', 'ZTF18abadjvy', 'ZTF18absoocn', 'ZTF18abivpiy', 'ZTF22aacjcsw', 'ZTF18acnambs', 'ZTF18achixag', 'ZTF18aaynqnn', 'ZTF19abdydlu', 'ZTF18abrnfpd', 'ZTF18adaoqcm', 'ZTF18ablpmbs', 'ZTF18acajzhc', 'ZTF18ablqndm', 'ZTF18acwbhzi', 'ZTF18actytgu', 'ZTF18abvgrec', 'ZTF18abgsptz', 'ZTF19abdhfsq', 'ZTF18aayvacc', 'ZTF18abpogdx', 'ZTF18a

In [139]:
# check duplicated rows
tables_gold[tables_gold.duplicated(keep=False)]

Unnamed: 0,oid,classifier_name,classifier_version,class_name,probability,ranking


In [140]:
tables_i[tables_i.duplicated(keep=False)]


Unnamed: 0,oid,classifier_name,classifier_version,class_name,probability,ranking


query 78

"For the light curve classifier, return the following columns from the 'taxonomy' table: name and version of the classifier, and classes included in it"

In [141]:
db_test[db_test['req_id']==78].request.values[0]

"For the light curve classifier, return the following columns from the 'taxonomy' table: name and version of the classifier, and classes included in it"

In [142]:
# query gold
query = '''
SELECT
    *
FROM
    taxonomy
WHERE
    classifier_name = 'lc_classifier'
'''
n = 3
tables_gold, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_gold.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_gold.columns.values.tolist(), "shape": tables_gold.shape})
tables_gold

Query executed successfully, shape: (1, 3)


Unnamed: 0,classifier_name,classifier_version,classes
0,lc_classifier,hierarchical_random_forest_1.0.0,"[SNIa, SNIbc, SNII, SLSN, QSO, AGN, Blazar, CV..."


In [143]:
# query 
query = '''
SELECT classifier_name,
       classifier_version,
       classes
FROM taxonomy
WHERE classifier_name = 'lc_classifier';
'''
n = 3
tables_i, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_i.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_i.columns.values.tolist(), "shape": tables_i.shape})
tables_i

Query executed successfully, shape: (1, 3)


Unnamed: 0,classifier_name,classifier_version,classes
0,lc_classifier,hierarchical_random_forest_1.0.0,"[SNIa, SNIbc, SNII, SLSN, QSO, AGN, Blazar, CV..."


In [144]:
# compare if rows are the same
print(tables_gold.loc[:, ~tables_gold.columns.duplicated()].reset_index(drop=True).equals(tables_i.loc[:, ~tables_i.columns.duplicated()].reset_index(drop=True)))

True


In [145]:
print(set(sorted(tables_gold.loc[:, ~tables_gold.columns.duplicated()]['classifier_name'])) == set(sorted(tables_i.loc[:, ~tables_i.columns.duplicated()]['classifier_name'])))

True


In [146]:
print(set(sorted(tables_gold.loc[:, ~tables_gold.columns.duplicated()]['classifier_name'])))
print(set(sorted(tables_i.loc[:, ~tables_i.columns.duplicated()]['classifier_name'])))

{'lc_classifier'}
{'lc_classifier'}


In [147]:
# check duplicated rows
tables_gold[tables_gold.duplicated(keep=False)]

TypeError: unhashable type: 'list'

In [None]:
tables_i[tables_i.duplicated(keep=False)]


TypeError: unhashable type: 'list'

query 39

In [148]:
# query gold
query = '''
SELECT
  sq.oid, sq.meanra, sq.meandec,
  detection.candid, detection.fid as f_id,detection.magpsf, detection.sigmapsf_corr, detection.sigmapsf_corr_ext
FROM
  (
SELECT
    object.oid, object.meanra, object.meandec, object.ndet
FROM
    object INNER JOIN
    probability
    ON object.oid = probability.oid
WHERE
    probability.classifier_name='lc_classifier_stochastic'
    AND probability.class_name='CV/Nova'
    AND probability.ranking = 1
) AS sq
  INNER JOIN detection
  ON sq.oid = detection.oid
WHERE
  sq.ndet < 50
  AND detection.fid = 2
ORDER BY oid
LIMIT 1000

'''
n = 3
tables_gold, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_gold.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_gold.columns.values.tolist(), "shape": tables_gold.shape})
tables_gold

Query executed successfully, shape: (1000, 8)


Unnamed: 0,oid,meanra,meandec,candid,f_id,magpsf,sigmapsf_corr,sigmapsf_corr_ext
0,ZTF17aaaaanb,324.816860,45.593583,537475640115015008,2,19.760700,0.021343,0.031418
1,ZTF17aaaaanb,324.816860,45.593583,607270780115015027,2,19.648392,0.026439,0.034795
2,ZTF17aaaaanb,324.816860,45.593583,613325340115010003,2,19.791729,0.025855,0.043814
3,ZTF17aaaaanb,324.816860,45.593583,536383390115015014,2,19.882864,0.022219,0.032339
4,ZTF17aaaaanb,324.816860,45.593583,529390300115015005,2,19.678644,100.000000,0.019559
...,...,...,...,...,...,...,...,...
995,ZTF17aaadine,101.589363,-3.744657,1716463772315015001,2,17.612800,0.005632,0.011954
996,ZTF17aaadine,101.589363,-3.744657,1128163402315010011,2,16.821800,100.000000,0.013540
997,ZTF17aaadine,101.589363,-3.744657,1426450772315015003,2,17.840300,100.000000,0.008150
998,ZTF17aaadine,101.589363,-3.744657,1413469302315015002,2,17.441300,100.000000,0.007593


In [152]:
# query 
query = '''
-- Get objects classified as CV/Nova by the stochastic classifier with at most 50 detections
WITH cv_nova_objects AS
  (SELECT o.oid
   FROM OBJECT o
   JOIN probability p ON o.oid = p.oid
   WHERE p.classifier_name = 'lc_classifier_stochastic'
     AND p.class_name = 'CV/Nova'
     AND p.ranking = 1 -- Most probable class

     AND o.ndet <= 50 -- At most 50 detections
) -- Get r-band detections for these objects with magnitude information

SELECT d.oid AS object_identifier,
       d.candid AS candidate_identifier,
       d.fid AS f_id,
       d.ra,
       d.dec,
       d.magpsf AS magnitude,
       d.sigmapsf AS magnitude_error,
       d.magpsf_corr AS psf_corrected_magnitude,
       d.sigmapsf_corr AS psf_corrected_magnitude_error,

  (SELECT MAX(magpsf_corr) - MIN(magpsf_corr)
   FROM detection
   WHERE oid = d.oid
     AND fid = 2) AS max_rise
FROM detection d
JOIN cv_nova_objects c ON d.oid = c.oid
WHERE d.fid = 2 -- r-band filter (fid=2)
ORDER BY d.oid
LIMIT 1000;
'''
n = 3
tables_i, error = run_sql_alerce( query, access_time=2, n_tries=n, query_time=False )
if error:
    print(f'Error in query: {error}')
    # query_columns.append({"req_id": row.req_id, "cols": [], "shape": []})
else:
    print(f'Query executed successfully, shape: {tables_i.shape}')
    # query_columns.append({"req_id": row.req_id, "cols": tables_i.columns.values.tolist(), "shape": tables_i.shape})
tables_i

Query executed successfully, shape: (1000, 10)


Unnamed: 0,object_identifier,candidate_identifier,f_id,ra,dec,magnitude,magnitude_error,psf_corrected_magnitude,psf_corrected_magnitude_error,max_rise
0,ZTF17aaaaanb,537475640115015008,2,324.816838,45.593586,19.760700,0.177926,17.878054,0.021343,0.524338
1,ZTF17aaaaanb,607270780115015027,2,324.816914,45.593575,19.648392,0.181106,17.857359,0.026439,0.524338
2,ZTF17aaaaanb,613325340115010003,2,324.816873,45.593583,19.791729,0.166419,18.342743,0.025855,0.524338
3,ZTF17aaaaanb,536383390115015014,2,324.816970,45.593631,19.882864,0.201099,17.898651,0.022219,0.524338
4,ZTF17aaaaanb,529390300115015005,2,324.816837,45.593660,19.678644,0.104125,17.863106,100.000000,0.524338
...,...,...,...,...,...,...,...,...,...,...
995,ZTF17aaadine,1716463772315015001,2,101.589473,-3.744746,17.612800,0.098539,15.322584,0.005632,0.627300
996,ZTF17aaadine,1128163402315010011,2,101.589354,-3.744667,16.821800,0.033791,15.828869,100.000000,0.627300
997,ZTF17aaadine,1426450772315015003,2,101.589364,-3.744588,17.840300,0.080940,15.347773,100.000000,0.627300
998,ZTF17aaadine,1413469302315015002,2,101.589331,-3.744735,17.441300,0.054552,15.300276,100.000000,0.627300


In [153]:
print(set(sorted(tables_gold.loc[:, ~tables_gold.columns.duplicated()]['oid'])) == set(sorted(tables_i.loc[:, ~tables_i.columns.duplicated()]['object_identifier'])))
print(set(sorted(tables_gold.loc[:, ~tables_gold.columns.duplicated()]['candid'])) == set(sorted(tables_i.loc[:, ~tables_i.columns.duplicated()]['candidate_identifier'])))

True
True


In [154]:
print(set(sorted(tables_gold.loc[:, ~tables_gold.columns.duplicated()]['oid'])))
print(set(sorted(tables_i.loc[:, ~tables_i.columns.duplicated()]['object_identifier'])))

{'ZTF17aaaaxqy', 'ZTF17aaaaayc', 'ZTF17aaaaght', 'ZTF17aaackne', 'ZTF17aaadapx', 'ZTF17aaaddnz', 'ZTF17aaaawyw', 'ZTF17aaaajqw', 'ZTF17aaaaehi', 'ZTF17aaaaanb', 'ZTF17aaaceht', 'ZTF17aaaaatr', 'ZTF17aaaammr', 'ZTF17aaaafdw', 'ZTF17aaaadni', 'ZTF17aaaaxsz', 'ZTF17aaaadqr', 'ZTF17aaaabwr', 'ZTF17aaaatxv', 'ZTF17aaaberw', 'ZTF17aaaaris', 'ZTF17aaabdwm', 'ZTF17aaabudq', 'ZTF17aaacfxu', 'ZTF17aaackdh', 'ZTF17aaadine', 'ZTF17aaaaoly', 'ZTF17aaaakfu', 'ZTF17aaabmzy', 'ZTF17aaaaofm', 'ZTF17aaachuj', 'ZTF17aaaaofb', 'ZTF17aaaafbn', 'ZTF17aaaafup', 'ZTF17aaabhjz', 'ZTF17aaaaown', 'ZTF17aaabrip', 'ZTF17aaaagiu', 'ZTF17aaabeiw', 'ZTF17aaadasq', 'ZTF17aaacdpp', 'ZTF17aaaamso', 'ZTF17aaabpca', 'ZTF17aaaantx', 'ZTF17aaacjhp', 'ZTF17aaabeof', 'ZTF17aaaaetc', 'ZTF17aaaacbq', 'ZTF17aaaagbn', 'ZTF17aaaammt', 'ZTF17aaabaon', 'ZTF17aaabkfu', 'ZTF17aaaachu', 'ZTF17aaabvoc', 'ZTF17aaadgdr', 'ZTF17aaaaauw', 'ZTF17aaabkjg', 'ZTF17aaaajfc', 'ZTF17aaaanys', 'ZTF17aaadfow', 'ZTF17aaaaoeo', 'ZTF17aaabsdz', 'ZTF17a

In [155]:
# check duplicated rows
tables_gold[tables_gold.duplicated(keep=False)]

Unnamed: 0,oid,meanra,meandec,candid,f_id,magpsf,sigmapsf_corr,sigmapsf_corr_ext


In [156]:
tables_i[tables_i.duplicated(keep=False)]


Unnamed: 0,object_identifier,candidate_identifier,f_id,ra,dec,magnitude,magnitude_error,psf_corrected_magnitude,psf_corrected_magnitude_error,max_rise
