# Notebook to crossmatch an external catalog with the ALeRCE database

```Author: Francisco Förster, Last updated: 20251009```

In [1]:
import pandas as pd
import sqlalchemy as sa
import requests

## Open a connection using the public credentials

In [2]:
credentials_file = "https://raw.githubusercontent.com/alercebroker/usecases/master/alercereaduser_v4.json"
params = requests.get(credentials_file).json()["params"]

In [3]:
engine = sa.create_engine("postgresql+psycopg2://" + params["user"] \
                          + ":" + params["password"] + "@" + params["host"] \
                          + "/" + params["dbname"])
conn = engine.connect()

## Example external catalog

In [4]:
# The catalog should have id_source, ra, and dec columns
df = pd.read_csv("https://github.com/alercebroker/usecases/blob/master/example_data/watchlist.csv?raw=True")
df.head()

Unnamed: 0,id_source,ra,dec
0,source_1,160.183014,33.016467
1,source_2,174.215249,44.837895


In [5]:
def ztf_crossmatch(conn, df, search_radius=1):
    
    '''
    conn: connection to database
    df: external catalog dataframe (with columns id_source, ra, dec)
    search_radius: external radius in arcsec (default=1)
    
    The output is a dataframe with the source id, ra, and dec, 
    as well as the ALeRCE database meanra, meandec, the crossmatch distance 
    in degrees and the time of first detection according to the ALeRCE database
    '''
    
    # Fix the dataframe
    objects = []
    for _, row in df.iterrows():
        objects.append(f"(\'{row.id_source}\', {row.ra}, {row.dec})")
    objects_str = ",\n".join(objects)
    
    # Convert the radius into degrees
    search_radius = search_radius / 3600
    
    # Prepare the query
    query = """
    WITH catalog ( source_id, ra, dec) AS (
        VALUES
            {values}
    )
    SELECT 
        c.source_id, c.ra, c.dec, o.oid, o.meanra, o.meandec,
        q3c_dist(c.ra, c.dec, o.meanra, o.meandec), o.firstmjd
    
    FROM object o, catalog c
        /*
           It is REALLY important to first use the catalog then the object
           ra, dec for speed. The radius is in degrees.
        */
    WHERE
        q3c_join(c.ra, c.dec, o.meanra, o.meandec, {radius})
    """
    
    # Final query string, radius in degrees
    query_str = query.format(values=objects_str, radius=search_radius)
    
    # Do the query
    try:
        matches = pd.read_sql(query_str,conn)
        matches["q3c_dist"] = matches.q3c_dist * 3600
        matches.rename({"q3c_dist": "dist_arcsec"}, axis=1, inplace=True)
        return matches
    except:
        print("Error accessing the database. Most common causes are timeout " \
              + "errors or wrongly formatted input query.")

## Do the crossmatch

Note that the default timeout is 2 min, if your catalog is too large you should split in smaller catalogs and do many queries.

In [6]:
results = ztf_crossmatch(conn, df)
results

Unnamed: 0,source_id,ra,dec,oid,meanra,meandec,dist_arcsec,firstmjd
0,source_1,160.183014,33.016467,ZTF18aaacsup,160.183001,33.016479,0.05765,58423.48456
1,source_1,160.183014,33.016467,ZTF24abrctly,160.183005,33.016208,0.935102,60621.476285
2,source_2,174.215249,44.837895,ZTF25aabsaek,174.215315,44.837997,0.404018,60664.434861
3,source_2,174.215249,44.837895,ZTF18aaapcut,174.215277,44.837916,0.10329,58440.470775


## Close the connection

In [7]:
conn.close()