## Creating the SQLite Databases Necessary for DES Sorcha

This notebook creates the SQLite databases needed to run **DES** in Sorcha. This code obtains the nescessory data from https://github.com/bernardinelli/DESTNOSIM/tree/master/data and converts it to sqlite databases. It creates two files:

1. **Visits database** from `y6a1c.ccdcorners.fits.gz`
2. **Pointing database** from `y6a1c.exposures.positions.fits`

---
For running DES, obtain the databases using this notebook, run the command line arg `sorcha init` and select the **DES config file** option. Then run the code with:

sorcha run -c DES_config_file.ini --pd DES_TNO.db --ob orbits_filename.csv -p colours_filename.csv -o ./ -s des --vd DES_visits.db

`orbits_filename.csv` and `colours_filename.csv` should be your input files for object's orbits and colours, respectively.

In [None]:
import numpy as np
from astropy.io import fits
from sqlalchemy import create_engine
import sqlite3
from astropy.time import Time
import pandas as pd
import requests
from io import BytesIO
import gzip



def process_pointings_to_sqlite(
    use_url,
    fits_path="y6a1c.exposures.positions.fits",
    db_path="DES_TNO.db",
    url="https://github.com/bernardinelli/DESTNOSIM/raw/refs/heads/master/data/y6a1c.exposures.positions.fits"
):
    """
    Processes a FITS file containing DES pointing data and stores 
    data into a SQLite database with indexing.

    Parameters
    ----------
    use_url : bool
        If True, the FITS file will be downloaded from the specified `url`. If False, the local file path
        specified by `fits_path` will be used.

    fits_path : str, optional
        Path to the local FITS file (default is "y6a1c.exposures.positions.fits"). Only used if `use_url` is False.

    db_path : str, optional
        Path where the SQLite database will be saved (default is "DES_TNO.db").

    url : str, optional
        Direct URL to the raw FITS file hosted on GitHub.

   
    """
    if use_url == True:
        response = requests.get(url)
        response.raise_for_status()
        with fits.open(BytesIO(response.content)) as hdul:
            df = hdul[1].data
    else:
        with fits.open(fits_path) as HDUl:
            df = HDUl[1].data

    # Define column names
    cov_1 = "cov_xx"
    cov_2 = "cov_yy"
    cov_3 = "cov_xy"
    observatory_1 = "observatory_1"
    observatory_2 = "observatory_2"
    observatory_3 = "observatory_3"
    velocity_1 = "velocity_1"
    velocity_2 = "velocity_2"
    velocity_3 = "velocity_3"

    # Create dictionary for DataFrame construction
    temp = {
        cov_1: np.array(df["cov"][:, 0], dtype="<f8"),
        cov_2: np.array(df["cov"][:, 1], dtype="<f8"),
        cov_3: np.array(df["cov"][:, 2], dtype="<f8"),
        "covwarn": df["covwarn"],
        "fieldDec": np.array(df["dec"], dtype="<f8"),
        "ecl_lat": np.array(df["ecl_lat"], dtype="<f8"),
        "ecl_lon": np.array(df["ecl_lon"], dtype="<f8"),
        "observationId": np.array(df["expnum"], "<i4"),
        "filter": df["filter"],
        "observationMidpointMJD": np.array(df["mjd_mid"], dtype="<f8"),
        observatory_1: np.array(df["observatory"][:, 0], dtype="<f8"),
        observatory_2: np.array(df["observatory"][:, 1], dtype="<f8"),
        observatory_3: np.array(df["observatory"][:, 2], dtype="<f8"),
        "obs_ecl_lon": np.array(df["obs_ecl_lon"], "<f8"),
        "fieldRA": np.array(df["ra"], dtype="<f8"),
        "fiveSigmaDepth": np.array(df["m50"], dtype="<f8"),
        "k": np.array(df["k"], dtype="<f8"),
        "c": np.array(df["c"], dtype="<f8"),
        velocity_1: np.array(df["velocity"][:, 0], dtype="<f8"),
        velocity_2: np.array(df["velocity"][:, 1], dtype="<f8"),
        velocity_3: np.array(df["velocity"][:, 2], dtype="<f8"),
    }

    # Set exposure times
    exo_time_s = np.full(len(df), 90)
    mask = (temp["observationMidpointMJD"] < 57447) & (temp["filter"] == "Y")
    exo_time_s[mask] = 45

    # Convert TDB to TAI
    time = Time(temp["observationMidpointMJD"], format="mjd", scale="tdb")
    time_TAI = time.tai
    temp["observationMidpointMJD"] = time_TAI.value

    # Check for NaNs
    if np.any(pd.isnull(temp["observationMidpointMJD"])):
        print("Warning: NaN values found in observationMidpointMJD")

    # Create DataFrame and insert exposure times
    df_hdl1 = pd.DataFrame(temp)
    df_hdl1.insert(8, "visitExposureTime", exo_time_s)

    # Save to SQLite database
    engine = create_engine(f"sqlite:///{db_path}")
    df_hdl1.to_sql("observations", engine, if_exists="replace", index=False)

    # Create indexes efficiently
    index_queries = [
        "CREATE INDEX idx_lat_long ON observations(ecl_lat,ecl_lon)",
        "CREATE INDEX idx_filter ON observations(filter)",
        "CREATE INDEX idx_dec_ra ON observations(fieldDec,fieldRA)",
        "CREATE INDEX idx_dec_ra_mjd ON observations(fieldDec,fieldRA,observationMidpointMJD)",
        "CREATE INDEX idx_mjd ON observations(observationMidpointMJD)",
        "CREATE INDEX idx_m50_c_k ON observations(fiveSigmaDepth,c,k)"
    ]

    with sqlite3.connect(db_path) as db:
        cursor = db.cursor()
        for query in index_queries:
            cursor.execute(query)

    print(f"DES pointing data processed and saved to: {db_path}")



def process_ccd_visits_to_sqlite(
    use_url,
    pointings_db_path,
    fits_path="y6a1c.ccdcorners.fits.gz",
    db_path="DES_visits.db",
    url="https://github.com/bernardinelli/DESTNOSIM/raw/refs/heads/master/data/y6a1c.ccdcorners.fits.gz"
    ):
    """
    Processes a FITS file containing DES ccd visits and stores 
    data into a SQLite database with indexing.

    Parameters
    ----------
    use_url : bool
        If True, the FITS file will be downloaded from the specified `url`. If False, the local file path
        specified by `fits_path` will be used.

    fits_path : str, optional
        Path to the local FITS file (default is "y6a1c.ccdcorners.fits.gz"). 

    db_path : str, optional
        Path where the SQLite database will be saved (default is "DES_TNO.db").

    url : str, optional
        Direct URL to the raw FITS file hosted on GitHub.

   
    """
    if use_url == True:
        response = requests.get(url)
        response.raise_for_status()
        with gzip.open(BytesIO(response.content), 'rb') as gz:
                with fits.open(gz) as hdul:
                    df = hdul[1].data
    else:
        with fits.open(fits_path) as HDUl:
            df = HDUl[1].data


    ra = df["ra"]
    dec = df["dec"]

    temp = {
        "visitId": df["expnum"],
        "detectorID": df["ccdnum"],
        "llcra": ra[:, 0],
        "llcdec": dec[:, 0],
        "lrcra": ra[:, 1],
        "lrcdec": dec[:, 1],
        "urcra": ra[:, 2],
        "urcdec": dec[:, 2],
        "ulcra": ra[:, 3],
        "ulcdec": dec[:, 3],
        "ra": ra[:, 4],   # center RA
        "dec": dec[:, 4], # center Dec
    }

    # Create DataFrame
    df_hdl1 = pd.DataFrame(temp)
    
    engine_pointings = create_engine(f"sqlite:///{pointings_db_path}")
    df_depth = pd.read_sql("SELECT observationId, fiveSigmaDepth FROM observations", engine_pointings)
    df_depth = df_depth.set_index("observationId")
    df_depth = pd.read_sql("SELECT observationId, fiveSigmaDepth FROM observations", engine_pointings)
    df_depth = df_depth.astype({"fiveSigmaDepth": "<f8"})  # just to be safe on dtype
    df_hdl1["magLim"] = df_hdl1["visitId"].map(df_depth["fiveSigmaDepth"])
    

    # Save to SQLite
    engine = create_engine(f"sqlite:///{db_path}")
    df_hdl1.to_sql("observations", engine, if_exists="replace", index=False, chunksize=10000)

    # Create spatial index
    index_sql = """
    CREATE INDEX IF NOT EXISTS idx_obs 
    ON observations(visitId, llcra, llcdec, lrcra, lrcdec, urcra, urcdec, ulcra, ulcdec)
    """

    with sqlite3.connect(db_path) as db:
        db.execute(index_sql)

    print(f"DES CCD visits data processed and saved to {db_path}")




In [2]:
pointing_db = "DES_TNO.db"
process_pointings_to_sqlite(True,db_path=pointing_db)
process_ccd_visits_to_sqlite(True,pointings_db_path=pointing_db)

DES pointing data processed and saved to: DES_TNO.db
DES CCD visits data processed and saved to DES_visits.db
