In [1]:
# Importing necessary libraries 
import os
import re
import json
import requests
from astropy.table import Table
from urllib.parse import urlencode

### General Structure of Reading JSON files from SLED and FITS Imaging Downloads from Multiple Databases.

In [2]:
def safe_name(name: str) -> str:
    """
    Sanitize object name for safe folder/file creation on Windows.

    :param name: Original object name
    :type name: str
    :return: Sanitized name containing only letters, numbers, underscores, or dashes
    :rtype: str
    """
    return re.sub(r'[^\w\-]', '_', name)

In [3]:
def load_sled_table(json_file: str) -> Table:
    """
    Load JSON file of lensed objects into an Astropy Table.
    Each row corresponds to a unique 'name' entry in the JSON.

    :param json_file: Path to the JSON catalog
    :type json_file: str
    :return: Astropy Table with catalog entries
    :rtype: astropy.table.Table
    """
    with open(json_file, "r") as f:
        data = json.load(f)

    rows = []
    for entry in data:
        rows.append({
            "name": entry.get("name", "Unknown"),
            "ra": float(entry.get("ra", None)),
            "dec": float(entry.get("dec", None)),
            "score": entry.get("score", None),
            "flag": entry.get("flag", "Unknown"),
            "n_img": entry.get("n_img", None),
            "image_sep": entry.get("image_sep", None),
            "lens_type": entry.get("lens_type", "Unknown"),
            "source_type": entry.get("source_type", "Unknown"),
            "imaging": entry.get("imaging", [])
        })

    table = Table(
        rows=rows,
        names=[
            "name", "ra", "dec", "score",
            "flag", "n_img", "image_sep", "lens_type", "source_type", "imaging"
        ]
    )
    return table

In [4]:
def download_fits(url: str, outpath: str) -> None:
    """
    Download a FITS file and save it to `outpath`.

    :param url: URL of the FITS file
    :type url: str
    :param outpath: Local file path to save the FITS
    :type outpath: str
    :return: None
    """
    try:
        response = requests.get(url, timeout=30)
        response.raise_for_status()
        with open(outpath, "wb") as f:
            f.write(response.content)
        print(f"Saved {outpath}")
    except requests.HTTPError as e:
        print(f"Failed to fetch {url}: {e}")

In [5]:
def query_panstarrs(ra: float, dec: float, imaging: list, outdir: str, name: str,
                    processed_names: set) -> None:
    """
    Download Pan-STARRS FITS cutouts for each band in `imaging` using ps1filenames.py service.
    If imaging list is empty, download all valid bands for Pan-STARRS1 using the given RA/Dec.
    Files are saved as: ./outdir/<safe_name>/<instrument>/<safe_name>_<band>.fits.
    Duplicate objects (based on sanitized name) are skipped.
    Already-downloaded FITS files are skipped.

    :param ra: Right Ascension of the object
    :type ra: float
    :param dec: Declination of the object
    :type dec: float
    :param imaging: List of imaging dictionaries (each with 'instrument' and 'band')
    :type imaging: list
    :param outdir: Base output directory
    :type outdir: str
    :param name: Original object name
    :type name: str
    :param processed_names: Set of sanitized names already processed to avoid duplicates
    :type processed_names: set
    :return: None
    """
    sanitized_name = safe_name(name)

    if sanitized_name in processed_names:
        print(f"Skipping duplicate object {name} -> {sanitized_name}")
        return
    processed_names.add(sanitized_name)

    base_fitscut = "https://ps1images.stsci.edu/cgi-bin/fitscut.cgi?"
    instrument_name = "Pan-STARRS1"
    valid_bands = ["g", "r", "i", "z", "y"]

    # If imaging is empty, download all valid bands
    if not imaging:
        imaging = [{"instrument": instrument_name, "band": band} for band in valid_bands]

    for img in imaging:
        # Use default instrument if missing
        instrument = img.get("instrument", instrument_name)
        band = img.get("band", None)

        if instrument != instrument_name:
            continue

        # If band missing, skip only if it's truly None
        if not band:
            continue

        band = band.strip().lower()
        if band not in valid_bands:
            continue

        # Output path
        inst_dir = os.path.join(outdir, sanitized_name, instrument)
        os.makedirs(inst_dir, exist_ok=True)
        outpath = os.path.join(inst_dir, f"{sanitized_name}_{band}.fits")

        # Skip if file already exists
        if os.path.exists(outpath):
            print(f"Skipping already downloaded {outpath}")
            continue

        # Query Pan-STARRS filenames
        try:
            url_table = f"https://ps1images.stsci.edu/cgi-bin/ps1filenames.py?ra={ra}&dec={dec}&filters={band}"
            table = Table.read(url_table, format='ascii')
            if len(table) == 0:
                filename = None  # Will download cutout directly with RA/Dec
            else:
                filename = table['filename'][0]
        except Exception as e:
            print(f"Failed to query filenames for {name} band {band}: {e}")
            filename = None

        # FITS cutout parameters
        params = {
            "ra": ra,
            "dec": dec,
            "size": 120,
            "format": "fits"
        }
        if filename:
            params["red"] = filename

        url = base_fitscut + urlencode(params)

        try:
            download_fits(url, outpath)
        except Exception as e:
            print(f"Failed to fetch {url}: {e}")

### Accessing JSON lensed (galaxies, quasars, supernovae) files

In [6]:
if __name__ == "__main__":
    # List of catalogs to process
    catalogs = [
        ("./lensed_galaxies/galaxies.json", "./lensed_galaxies/cutouts"),
        ("./lensed_quasars/quasars.json", "./lensed_quasars/cutouts"),
        ("./lensed_supernovae/supernovae.json", "./lensed_supernovae/cutouts")
    ]

    processed_names = set()

    for json_path, outdir in catalogs:
        table = load_sled_table(json_path)
        print(table)

        for row in table:
            name = row["name"]
            ra = row["ra"]
            dec = row["dec"]
            imaging_list = row["imaging"]  # May be empty

            # Always call query_panstarrs, even if imaging_list is empty
            query_panstarrs(
                ra=ra,
                dec=dec,
                imaging=imaging_list,
                outdir=outdir,
                name=name,
                processed_names=processed_names  # make sure to pass the set
            )

         name          ...
---------------------- ...
         HAHJ0001-5440 ...
         HAHJ0014-3026 ...
ACSJ001423.02-302109.8 ...
ACSJ001426.26-302255.9 ...
         HAHJ0015-3904 ...
   J002240.91+143110.4 ...
     HAHJ002348-244149 ...
     HAHJ002357-244154 ...
            J0025-0144 ...
         HAHJ0026-1105 ...
                   ... ...
        SDSSJ2324+0105 ...
         HAHJ2332-5359 ...
          J2336â€“5352 ...
         HAHJ2338+2704 ...
         HAHJ2341-0000 ...
         HAHJ2342-5413 ...
            J2349-5113 ...
ACSJ235130.60-261459.7 ...
         HAHJ2357+4147 ...
   J235933.53+020823.3 ...
Length = 407 rows
Skipping already downloaded ./lensed_galaxies/cutouts\HAHJ0014-3026\Pan-STARRS1\HAHJ0014-3026_r.fits
Skipping already downloaded ./lensed_galaxies/cutouts\HAHJ0014-3026\Pan-STARRS1\HAHJ0014-3026_y.fits
Skipping already downloaded ./lensed_galaxies/cutouts\HAHJ0014-3026\Pan-STARRS1\HAHJ0014-3026_i.fits
Skipping already downloaded ./lensed_galaxies/cutouts\HAHJ