In [1]:
# Import necessary libraries
import os
import pandas as pd
from astropy.coordinates import SkyCoord
from astroquery.mast import Catalogs
from astropy import units as u
from tqdm import tqdm
import glob
import requests

In [2]:
# Step 1.1: Extract SDSS DR18 LRG Samples using CasJobs
# Data listed in dr18_lrg_sample.csv (Sample Size: 1,174,900)
# Criteria : 

# SELECT
  # s.specobjid,
  # s.z AS redshift,                   
  # s.veldisp,
  # p.ra, p.dec, p.u, p.g, p.r, p.i, p.modelMag_r,
  # s.programname, s.plate, s.fiberid, s.mjd into mydb.LRG_full_catalog from SpecObjAll AS s
# JOIN PhotoObjAll AS p ON s.bestobjid = p.objid
# WHERE
  # s.class = 'GALAXY'
  # AND s.z BETWEEN 0.1 AND 0.7
  # AND s.veldisp > 0 AND s.veldisp < 500
  # AND s.programname IN ('boss', 'eboss')
  # AND (p.r - p.i) > 0.5
  # AND (p.g - p.r) > 0.7
  # AND p.modelMag_r BETWEEN 16 AND 21

In [4]:
BATCH_SIZE = 10000
INPUT_CSV = "sdss_dr18_lrg_sample.csv"
OUTPUT_CSV_TEMPLATE = "sdss_lrg_ps1_matched_batch_{batch_num}.csv"
OUTPUT_FOLDER = "sdss_lrg_queried_objects"

df = pd.read_csv(INPUT_CSV)
n_objects = len(df)

def query_ps1_batch(batch_df):
    g_mag, r_mag, i_mag = [], [], []
    g_depth, r_depth, i_depth = [], [], []
    g_fwhm, r_fwhm, i_fwhm = [], [], []

    for _, row in tqdm(batch_df.iterrows(), total=len(batch_df)):
        try:
            coord = SkyCoord(ra=row['ra'], dec=row['dec'], unit='deg', frame='icrs')
            result = Catalogs.query_region(coord, radius=2.5 * u.arcsec, catalog='PanSTARRS', data_release='dr2')
            if len(result) > 0:
                best = result[0]
                g_mag.append(best.get('gMeanPSFMag'))
                r_mag.append(best.get('rMeanPSFMag'))
                i_mag.append(best.get('iMeanPSFMag'))

                g_depth.append(best.get('gMeanDepth'))
                r_depth.append(best.get('rMeanDepth'))
                i_depth.append(best.get('iMeanDepth'))

                g_fwhm.append(best.get('gFWHM'))
                r_fwhm.append(best.get('rFWHM'))
                i_fwhm.append(best.get('iFWHM'))
            else:
                g_mag.append(None)
                r_mag.append(None)
                i_mag.append(None)
                g_depth.append(None)
                r_depth.append(None)
                i_depth.append(None)
                g_fwhm.append(None)
                r_fwhm.append(None)
                i_fwhm.append(None)
        except Exception:
            g_mag.append(None)
            r_mag.append(None)
            i_mag.append(None)
            g_depth.append(None)
            r_depth.append(None)
            i_depth.append(None)
            g_fwhm.append(None)
            r_fwhm.append(None)
            i_fwhm.append(None)

    batch_df['ps1_g_mag'] = g_mag
    batch_df['ps1_r_mag'] = r_mag
    batch_df['ps1_i_mag'] = i_mag
    batch_df['ps1_g_depth'] = g_depth
    batch_df['ps1_r_depth'] = r_depth
    batch_df['ps1_i_depth'] = i_depth
    batch_df['ps1_g_fwhm'] = g_fwhm
    batch_df['ps1_r_fwhm'] = r_fwhm
    batch_df['ps1_i_fwhm'] = i_fwhm

    # Filter: keep only rows with at least 2 non-null PS1 magnitudes
    mask = (
        batch_df[['ps1_g_mag', 'ps1_r_mag', 'ps1_i_mag']]
        .notnull()
        .sum(axis=1) >= 2
    )
    filtered_df = batch_df[mask].reset_index(drop=True)
    print(f"Filtered from {len(batch_df)} → {len(filtered_df)} rows with ≥ 2 PS1 bands")
    return filtered_df

for i in range(0, n_objects, BATCH_SIZE):
    batch_num = i // BATCH_SIZE + 1
    output_file = os.path.join(OUTPUT_FOLDER, OUTPUT_CSV_TEMPLATE.format(batch_num=batch_num))
    
    if os.path.exists(output_file):
        print(f"Batch {batch_num} already done (found {output_file}), skipping...")
        continue

    batch_df = df.iloc[i:i+BATCH_SIZE].copy()
    print(f"\nProcessing batch {batch_num} ({i} to {i + len(batch_df) - 1})...")
    filtered_df = query_ps1_batch(batch_df)
    os.makedirs(OUTPUT_FOLDER, exist_ok=True)
    filtered_df.to_csv(output_file, index=False)
    print(f"Saved filtered batch {batch_num} to {output_file}")

print("\nAll batches processed!")

Batch 1 already done (found sdss_lrg_queried_objects\sdss_lrg_ps1_matched_batch_1.csv), skipping...
Batch 2 already done (found sdss_lrg_queried_objects\sdss_lrg_ps1_matched_batch_2.csv), skipping...
Batch 3 already done (found sdss_lrg_queried_objects\sdss_lrg_ps1_matched_batch_3.csv), skipping...
Batch 4 already done (found sdss_lrg_queried_objects\sdss_lrg_ps1_matched_batch_4.csv), skipping...
Batch 5 already done (found sdss_lrg_queried_objects\sdss_lrg_ps1_matched_batch_5.csv), skipping...
Batch 6 already done (found sdss_lrg_queried_objects\sdss_lrg_ps1_matched_batch_6.csv), skipping...
Batch 7 already done (found sdss_lrg_queried_objects\sdss_lrg_ps1_matched_batch_7.csv), skipping...
Batch 8 already done (found sdss_lrg_queried_objects\sdss_lrg_ps1_matched_batch_8.csv), skipping...
Batch 9 already done (found sdss_lrg_queried_objects\sdss_lrg_ps1_matched_batch_9.csv), skipping...
Batch 10 already done (found sdss_lrg_queried_objects\sdss_lrg_ps1_matched_batch_10.csv), skipping..

100%|██████████| 10000/10000 [14:27<00:00, 11.52it/s]


Filtered from 10000 → 9997 rows with ≥ 2 PS1 bands
Saved filtered batch 112 to sdss_lrg_queried_objects\sdss_lrg_ps1_matched_batch_112.csv

Processing batch 113 (1120000 to 1129999)...


100%|██████████| 10000/10000 [14:12<00:00, 11.74it/s]


Filtered from 10000 → 9997 rows with ≥ 2 PS1 bands
Saved filtered batch 113 to sdss_lrg_queried_objects\sdss_lrg_ps1_matched_batch_113.csv

Processing batch 114 (1130000 to 1139999)...


100%|██████████| 10000/10000 [14:27<00:00, 11.52it/s]


Filtered from 10000 → 10000 rows with ≥ 2 PS1 bands
Saved filtered batch 114 to sdss_lrg_queried_objects\sdss_lrg_ps1_matched_batch_114.csv

Processing batch 115 (1140000 to 1149999)...


100%|██████████| 10000/10000 [14:09<00:00, 11.77it/s]


Filtered from 10000 → 9995 rows with ≥ 2 PS1 bands
Saved filtered batch 115 to sdss_lrg_queried_objects\sdss_lrg_ps1_matched_batch_115.csv

Processing batch 116 (1150000 to 1159999)...


100%|██████████| 10000/10000 [13:55<00:00, 11.97it/s]


Filtered from 10000 → 9990 rows with ≥ 2 PS1 bands
Saved filtered batch 116 to sdss_lrg_queried_objects\sdss_lrg_ps1_matched_batch_116.csv

Processing batch 117 (1160000 to 1169999)...


100%|██████████| 10000/10000 [14:02<00:00, 11.87it/s]


Filtered from 10000 → 9998 rows with ≥ 2 PS1 bands
Saved filtered batch 117 to sdss_lrg_queried_objects\sdss_lrg_ps1_matched_batch_117.csv

Processing batch 118 (1170000 to 1174899)...


100%|██████████| 4900/4900 [06:50<00:00, 11.93it/s]

Filtered from 4900 → 4899 rows with ≥ 2 PS1 bands
Saved filtered batch 118 to sdss_lrg_queried_objects\sdss_lrg_ps1_matched_batch_118.csv

All batches processed!



