In [None]:
import os
from astroquery.gaia import Gaia
from astroquery.mast import Catalogs
import numpy as np
import re
import time
from concurrent.futures import ThreadPoolExecutor

# Function to parse the report and extract RA and Dec ranges
def parse_report(file_path):
    areas = []
    with open(file_path, 'r') as report_file:
        lines = report_file.readlines()
        for i in range(0, len(lines), 4):  # 4 lines per area in the report
            ra_line = lines[i+1]
            dec_line = lines[i+2]
            
            # Extract RA and Dec ranges using regular expressions
            ra_min, ra_max = map(float, re.findall(r"[-+]?\d*\.\d+|\d+", ra_line))
            dec_min, dec_max = map(float, re.findall(r"[-+]?\d*\.\d+|\d+", dec_line))
            areas.append({'ra_min': ra_min, 'ra_max': ra_max, 'dec_min': dec_min, 'dec_max': dec_max})
    
    return areas

# Function to query Gaia for a sub-range of an area
def query_gaia_subrange(ra_min, ra_max, dec_min, dec_max, output_dir):
    query = f"""
    SELECT * FROM gaiadr3.gaia_source
    WHERE ra BETWEEN {ra_min} AND {ra_max}
    AND dec BETWEEN {dec_min} AND {dec_max}
    AND phot_g_mean_mag <= 19
    """
    job = Gaia.launch_job_async(query)
    results = job.get_results()
    
    # Save to FITS file
    output_file = os.path.join(output_dir, f"gaia_area_ra_{int(ra_min)}_{int(ra_max)}_dec_{int(dec_min)}_{int(dec_max)}.fits")
    results.write(output_file, overwrite=True)
    print(f"Saved Gaia data to {output_file}")

# Function to query Pan-STARRS for a sub-range of an area
def query_panstarrs_subrange(ra_min, ra_max, dec_min, dec_max, output_dir):
    catalog_data = Catalogs.query_criteria(catalog="PanSTARRS", 
                                           ra=[ra_min, ra_max], 
                                           dec=[dec_min, dec_max], 
                                           mag_psf_g=[None, 19])
    
    # Save to FITS file
    output_file = os.path.join(output_dir, f"panstarrs_area_ra_{int(ra_min)}_{int(ra_max)}_dec_{int(dec_min)}_{int(dec_max)}.fits")
    catalog_data.write(output_file, overwrite=True)
    print(f"Saved Pan-STARRS data to {output_file}")

# Function to divide an area into sub-ranges and perform queries with delay
def divide_and_query(area, output_dir_gaia, output_dir_panstarrs):
    ra_min = area['ra_min']
    ra_max = area['ra_max']
    dec_min = area['dec_min']
    dec_max = area['dec_max']
    
    # Divide RA and Dec ranges into sub-ranges (6 parts for RA and 5 parts for Dec to make 30 parts total)
    ra_steps = np.linspace(ra_min, ra_max, 7)  # Divide into 6 segments
    dec_steps = np.linspace(dec_min, dec_max, 6)  # Divide into 5 segments
    
    # Loop through each sub-range and perform queries
    for i in range(len(ra_steps) - 1):
        for j in range(len(dec_steps) - 1):
            ra_start = ra_steps[i]
            ra_end = ra_steps[i + 1]
            dec_start = dec_steps[j]
            dec_end = dec_steps[j + 1]
            
            # Query Gaia for the sub-range
            query_gaia_subrange(ra_start, ra_end, dec_start, dec_end, output_dir_gaia)
            
            # Query Pan-STARRS for the sub-range
            query_panstarrs_subrange(ra_start, ra_end, dec_start, dec_end, output_dir_panstarrs)

            # Add delay between requests to prevent overwhelming the server
            time.sleep(1)  # 1 second delay between each query

# Parallel execution with ThreadPoolExecutor
def parallel_query(area, output_dir_gaia, output_dir_panstarrs):
    with ThreadPoolExecutor(max_workers=5) as executor:
        futures = []
        ra_min = area['ra_min']
        ra_max = area['ra_max']
        dec_min = area['dec_min']
        dec_max = area['dec_max']
        
        # Divide RA and Dec ranges into sub-ranges (6 parts for RA and 5 parts for Dec to make 30 parts total)
        ra_steps = np.linspace(ra_min, ra_max, 7)
        dec_steps = np.linspace(dec_min, dec_max, 6)
        
        # Submit queries to the executor
        for i in range(len(ra_steps) - 1):
            for j in range(len(dec_steps) - 1):
                ra_start = ra_steps[i]
                ra_end = ra_steps[i + 1]
                dec_start = dec_steps[j]
                dec_end = dec_steps[j + 1]
                
                # Schedule Gaia query
                futures.append(executor.submit(query_gaia_subrange, ra_start, ra_end, dec_start, dec_end, output_dir_gaia))
                
                # Schedule Pan-STARRS query
                futures.append(executor.submit(query_panstarrs_subrange, ra_start, ra_end, dec_start, dec_end, output_dir_panstarrs))

        # Wait for all futures to complete
        for future in futures:
            future.result()  # Ensure all tasks are completed

# Main function to loop through all areas and perform parallel queries
def main(report_path, output_dir_gaia, output_dir_panstarrs):
    # Parse the report
    areas = parse_report(report_path)
    
    # Create output directories if they don't exist
    if not os.path.exists(output_dir_gaia):
        os.makedirs(output_dir_gaia)
    if not os.path.exists(output_dir_panstarrs):
        os.makedirs(output_dir_panstarrs)
    
    # Loop through each area and perform parallel queries
    for area in areas:
        parallel_query(area, output_dir_gaia, output_dir_panstarrs)

# Usage
report_path = "sky_areas_report.txt"  # Path to your report file
output_dir_gaia = "gaia_data"  # Directory where Gaia data will be saved
output_dir_panstarrs = "panstarrs_data"  # Directory where Pan-STARRS data will be saved

# Run the script
main(report_path, output_dir_gaia, output_dir_panstarrs)
