In [2]:
import pandas as pd

# Define the file paths
file_path = "post_AGB.csv"  # Replace with your actual file path

# Load the CSV files
df = pd.read_csv(file_path, delimiter=',')

# Extract Gaia source IDs from the other CSV file (assuming it's the first column)
ids_to_keep = set(df.iloc[:, 0].astype(str))

# Filter rows in df based on conditions
filtered_df = df[(df['Vickers category'] <= 3)]

# Keep only those rows where the Gaia source ID is in ids_to_keep and the last column is not empty
filtered_df = filtered_df[filtered_df.iloc[:, -1].notna() & filtered_df.iloc[:, 0].astype(str).isin(ids_to_keep)]

# Extract the first column (Gaia source ID)
first_column_list = filtered_df.iloc[:, 0].tolist()

# Print the list of first column values
print(first_column_list)

[565507868441719424, 2351623413515105920, 4715635535640762240, 532078488709487360, 4686479751449676032, 351149177434709760, 459182413984008448, 513671461473684352, 433515788197481984, 3303343395568710016, 471908436438311680, 173086700992466688, 255225480926107392, 3238918336374596864, 3388902129107252992, 3422437684728294528, 2968265509022275840, 4758015524139610880, 2902505745786910080, 3334854780347915520, 3336558507975208448, 994259335315643520, 3105987960396950784, 3159640386918214528, 3108327343185135872, 5617989266685365120, 3156171118495247360, 3032030620730261376, 5620444471847839232, 3151417586128916864, 5597822402371118336, 5698817012142459136, 5545800762036628736, 5520238967817034880, 5707613169577769600, 5540178478053582592, 5277809440015969792, 5515266327706463616, 5521628033275348480, 5409069172514684416, 5462428643590805248, 5254793942926363392, 5351904394753372672, 5351069693654349952, 5241806275407841664, 5337582534294739456, 5237007177683569536, 5335866849446446080, 5

In [8]:
gaiadr3_ids = first_column_list
len(gaiadr3_ids)

249

In [10]:
import requests
import pyvo as vo
import json
import numpy as np

# Define the parameters
name = 'APPLAUSE'
url = 'https://www.plate-archive.org/tap'
token = 'Token d62cc50a37a9d01149f6de294ee9ab0193207569'

# Function to chunk the list into groups of specified size
def chunk_list(data_list, chunk_size):
    for i in range(0, len(data_list), chunk_size):
        yield data_list[i:i + chunk_size]

# Function to process each chunk
def process_chunk(chunk):
    # Format the gaiaedr3_id list for the SQL query
    ids_str = ', '.join(f"'{id}'" for id in chunk)
    
    # Create the query string
    qstr = f"""
    SELECT plate_id, scan_id, source_id, solution_num, gaiaedr3_id
    FROM applause_dr4.source_xmatch 
    WHERE gaiaedr3_id IN ({ids_str})
    """
    
    # Setup the TAP service session
    tap_session = requests.Session()
    tap_session.headers['Authorization'] = token
    tap_service = vo.dal.TAPService(url, session=tap_session)
    
    # Submit the query
    lang = 'PostgreSQL'
    job = tap_service.submit_job(qstr, language=lang, QUEUE="1h")
    job.run()
    
    # Wait for job completion
    job.wait(phases=["COMPLETED", "ERROR", "ABORTED"], timeout=600.)
    
    # Raise an error if the job failed
    job.raise_if_error()
    
    # Fetch results
    return job.fetch_result()

# Print pyvo version and TAP service name
print('\npyvo version %s \n' % vo.__version__)
print('TAP service %s \n' % name)

# Initialize the result dictionary
gaiaedr3_to_plates = {}

# Function to process chunks with retries
def process_chunks_with_retries(gaiadr3_ids, chunk_size, max_retries=3):
    for chunk in chunk_list(gaiadr3_ids, chunk_size):
        retries = 0
        while retries < max_retries:
            try:
                results = process_chunk(chunk)
                # Process results into a dictionary
                for row in results:
                    gaiaedr3_id = str(row['gaiaedr3_id'])  # Convert to string
                    plate_info = {
                        'plate_id': row['plate_id'],
                        'source_id': row['source_id'],
                        'scan_id': row['scan_id'],
                        'solution_num': row['solution_num']
                    }
                    if gaiaedr3_id not in gaiaedr3_to_plates:
                        gaiaedr3_to_plates[gaiaedr3_id] = []
                    gaiaedr3_to_plates[gaiaedr3_id].append(plate_info)
                break  # Exit the retry loop if processing is successful
            except Exception as e:
                retries += 1
                print(f"Error processing chunk {chunk} (retry {retries}/{max_retries}): {e}")
                if retries == max_retries:
                    print(f"Failed to process chunk after {max_retries} retries. Skipping to next chunk.")

# Process all chunks with retries
process_chunks_with_retries(gaiadr3_ids, chunk_size=150)

# Print the resulting dictionary
# print(gaiaedr3_to_plates)


pyvo version 1.5.2 

TAP service APPLAUSE 



In [12]:
import numpy as np

# Define a custom encoder class
class NumpyEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        elif isinstance(obj, np.floating):
            return float(obj)
        elif isinstance(obj, np.ndarray):
            return obj.tolist()
        return super(NumpyEncoder, self).default(obj)

# Assuming 'gaiaedr3_to_plates_selected' is your dictionary
with open('pAGB_gaiaedr3_to_plates_fL.txt', 'w') as file:
    json.dump(gaiaedr3_to_plates, file, indent=4, ensure_ascii=False, cls=NumpyEncoder)

print('Dictionary saved to pAGB_gaiaedr3_to_plates_fL.txt')

Dictionary saved to pAGB_gaiaedr3_to_plates_fL.txt


In [14]:
import requests
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
import json
import pyvo as vo

# Define the parameters
url = 'https://www.plate-archive.org/tap'
token = 'Token d62cc50a37a9d01149f6de294ee9ab0193207569'

# Function to chunk the list into groups of specified size
def chunk_list(data_list, chunk_size):
    for i in range(0, len(data_list), chunk_size):
        yield data_list[i:i + chunk_size]

# Read the gaiaedr3_to_plates.txt file to obtain source_ids
with open('pAGB_gaiaedr3_to_plates_fL.txt', 'r') as file
    gaiaedr3_to_plates = json.load(file)

# Extract source_ids from gaiaedr3_to_plates dictionary
source_ids = [entry['source_id'] for entries in gaiaedr3_to_plates.values() for entry in entries]

# Initialize dictionary to store calibration information for each source_id
source_calib_info = {}

# Setup a retry strategy
retry_strategy = Retry(
    total=3,
    backoff_factor=1,
    status_forcelist=[429, 500, 502, 503, 504]
)
adapter = HTTPAdapter(max_retries=retry_strategy)
session = requests.Session()
session.mount("https://", adapter)
session.headers['Authorization'] = token

# Initialize TAP service
tap_service = vo.dal.TAPService(url, session=session)
lang = 'PostgreSQL'

# Function to process each chunk for source calibration information
def process_source_calib_chunk(chunk):
    # Format the source_id list for the SQL query
    ids_str = ', '.join(f"'{id}'" for id in chunk)
    
    # Create the query string
    qstr = f"""
    SELECT bpmag, bpmag_error, rpmag, rpmag_error, natmag, natmag_error, natmag_plate, natmag_correction, natmag_residual, source_id, gaiaedr3_id, airmass, zenith_angle, ra_icrs, dec_icrs, phot_calib_flags, gaiaedr3_gmag, gaiaedr3_bp_rp, gaiaedr3_dist, cat_natmag
    FROM applause_dr4.source_calib 
    WHERE source_id IN ({ids_str}) AND phot_calib_flags = 0
    """
    
    # Submit the query
    job = tap_service.submit_job(qstr, language=lang, QUEUE="1h")
    job.run()
    
    # Wait for job completion
    job.wait(phases=["COMPLETED", "ERROR", "ABORTED"], timeout=600.)
    
    # Raise an error if the job failed
    job.raise_if_error()
    
    # Fetch results
    return job.fetch_result()

# Function to process chunks with retries
def process_chunks_with_retries(source_ids, chunk_size, max_retries=3):
    for chunk in chunk_list(source_ids, chunk_size):
        retries = 0
        while retries < max_retries:
            try:
                results = process_source_calib_chunk(chunk)
                # Process results into a dictionary
                for row in results:
                    source_id = str(row['source_id'])
                    calib_info = {
                        'gaiaedr3_id': row['gaiaedr3_id'],
                        'bpmag': row['bpmag'],
                        'bpmag_error': row['bpmag_error'],
                        'rpmag': row['rpmag'],
                        'rpmag_error': row['rpmag_error'],
                        'natmag': row['natmag'],
                        'natmag_error': row['natmag_error'],
                        'natmag_plate': row['natmag_plate'],
                        'natmag_correction': row['natmag_correction'],
                        'natmag_residual': row['natmag_residual'],
                        'cat_natmag': row['cat_natmag'],
                        'airmass': row['airmass'],
                        'zenith_angle': row['zenith_angle'],
                        'ra_icrs': row['ra_icrs'],
                        'dec_icrs': row['dec_icrs'],
                        'phot_calib_flags': row['phot_calib_flags'],
                        'gaiaedr3_gmag': row['gaiaedr3_gmag'],
                        'gaiaedr3_bp_rp': row['gaiaedr3_bp_rp'],
                        'gaiaedr3_dist': row['gaiaedr3_dist']
                    }
                    source_calib_info[source_id] = calib_info
                break  # Exit the retry loop if processing is successful
            except Exception as e:
                retries += 1
                print(f"Error processing chunk {chunk} (retry {retries}/{max_retries}): {e}")
                if retries == max_retries:
                    print(f"Failed to process chunk after {max_retries} retries. Skipping to next chunk.")

# Process all chunks with retries
process_chunks_with_retries(source_ids, chunk_size=200)

# Print the resulting dictionary
# print(source_calib_info)

In [16]:
import numpy as np

# Define a custom encoder class
class NumpyEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        elif isinstance(obj, np.floating):
            return float(obj)
        elif isinstance(obj, np.ndarray):
            return obj.tolist()
        return super(NumpyEncoder, self).default(obj)

# Save the source calibration information to a .txt file in JSON format
with open('pAGB_source_calib_info_fL.txt', 'w') as file:
    json.dump(source_calib_info, file, indent=4, ensure_ascii=False, cls=NumpyEncoder)

print('Source calibration information saved to pAGB_source_calib_info_fL.txt')

Source calibration information saved to pAGB_source_calib_info_fL.txt


In [19]:
import requests
import pyvo as vo
import json

# Define the parameters
name = 'APPLAUSE'
url = 'https://www.plate-archive.org/tap'
token = 'Token d62cc50a37a9d01149f6de294ee9ab0193207569'

# Function to chunk the list into groups of specified size
def chunk_list(data_list, chunk_size):
    for i in range(0, len(data_list), chunk_size):
        yield data_list[i:i + chunk_size]

# Function to process the plate details query
def process_plate_details_query(plate_ids):
    # Format the plate_id list for the SQL query
    ids_str = ', '.join(f"'{id}'" for id in plate_ids)
    
    # Create the query string
    qstr = f"""
    SELECT plate_id, plate_num, plate_quality, date_orig, observatory, air_temperature
    FROM applause_dr4.plate
    WHERE plate_id IN ({ids_str})
    """
    
    # Setup the TAP service session
    tap_session = requests.Session()
    tap_session.headers['Authorization'] = token
    tap_service = vo.dal.TAPService(url, session=tap_session)
    
    # Submit the query
    lang = 'PostgreSQL'
    job = tap_service.submit_job(qstr, language=lang, QUEUE="1h")
    job.run()
    
    # Wait for job completion
    job.wait(phases=["COMPLETED", "ERROR", "ABORTED"], timeout=600.)
    
    # Raise an error if the job failed
    job.raise_if_error()
    
    # Fetch results
    return job.fetch_result()

# Read the pAGB_gaiaedr3_to_plates_fL.txt file
with open('pAGB_gaiaedr3_to_plates_fL.txt', 'r') as file:
    gaiaedr3_to_plates = json.load(file)

# Extract all unique plate_ids
plate_ids = set()
for plate_list in gaiaedr3_to_plates.values():
    for plate_info in plate_list:
        plate_ids.add(plate_info['plate_id'])

# Notify the total number of unique plates
total_plates = len(plate_ids)
print(f'Total number of unique plates: {total_plates}')

# Initialize list to hold all plate details
all_plate_details = []

# Process each chunk of plate_ids
for chunk in chunk_list(list(plate_ids), 150):
    results = process_plate_details_query(chunk)
    
    # Convert results to a list of dictionaries and add to the main list
    for row in results:
        all_plate_details.append({
            'plate_id': row['plate_id'],
            'plate_num': row['plate_num'],
            'plate_quality': row['plate_quality'],
            'date_orig': row['date_orig'],
            'observatory': row['observatory'],
            'air_temperature': row['air_temperature']
        })

Total number of unique plates: 18902


In [22]:
# Save the unique plate details to a new .txt file in JSON format
with open('pAGB_unique_plate_details_fL.txt', 'w') as file:
    json.dump(all_plate_details, file, indent=4, ensure_ascii=False, cls=NumpyEncoder)

print('Unique plate details saved to pAGB_unique_plate_details.txt')

Unique plate details saved to pAGB_unique_plate_details.txt


In [None]:
import requests
import pyvo

service_url = 'https://www.plate-archive.org/tap'

tap_session = requests.Session()
tap_session.headers['Authorization'] = 'Token d62cc50a37a9d01149f6de294ee9ab0193207569'

tap_service = pyvo.dal.TAPService(service_url, session=tap_session)
completed_jobs = tap_service.get_job_list(phases='COMPLETED')
for job in completed_jobs:
     job = pyvo.dal.AsyncTAPJob(service_url + '/async/' + job.jobid,
session=tap_session)
     job.delete()

In [69]:
import json

def add_plate_ids(source_file, gaia_plate_file, output_file):
    with open(source_file, 'r') as f:
        source_data = json.load(f)
    with open(gaia_plate_file, 'r') as f:
        gaia_plate_data = json.load(f)

    output_data = {}

    for source_id_str, source_info in source_data.items():
        gaia_id = source_info.get('gaiaedr3_id')

        if gaia_id is not None and gaia_id != 0 and not (isinstance(gaia_id, float) and np.isnan(gaia_id)):
            gaia_id_str = str(gaia_id)
            if gaia_id_str in gaia_plate_data:
                for plate_info in gaia_plate_data[gaia_id_str]:
                    plate_source_id = plate_info.get('source_id')
                    if int(source_id_str) == plate_source_id:
                        source_info["plate_id"] = plate_info.get("plate_id")
                        output_data[source_id_str] = source_info
                        break # Important: exit inner loop once match is found
        else:
            output_data[source_id_str] = source_info
            output_data[source_id_str]["plate_id"] = None

    with open(output_file, 'w') as outfile:
        json.dump(output_data, outfile, indent=4)  # indent for pretty printing

source_file = 'pAGB_source_calib_info_fL.txt'
gaia_plate_file = 'pAGB_gaiaedr3_to_plates_fL.txt'
output_file = 'pAGB_source_calib_date.txt'

add_plate_ids(source_file, gaia_plate_file, output_file)

print(f"Output written to {output_file}")

Output written to pAGB_source_calib_date.txt


In [71]:
import json
import numpy as np

def add_plate_dates(source_with_plates_file, plate_metadata_file, output_file):
    with open(source_with_plates_file, 'r') as f:
        source_data = json.load(f)
    with open(plate_metadata_file, 'r') as f:
        plate_metadata = json.load(f)

    plate_dates = {plate['plate_id']: plate['date_orig'] for plate in plate_metadata}
    output_data = {}

    for source_id_str, source_info in source_data.items():
        plate_id = source_info.get('plate_id')

        if plate_id is not None:
            if plate_id in plate_dates:
                source_info['date_orig'] = plate_dates[plate_id]
            else:
                source_info['date_orig'] = None  # Handle cases where plate ID isn't found
        else:
            source_info['date_orig'] = None
        output_data[source_id_str] = source_info

    with open(output_file, 'w') as outfile:
        json.dump(output_data, outfile, indent=4)

source_with_plates_file = 'pAGB_source_calib_date.txt'  # Output from the previous step
plate_metadata_file = 'pAGB_unique_plate_details_fL.txt'
output_file = 'pAGB_source_calib_date_plate.txt'

add_plate_dates(source_with_plates_file, plate_metadata_file, output_file)

print(f"Output written to {output_file}")

Output written to pAGB_source_calib_date_plate.txt


In [110]:
import json
import matplotlib.pyplot as plt
from datetime import datetime
import numpy as np
import matplotlib.dates as mdates
import os

def plot_lightcurves(data_file, output_dir):  # Added output_dir parameter
    with open(data_file, 'r') as f:
        data = json.load(f)

    gaia_data = {}

    for source_info in data.values():
        gaia_id = source_info.get('gaiaedr3_id')
        bpmag = source_info.get('bpmag')
        bpmag_error = source_info.get('bpmag_error')
        date_str = source_info.get('date_orig')

        if gaia_id is None or gaia_id == 0 or bpmag is None or date_str is None or bpmag_error is None or np.isnan(gaia_id) or np.isnan(bpmag) or np.isnan(bpmag_error) or bpmag < 0 or bpmag > 20 or bpmag_error > 0.5:
            continue

        try:
            date_obj = datetime.strptime(date_str, '%Y-%m-%d').date()
        except ValueError as e:
            print(f"Invalid date format: {date_str}. Error: {e}")
            continue

        if gaia_id not in gaia_data:
            gaia_data[gaia_id] = []
        gaia_data[gaia_id].append((date_obj, bpmag, bpmag_error))

    for gaia_id, observations in gaia_data.items():
        if len(observations) < 2:
            print(f"Not enough data points for Gaia ID: {gaia_id} to plot a lightcurve")
            continue

        dates, magnitudes, errors = zip(*observations)
        dates = list(dates)
        magnitudes = list(magnitudes)
        errors = list(errors)

        # Sort by date
        sorted_indices = sorted(range(len(dates)), key=lambda k: dates[k])
        sorted_dates = [dates[i] for i in sorted_indices]
        sorted_magnitudes = [magnitudes[i] for i in sorted_indices]
        sorted_errors = [errors[i] for i in sorted_indices]

        plt.figure(figsize=(10, 10))
        plt.errorbar(
            sorted_dates,
            sorted_magnitudes,
            yerr=sorted_errors,
            fmt='o',
            capsize=4,
            markersize=9,
            color='red',
            ecolor='red',
            elinewidth=2,
        )
        plt.xlabel('Date', fontsize=12, color='black')
        plt.ylabel('Bpmag', fontsize=12, color='black')
        plt.title(f'Light Curve for Gaia ID: {gaia_id}', fontsize=14)
        plt.gca().invert_yaxis()

        # Improved date formatting on x-axis
        plt.gca().xaxis.set_major_locator(mdates.AutoDateLocator())
        plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
        plt.gcf().autofmt_xdate()

        # Finer Ticks (replacing grid)
        plt.minorticks_on()
        plt.tick_params(axis='both', which='major', labelsize=10, length=6, width=1, colors='black') # added color here
        plt.tick_params(axis='both', which='minor', length=3, width=1, colors='black') # added color here

        plt.tight_layout()

        # Create the output directory if it doesn't exist
        os.makedirs(output_dir, exist_ok=True)

        output_path = os.path.join(output_dir, f"lightcurve_{gaia_id}.png") # joined the path
        plt.savefig(output_path, dpi=400)
        print(f"Saved: {output_path}") # prints the full path
        plt.close() # close the plot after saving

data_file = 'pAGB_source_calib_date_plate.txt'
output_dir = r'C:\Users\arkap\Downloads\TLS\Downloads\A2'  # Raw string for Windows paths
plot_lightcurves(data_file, output_dir)

Saved: C:\Users\arkap\Downloads\TLS\Downloads\A2\lightcurve_433515788197481984.png
Saved: C:\Users\arkap\Downloads\TLS\Downloads\A2\lightcurve_471908436438311680.png
Saved: C:\Users\arkap\Downloads\TLS\Downloads\A2\lightcurve_3108327343185135872.png
Saved: C:\Users\arkap\Downloads\TLS\Downloads\A2\lightcurve_3151417586128916864.png
Saved: C:\Users\arkap\Downloads\TLS\Downloads\A2\lightcurve_3238918336374596864.png
Saved: C:\Users\arkap\Downloads\TLS\Downloads\A2\lightcurve_459182413984008448.png
Saved: C:\Users\arkap\Downloads\TLS\Downloads\A2\lightcurve_3589047952995134720.png
Saved: C:\Users\arkap\Downloads\TLS\Downloads\A2\lightcurve_1328057763997734144.png
Saved: C:\Users\arkap\Downloads\TLS\Downloads\A2\lightcurve_3336558507975208448.png
Saved: C:\Users\arkap\Downloads\TLS\Downloads\A2\lightcurve_3334854780347915520.png
Saved: C:\Users\arkap\Downloads\TLS\Downloads\A2\lightcurve_3388902129107252992.png
Saved: C:\Users\arkap\Downloads\TLS\Downloads\A2\lightcurve_4334241408966611328

'pwd' is not recognized as an internal or external command,
operable program or batch file.
