In [7]:
from astropy.table import Table
from astroquery.gaia import Gaia
import pandas as pd
import matplotlib.pyplot as plt
from astropy.io.votable import parse_single_table

In [8]:
def chunks(lst, n):
    ""
    "Split an input list into multiple chunks of size =< n"
    ""
    for i in range(0, len(lst), n):
        yield lst[i:i + n]

def extract_dl_ind(datalink_dict, key, figsize = [15,5], fontsize = 12, linewidth = 2, show_legend = True, show_grid = True):
    ""
    "Extract individual DataLink products and export them to an Astropy Table"
    ""    
    for i in datalink_dict[key]:   
        dl_out  = i.to_table()
        if 'time' in dl_out.keys():
            plot_e_phot(dl_out, colours  = ['green', 'red', 'blue'], title = 'Epoch photometry', fontsize = fontsize, show_legend = show_legend, show_grid = show_grid, figsize = figsize)
        if 'wavelength' in dl_out.keys():
            if len(dl_out) == 343:  title = key
            if len(dl_out) == 2401: title = key
            plot_sampled_spec(dl_out, color = 'blue', title = title, fontsize = fontsize, show_legend = False, show_grid = show_grid, linewidth = linewidth, legend = '', figsize = figsize)
    return dl_out    


def plot_e_phot(inp_table, colours  = ['green', 'red', 'blue'], title = 'Epoch photometry', fontsize = 12, show_legend = True, show_grid = True, figsize = [15,5]):
    ""
    "Epoch photometry plotter. 'inp_table' MUST be an Astropy-table object."
    ""       
    fig      = plt.figure(figsize=figsize)
    xlabel   = f'JD date [{inp_table["time"].unit}]'
    ylabel   = f'magnitude [{inp_table["mag"].unit}]'
    gbands   = ['G', 'RP', 'BP']  
    colours  = iter(colours)

    plt.gca().invert_yaxis()
    for band in gbands:
        phot_set = inp_table[inp_table['band'] == band]
        plt.plot(phot_set['time'], phot_set['mag'], 'o', label = band, color = next(colours))
    make_canvas(title = title, xlabel = xlabel, ylabel = ylabel, fontsize= fontsize, show_legend=show_legend, show_grid = show_grid)
    plt.show()

    
def plot_sampled_spec(inp_table, color = 'blue', title = '', fontsize = 14, show_legend = True, show_grid = True, linewidth = 2, legend = '', figsize = [12,4], show_plot = True):
    ""
    "RVS & XP sampled spectrum plotter. 'inp_table' MUST be an Astropy-table object."
    ""       
    if show_plot:
        fig      = plt.figure(figsize=figsize)
    xlabel   = f'Wavelength [{inp_table["wavelength"].unit}]'
    ylabel   = f'Flux [{inp_table["flux"].unit}]'
    plt.plot(inp_table['wavelength'], inp_table['flux'], '-', linewidth = linewidth, label = legend)
    make_canvas(title = title, xlabel = xlabel, ylabel = ylabel, fontsize= fontsize, show_legend=show_legend, show_grid = show_grid)
    if show_plot:
        plt.show()


def make_canvas(title = '', xlabel = '', ylabel = '', show_grid = False, show_legend = False, fontsize = 12):
    ""
    "Create generic canvas for plots"
    ""
    plt.title(title,    fontsize = fontsize)
    plt.xlabel(xlabel,  fontsize = fontsize)
    plt.ylabel(ylabel , fontsize = fontsize)
    plt.xticks(fontsize = fontsize)
    plt.yticks(fontsize = fontsize)
    if show_grid:
        plt.grid()
    if show_legend:
        plt.legend(fontsize = fontsize*0.75)

In [None]:
Gaia.login()

In [None]:
query = """SELECT gaia3.source_id,gaia3.ra,gaia3.dec,gaia3.parallax,gaia3.pmra,gaia3.pmdec,gaia3.astrometric_params_solved,gaia3.ruwe,gaia3.bp_rp,gaia3.radial_velocity,gaia3.in_qso_candidates,gaia3.in_galaxy_candidates,gaia3.non_single_star,gaia3.has_xp_continuous,gaia3.has_xp_sampled,gaia3.has_rvs,gaia3.has_epoch_photometry,gaia3.has_epoch_rv,gaia3.has_mcmc_gspphot,gaia3.has_mcmc_msc,gaia3.teff_gspphot , DISTANCE(
            POINT(gaia2.ra, gaia2.dec),
            POINT(gaia3.ra, gaia3.dec)) AS ang_sep
            FROM user_abhatnag.dr2_sources AS gaia2
            JOIN gaiadr3.gaia_source_lite AS gaia3 
            ON 1 = CONTAINS(
            POINT(gaia2.ra, gaia2.dec),
            CIRCLE(gaia3.ra, gaia3.dec, 0.00028))"""

job = Gaia.launch_job_async(query)
data = job.get_results()

In [13]:
df_rvs = pd.read_csv('../datasets/Gaia DR3/df_rvs.csv')
df_xp = pd.read_csv('../datasets/Gaia DR3/df_xp.csv')

In [17]:
table_xp = Table.from_pandas(df_xp)
table_rvs = Table.from_pandas(df_rvs)

# Download XP spectrums

In [19]:
dl_threshold = 5000               # DataLink server threshold. It is not possible to download products for more than 5000 sources in one single call.
ids          = table_xp['source_id']
ids_chunks   = list(chunks(ids, dl_threshold))

print(f'* Input list contains {len(ids)} source_IDs')
print(f'* This list is split into {len(ids_chunks)} chunks of <= {dl_threshold} elements each')

* Input list contains 23636 source_IDs
* This list is split into 5 chunks of <= 5000 elements each


In [21]:
retrieval_type = 'XP_SAMPLED'        # Options are: 'EPOCH_PHOTOMETRY', 'MCMC_GSPPHOT', 'MCMC_MSC', 'XP_SAMPLED', 'XP_CONTINUOUS', 'RVS' 
data_structure = 'INDIVIDUAL'   # Options are: 'INDIVIDUAL', 'COMBINED', 'RAW' - but as explained above, we strongly recommend to use COMBINED for massive downloads.
data_release   = 'Gaia DR3'   # Options are: 'Gaia DR3' (default), 'Gaia DR2'
dl_key         = f'{retrieval_type}_{data_structure}.xml'


ii = 0
for chunk in ids_chunks:
    ii = ii + 1
    print(f'Downloading Chunk #{ii}; N_files = {len(chunk)}')
    datalink  = Gaia.load_data(ids=chunk, data_release = data_release, retrieval_type=retrieval_type, format = 'votable', data_structure = data_structure)
    print(len(datalink))
    
    for dl_key in datalink.keys():
        if 'XP_SAMPLED' in dl_key: 
            product = datalink[dl_key][0]
            
            file_name = f"{dl_key.replace('.xml', '').replace(' ','_').replace('-','_')}.vot"

            print(f'Writing table as: {file_name}')
            product.to_table().write('../datasets/Gaia DR3/XP/'+file_name, format = 'votable', overwrite = True)
            
            #dl_out = extract_dl_ind(datalink,dl_key, figsize=[20,7])
            #print(f'Showing {retrieval_type} for source_id = {dl_key.replace(".xml", "").replace("XP_SAMPLED-Gaia DR3 ","")}')
            #display(product.to_table())

Downloading Chunk #1; N_files = 5000
5000
Writing table as: XP_SAMPLED_Gaia_DR3_1012746180227105536.vot
Writing table as: XP_SAMPLED_Gaia_DR3_1264373951729187840.vot
Writing table as: XP_SAMPLED_Gaia_DR3_155969302110293248.vot
Writing table as: XP_SAMPLED_Gaia_DR3_156980406131102720.vot
Writing table as: XP_SAMPLED_Gaia_DR3_159026803427715072.vot
Writing table as: XP_SAMPLED_Gaia_DR3_161912501759303296.vot
Writing table as: XP_SAMPLED_Gaia_DR3_172726782733488768.vot
Writing table as: XP_SAMPLED_Gaia_DR3_172729776327036288.vot
Writing table as: XP_SAMPLED_Gaia_DR3_173679204618093184.vot
Writing table as: XP_SAMPLED_Gaia_DR3_1751329750248988544.vot
Writing table as: XP_SAMPLED_Gaia_DR3_176895585371267456.vot
Writing table as: XP_SAMPLED_Gaia_DR3_1780599505693111296.vot
Writing table as: XP_SAMPLED_Gaia_DR3_179379553932894464.vot
Writing table as: XP_SAMPLED_Gaia_DR3_179854508596407552.vot
Writing table as: XP_SAMPLED_Gaia_DR3_179857910210462208.vot
Writing table as: XP_SAMPLED_Gaia_DR3_1

# Download RVS spectrums

In [22]:
dl_threshold = 5000               # DataLink server threshold. It is not possible to download products for more than 5000 sources in one single call.
ids          = table_rvs['source_id']
ids_chunks   = list(chunks(ids, dl_threshold))
datalink_all = []


print(f'* Input list contains {len(ids)} source_IDs')
print(f'* This list is split into {len(ids_chunks)} chunks of <= {dl_threshold} elements each')

* Input list contains 924 source_IDs
* This list is split into 1 chunks of <= 5000 elements each


In [23]:
retrieval_type = 'RVS'        # Options are: 'EPOCH_PHOTOMETRY', 'MCMC_GSPPHOT', 'MCMC_MSC', 'XP_SAMPLED', 'XP_CONTINUOUS', 'RVS' 
data_structure = 'INDIVIDUAL'   # Options are: 'INDIVIDUAL', 'COMBINED', 'RAW' - but as explained above, we strongly recommend to use COMBINED for massive downloads.
data_release   = 'Gaia DR3'   # Options are: 'Gaia DR3' (default), 'Gaia DR2'
dl_key         = f'{retrieval_type}_{data_structure}.xml'


ii = 0
for chunk in ids_chunks:
    ii = ii + 1
    print(f'Downloading Chunk #{ii}; N_files = {len(chunk)}')
    datalink  = Gaia.load_data(ids=chunk, data_release = data_release, retrieval_type=retrieval_type, format = 'votable', data_structure = data_structure)
    datalink_all.append(datalink)
    for dl_key in datalink.keys():
        if 'RVS' in dl_key:
            product = datalink[dl_key][0]
            file_name = f"{dl_key.replace('.xml', '').replace(' ','_').replace('-','_')}.vot"

            print(f'Writing table as: {file_name}')
            product.to_table().write('../datasets/Gaia DR3/RVS/'+file_name, format = 'votable', overwrite = True)
            
            #dl_out = extract_dl_ind(datalink,dl_key, figsize=[20,7])
            #print(f'Showing {retrieval_type} for source_id = {dl_key.replace(".xml", "").replace("RVS-Gaia DR3 ","")}')
            #display(product.to_table())

Downloading Chunk #1; N_files = 924
Writing table as: RVS_Gaia_DR3_1101876819022885504.vot
Writing table as: RVS_Gaia_DR3_1202908537318924672.vot
Writing table as: RVS_Gaia_DR3_1264373951729187840.vot
Writing table as: RVS_Gaia_DR3_128904067476795392.vot
Writing table as: RVS_Gaia_DR3_1398436236272992256.vot
Writing table as: RVS_Gaia_DR3_176895585371267456.vot
Writing table as: RVS_Gaia_DR3_1803832633067414144.vot
Writing table as: RVS_Gaia_DR3_1804853082938044416.vot
Writing table as: RVS_Gaia_DR3_1808075236135604736.vot
Writing table as: RVS_Gaia_DR3_1815850192083365120.vot
Writing table as: RVS_Gaia_DR3_1820154058573970944.vot
Writing table as: RVS_Gaia_DR3_1824230738491806208.vot
Writing table as: RVS_Gaia_DR3_182635070426723840.vot
Writing table as: RVS_Gaia_DR3_1828217086584470400.vot
Writing table as: RVS_Gaia_DR3_1828386961117254272.vot
Writing table as: RVS_Gaia_DR3_1834497634423975808.vot
Writing table as: RVS_Gaia_DR3_1859834333694811008.vot
Writing table as: RVS_Gaia_DR3_1