In [2]:
from astropy.table import Table
from astroquery.gaia import Gaia
import pandas as pd
import matplotlib.pyplot as plt
from astropy.io.votable import parse_single_table

In [3]:
def chunks(lst, n):
    ""
    "Split an input list into multiple chunks of size =< n"
    ""
    for i in range(0, len(lst), n):
        yield lst[i:i + n]

def extract_dl_ind(datalink_dict, key, figsize = [15,5], fontsize = 12, linewidth = 2, show_legend = True, show_grid = True):
    ""
    "Extract individual DataLink products and export them to an Astropy Table"
    ""    
    for i in datalink_dict[key]:   
        dl_out  = i.to_table()
        if 'time' in dl_out.keys():
            plot_e_phot(dl_out, colours  = ['green', 'red', 'blue'], title = 'Epoch photometry', fontsize = fontsize, show_legend = show_legend, show_grid = show_grid, figsize = figsize)
        if 'wavelength' in dl_out.keys():
            if len(dl_out) == 343:  title = key
            if len(dl_out) == 2401: title = key
            plot_sampled_spec(dl_out, color = 'blue', title = title, fontsize = fontsize, show_legend = False, show_grid = show_grid, linewidth = linewidth, legend = '', figsize = figsize)
    return dl_out    


def plot_e_phot(inp_table, colours  = ['green', 'red', 'blue'], title = 'Epoch photometry', fontsize = 12, show_legend = True, show_grid = True, figsize = [15,5]):
    ""
    "Epoch photometry plotter. 'inp_table' MUST be an Astropy-table object."
    ""       
    fig      = plt.figure(figsize=figsize)
    xlabel   = f'JD date [{inp_table["time"].unit}]'
    ylabel   = f'magnitude [{inp_table["mag"].unit}]'
    gbands   = ['G', 'RP', 'BP']  
    colours  = iter(colours)

    plt.gca().invert_yaxis()
    for band in gbands:
        phot_set = inp_table[inp_table['band'] == band]
        plt.plot(phot_set['time'], phot_set['mag'], 'o', label = band, color = next(colours))
    make_canvas(title = title, xlabel = xlabel, ylabel = ylabel, fontsize= fontsize, show_legend=show_legend, show_grid = show_grid)
    plt.show()

    
def plot_sampled_spec(inp_table, color = 'blue', title = '', fontsize = 14, show_legend = True, show_grid = True, linewidth = 2, legend = '', figsize = [12,4], show_plot = True):
    ""
    "RVS & XP sampled spectrum plotter. 'inp_table' MUST be an Astropy-table object."
    ""       
    if show_plot:
        fig      = plt.figure(figsize=figsize)
    xlabel   = f'Wavelength [{inp_table["wavelength"].unit}]'
    ylabel   = f'Flux [{inp_table["flux"].unit}]'
    plt.plot(inp_table['wavelength'], inp_table['flux'], '-', linewidth = linewidth, label = legend)
    make_canvas(title = title, xlabel = xlabel, ylabel = ylabel, fontsize= fontsize, show_legend=show_legend, show_grid = show_grid)
    if show_plot:
        plt.show()


def make_canvas(title = '', xlabel = '', ylabel = '', show_grid = False, show_legend = False, fontsize = 12):
    ""
    "Create generic canvas for plots"
    ""
    plt.title(title,    fontsize = fontsize)
    plt.xlabel(xlabel,  fontsize = fontsize)
    plt.ylabel(ylabel , fontsize = fontsize)
    plt.xticks(fontsize = fontsize)
    plt.yticks(fontsize = fontsize)
    if show_grid:
        plt.grid()
    if show_legend:
        plt.legend(fontsize = fontsize*0.75)

In [None]:
Gaia.login()

In [None]:
query = """SELECT gaia3.source_id,gaia3.ra,gaia3.dec,gaia3.parallax,gaia3.pmra,gaia3.pmdec,gaia3.astrometric_params_solved,gaia3.ruwe,gaia3.bp_rp,gaia3.radial_velocity,gaia3.in_qso_candidates,gaia3.in_galaxy_candidates,gaia3.non_single_star,gaia3.has_xp_continuous,gaia3.has_xp_sampled,gaia3.has_rvs,gaia3.has_epoch_photometry,gaia3.has_epoch_rv,gaia3.has_mcmc_gspphot,gaia3.has_mcmc_msc,gaia3.teff_gspphot , DISTANCE(
            POINT(gaia2.ra, gaia2.dec),
            POINT(gaia3.ra, gaia3.dec)) AS ang_sep
            FROM user_abhatnag.dr2_sources AS gaia2
            JOIN gaiadr3.gaia_source_lite AS gaia3 
            ON 1 = CONTAINS(
            POINT(gaia2.ra, gaia2.dec),
            CIRCLE(gaia3.ra, gaia3.dec, 0.00028))"""

job = Gaia.launch_job_async(query)
data = job.get_results()

In [4]:
df_xp = pd.read_csv('../data/Gaia DR3/external/10.10510004-6361200912659/sources_xmatch.csv')
df_xp

Unnamed: 0,angDist,_RAJ2000,_DEJ2000,Cl,Sp,ALS,logFbol,Dist,2XMM,CRpn,...,B_AG,E(BP-RP),b_E(BP-RP),B_E(BP-RP),Lib,RAJ2000,DEJ2000,e_RAJ2000,e_DEJ2000,RADEcorJ2000
0,1.401283,1.513333,63.679444,1,S,6036,-5.74,0.7,J000603.2+634046,0.3020,...,1.2775,0.6823,0.6793,0.6963,OB,1.514121,63.679659,0.416877,0.486704,-0.4612
1,0.725315,35.468750,56.905000,1,S,7124,-6.10,1.2,J022152.5+565418,,...,1.1281,0.6060,0.5179,0.6140,A,35.468384,56.905009,0.240032,0.324842,0.0663
2,1.776828,40.186250,61.282222,1,S,7374,-5.68,1.8,J024044.7+611656,,...,,,,,,40.187289,61.282221,3.570188,3.479530,-0.0027
3,4.644194,40.186250,61.282222,1,S,7374,-5.68,1.8,J024044.7+611656,,...,,,,,,40.188868,61.281915,2.671653,3.013517,0.0098
4,4.769103,40.186250,61.282222,1,S,7374,-5.68,1.8,J024044.7+611656,,...,,,,,,40.183711,61.281705,0.730981,1.214668,0.7862
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
557,3.435575,253.581667,-41.819722,6,,3810,,0.2,J165419.6-414911,0.0265,...,,,,,,253.580819,-41.818999,1.128849,0.804012,0.6843
558,3.903248,253.581667,-41.819722,6,,3810,,0.2,J165419.6-414911,0.0265,...,,,,,,253.582770,-41.820429,50.403885,41.432550,0.9989
559,4.168521,253.581667,-41.819722,6,,3810,,0.2,J165419.6-414911,0.0265,...,,,,,,253.580214,-41.819312,48.033623,25.178572,-0.2583
560,0.253371,304.885000,38.731667,6,,11163,-5.44,0.3,J201932.4+384354,0.2360,...,,,,,,304.885095,38.731655,0.252124,0.297439,-0.0793


In [23]:
df_xp['source_id'] = 'Gaia DR3 ' + df_xp['gaia_dr3'].astype(str)
df_xp['source_id']

0     Gaia DR3 2059863536357618816
1     Gaia DR3 2059073159271061632
2     Gaia DR3 2059219875373718016
3     Gaia DR3 1908368593700046720
4     Gaia DR3 1908095850396090880
5     Gaia DR3 1931240295282530688
6     Gaia DR3 1934565905639464448
7     Gaia DR3 1934938537001944960
8     Gaia DR3 2168287095193315328
9     Gaia DR3 1970219769209144320
10    Gaia DR3 2208114399949151616
11    Gaia DR3 2207626938340489728
12    Gaia DR3 2207645423880737152
13    Gaia DR3 2020566853331230720
14    Gaia DR3 2020088874998745600
Name: source_id, dtype: object

In [14]:
table_xp = Table.from_pandas(df_xp)

# Download XP spectrums

In [5]:
dl_threshold = 5000               # DataLink server threshold. It is not possible to download products for more than 5000 sources in one single call.
ids          = df_xp['DR3Name']
ids_chunks   = list(chunks(ids, dl_threshold))

print(f'* Input list contains {len(ids)} source_IDs')
print(f'* This list is split into {len(ids_chunks)} chunks of <= {dl_threshold} elements each')

* Input list contains 562 source_IDs
* This list is split into 1 chunks of <= 5000 elements each


In [6]:
retrieval_type = 'XP_SAMPLED'        # Options are: 'EPOCH_PHOTOMETRY', 'MCMC_GSPPHOT', 'MCMC_MSC', 'XP_SAMPLED', 'XP_CONTINUOUS', 'RVS' 
data_structure = 'INDIVIDUAL'   # Options are: 'INDIVIDUAL', 'COMBINED', 'RAW' - but as explained above, we strongly recommend to use COMBINED for massive downloads.
data_release   = 'Gaia DR3'   # Options are: 'Gaia DR3' (default), 'Gaia DR2'
dl_key         = f'{retrieval_type}_{data_structure}.xml'


ii = 0
for chunk in ids_chunks:
    ii = ii + 1
    print(f'Downloading Chunk #{ii}; N_files = {len(chunk)}')
    datalink  = Gaia.load_data(ids=chunk, data_release = data_release, retrieval_type=retrieval_type, format = 'votable', data_structure = data_structure)
    print(len(datalink))
    
    for dl_key in datalink.keys():
        if 'XP_SAMPLED' in dl_key: 
            product = datalink[dl_key][0]
            
            file_name = f"{dl_key.replace('.xml', '').replace(' ','_').replace('-','_')}.vot"

            print(f'Writing table as: {file_name}')
            product.to_table().write('../data/Gaia DR3/external/10.10510004-6361200912659/'+file_name, format = 'votable', overwrite = True)
            
            #dl_out = extract_dl_ind(datalink,dl_key, figsize=[20,7])
            #print(f'Showing {retrieval_type} for source_id = {dl_key.replace(".xml", "").replace("XP_SAMPLED-Gaia DR3 ","")}')
            #display(product.to_table())

Downloading Chunk #1; N_files = 562
278
Writing table as: XP_SAMPLED_Gaia_DR3_168726523211215360.vot
Writing table as: XP_SAMPLED_Gaia_DR3_182071570715713024.vot
Writing table as: XP_SAMPLED_Gaia_DR3_2007416556811088768.vot
Writing table as: XP_SAMPLED_Gaia_DR3_2007418961992863744.vot
Writing table as: XP_SAMPLED_Gaia_DR3_2007423050801646720.vot
Writing table as: XP_SAMPLED_Gaia_DR3_2007440058871856768.vot
Writing table as: XP_SAMPLED_Gaia_DR3_2057561124621727616.vot
Writing table as: XP_SAMPLED_Gaia_DR3_2057563358004716928.vot
Writing table as: XP_SAMPLED_Gaia_DR3_2058844907537751680.vot
Writing table as: XP_SAMPLED_Gaia_DR3_2059130368252069888.vot
Writing table as: XP_SAMPLED_Gaia_DR3_2061286540593527808.vot
Writing table as: XP_SAMPLED_Gaia_DR3_2061288572130879616.vot
Writing table as: XP_SAMPLED_Gaia_DR3_2061291595788070656.vot
Writing table as: XP_SAMPLED_Gaia_DR3_2062049262381982976.vot
Writing table as: XP_SAMPLED_Gaia_DR3_2064671253991921408.vot
Writing table as: XP_SAMPLED_Gai

# Download RVS spectrums

In [22]:
dl_threshold = 5000               # DataLink server threshold. It is not possible to download products for more than 5000 sources in one single call.
ids          = table_rvs['source_id']
ids_chunks   = list(chunks(ids, dl_threshold))
datalink_all = []


print(f'* Input list contains {len(ids)} source_IDs')
print(f'* This list is split into {len(ids_chunks)} chunks of <= {dl_threshold} elements each')

* Input list contains 924 source_IDs
* This list is split into 1 chunks of <= 5000 elements each


In [23]:
retrieval_type = 'RVS'        # Options are: 'EPOCH_PHOTOMETRY', 'MCMC_GSPPHOT', 'MCMC_MSC', 'XP_SAMPLED', 'XP_CONTINUOUS', 'RVS' 
data_structure = 'INDIVIDUAL'   # Options are: 'INDIVIDUAL', 'COMBINED', 'RAW' - but as explained above, we strongly recommend to use COMBINED for massive downloads.
data_release   = 'Gaia DR3'   # Options are: 'Gaia DR3' (default), 'Gaia DR2'
dl_key         = f'{retrieval_type}_{data_structure}.xml'


ii = 0
for chunk in ids_chunks:
    ii = ii + 1
    print(f'Downloading Chunk #{ii}; N_files = {len(chunk)}')
    datalink  = Gaia.load_data(ids=chunk, data_release = data_release, retrieval_type=retrieval_type, format = 'votable', data_structure = data_structure)
    datalink_all.append(datalink)
    for dl_key in datalink.keys():
        if 'RVS' in dl_key:
            product = datalink[dl_key][0]
            file_name = f"{dl_key.replace('.xml', '').replace(' ','_').replace('-','_')}.vot"

            print(f'Writing table as: {file_name}')
            product.to_table().write('../datasets/Gaia DR3/RVS/'+file_name, format = 'votable', overwrite = True)
            
            #dl_out = extract_dl_ind(datalink,dl_key, figsize=[20,7])
            #print(f'Showing {retrieval_type} for source_id = {dl_key.replace(".xml", "").replace("RVS-Gaia DR3 ","")}')
            #display(product.to_table())

Downloading Chunk #1; N_files = 924
Writing table as: RVS_Gaia_DR3_1101876819022885504.vot
Writing table as: RVS_Gaia_DR3_1202908537318924672.vot
Writing table as: RVS_Gaia_DR3_1264373951729187840.vot
Writing table as: RVS_Gaia_DR3_128904067476795392.vot
Writing table as: RVS_Gaia_DR3_1398436236272992256.vot
Writing table as: RVS_Gaia_DR3_176895585371267456.vot
Writing table as: RVS_Gaia_DR3_1803832633067414144.vot
Writing table as: RVS_Gaia_DR3_1804853082938044416.vot
Writing table as: RVS_Gaia_DR3_1808075236135604736.vot
Writing table as: RVS_Gaia_DR3_1815850192083365120.vot
Writing table as: RVS_Gaia_DR3_1820154058573970944.vot
Writing table as: RVS_Gaia_DR3_1824230738491806208.vot
Writing table as: RVS_Gaia_DR3_182635070426723840.vot
Writing table as: RVS_Gaia_DR3_1828217086584470400.vot
Writing table as: RVS_Gaia_DR3_1828386961117254272.vot
Writing table as: RVS_Gaia_DR3_1834497634423975808.vot
Writing table as: RVS_Gaia_DR3_1859834333694811008.vot
Writing table as: RVS_Gaia_DR3_1