# Retrieving Clusters from Gaia

In [2]:
!pip install astroquery

Collecting astroquery
  Downloading astroquery-0.4.6-py3-none-any.whl (4.5 MB)
     ---------------------------------------- 4.5/4.5 MB 3.8 MB/s eta 0:00:00
Collecting pyvo>=1.1
  Downloading pyvo-1.4-py3-none-any.whl (885 kB)
     -------------------------------------- 885.7/885.7 kB 8.0 MB/s eta 0:00:00
Installing collected packages: pyvo, astroquery
Successfully installed astroquery-0.4.6 pyvo-1.4


In [3]:
from astroquery.gaia import Gaia

# Import data science packages
import pandas as pd

# Import NumPy to do mathy stuff
import numpy as np

print('Modules imported!')

Modules imported!


In [4]:
# Suppress warnings. Comment this out (put #'s at the start of the lines) if you wish to see the warning messages
import warnings
warnings.filterwarnings('ignore')
print('Warnings suppressed!')



In [5]:
from datetime import datetime

def timer_start():
    global start_time
    start_time = datetime.now()

def timer_stop():
    time_elapsed = datetime.now() - start_time

    da, remainder  = divmod(time_elapsed.total_seconds(), 24*3600)
    hrs, remainder = divmod(remainder, 3600)
    mins, secs = divmod(remainder, 60)

    if da:
        print(f'{int(da)} days {int(hrs)} hours {int(mins)} minutes {int(secs)} seconds elapsed')
    elif hrs:
        print(f'{int(hrs)} hours {int(mins)} minutes {int(secs)} seconds elapsed')
    elif mins:
        print(f'{int(mins)} minutes {int(secs)} seconds elapsed')
    elif secs >= 1.0:
        print(f'{int(secs)} seconds elapsed')
    else:
        print(f'{secs:.2} seconds elapsed')
        
print('Timer functions loaded!')

Timer functions loaded!


# Investigating what's available in Gias

In [6]:
#load and look at the available Gaia tables

timer_start()
tables = Gaia.load_tables(only_names=False)
timer_stop()

INFO: Retrieving tables... [astroquery.utils.tap.core]
INFO: Parsing tables... [astroquery.utils.tap.core]
INFO: Done. [astroquery.utils.tap.core]
1 minutes 9 seconds elapsed


In [7]:
# print the ith table name and description
i=93
print(tables[i].get_qualified_name())
print(tables[i].description)

gaiadr3.gaiadr3.oa_neuron_xp_spectra
This is the table hosting the prototype BP/RP spectrum corresponding to each of the neurons of the Self-Organised-Map produced by the Apsis module OA. Other neuron attributes, such as statistics on various parameters, are available in another table: {\tt OaNeuronInformation}. See Section~\ref{ssec:cu8par_apsis_oa} for further details.


In [8]:
# print all table names and descriptions in gaia database
for n, table in enumerate(tables):
    # print(f'{n} {table.get_qualified_name()}\n', table.description.replace("\n", " "), '\n') # this looks better in Anaconda
    print(f'{n} {table.get_qualified_name()[:50]:50}', table.description.replace("\n", " ")) # this looks better in Google CoLab

0 external.external.apassdr9                         The AAVSO Photometric All-Sky Survey - Data Release 9     This publication makes use of data products from the AAVSO     Photometric All Sky Survey (APASS). Funded by the Robert Martin Ayers     Sciences Fund and the National Science Foundation. Original catalogue released by Henden et al. 2015 AAS Meeting #225, id.336.16. Data retrieved using the VizieR catalogue access tool, CDS, Strasbourg, France. The original description of the VizieR service was published in A&AS 143, 23. VizieR catalogue II/336.
1 external.external.gaiadr2_astrophysical_parameters Fouesneau et al. (2022) Gaia DR2 astrophysical parameters. "Astrophysical Parameters from Gaia DR2, 2MASS & AllWISE", Fouesneau et al., 2022, A&A (https://ui.adsabs.harvard.edu/abs/2022arXiv220103252F/abstract). Data replicated from the gdr2ap.main table at the GAVO Data Centre TAP service https://dc.g-vo.org/tap and TAP metadata as of January 2022.  Original table description: Stell

In [9]:
# Build a sample query. Specifying "TOP 20" limits the results to 20 rows.
myquery = 'SELECT TOP 20 * FROM gaiadr2.gaia_source'

# Run the query and store the results
timer_start()
job = Gaia.launch_job(myquery, dump_to_file=False)
timer_stop()

1 seconds elapsed


In [10]:
print(job)

<Table length=20>
              name                dtype       unit                                          description                                      n_bad
-------------------------------- ------- ------------- ------------------------------------------------------------------------------------- -----
                     solution_id   int64                                                                                 Solution Identifier     0
                     DESIGNATION  object                                         Unique source designation (unique across all Data Releases)     0
                       source_id   int64                                  Unique source identifier (unique within a particular Data Release)     0
                    random_index   int64                                                                 Random index used to select subsets     0
                       ref_epoch float64            yr                                       

In [11]:
# Convert our AstroPy data into a pandas dataframe
sample_df = (job.get_results()).to_pandas()

In [12]:
# Check that we got a pandas dataframe
type(sample_df)

pandas.core.frame.DataFrame

In [13]:
# Take a look at the first 5 rows
sample_df.head()

Unnamed: 0,solution_id,DESIGNATION,source_id,random_index,ref_epoch,ra,ra_error,dec,dec_error,parallax,...,e_bp_min_rp_percentile_lower,e_bp_min_rp_percentile_upper,flame_flags,radius_val,radius_percentile_lower,radius_percentile_upper,lum_val,lum_percentile_lower,lum_percentile_upper,datalink_url
0,1635721458409799680,Gaia DR2 4464195329654279808,4464195329654279808,56394603,2015.5,243.363045,0.111034,13.332208,0.089123,0.164391,...,,,,,,,,,,https://gea.esac.esa.int/data-server/datalink/...
1,1635721458409799680,Gaia DR2 4464157602662621312,4464157602662621312,28197301,2015.5,245.760725,0.357744,15.362548,0.24214,-0.21103,...,,,,,,,,,,https://gea.esac.esa.int/data-server/datalink/...
2,1635721458409799680,Gaia DR2 4464174164056625024,4464174164056625024,14098650,2015.5,246.293908,0.338131,15.472578,0.24126,0.474643,...,,,,,,,,,,https://gea.esac.esa.int/data-server/datalink/...
3,1635721458409799680,Gaia DR2 4464158186777864192,4464158186777864192,1006853251,2015.5,246.116657,0.672287,15.149298,0.366033,0.295954,...,,,,,,,,,,https://gea.esac.esa.int/data-server/datalink/...
4,1635721458409799680,Gaia DR2 4464194092703698048,4464194092703698048,1325455136,2015.5,243.321309,0.380325,13.330618,0.31249,0.956317,...,,,,,,,,,,https://gea.esac.esa.int/data-server/datalink/...


In [14]:
# alternate method for looking at column names
for col in sample_df.columns:
    print(col)

solution_id
DESIGNATION
source_id
random_index
ref_epoch
ra
ra_error
dec
dec_error
parallax
parallax_error
parallax_over_error
pmra
pmra_error
pmdec
pmdec_error
ra_dec_corr
ra_parallax_corr
ra_pmra_corr
ra_pmdec_corr
dec_parallax_corr
dec_pmra_corr
dec_pmdec_corr
parallax_pmra_corr
parallax_pmdec_corr
pmra_pmdec_corr
astrometric_n_obs_al
astrometric_n_obs_ac
astrometric_n_good_obs_al
astrometric_n_bad_obs_al
astrometric_gof_al
astrometric_chi2_al
astrometric_excess_noise
astrometric_excess_noise_sig
astrometric_params_solved
astrometric_primary_flag
astrometric_weight_al
astrometric_pseudo_colour
astrometric_pseudo_colour_error
mean_varpi_factor_al
astrometric_matched_observations
visibility_periods_used
astrometric_sigma5d_max
frame_rotator_object_type
matched_observations
duplicated_source
phot_g_n_obs
phot_g_mean_flux
phot_g_mean_flux_error
phot_g_mean_flux_over_error
phot_g_mean_mag
phot_bp_n_obs
phot_bp_mean_flux
phot_bp_mean_flux_error
phot_bp_mean_flux_over_error
phot_bp_mean_ma

# Queryig Gias for Pleiades Cluster data

In [15]:
# querying in a region of 1.833 deg around the specified coordinate.

In [16]:
from astropy.coordinates import SkyCoord
SkyCoord.from_name("Pleiades")

<SkyCoord (ICRS): (ra, dec) in deg
    (56.601, 24.114)>

In [None]:
timer_start()
job = Gaia.launch_job("SELECT phot_g_mean_mag as gmag, ra, dec, parallax as plx, bp_rp, lum_val, teff_val, radius_val \
FROM gaiadr2.gaia_source \
WHERE CONTAINS(POINT('ICRS',ra,dec),CIRCLE('ICRS',56.75,24.11667,1.833))=1 \
AND parallax IS NOT NULL AND abs(parallax)>0 \
AND parallax_over_error>10 \
AND abs(pmra_error/pmra)<0.10 \
AND abs(pmdec_error/pmdec)<0.10 \
AND pmra IS NOT NULL AND abs(pmra)>0 \
AND pmdec IS NOT NULL AND abs(pmdec)>0 \
AND pmra BETWEEN 15 AND 25 \
AND pmdec BETWEEN -55 AND -40;"
, dump_to_file=False)
timer_stop()

In [18]:
print(job)

<Table length=20>
              name                dtype       unit                                          description                                      n_bad
-------------------------------- ------- ------------- ------------------------------------------------------------------------------------- -----
                     solution_id   int64                                                                                 Solution Identifier     0
                     DESIGNATION  object                                         Unique source designation (unique across all Data Releases)     0
                       source_id   int64                                  Unique source identifier (unique within a particular Data Release)     0
                    random_index   int64                                                                 Random index used to select subsets     0
                       ref_epoch float64            yr                                       

In [19]:
df = (job.get_results()).to_pandas()

for col in df.columns:
    print(col)

solution_id
DESIGNATION
source_id
random_index
ref_epoch
ra
ra_error
dec
dec_error
parallax
parallax_error
parallax_over_error
pmra
pmra_error
pmdec
pmdec_error
ra_dec_corr
ra_parallax_corr
ra_pmra_corr
ra_pmdec_corr
dec_parallax_corr
dec_pmra_corr
dec_pmdec_corr
parallax_pmra_corr
parallax_pmdec_corr
pmra_pmdec_corr
astrometric_n_obs_al
astrometric_n_obs_ac
astrometric_n_good_obs_al
astrometric_n_bad_obs_al
astrometric_gof_al
astrometric_chi2_al
astrometric_excess_noise
astrometric_excess_noise_sig
astrometric_params_solved
astrometric_primary_flag
astrometric_weight_al
astrometric_pseudo_colour
astrometric_pseudo_colour_error
mean_varpi_factor_al
astrometric_matched_observations
visibility_periods_used
astrometric_sigma5d_max
frame_rotator_object_type
matched_observations
duplicated_source
phot_g_n_obs
phot_g_mean_flux
phot_g_mean_flux_error
phot_g_mean_flux_over_error
phot_g_mean_mag
phot_bp_n_obs
phot_bp_mean_flux
phot_bp_mean_flux_error
phot_bp_mean_flux_over_error
phot_bp_mean_ma

In [20]:
df.head()
print(len(df))

20


In [21]:
df.to_csv("Pleiades_Cluster.csv", index=None)