# Retrieving Clusters from Gaia 

In [1]:
from astroquery.gaia import Gaia

# Import data science packages
import pandas as pd

# Import NumPy to do mathy stuff
import numpy as np

print('Modules imported!')

Created TAP+ (v1.2.1) - Connection:
	Host: gea.esac.esa.int
	Use HTTPS: True
	Port: 443
	SSL Port: 443
Created TAP+ (v1.2.1) - Connection:
	Host: geadata.esac.esa.int
	Use HTTPS: True
	Port: 443
	SSL Port: 443
Modules imported!


In [2]:
# Suppress warnings. Comment this out (put #'s at the start of the lines) if you wish to see the warning messages
import warnings
warnings.filterwarnings('ignore')
print('Warnings suppressed!')



In [3]:
from datetime import datetime

def timer_start():
  global start_time
  start_time = datetime.now()

def timer_stop():
  time_elapsed = datetime.now() - start_time

  da, remainder  = divmod(time_elapsed.total_seconds(), 24*3600)
  hrs, remainder = divmod(remainder, 3600)
  mins, secs = divmod(remainder, 60)

  if da:
      print(f'{int(da)} days {int(hrs)} hours {int(mins)} minutes {int(secs)} seconds elapsed')
  elif hrs:
      print(f'{int(hrs)} hours {int(mins)} minutes {int(secs)} seconds elapsed')
  elif mins:
      print(f'{int(mins)} minutes {int(secs)} seconds elapsed')
  elif secs >= 1.0:
      print(f'{int(secs)} seconds elapsed')
  else:
      print(f'{secs:.2} seconds elapsed')
        
print('Timer functions loaded!')

Timer functions loaded!


# Investigating what's available in Gaia
Load and look at the available Gaia tables.

In [4]:
timer_start()
tables = Gaia.load_tables(only_names=False)
timer_stop()

INFO: Retrieving tables... [astroquery.utils.tap.core]
INFO: Parsing tables... [astroquery.utils.tap.core]
INFO: Done. [astroquery.utils.tap.core]
22 seconds elapsed


In [5]:
# print the ith table name and description
i=93
print(tables[i].get_qualified_name())
print(tables[i].description)

gaiaedr3.gaiaedr3.gsc23_best_neighbour
<p>GSC2.3 BestNeighbour table lists each matched Gaia object with its best neighbour in 
the external catalogue.
The cross-match algorithm is not symmetric and searches Gaia sources counterparts in GSC2.3.<br/>
The best neighbour is chosen among good neighbours as the one  with the highest value of 
the figure of merit, which evaluates the ratio between two opposite models/hypotheses: 
the counterpart candidate is a match or it is found by chance.
Good neighbours are nearby objects in the external catalogue whose position is 
compatible within position errors with the Gaia target.<br/>
The cross-match algorithm is positional and exploits the full 5 
parameters covariance matrix of Gaia astrometric solution when available and the
external catalogue positions and position errors. In addition it takes into account the 
external catalogue environment using the local density.<br/>
<br/>
Please note that the cross-match algorithm is a trade-off between 

In [6]:
# print all table names and descriptions in gaia database
for n, table in enumerate(tables):
    # print(f'{n} {table.get_qualified_name()}\n', table.description.replace("\n", " "), '\n') # this looks better in Anaconda
    print(f'{n} {table.get_qualified_name()[:50]:50}', table.description.replace("\n", " ")) # this looks better in Google CoLab

0 external.external.apassdr9                         The AAVSO Photometric All-Sky Survey - Data Release 9     This publication makes use of data products from the AAVSO     Photometric All Sky Survey (APASS). Funded by the Robert Martin Ayers     Sciences Fund and the National Science Foundation. Original catalogue released by Henden et al. 2015 AAS Meeting #225, id.336.16. Data retrieved using the VizieR catalogue access tool, CDS, Strasbourg, France. The original description of the VizieR service was published in A&AS 143, 23. VizieR catalogue II/336.
1 external.external.gaiadr2_geometric_distance       Estimating distances from parallaxes IV: Distances to 1.33 billion stars in Gaia Data Release 2. Bailer-Jones et al. 2018 AJ 156:58, https://doi.org/10.3847/1538-3881/aacb21. The catalogue provides distances estimates (and uncertainties therein) for 1.33 billion stars over the whole sky brighter than about G=20.7. These have been estimated using the parallaxes (and their uncertaintie

In [7]:
# Build a sample query. Specifying "TOP 20" limits the results to 20 rows.
myquery = 'SELECT TOP 20 * FROM gaiadr2.gaia_source'

# Run the query and store the results
timer_start()
job = Gaia.launch_job(myquery, dump_to_file=False)
timer_stop()

1 seconds elapsed


In [8]:
print(job)

<Table length=20>
              name                dtype       unit                                          description                                      n_bad
-------------------------------- ------- ------------- ------------------------------------------------------------------------------------- -----
                     solution_id   int64                                                                                 Solution Identifier     0
                     designation  object                                         Unique source designation (unique across all Data Releases)     0
                       source_id   int64                                  Unique source identifier (unique within a particular Data Release)     0
                    random_index   int64                                                                 Random index used to select subsets     0
                       ref_epoch float64            yr                                       

In [9]:
# Convert our AstroPy data into a pandas dataframe
sample_df = (job.get_results()).to_pandas()

In [10]:
# Check that we got a pandas dataframe
type(sample_df)

pandas.core.frame.DataFrame

In [11]:
# Take a look at the first 5 rows
sample_df.head()

Unnamed: 0,solution_id,designation,source_id,random_index,ref_epoch,ra,ra_error,dec,dec_error,parallax,...,e_bp_min_rp_percentile_lower,e_bp_min_rp_percentile_upper,flame_flags,radius_val,radius_percentile_lower,radius_percentile_upper,lum_val,lum_percentile_lower,lum_percentile_upper,datalink_url
0,1635721458409799680,b'Gaia DR2 6062180144564720000',6062180144564720000,489592866,2015.5,198.511781,1.227588,-59.010536,1.37004,0.957741,...,,,,,,,,,,b'https://gea.esac.esa.int/data-server/datalin...
1,1635721458409799680,b'Gaia DR2 6062193063828005120',6062193063828005120,1196140040,2015.5,198.755073,0.186755,-58.747496,0.243379,0.505523,...,,,,,,,,,,b'https://gea.esac.esa.int/data-server/datalin...
2,1635721458409799680,b'Gaia DR2 6062190349408279936',6062190349408279936,1372776834,2015.5,198.967387,0.237929,-58.821787,0.323899,0.760715,...,,,,,,,,,,b'https://gea.esac.esa.int/data-server/datalin...
3,1635721458409799680,b'Gaia DR2 6062170970510395904',6062170970510395904,686388417,2015.5,198.918314,0.485605,-59.053246,0.626549,0.441547,...,,,,,,,,,,b'https://gea.esac.esa.int/data-server/datalin...
4,1635721458409799680,b'Gaia DR2 6062210484221229312',6062210484221229312,1245338928,2015.5,197.721047,2.266964,-58.966452,2.414745,,...,,,,,,,,,,b'https://gea.esac.esa.int/data-server/datalin...


In [12]:
# alternate method for looking at column names
for col in sample_df.columns:
    print(col)

solution_id
designation
source_id
random_index
ref_epoch
ra
ra_error
dec
dec_error
parallax
parallax_error
parallax_over_error
pmra
pmra_error
pmdec
pmdec_error
ra_dec_corr
ra_parallax_corr
ra_pmra_corr
ra_pmdec_corr
dec_parallax_corr
dec_pmra_corr
dec_pmdec_corr
parallax_pmra_corr
parallax_pmdec_corr
pmra_pmdec_corr
astrometric_n_obs_al
astrometric_n_obs_ac
astrometric_n_good_obs_al
astrometric_n_bad_obs_al
astrometric_gof_al
astrometric_chi2_al
astrometric_excess_noise
astrometric_excess_noise_sig
astrometric_params_solved
astrometric_primary_flag
astrometric_weight_al
astrometric_pseudo_colour
astrometric_pseudo_colour_error
mean_varpi_factor_al
astrometric_matched_observations
visibility_periods_used
astrometric_sigma5d_max
frame_rotator_object_type
matched_observations
duplicated_source
phot_g_n_obs
phot_g_mean_flux
phot_g_mean_flux_error
phot_g_mean_flux_over_error
phot_g_mean_mag
phot_bp_n_obs
phot_bp_mean_flux
phot_bp_mean_flux_error
phot_bp_mean_flux_over_error
phot_bp_mean_ma

### Querying Gaia for Pleiades Cluster data 

We will be querying in a region of 1.833 deg around the specified coordinate.

In [13]:
from astropy.coordinates import SkyCoord
SkyCoord.from_name("Pleiades")

<SkyCoord (ICRS): (ra, dec) in deg
    (56.75, 24.11666667)>

In [19]:
timer_start()
job = Gaia.launch_job("SELECT phot_g_mean_mag as gmag, ra, dec, parallax as plx, bp_rp, lum_val, teff_val, radius_val \
FROM gaiadr2.gaia_source \
WHERE CONTAINS(POINT('ICRS',ra,dec),CIRCLE('ICRS',56.75,24.11667,1.833))=1 \
AND parallax IS NOT NULL AND abs(parallax)>0 \
AND parallax_over_error>10 \
AND abs(pmra_error/pmra)<0.10 \
AND abs(pmdec_error/pmdec)<0.10 \
AND pmra IS NOT NULL AND abs(pmra)>0 \
AND pmdec IS NOT NULL AND abs(pmdec)>0 \
AND pmra BETWEEN 15 AND 25 \
AND pmdec BETWEEN -55 AND -40;"
, dump_to_file=False)
timer_stop()

4 seconds elapsed


In [20]:
print(job)

<Table length=987>
   name     dtype  unit          description          n_bad
---------- ------- ---- ----------------------------- -----
      gmag float32  mag         G-band mean magnitude     0
        ra float64  deg               Right ascension     0
       dec float64  deg                   Declination     0
       plx float64  mas                      Parallax     0
     bp_rp float32  mag                BP - RP colour    16
   lum_val float32 Lsun            stellar luminosity   681
  teff_val float32    K stellar effective temperature   301
radius_val float32 Rsun                stellar radius   681
Jobid: None
Phase: COMPLETED
Owner: None
Output file: sync_20210715145913.xml.gz
Results: None


In [21]:
df = (job.get_results()).to_pandas()

for col in df.columns:
    print(col)

gmag
ra
dec
plx
bp_rp
lum_val
teff_val
radius_val


In [22]:
df.head()
print(len(df))

987


In [24]:
df.to_csv("Pleiades_Cluster.csv", index=None)