# Grab Gaia data and format it for Firefly

Create a conda env

```
conda create --name Gaia python=3.10 astropy astroquery jupyter scipy numpy matplotlib pandas h5py eventlet flask flask-socketio requests
```


In [60]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd

from astroquery.gaia import Gaia
from astropy.table import Table, Column
import astropy.units as u
from astropy.coordinates import SkyCoord, ICRS
from astropy.io import ascii

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [61]:
GaiaCatalog = "gaiaedr3.gaia_source" 

In [233]:
N = 1e7
gmax = 12
columns = 'ra, dec, parallax, pmra, pmdec, dr2_radial_velocity, phot_g_mean_mag, bp_rp'

cmd = f"SELECT TOP {int(N)} {columns} \
FROM {GaiaCatalog} \
WHERE phot_g_mean_mag<={gmax} \
AND parallax IS NOT NULL \
AND dr2_radial_velocity IS NOT NULL"
print(cmd)

#synchronous commands are OK for jobs with < 2000 output rows
#job = Gaia.launch_job(cmd, dump_to_file=False) 

#asynchronous commands for larger files
job = Gaia.launch_job_async(cmd, dump_to_file=False)

print(job)

SELECT TOP 10000000 ra, dec, parallax, pmra, pmdec, dr2_radial_velocity, phot_g_mean_mag, bp_rp FROM gaiaedr3.gaia_source WHERE phot_g_mean_mag<=12 AND parallax IS NOT NULL AND dr2_radial_velocity IS NOT NULL
INFO: Query finished. [astroquery.utils.tap.core]
<Table length=2346497>
        name         dtype    unit                  description                 n_bad
------------------- ------- -------- ------------------------------------------ -----
                 ra float64      deg                            Right ascension     0
                dec float64      deg                                Declination     0
           parallax float64      mas                                   Parallax     0
               pmra float64 mas / yr Proper motion in right ascension direction     0
              pmdec float64 mas / yr     Proper motion in declination direction     0
dr2_radial_velocity float32   km / s              Radial velocity from Gaia DR2     0
    phot_g_mean_mag float32   

In [234]:
GaiaData = job.get_results()
GaiaData

ra,dec,parallax,pmra,pmdec,dr2_radial_velocity,phot_g_mean_mag,bp_rp
deg,deg,mas,mas / yr,mas / yr,km / s,mag,mag
float64,float64,float64,float64,float64,float32,float32,float32
282.717638809954,5.678589256718091,4.895651948510564,12.920215499221381,-12.157485102325001,-17.226954,10.219143,0.7910309
285.42400750687676,5.748524590289011,2.8097993718125878,4.172226577840751,-2.456463325250668,10.045121,11.459485,0.8224335
283.2253084293333,5.985893578742719,4.797513455035142,40.134645201165604,-23.7457194885826,22.376074,9.865556,0.7161522
258.16921472992595,-19.17551233809951,0.5471487465757905,1.2252572441859229,4.034427556941061,-20.605927,9.423732,2.068554
276.6671334933571,-23.36800196727716,0.4779239568776744,6.537071073496116,-2.8653532839112232,-39.64584,11.350695,1.9902363
258.1501377276725,-19.157169294215578,0.43959325118390113,1.390618364094999,-4.666901669568685,-26.035835,11.965985,1.837017
285.4302610444678,5.8185759796736205,2.626231713942302,-12.53273381974123,-9.01000902914774,6.69268,11.756206,0.95399
282.7946127450585,5.790625549362154,1.2784787006805898,-2.5938735193501694,-9.114955094605095,3.0134463,11.737728,2.06361
283.12530877550086,6.050877372101879,1.565778039123882,-3.384788451719281,-8.116365858582881,-28.795065,10.15765,1.5731535
...,...,...,...,...,...,...,...


## Convert these ra, dec, parallax coordinates to 3D cartesian

A better way may be to use the Bailer-Jones distances...

In [235]:
mask = ( (GaiaData['parallax'] > 0) &
         ~np.isnan((GaiaData['parallax'])) &
         ~np.isnan((GaiaData['phot_g_mean_mag'])) &
         ~np.isnan((GaiaData['bp_rp'])) &
         ~np.isnan((GaiaData['dr2_radial_velocity'])) 
       )
useGaiaData = GaiaData[mask]

In [236]:
coordsRV = ICRS(ra = useGaiaData['ra'], 
              dec = useGaiaData['dec'], 
              distance = (useGaiaData['parallax']).to(u.parsec, equivalencies=u.parallax()),
              pm_ra_cosdec = useGaiaData['pmra'],
              pm_dec = useGaiaData['pmdec'], 
              radial_velocity = useGaiaData['dr2_radial_velocity'])
#print(coordsRV.cartesian)
#print(coordsRV.velocity)

In [237]:
outdfRV = pd.DataFrame()
outdfRV['x'] = coordsRV.cartesian.x.value
outdfRV['y'] = coordsRV.cartesian.y.value
outdfRV['z'] = coordsRV.cartesian.z.value
outdfRV['vx'] = coordsRV.velocity.d_x.value
outdfRV['vy'] = coordsRV.velocity.d_y.value
outdfRV['vz'] = coordsRV.velocity.d_z.value
outdfRV['bp_rp'] = np.array(useGaiaData['bp_rp'].data)
outdfRV['phot_g_mean_mag'] = np.array(useGaiaData['phot_g_mean_mag'].data)
outdfRV

Unnamed: 0,x,y,z,vx,vy,vz,bp_rp,phot_g_mean_mag
0,44.747089,-198.273863,20.211384,8.686336,18.339788,-13.418920,0.791031,10.219143
1,94.178458,-341.354005,35.647542,9.554099,-8.162692,-3.117365,0.822433,11.459485
2,47.427379,-201.806657,21.737012,44.256740,-14.972933,-21.001991,0.716152,9.865556
3,-353.919672,-1689.582263,-600.317558,12.026454,5.635547,39.783008,2.068554,9.423732
4,223.001941,-1907.768735,-829.912968,58.867778,54.872438,-10.364953,1.990236,11.350695
...,...,...,...,...,...,...,...,...
2344537,-1234.699284,-301.921740,-2261.331976,-23.074748,132.772969,49.683330,2.006218,11.910892
2344538,-692.277165,-202.976038,-1244.584484,1.860059,53.555927,-2.099770,1.668958,11.683310
2344539,-480.489885,-141.519115,-861.667748,3.072682,33.529618,1.114667,1.655364,11.773745
2344540,-136.369876,-39.308765,-245.326095,-6.147136,-10.556231,-18.552521,1.593614,8.536899


In [238]:
# export this to a csv file for now
outdfRV.to_csv('tmpGaia.csv')

In [239]:
from firefly.data_reader import ArrayReader

In [240]:
df = pd.read_csv('tmpGaia.csv')
coordinates = np.zeros((df.shape[0],3))
velocities = np.zeros((df.shape[0],3))

for i,axis in enumerate(['x','y','z']):
    coordinates[:,i] = df[axis]
    velocities[:,i] = df[f'v{axis}']
coordinates

array([[  44.74708905, -198.27386341,   20.21138422],
       [  94.1784582 , -341.35400452,   35.64754174],
       [  47.42737877, -201.80665677,   21.73701154],
       ...,
       [-480.48988524, -141.51911534, -861.66774804],
       [-136.36987599,  -39.3087655 , -245.32609517],
       [-498.94824416, -146.43772507, -892.23702308]])

In [241]:
fields = {key:df[key].to_numpy() for key in ['bp_rp','phot_g_mean_mag']}
fields['minus_mag'] = -fields['phot_g_mean_mag']
for key,value in fields.items():
    print(key,np.min(value),np.max(value))


bp_rp -0.1259861 7.2997017
phot_g_mean_mag 2.4740036 12.0
minus_mag -12.0 -2.4740036


In [242]:
reader = ArrayReader(
    ## don't have to pass as a list with 1 element 
    ##  if only 1 particle group
    coordinates,
    velocities,
    'eDR3-RV',
    fields,
    JSONdir='GaiaeDR3', ## if not an absolute path assumes from ~
    clean_JSONdir=True, ## delete old files in the directory
    write_startup=True, ## overwrite the startup file (and turn off PG append)
    write_to_disk=False, ## whether to write to disk at the end of __init__
    field_filter_flags=[True,True,False], ## flags corresponding to fields
    field_colormap_flags=[True,True,False],
    field_radius_flags=[False,False,True],
)

reader.settings['sizeMult']['eDR3-RV'] = 0.1
reader.settings['radiusVariable']['eDR3-RV'] = 1

reader.writeToDisk()

JSONdir: /Users/agurvich/GaiaeDR3 -- is not a sub-directory of firefly/static/data. 
This may produce confusing or inoperable results. As such, we will create a symlink for you when you writeToDisk.
filter/colormap/radius flags correspond to: ['bp_rp', 'phot_g_mean_mag', 'minus_mag']
Outputting: eDR3-RV - 2344542/2344542 particles - 4 tracked fields


''

In [244]:
reader.copyFireflySourceToTarget('Gaia_test',init_gh_pages=True)

Initializing a new GitHub repository at /Users/agurvich/Gaia_test with
	GHREPONAME: Gaia_test
	GHUSER: agurvich
	GHOAUTHTOKENPATH: /Users/agurvich/.github.token



  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  5593  100  5573  100    20   5163     18  0:00:01  0:00:01 --:--:--  5217
To github.com:agurvich/Gaia_test.git
 * [new branch]      main -> main
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100   622  100   583  100    39   1303     87 --:--:-- --:--:-- --:--:--  1484


['{',
 '  "id": 498967230,',
 '  "node_id": "R_kgDOHb2ivg",',
 '  "name": "Gaia_test",',
 '  "full_name": "agurvich/Gaia_test",',
 '  "private": false,',
 '  "owner": {',
 '    "login": "agurvich",',
 '    "id": 7799423,',
 '    "node_id": "MDQ6VXNlcjc3OTk0MjM=",',
 '    "avatar_url": "https://avatars.githubusercontent.com/u/7799423?v=4",',
 '    "gravatar_id": "",',
 '    "url": "https://api.github.com/users/agurvich",',
 '    "html_url": "https://github.com/agurvich",',
 '    "followers_url": "https://api.github.com/users/agurvich/followers",',
 '    "following_url": "https://api.github.com/users/agurvich/following{/other_user}",',
 '    "gists_url": "https://api.github.com/users/agurvich/gists{/gist_id}",',
 '    "starred_url": "https://api.github.com/users/agurvich/starred{/owner}{/repo}",',
 '    "subscriptions_url": "https://api.github.com/users/agurvich/subscriptions",',
 '    "organizations_url": "https://api.github.com/users/agurvich/orgs",',
 '    "repos_url": "https://api.gi

In [None]:
reader.createOctrees([True],npart_min_node=1e4,npart_max_node=1e5)

In [None]:
## symlink = False -> data will be copied to static/data
reader.writeToDisk(symlink=False)