# Aaron's (minimal) example of using Firefly with Gaia data


## Download the Gaia data 

In [1]:
import numpy as np

import hdbscan

from astroquery.gaia import Gaia

from astropy import units as u
from astropy.coordinates import SkyCoord, Distance
from astropy.table import Table

import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
%matplotlib inline

Retrieve all the available data in the region of interest.

I will download data in the direction of the open cluster M67 (coordinates: RA = 132.825 deg, +11.8167) with a search radius of 1 degrees.

I will perform an asynchronous query (asynchronous rather than synchronous queries should be performed when retrieving more than 2000 rows), and also require that the parallax and proper motion data to be well behaved (to remove likely spurious data).

Note: The query to the archive is with ADQL (Astronomical Data Query Language). For a description of ADQL and more examples see the Gaia DR1 ADQL cookbook: https://gaia.ac.uk/data/gaia-data-release-1/adql-cookbook

In [None]:
cmd = "SELECT * FROM gaiadr3.gaia_source \
    WHERE CONTAINS(POINT('ICRS',gaiadr3.gaia_source.ra, gaiadr3.gaia_source.dec),\
    CIRCLE('ICRS', 132.825, 11.8167, 1))=1\
    AND parallax>0 \
    AND abs(pmra_error)<5 \
    AND abs(pmdec_error)<5 \
    AND pmra IS NOT NULL AND abs(pmra)>0 \
    AND pmdec IS NOT NULL AND abs(pmdec)>0;"

job = Gaia.launch_job_async(cmd, dump_to_file=False) #could save this to a file

print (job)

This downloads as an astropy table.  Save this to an [ecsv](https://docs.astropy.org/en/stable/io/ascii/ecsv.html) file (to retain the units and masks).  That way I can reuse this data later without having to download from Gaia

In [24]:
tab = job.get_results()
tab.write('Gaia_m67.ecsv', overwrite=True)

In [2]:
# to read the data back in
tab = Table.read('Gaia_m67.ecsv')

Calculate 3D coordinates

In [None]:
coords_3d = SkyCoord(
    ra=tab['ra'], 
    dec=tab['dec'],
    distance=Distance(parallax=tab['parallax'])
)
coords_3d.cartesian

Do the same for the expected cluster center (from literature/internet)

In [4]:
coords_3d_center = SkyCoord(
    ra=132.825*u.deg, 
    dec=11.8167*u.deg,
    distance=900*u.parsec
)

I prefer pandas (and so does Firefly), though note that this will remove the units.  Let's also simply things to only continue with the columns we're interested in.

In [None]:
# look at all the column names
list(tab.columns)

In [None]:
cols = [
    'SOURCE_ID',
    'ra',
    'dec',
    'parallax',
    'pmra',
    'pmdec',
    'radial_velocity',
    'phot_g_mean_mag',
    'bp_rp',
    'ruwe',
    'teff_gspphot'
]
df = tab[cols].to_pandas()

# add the 3D coordinates but center them on the expected cluster center
df['x'] = coords_3d.cartesian.x - coords_3d_center.cartesian.x
df['y'] = coords_3d.cartesian.y - coords_3d_center.cartesian.y
df['z'] = coords_3d.cartesian.z - coords_3d_center.cartesian.z

df

Use [HDBSCAN](https://hdbscan.readthedocs.io/en/latest/) to identify clusters in these data (hopefully one is M67).  We will cluster on ra, dec, parallax, pmra, pmdec.

In [None]:
clusterer = hdbscan.HDBSCAN(min_cluster_size=10) # there are many settings that you can tweak to improve this
clusterer.fit(df[['ra','dec','parallax','pmra','pmdec']])
df['label'] = clusterer.labels_ + 1 # so that I don't have a negative number below
df['label'].value_counts()

In [8]:
# create a list of (rgb) colors to attach to the labels, to show in matplotlib and firefly
default_colors = plt.rcParams['axes.prop_cycle'].by_key()['color']
rgb_colors = [mcolors.to_rgb(color) for color in default_colors]


In [None]:
# check the CMD
f, ax = plt.subplots()
ax.scatter(df['bp_rp'],df['phot_g_mean_mag'], s=1, color='gray')
for i, lab in enumerate(df['label'].unique()):
    use_df = df.loc[df['label'] == lab]
    ax.scatter(use_df['bp_rp'],use_df['phot_g_mean_mag'], s=1, label=lab, color=rgb_colors[i])

ax.legend()
plt.gca().invert_yaxis()

## Format Data for Firefly

Here are the docs for [Reader](https://firefly.rcs.northwestern.edu/docs/reference/api/classes/firefly.data_reader.Reader.html#firefly.data_reader.Reader) and [ParticleGroup](https://firefly.rcs.northwestern.edu/docs/reference/api/classes/firefly.data_reader.ParticleGroup.html#firefly.data_reader.ParticleGroup) and also [a tutorial for using these](https://firefly.rcs.northwestern.edu/docs/data_reader/reader_tutorial.html). 

In [10]:
from firefly.data_reader import ParticleGroup, Reader, Settings

There are lots of Firefly [settings](https://firefly.rcs.northwestern.edu/docs/reference/api/classes/firefly.data_reader.Settings.html) that we can change. [Here's a tutorial](https://github.com/ageller/Firefly/blob/main/src/firefly/ntbks/settings_tutorial.ipynb) on how to manipulate these.  Here, I want to allow Firefly to have larger point sizes (there is a limit imposed so that we don't overload your computer... so use this with caution).

In [None]:
# get the default settings
settings = Settings()

# change any that you want (I will change the max point scale)
settings['maxPointScale'] = 50

# print out all the possible keys and their values
settings.printKeys()

In [None]:
# create the Reader, and add the settings
my_reader = Reader(settings=settings)

# identify the columns we should send to firefly for coloring & filtering
field_names = ['SOURCE_ID','parallax','pmra','pmdec','radial_velocity','phot_g_mean_mag','bp_rp','teff_gspphot','ruwe']

# add all the particle groups (one per HDBSCAN cluster)
for i,lab in enumerate(df['label'].unique()):
    print(lab)
    use_df = df.loc[df['label'] == lab]
    coords = use_df[['x','y','z']].to_numpy()
    fields = np.nan_to_num(use_df[field_names].to_numpy(), nan=-999).T
    group = ParticleGroup(
        'group' + str(lab),
        coords,
        field_arrays=fields, 
        field_names=field_names,
        partsColors=list(rgb_colors[i]) + [1],
    ) 
    my_reader.addParticleGroup(group)
        

## Display Inline

In [14]:
from firefly.server import spawnFireflyServer,quitAllFireflyServers

In [15]:
# define the local port (typically anything in 5000 - 8000 range)
port = 5500

In [None]:
process = spawnFireflyServer(port, method = 'flask')

In [None]:
from IPython.display import IFrame
url = f'http://localhost:{port:d}/combined'
IFrame(url, width=1000, height=500)

In [None]:
# Send data to the server.
# Wait until it loads to run this command
my_reader.sendDataViaFlask()

## Get the selected data in Python

(after using the selection tool)

In [22]:
import requests

In [None]:
# send a get request to receive the current settings from Firefly
# for larger amounts of data, you will need to increase the waitTime (in seconds) via params (see below; the default is 10s)
r = requests.get(url = f'http://localhost:{port:d}/get_selected_data', params = {'waitTime':60})
if r.status_code == 200:
    # success
    selection = r.json()
    # as a check
    partsKeys = list(selection.keys())
    print(partsKeys)
    print(selection[partsKeys[0]]['Coordinates_flat'][:100]) 
else:
    print('Error: {}'.format(r.status_code), r.content)


In [None]:
part4 = selection['group4']
part4.keys()

In [None]:
f,ax = plt.subplots()

# all
ax.scatter(df['bp_rp'],df['phot_g_mean_mag'], s=1,color='gray', label='all')

# original
use_df = df.loc[df['label'] == 4]
ax.scatter(use_df['bp_rp'],use_df['phot_g_mean_mag'], s=1, color=rgb_colors[2], label='org')

# selection
ax.scatter(part4['bp_rp'],part4['phot_g_mean_mag'], s=5, color='black', label='selected')

ax.legend()
plt.gca().invert_yaxis()

### Quit the Firefly server

... this doesn't always work in a notebook ... you can also quit the server by resetting the kernel.

In [None]:
return_code = quitAllFireflyServers()