In [None]:
import numpy as np
import astropy.units as u
from astropy.table import Table, QTable
from astropy.coordinates import SkyCoord
import matplotlib.pyplot as plt

In [None]:
data1 = Table.read('../data/data1.ecsv')
cluster1 = Table.read('../data/cluster1.ecsv')

In [None]:
# data0 = Table.read('../session_01/data0.ecsv')
# positive_parallaxes = data0['parallax'] > 0
# data1 = data0[positive_parallaxes]
# data1['mg'] = data1['phot_g_mean_mag']+5*np.log10(data1['parallax'])-10
# manual_filter1 = (data1['parallax'] > 5.0*u.mas) & (data1['parallax'] < 5.7*u.mas)
# cluster1 = data1[manual_filter1]

In [None]:
pos = np.vstack([data1['pmra'].data,
                 data1['pmdec'].data, 
                 data1['parallax'].data]).T
pos
np.shape(pos)

In [None]:
pos

## KMeans

In [None]:
from sklearn.cluster import KMeans
kmeans = KMeans(n_clusters=3)
kmeans.fit(pos)
labels = kmeans.predict(pos)


In [None]:
# Finding the final centroids
centroids = kmeans.cluster_centers_
centroids

In [None]:
fig, ax = plt.subplots(ncols=1, figsize=(18,10))
ax.scatter(data1['pmra'], data1['pmdec'], c='gray', s=1, alpha=0.5);
for l in np.unique(labels):
    ax.plot(data1[labels==l]['pmra'], data1[labels==l]['pmdec'], marker='o', ls='', ms=4)

ax.set_aspect('equal')

ax.set_xlabel('Proper motion in right ascension [mas/yr]')
ax.set_ylabel('Proper motion in declination [mas/yr]')

ax.set_xlim(-60, 30)
ax.set_ylim(-60, 30);


Not really what we are looking for. It is purely forcing the groups to follow linear distance, without taking into consideration the density. We need to find an alternative method.

## DBSCAN

In [None]:
from astropy.coordinates import Distance
from sklearn.neighbors import DistanceMetric
from sklearn.cluster import DBSCAN

def get_clusters(pos, eps=.005, min_samples=20,sphere=False):   
    db = DBSCAN(eps=eps, min_samples=min_samples,metric='euclidean').fit(pos)
    core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
    core_samples_mask[db.core_sample_indices_] = True
    labels = db.labels_

    # Number of clusters in labels, ignoring noise if present.
    n_clusters = len(set(labels)) - (1 if -1 in labels else 0)
    print('Clusters {0}'.format(n_clusters))
    return labels,core_samples_mask, n_clusters

In [None]:
labels, core_samples_mask, n_clusters = get_clusters(pos, eps=2, min_samples=25, sphere=False)

In [None]:
fig, ax = plt.subplots(ncols=1, figsize=(18,10))
ax.scatter(data1['pmra'], data1['pmdec'], c='gray', s=1, alpha=0.5);

for l in set(labels):
    if l > 0:
        ax.plot(data1[labels==l]['pmra'],
                data1[labels==l]['pmdec'],
                marker='o', ls='', ms=3, label=l)

ax.set_aspect('equal')

ax.set_xlabel('Proper motion in right ascension [mas/yr]')
ax.set_ylabel('Proper motion in declination [mas/yr]')

ax.set_xlim(-50, 20)
ax.set_ylim(-30, 20);

ax.legend()

TBD

In [None]:
fig, ax = plt.subplots(ncols=2, figsize=(18,6))
ax[0].hist(data1['parallax'],    bins=np.arange(0, 8, 0.05), label='Full sample')
ax[0].hist(cluster1['parallax'], bins=np.arange(0, 8, 0.05), label='Cluster')

ax[1].hist(data1['distance'],    bins=np.arange(0, 2, 0.01), label='Full sample')
ax[1].hist(cluster1['distance'], bins=np.arange(0, 2, 0.01), label='Cluster')

for l in set(labels):
    if l > 0:
        ax[0].hist(data1[labels==l]['parallax'], bins=np.arange(0, 8, 0.05), label=f'Cluster {l}')
        ax[1].hist(data1[labels==l]['distance'], bins=np.arange(0, 2, 0.01), label=f'Cluster {l}')
        
ax[0].legend()
ax[1].legend()
ax[0].set_xlabel('Parallax [mas]')
ax[0].set_ylabel('Number of stars');
ax[1].set_xlabel('Distance [kpc]')
ax[1].set_ylabel('Number of stars');

ax[0].legend()
ax[1].legend();

In [None]:
fig, ax = plt.subplots(ncols=2, figsize=(18,6))
ax[0].hist(data1['pmra'],    bins=np.arange(-40, 40, 1), label='Full sample')
ax[0].hist(cluster1['pmra'], bins=np.arange(-40, 40, 1), label='Cluster')

ax[1].hist(data1['pmdec'],    bins=np.arange(-40, 40, 1), label='Full sample')
ax[1].hist(cluster1['pmdec'], bins=np.arange(-40, 40, 1), label='Cluster')

for l in set(labels):
    if l > 0:
        ax[0].hist(data1[labels==l]['pmra'], bins=np.arange(-40, 40, 1), label='DBSCAN')
        ax[1].hist(data1[labels==l]['pmdec'], bins=np.arange(-40, 40, 1), label='DBSCAN')

        
ax[0].legend()
ax[1].legend()

ax[0].legend()
ax[1].legend();

In [None]:
fig, ax = plt.subplots(ncols=2, figsize=(18,6))
ax[0].hist(data1['pmra'],    bins=np.arange(-40, 40, 1), label='Full sample')
ax[0].hist(cluster1['pmra'], bins=np.arange(-40, 40, 1), label='Cluster')

ax[1].hist(data1['pmdec'],    bins=np.arange(-40, 40, 1), label='Full sample')
ax[1].hist(cluster1['pmdec'], bins=np.arange(-40, 40, 1), label='Cluster')

ax[0].hist(data1[labels!=1]['pmra'],  bins=np.arange(-40, 40, 1), label='DBSCAN')
ax[1].hist(data1[labels!=1]['pmdec'], bins=np.arange(-40, 40, 1), label='DBSCAN')

def str_label(tab, col):
    return f"{tab[col].description} [{tab[col].quantity.unit}]"

ax[0].set_xlabel(str_label(data1, 'pmra'))
ax[1].set_xlabel(str_label(data1, 'pmdec'))


ax[0].legend()
ax[1].legend()

ax[0].legend()
ax[1].legend();

In [None]:
cluster2 = data1[labels==1]
cluster2.write('../data/cluster2.ecsv', format='ascii.ecsv', overwrite=True)

## Photometry

In [None]:
cluster1.columns

In [None]:
fig, ax = plt.subplots(ncols=1, figsize=(14,12))
ax.scatter(data1['bp_rp'], data1['Mg'], c='grey', s=1)
ax.scatter(cluster2['bp_rp'], cluster2['Mg'], c='k', s=40)
l = plt.scatter(cluster2['bp_rp'], cluster2['Mg'], c=cluster2['teff_val'], s=40)

#ax.set_xlabel('G-RP colour. 630-1050nm')
#ax.set_ylabel('G-band mean magnitude')
ax.invert_yaxis()

cb = fig.colorbar(l)
cb.set_label("$T_{eff}$ [K]")

ax.set_xlim(-0.5, 3.6)
ax.set_ylim(15, -2.5)

In [None]:
np.count_nonzero(cluster2['radius_val'])

In [None]:
fig, ax = plt.subplots(ncols=1, figsize=(14,12))
ax.scatter(data1['bp_rp'], data1['Mg'], c='grey', s=1)
#ax.scatter(cluster2['bp_rp'], cluster2['Mg'], c='k', s=cluster2['radius_val']*50)
#l = plt.scatter(cluster2['bp_rp'], cluster2['Mg'],  color='k', s=50)
l = plt.scatter(cluster2['bp_rp'], cluster2['Mg'], c='k', s=40)  # In black if we don't have Teff nor the star radius
l = plt.scatter(cluster2['bp_rp'], cluster2['Mg'], c=cluster2['teff_val'], s=40)  # size 40 if we don't have the star radius
l = plt.scatter(cluster2['bp_rp'], cluster2['Mg'], c=cluster2['teff_val'], s=cluster2['radius_val']*40) # color and size if we have everything



#l = plt.scatter(cluster2['bp_rp'], cluster2['Mg'], c=cluster2['rv_template_teff'], s=cluster2['radius_val']*20)

#ax.set_xlabel('G-RP colour. 630-1050nm')
#ax.set_ylabel('G-band mean magnitude')
ax.invert_yaxis()

cb = fig.colorbar(l)
cb.set_label("$T_{eff}$ [K]")

ax.set_xlim(-0.5, 3.6)
ax.set_ylim(15, -2.5)

In [None]:
import read_mist_models

In [None]:
import os
filename = '../data/MIST_iso_62321da9c816b.iso.cmd'

if not os.path.isfile(filename):
    os.system("cd ../data && unzip MIST_isocmd.zip")

isocmd = read_mist_models.ISOCMD(filename)

In [None]:
print('version: ', isocmd.version)
print('abundances: ', isocmd.abun)
print('rotation: ', isocmd.rot)
print('ages: ', [round(x,2) for x in isocmd.ages])
print('number of ages: ', isocmd.num_ages)
print('available columns: ', isocmd.hdr_list)

In [None]:
fig, ax = plt.subplots(ncols=1, figsize=(14,12))
ax.scatter(data1['bp_rp'], data1['Mg'], c='grey', s=1)
ax.scatter(cluster2['bp_rp'], cluster2['Mg'], c='k', s=40)
l = plt.scatter(cluster2['bp_rp'], cluster2['Mg'], c=cluster2['rv_template_teff'], s=40)

#ax.set_xlabel('G-RP colour. 630-1050nm')
#ax.set_ylabel('G-band mean magnitude')
ax.invert_yaxis()

cb = fig.colorbar(l)
cb.set_label("$T_{eff}$ [K]")

ax.set_xlim(-0.5, 3.6)
ax.set_ylim(15, -2.5)

# We can use phase to select the part of the isochrone for stars in the main sequence and red giant phases.
phase_mask = (isocmd.isocmds[0]['phase'] >= 0) & (isocmd.isocmds[0]['phase'] < 3)

for age in [8.7, 8.8, 8.9, 9.0, 9.1]:
    age_ind = isocmd.age_index(age) #returns the index for the desired age
    phase_mask = (isocmd.isocmds[age_ind]['phase'] >= 0) & (isocmd.isocmds[age_ind]['phase'] < 3)
    BP = isocmd.isocmds[age_ind]['Gaia_BP_EDR3'][phase_mask]
    RP = isocmd.isocmds[age_ind]['Gaia_RP_EDR3'][phase_mask]
    ax.plot(BP-RP, RP, label=age) 

ax.set_xlabel('Gaia_BP_EDR3 - Gaia_RP_EDR3')
ax.set_ylabel('Gaia_RP_EDR3')

ax.legend()

In [None]:
df = cluster2.to_pandas()
print('Average values for the cluter:')
print('Number of members  : {0}'.format(len(cluster2)))
print('Right Ascension    : {0:7.3f} {1}'.format(cluster2['ra'].mean(), cluster2['ra'].quantity.unit))
print('Declination        : {0:7.3f} {1}'.format(cluster2['dec'].mean(), cluster2['ra'].quantity.unit))
print('Proper motion R.A. : {0:7.2f} {1}'.format(cluster2['pmra'].mean(), cluster2['pmra'].quantity.unit))
print('Proper motion Dec. : {0:7.2f} {1}'.format(cluster2['pmdec'].mean(), cluster2['pmdec'].quantity.unit))
print('Distance           : {0:7.3f} {1}'.format(cluster2['distance'].mean(), cluster2['distance'].quantity.unit))

### ⛏ Exercise
Compare those values with the ones in Vizier table: J/A+A/633/A99/table1 Two options:  
(a) Download the Vizier table with `astroquery` as explained the first day  
(b) Simply visit https://vizier.cds.unistra.fr/viz-bin/VizieR-5?-ref=VIZ6232587c2323a1&-out.add=.&-source=J/A%2bA/633/A99/table1&recno=834

What is our discrepancy in the distance of the cluster with respect to that publication?

### 🌪 Exercise
Try to improve the proper motion of the cluster by computing the weighted average of the `pmra` and `pmdec` columns. You can do it by converting the column to a numpy array with the `.data` attribute, and then applying `np.average`. Check the documentation.

### 🌪 Exercise
The [Simbad page for NGC 2632 / M44](https://simbad.u-strasbg.fr/simbad/sim-basic?Ident=m44) indicates that:

> Angular size (arcmin): 	118.2 118.2 0 (Opt) D [2020A&A...633A..99C ](https://simbad.cds.unistra.fr/simbad/sim-ref?bibcode=2020A%26A...633A..99C)

Compute the physical size of `cluster2`