# Load data from Vizier

#### Index<a name="index"></a>
1. [Import packages](#imports)
* [Load data](#loadData)

## 1. Import packages<a name="imports"></a>

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from astroquery.vizier import Vizier

In [None]:
import random
import sys
import os


from math import *
import matplotlib as mpl

plt.rc('font', family='serif')
mpl.rcParams.update({'font.size': 12})
mpl.rcParams.update({'legend.labelspacing':0.25, 'legend.fontsize': 12})
mpl.rcParams.update({'errorbar.capsize': 4})

## 2. Load data<a name="loadData"></a>

In [None]:
Vizier.ROW_LIMIT = 30000
catalogs = Vizier.get_catalogs('J/A+A/618/A93/members')  # astroquery.utils.commons.TableList
print(catalogs)

Since `catalogs` only has 1 table, we are only interested in `catalogs[0]`. We will transform it to a `pandas` Data Frame for ease of handling.

In [None]:
catalog = catalogs[0].to_pandas()
catalog = catalog.applymap(lambda x: x.decode() if isinstance(x, bytes) else x)
catalog.head()

## 3. Create examples folder

In [None]:
if not os.path.exists('examples'):
    os.makedirs('examples')
if not os.path.exists('neg_examples'):
    os.makedirs('neg_examples')

## 4. Save data in the desired format

In [None]:
plot_scatter = True
plot_hist = False

### 4.1. True clusters

In [None]:
clusters = np.unique(catalog['Cluster'])
number_clusters = len(clusters)

true_clusters_2d_hist = []
for cluster in clusters:
    is_cluster = catalog['Cluster'] == cluster
    data_cluster = catalog[is_cluster]
    
    # Remove stars for which Gaia didn't measure magnitude or colour
    gmag = data_cluster['Gmag']
    bp_rp = data_cluster['BP-RP']
    is_nan = (np.isnan(bp_rp)) | (np.isnan(gmag))
    data_cluster = data_cluster[~is_nan]
    
    gmag = data_cluster['Gmag']
    bp_rp = data_cluster['BP-RP']
    
    if plot_scatter:
        plt.scatter(bp_rp, gmag, marker='.')
        plt.xlabel(r'$B_p - R_p$ (mag)')
        plt.ylabel(r'$G$ (mag)')
        plt.title(f'Cluster {cluster}')
        plt.ylim([18,8])
        plt.show()

    hist, xedges, yedges = np.histogram2d(bp_rp, gmag, bins=20, 
                                          density=True)

    plt.imshow(hist, interpolation='nearest')
    plt.title(f'Cluster {cluster}')
    plt.savefig(f'examples/{cluster}.jpg', bbox_inches='tight')
    if plot_hist:
        plt.show()
    plt.close()

    hist_linear = hist.reshape((400))
    true_clusters_2d_hist.append(hist_linear)

true_clusters_2d_hist_file = 'true_clusters_2d_hist.npy'
np.save(true_clusters_2d_hist_file, true_clusters_2d_hist)

Confirm the file was well saved by loading it.

In [None]:
true_clusters_2d_hist_saved = np.load(true_clusters_2d_hist_file, allow_pickle=True)
np.allclose(true_clusters_2d_hist_saved, true_clusters_2d_hist)