In [None]:
import os
os.environ['APOGEE_CACHE_PATH'] = "/mnt/ceph/users/apricewhelan/apogee-test/"
os.environ['JOAQUIN_CACHE_PATH'] = "/mnt/ceph/users/apricewhelan/projects/joaquin/cache"
import warnings
warnings.filterwarnings('ignore', category=Warning) 
import pickle

import sys
import pathlib
_path = str(pathlib.Path('../').resolve())
if _path not in sys.path:
    sys.path.append(_path)

import corner
from astropy.io import fits
import astropy.coordinates as coord
import astropy.table as at
import astropy.units as u
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
from tqdm.auto import tqdm
from sklearn.neighbors import KernelDensity
from scipy.spatial import cKDTree

from joaquin.data import JoaquinData
from joaquin.config import root_cache_path, zone1_K, zone2_K
from joaquin.plot import simple_corner
from joaquin.neighborhoods import get_neighborhood_X

See: `PCA-neighbord-training.ipynb` and `PCA-neighbord-apply.ipynb` first

In [None]:
parent_projected_X = np.load(
    root_cache_path / 'parent_projected_X.npy')

zones_X = np.load(
    root_cache_path / 'neighborhoods_projected_X.npy')

In [None]:
parent_data = JoaquinData(
    cache_file='parent-sample')
parent_stars = parent_data.stars[parent_data.stars_mask]

In [None]:
parent_projected_X.shape, zones_X.shape

In [None]:
tree = cKDTree(parent_projected_X)

In [None]:
zone_idx = []
for i in range(zones_X.shape[0]):
    dist, idx = tree.query(zones_X[i], k=zone2_K)
    zone_idx.append(idx[dist.argsort()])

### Plot HR diagram colored by zone

In [None]:
from scipy.stats import binned_statistic_2d

In [None]:
zone_idx_unq = []
for i, tmp in enumerate(zone_idx):
    if i > 0:
        zone_idx_unq.append(
            tmp[~np.isin(tmp, np.concatenate(zone_idx_unq[:i]))])
    else:
        zone_idx_unq.append(tmp)
        
colors = np.full(len(parent_stars), np.nan)

derp = np.arange(len(zone_idx_unq))
np.random.seed(42)
np.random.shuffle(derp)
for i, tmp in zip(derp, zone_idx_unq):
    colors[tmp] = i

In [None]:
mask = np.isfinite(colors)

bins = (np.linspace(3000, 8500, 256),
        np.linspace(0, 5.5, 256))
stat1 = binned_statistic_2d(
    parent_stars['TEFF'][mask], 
    parent_stars['LOGG'][mask], 
    values=colors[mask],
    bins=bins)

bins = (np.linspace(3000, 8500, 256),
        np.linspace(-2.5, 0.5, 256))
stat2 = binned_statistic_2d(
    parent_stars['TEFF'][mask], 
    parent_stars['M_H'][mask], 
    values=colors[mask],
    bins=bins)

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(12, 6))

for stat, ax in zip([stat1, stat2], axes):
    ax.pcolormesh(
        stat.x_edge, stat.y_edge, 
        stat.statistic.T,
        cmap='Set2')

axes[0].set_xlim(8500, 3000)
axes[0].set_ylim(5.5, 0)

axes[1].set_xlim(8500, 3000)
axes[1].set_ylim(-2.5, 0.5)

fig.tight_layout()

### What fraction of the parent sample stars end up in a zone 1 vs zone 2?

In [None]:
all_indices = []
for i in range(zones_X.shape[0]):
    _, idx = tree.query(zones_X[i], k=zone1_K)
    all_indices.append(idx)
    
tmp = np.unique(all_indices).shape[0] / parent_projected_X.shape[0]
print(f"{tmp*100:.1f}% of stars end up in a zone 1")

In [None]:
all_indices = []
for i in range(zones_X.shape[0]):
    _, idx = tree.query(zones_X[i], k=zone2_K)
    all_indices.append(idx)
    
tmp = np.unique(all_indices).shape[0] / parent_projected_X.shape[0]
print(f"{tmp*100:.1f}% of stars end up in a zone 2")