In [None]:
import os
from os import path

# Third-party
from astropy.io import fits
from astropy.stats import median_absolute_deviation
from astropy.table import Table, join
from astropy.time import Time
import astropy.units as u
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
import numpy as np
%matplotlib inline
import h5py
import pandas as pd
from sqlalchemy import func
import tqdm
from sklearn.cluster import KMeans

from thejoker import JokerSamples

from twoface.config import TWOFACE_CACHE_PATH
from twoface.samples_analysis import unimodal_P, MAP_sample
from twoface.db import (db_connect, AllStar, AllVisit, AllVisitToAllStar, NessRG,
                        StarResult, Status, JokerRun)
from twoface.plot import plot_two_panel, plot_phase_fold, _RV_LBL
from twoface.mass import get_m2_min, mf

In [None]:
Session, _ = db_connect(path.join(TWOFACE_CACHE_PATH, 'apogee.sqlite'))
session = Session()

plot_path = '../../paper/1-catalog/figures/'
table_path = '../../paper/1-catalog/tables/'

samples_file = path.join(TWOFACE_CACHE_PATH, 'apogee-jitter.hdf5')
mcmc_samples_file = path.join(TWOFACE_CACHE_PATH, 'apogee-jitter-mcmc.hdf5')

In [None]:
run = session.query(JokerRun).limit(1).one()
joker_pars = run.get_joker_params()

In [None]:
high_K_stars = session.query(AllStar).join(StarResult).filter(StarResult.status_id.in_([1, 4]))\
                      .filter(StarResult.high_K).all()
len(high_K_stars)

# Make the catalog:

In [None]:
def is_n_modal(data, samples, n_clusters=2):
    clf = KMeans(n_clusters=n_clusters)
    
    ecc = samples['e'].value
    lnP = np.log(samples['P'].value).reshape(-1, 1)
    y = clf.fit_predict(lnP)

    data = star.apogeervdata()
    
    unimodals = []
    for j in np.unique(y):
        unimodals.append(unimodal_P(samples[y==j], data))
        
    return all(unimodals)

In [None]:
bimodal = []
nsamples = []

n = 0
with h5py.File(samples_file, 'r') as f:
    for star in tqdm.tqdm(high_K_stars):
        samples = JokerSamples.from_hdf5(f[star.apogee_id])
        data = star.apogeervdata()
        
        if len(samples) > 1:
            bimodal.append(is_n_modal(data, samples, n_clusters=2))
            
        else:
            bimodal.append(False)
        
        nsamples.append(len(samples))

nsamples = np.array(nsamples)
bimodal = np.array(bimodal)
bimodal.sum()

Most of these only have a few samples:

In [None]:
plt.hist(nsamples[bimodal], bins='auto');

--- 

# Make paper figure:

In [None]:
full_catalog = Table.read(path.join(table_path, 'high'))

In [None]:
# with h5py.File(samples_file, 'r') as f:
#     for star in tqdm.tqdm(np.array(high_K_stars)[bimodal][:10]):
#         samples = JokerSamples.from_hdf5(f[star.apogee_id])
#         data = star.apogeervdata()
        
#         _ = plot_two_panel(data, samples, 
#                            plot_data_orbits_kw=dict(highlight_P_extrema=False))

In [None]:
np.random.seed(36)

rc = {
    'axes.labelsize': 18,
    'xtick.labelsize': 14,
    'ytick.labelsize': 14
}
    
rand_subset = np.random.choice(np.array(high_K_stars)[bimodal], size=4, 
                               replace=False)
    
with mpl.rc_context(rc):
    gs = GridSpec(4, 3)
    fig = plt.figure(figsize=(8., 9.5))
    for j, star in enumerate(rand_subset):
        ax1 = fig.add_subplot(gs[j, :2])
        ax2 = fig.add_subplot(gs[j, 2])

        if j == 0:
            ax1.set_title('Mildly multi-modal, high-$K$ stars', fontsize=20)

        data = star.apogeervdata()

        with h5py.File(samples_file, 'r') as f:
            samples = JokerSamples.from_hdf5(f[star.apogee_id])

        fig = plot_two_panel(data, samples, axes=[ax1, ax2], tight=False,
                             plot_data_orbits_kw=dict(n_times=16384, 
                                                      n_orbits=128,
                                                      highlight_P_extrema=False,
                                                      xlim_choice='data',
                                                      relative_to_t0=True,
                                                      plot_kwargs=dict(linewidth=0.2,
                                                                       rasterized=True)))

        xlim = ax1.get_xlim()
        ylim = ax1.get_ylim()

        ax1.text(xlim[0] + (xlim[1]-xlim[0])/20,
                 ylim[1] - (ylim[1]-ylim[0])/20,
                 star.apogee_id, fontsize=15, va='top', ha='left')

        ax1.set_xlabel('')
        ax2.set_xlabel('')

    ax1.set_xlabel(r'${\rm BMJD} - t_0$ [day]')
    ax2.set_xlabel('period, $P$ [day]')

    fig.tight_layout()
    fig.subplots_adjust(left=0.125, right=0.95, hspace=0.2, wspace=0.4)
    
    fig.savefig(path.join(plot_path, 'highK-multimodal.pdf'), dpi=250)

---

## Stars with samples that have small dispersion, or PTP lnP:

In [None]:
stats = []
with h5py.File(samples_file, 'r') as f:
    for star in tqdm.tqdm(high_K_stars):
        lnP = np.log(f[star.apogee_id]['P'][:])
        stats.append([np.ptp(lnP), np.std(lnP)])
stats = np.array(stats)

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(6,6))
ax.scatter(stats[:, 0], 3*stats[:, 1], alpha=0.25, linewidth=0)
ax.set_xlim(-0.02, 10.5)
ax.set_ylim(-0.02, 10.5)

In [None]:
((stats[:, 0] < 3) & bimodal).sum()

In [None]:
# star = np.array(high_K_stars)[(stats[:, 0] > 8) & (stats[:, 1] < 2)][0]
# star = np.array(high_K_stars)[(stats[:, 0] < 2) & (stats[:, 1] < 2)][0]
# star = np.array(high_K_stars)[(stats[:, 0] > 2) & (stats[:, 0] < 4) & (stats[:, 1] < 2)][4]

data = star.apogeervdata()
with h5py.File(samples_file, 'r') as f:
    samples = JokerSamples.from_hdf5(f[star.apogee_id])
    
_ = plot_two_panel(data, samples, plot_data_orbits_kw=dict(highlight_P_extrema=False))

## Produce catalog table:

In [None]:
unimodal_stars = np.array(high_K_stars)[unimodal_mask]
unimodal_converged = converged_mcmc[unimodal_mask]

In [None]:
rows = dict()
rows['APOGEE_ID'] = []
for k in JokerSamples._valid_keys:
    rows[k] = []
    rows[k + '_err'] = []
rows['t0'] = []
rows['emcee_converged'] = []
rows['Gelman-Rubin'] = []

with h5py.File(mcmc_samples_file, 'r') as mcmc_f, h5py.File(samples_file, 'r') as joker_f:
    for i, star in tqdm.tqdm(enumerate(unimodal_stars)):
        data = star.apogeervdata()
        if star.apogee_id in mcmc_f:
            samples = JokerSamples.from_hdf5(mcmc_f[star.apogee_id])
            R = mcmc_f[star.apogee_id]['chain-stats/gelman_rubin'][:]
        else:
            samples = JokerSamples.from_hdf5(joker_f[star.apogee_id])
            R = np.full(7, np.nan)
        
        rows['APOGEE_ID'].append(star.apogee_id)
        MAP = MAP_sample(data, samples, joker_pars)
        for k in samples.keys():
            rows[k].append(MAP[k])
            rows[k+'_err'].append(1.5 * median_absolute_deviation(samples[k]))
            
        rows['t0'].append(data.t0.tcb.mjd)
        rows['emcee_converged'].append(unimodal_converged[i])
        rows['Gelman-Rubin'].append(R)
        
for k in rows:
    if hasattr(rows[k][0], 'unit'):
        rows[k] = u.Quantity(rows[k])
        
rows['t0'] = Time(rows['t0'], format='mjd', scale='tcb')

In [None]:
tbl = Table(rows)

### Add Ness masses to table:

In [None]:
ness_tbl = Table.read('../../data/NessRG.fits')
ness_tbl.rename_column('2MASS', 'APOGEE_ID')
ness_tbl = ness_tbl[np.isin(ness_tbl['APOGEE_ID'], tbl['APOGEE_ID'])]

# trim the duplicates...
_, unq_idx = np.unique(ness_tbl['APOGEE_ID'], return_index=True)
ness_tbl = ness_tbl[unq_idx]

tbl_with_ness = join(tbl, ness_tbl, keys='APOGEE_ID', join_type='outer')
assert len(tbl_with_ness) == len(tbl)

### Compute m2_min using Ness mass

In [None]:
rnd = np.random.RandomState(seed=42)
N = rnd.normal

mass_ratio = np.full(len(tbl_with_ness), np.nan)
mass_ratio_err = np.full(len(tbl_with_ness), np.nan)
m1 = np.full(len(tbl_with_ness), np.nan) * u.Msun
m1_err = np.full(len(tbl_with_ness), np.nan) * u.Msun
m2_min = np.full(len(tbl_with_ness), np.nan) * u.Msun
m2_min_err = np.full(len(tbl_with_ness), np.nan) * u.Msun

n_samples = 1024
for i, row in tqdm.tqdm(enumerate(tbl_with_ness)):
    if tbl_with_ness['lnM'].mask[i]:
        continue
        
    m1_samples = np.exp(N(row['lnM'], row['e_logM'], size=n_samples)) * u.Msun
    mass_func = mf(P=N(row['P'], row['P_err'], n_samples) * tbl_with_ness['P'].unit, 
                   K=N(row['K'], row['K_err'], n_samples) * tbl_with_ness['K'].unit,
                   e=N(row['e'], row['e_err'], n_samples))
    m2_mins = get_m2_min(m1_samples, mass_func)

    med_m1 = np.median(m1_samples)
    med_m2 = np.median(m2_mins)
    
    m1[i] = med_m1
    m1_err[i] = 1.5*median_absolute_deviation(m1_samples)
    m2_min[i] = med_m2
    m2_min_err[i] = 1.5*median_absolute_deviation(m2_mins)
    mass_ratio[i] = (med_m2 / med_m1).decompose().value
    mass_ratio_err[i] = 1.5*median_absolute_deviation(m2_mins / m1_samples).decompose().value
    
tbl_with_ness['M1'] = m1
tbl_with_ness['M1_err'] = m1_err
tbl_with_ness['M2_min'] = m2_min
tbl_with_ness['M2_min_err'] = m2_min_err

tbl_with_ness['M1'].mask = np.isnan(tbl_with_ness['M1'])
tbl_with_ness['M1_err'].mask = np.isnan(tbl_with_ness['M1'])
tbl_with_ness['M2_min'].mask = np.isnan(tbl_with_ness['M1'])
tbl_with_ness['M2_min_err'].mask = np.isnan(tbl_with_ness['M1'])

### Now we load the APOGEE AllStar table to join the APOGEE data with our orbits:

In [None]:
allstar_tbl = fits.getdata('/Users/adrian/data/APOGEE_DR14/allStar-l31c.2.fits')
allstar_tbl = allstar_tbl[np.isin(allstar_tbl['APOGEE_ID'], tbl['APOGEE_ID'])]

# trim the duplicates...
_, unq_idx = np.unique(allstar_tbl['APOGEE_ID'], return_index=True)
allstar_tbl = allstar_tbl[unq_idx]
assert len(allstar_tbl) == len(tbl)

allstar_tbl = Table(allstar_tbl)
allstar_tbl.rename_column('K', 'KS')
allstar_tbl.rename_column('K_ERR', 'KS_ERR')

In [None]:
full_catalog = join(tbl_with_ness, allstar_tbl, keys='APOGEE_ID')
full_catalog[:1]

### Finally, add a binary flag "DR14RC" if in DR14 RC catalog

In [None]:
rc = Table.read('/Users/adrian/data/APOGEE_DR14/apogee-rc-DR14.fits')

In [None]:
full_catalog['DR14RC'] = np.isin(full_catalog['APOGEE_ID'], rc['APOGEE_ID'])

TODO: describe in README with data to use `Table.read('', astropy_native=True)`

In [None]:
len(full_catalog)

In [None]:
full_catalog['M1'][full_catalog['M1'].mask] = np.nan

In [None]:
full_catalog.write(path.join(table_path, 'high-K-unimodal.fits'), overwrite=True)

In [None]:
for name in full_catalog.colnames[:18] + full_catalog.colnames[33:37]:
    c1 = '\\texttt{{{0}}}'.format(name.replace('_', '\\_'))
    try:
        c2 = '{0:latex_inline}'.format(full_catalog[name].unit)
    except TypeError:
        c2 = ''
    except AttributeError:
        c2 = ''
    
    if len(c1) < 26:
        c1 = c1 + ' '*(26 - len(c1))
        
    if len(c2) < 24:
        c2 = c2 + ' '*(24 - len(c2))
        
    print('{0} & {1} & <description> \\\\'.format(c1, c2))

---

In [None]:
emcee_converged = full_catalog[full_catalog['emcee_converged']]

In [None]:
_path = '../../plots/emcee_converged'
os.makedirs(_path, exist_ok=True)

In [None]:
with h5py.File(mcmc_samples_file, 'r') as mcmc_f, h5py.File(samples_file, 'r') as f:
    for row in emcee_converged:
        star = AllStar.get_apogee_id(session, row['APOGEE_ID'])
        data = star.apogeervdata()
    
        if star.apogee_id in mcmc_f:
            samples = JokerSamples.from_hdf5(mcmc_f[star.apogee_id])
            print('mcmc')
        else:
            samples = JokerSamples.from_hdf5(f[star.apogee_id])
            print('thejoker')
            
        samples.t0 = data.t0
        
        fig = plot_two_panel(data, samples, 
                             plot_data_orbits_kw=dict(n_times=16384,                
                                                      highlight_P_extrema=False))
        fig.axes[0].set_title(star.apogee_id)
        fig.tight_layout()
        fig.savefig(path.join(_path, '{0}.png'.format(star.apogee_id)), dpi=200)
        plt.close(fig)

By-eye vetting: these ones are suspicious

In [None]:
suspicious_ids = ['2M05224382+4300425',
                  '2M08505498+1156503',
                  '2M10264342+1340172',
                  '2M10513288-0250550',
                  '2M14574438+2106271',
                  '2M16131259+5043080',
                  '2M17121495+3211467',
                  '2M17212080+6003296',
                  '2M18571262-0328064',
                  '2M21260907+1100178',
                  '2M21374395+4304268']

In [None]:
derp = emcee_converged[~np.isin(emcee_converged['APOGEE_ID'], suspicious_ids)]

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(6,6))

ax.errorbar(derp['P'], derp['LOGG'],
            xerr=derp['P_err'], yerr=derp['LOGG_ERR'],
            marker='o', linestyle='none', alpha=0.8)

ax.set_xscale('log')
ax.set_ylim(4., 0)
ax.set_xlabel('P')
ax.set_ylabel('logg')

# -----

fig, ax = plt.subplots(1, 1, figsize=(6,6))

ax.errorbar(derp['P'], derp['e'],
            xerr=derp['P_err'], yerr=derp['e_err'],
            marker='o', linestyle='none', alpha=0.8)

ax.set_xscale('log')
ax.set_ylim(0, 1)
ax.set_xlabel('P')
ax.set_ylabel('e')

# -----

fig, axes = plt.subplots(1, 2, figsize=(10, 5))

ax = axes[0]
ax.errorbar(derp['M1'], derp['M2_min']/derp['M1'],
            xerr=derp['M1_err'], yerr=np.sqrt(derp['M1_err']**2+derp['M2_min_err']**2),
            marker='o', linestyle='none', alpha=0.8)
ax.set_xlabel('M1')
ax.set_ylabel('M2/M1')

ax = axes[1]
mass_ratio = derp['M2_min']/derp['M1']
ax.hist(mass_ratio[np.isfinite(mass_ratio)], bins='auto')
ax.set_xlabel('M2/M1')

In [None]:
with h5py.File(mcmc_samples_file, 'r') as mcmc_f, h5py.File(samples_file, 'r') as f:
    for row in derp[rc_mask & (derp['P'] < 20)]:
        star = AllStar.get_apogee_id(session, row['APOGEE_ID'])
        data = star.apogeervdata()
    
        if star.apogee_id in mcmc_f:
            samples = JokerSamples.from_hdf5(mcmc_f[star.apogee_id])
            print('mcmc')
        else:
            samples = JokerSamples.from_hdf5(f[star.apogee_id])
            print('thejoker')
            
        samples.t0 = data.t0
        
        fig = plot_two_panel(data, samples, 
                             plot_data_orbits_kw=dict(n_times=16384,                
                                                      highlight_P_extrema=False))
        fig.axes[0].set_title('P = {0:.2f}'.format(samples['P'][0]))
        fig.tight_layout()

In [None]:
derp[rc_mask & (derp['P'] < 20)]