In [None]:
import os

from astropy.io import fits
import astropy.table as at
from astropy.timeseries import BoxLeastSquares
from astropy.constants import G
import astropy.coordinates as coord
import astropy.units as u
import h5py
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import requests
from tqdm.notebook import tqdm

import thejoker as tj
from hq.data import get_rvdata

In [None]:
binaries = at.Table.read('../catalogs/lnK0.0_logL4.6_metadata_gaia_starhorse.fits')
gold = at.QTable(at.Table.read('../catalogs/gold_sample.fits').filled())

In [None]:
allvisit = fits.getdata('/mnt/home/apricewhelan/data/APOGEE_DR16/allVisit-r12-l33.fits')

In [None]:
kepler = at.Table.read('/mnt/home/apricewhelan/data/Gaia-Kepler/kepler_dr2_4arcsec.fits')
k2 = at.Table.read('/mnt/home/apricewhelan/data/Gaia-Kepler/k2_dr2_4arcsec.fits')

kepler = kepler[kepler['kepler_gaia_ang_dist'] < 1.]
k2 = k2[k2['k2_gaia_ang_dist'] < 1.]

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(12, 5),
                         sharex=True, sharey=True)

ax = axes[0]
dmag = kepler['phot_g_mean_mag'] - kepler['kepmag']
ax.plot(kepler['teff'], dmag, 
        ls='none', marker='o', 
        mew=0, ms=1.5, alpha=0.2)
ax.set_xlabel(r'$T_{\rm eff}$')
ax.set_ylabel(r'${\rm Kp} - G$ [mag]')
ax.set_xlim(2000, 12000)
ax.set_ylim(-3, 10)

ax = axes[1]
dmag = k2['phot_g_mean_mag'] - k2['k2_kepmag']
ax.plot(k2['k2_teff'], dmag, 
        ls='none', marker='o', 
        mew=0, ms=1.5, alpha=0.2)
ax.set_xlabel(r'$T_{\rm eff}$')
ax.set_ylabel(r'K2 ${\rm Kp} - G$ [mag]')

for ax in axes:
    ax.axhline(-0.5)
    ax.axhline(0.5)
    
fig.set_facecolor('w')

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(12, 5), 
                         sharex=True, sharey=True)

ax = axes[0]
dmag = kepler['phot_g_mean_mag'] - kepler['kepmag']
ax.plot(np.abs(dmag), kepler['kepler_gaia_ang_dist'], 
        ls='none', marker='o', mew=0, ms=1.5,
        alpha=0.25)
ax.set_xlabel(r'${\rm Kp} - G$ [mag]')
ax.set_ylabel('Kepler–Gaia sep. [arcsec]')

ax = axes[1]
dmag = k2['phot_g_mean_mag'] - k2['k2_kepmag']
ax.plot(np.abs(dmag), k2['k2_gaia_ang_dist'], 
        ls='none', marker='o', mew=0, ms=1.5,
        alpha=0.25)
ax.set_xlabel(r'${\rm Kp} - G$ [mag]')
ax.set_ylabel('K2–Gaia sep. [arcsec]')

for ax in axes:
    ax.axvline(1.5, color='tab:red')

fig.set_facecolor('w')
fig.tight_layout()

In [None]:
tmp_kepler = kepler[np.abs(kepler['phot_g_mean_mag'] - kepler['kepmag']) < 1.]
tmp_k2 = k2[np.abs(k2['phot_g_mean_mag'] - k2['k2_kepmag']) < 1.5]
tmp_k2.rename_column('tm_name', 'tm_designation')

master = at.vstack((tmp_kepler, tmp_k2))
master['tm_designation'] = master['tm_designation'].astype(str)
master = master[master['tm_designation'] != 'N/A']

_, idx = np.unique(master['source_id'], return_index=True)
master = master[idx]

In [None]:
gold_master = at.join(gold, master, keys='source_id', 
                      uniq_col_name='{col_name}{table_name}',
                      table_names=['', '2'])

gold_master = gold_master[
    ((gold_master['n_visits'] - gold_master['NVISITS']) > -3)]

## Known EBs:

In [None]:
kebs = at.Table.read('/mnt/home/apricewhelan/data/Gaia-Kepler/Kirk2016-Kepler-EBs.csv', 
                     format='ascii.commented_header', 
                     delimiter=',', header_start=7)
np.isin(kebs['KIC'], gold_master['kepid']).sum()

In [None]:
binaries_master = at.join(binaries, master, keys='source_id', 
                          uniq_col_name='{col_name}{table_name}',
                          table_names=['', '2'])
binaries_master = at.QTable(binaries_master[binaries_master['n_visits'] >= 5])

In [None]:
len(gold_master), len(binaries_master)

In [None]:
P_threshold = 1000*u.day

In [None]:
k2_mask = ((gold_master['MAP_P'] < P_threshold) & 
           ~gold_master['k2_campaign_str'].mask)
k2_sample = gold_master[k2_mask]
k2_sample['epic_number', 'MAP_P', 'MAP_e']

In [None]:
kep_mask = ((gold_master['MAP_P'] < P_threshold) & 
            ~gold_master['kepid'].mask)
kep_sample = gold_master[kep_mask]
kep_sample['kepid', 'MAP_P', 'MAP_e']

In [None]:
# kep_sample['m2_min_50'][kep_sample['m2_min_50'].argsort()].to(u.Mjup)
kep_sample['MAP_P', 'MAP_e'][kep_sample['MAP_e'].argsort()]

In [None]:
full_kep_mask = ~binaries_master['kepid'].mask
full_kep_sample = binaries_master[full_kep_mask & ~np.isin(binaries_master['APOGEE_ID'], 
                                                           gold_master['APOGEE_ID'])]
len(full_kep_sample)

--- 

## Download Kepler/K2 light curves

In [None]:
import lightkurve as lk
from astropy.time import Time

In [None]:
def get_transit_period(lc, rv_period=None):
    
    # Convert to parts per thousand
    x = lc.astropy_time.tcb.jd
    y = lc.flux
    mu = np.nanmedian(y)
    y = (y / mu - 1) * 1e3
    yerr = lc.flux_err * 1e3

    x_ref = np.min(x)
    x = x - x_ref
    
    m = np.isfinite(y)
    bls = BoxLeastSquares(x[m], y[m])

    if rv_period is None:
        period_grid = np.exp(np.linspace(np.log(1.5), np.log(200), 10000))
    else:
        logP = np.log(rv_period.to_value(u.day))
        period_grid = np.exp(np.linspace(logP-1, logP+1, 10000))
    
    bls_power = bls.power(period_grid, 0.1, oversample=10)

    # Save the highest peak as the planet candidate
    index = np.argmax(bls_power.power)
    bls_period = bls_power.period[index]
    bls_t0 = bls_power.transit_time[index]
    bls_depth = bls_power.depth[index]
    
    return Time(bls_t0 + x_ref, format='jd', scale='tcb'), bls_period

In [None]:
len(kep_sample), len(k2_sample)

In [None]:
# row = kep_sample[5]
row = kep_sample[kep_sample['MAP_e'].argsort()[::-1]][0]

In [None]:
row['APOGEE_ID', 'kepid', 'm2_min_50', 'MAP_P', 'MAP_e']

In [None]:
# tpf = lk.search_targetpixelfile(f"EPIC {row['epic_number']}", 
#                                 campaign=int(row['k2_campaign_str']), 
#                                 cadence='short').download()
# tpf = lk.search_targetpixelfile(f"KIC {row['kepid']}").download_all()

In [None]:
lcfs = lk.search_lightcurvefile(f"KIC {row['kepid']}", mission='Kepler').download_all()
stitched_lc = lcfs.PDCSAP_FLUX.stitch()

In [None]:
transit_time, transit_P = get_transit_period(stitched_lc, row['MAP_P'])

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(12, 6))
phase = ((stitched_lc.astropy_time - transit_time).jd / transit_P + 0.5) % 1. - 0.5
cc = ax.scatter(phase, stitched_lc.flux, 
                s=1, alpha=0.2, lw=1, 
                c=stitched_lc.time)
ax.set_xlim(-0.5, 0.5)
ax.set_xlabel('Phase')
ax.set_ylabel('Normalized Flux')
cb = fig.colorbar(cc)
cb.set_label('Kepler time', fontsize=14)

ax.set_title((f"KIC {row['kepid']};  " +
              f"P={row['MAP_P']:.2f};  " + 
              f"e={row['MAP_e']:.2f};  " +
              f"{row['m2_min_50']:.2f};  " + 
              f"{row['m2_min_50'].to(u.Mjup):.1f};  "),
             fontsize=14)
fig.set_facecolor('w')
fig.tight_layout()

## Make all plots:

In [None]:
from scipy.stats import binned_statistic

In [None]:
def plot_folded(lc, rv_data, P, t0, 
                kepid, row, fold_text=''):
    
    fig, axes = plt.subplots(2, 1, figsize=(12, 9), sharex=True)
    
    ax = axes[0]
    phase = ((lc.astropy_time - t0).jd / P.to_value(u.day) + 0.5) % 1. - 0.5
    
    for i in [-1, 0, 1]:
        cc = ax.scatter(phase + i, lc.flux,
                        s=1, alpha=0.2, lw=1, 
                        c=lc.time)
        
        m = np.isfinite(lc.flux)
        stat = binned_statistic(phase[m] + i, lc.flux[m], statistic='mean',
                                bins=np.arange(-0.75, 0.75+1e-3, 0.02))
        ctr = 0.5 * (stat.bin_edges[:-1] + stat.bin_edges[1:])
        ax.plot(ctr, stat.statistic, 
                marker='o', color='k', ls='-', alpha=0.5,
                zorder=100, mew=0, ms=3, drawstyle='steps-mid')
    
    ax.axhline(1, marker='', zorder=100, color='w', alpha=0.4)
    ax.set_xlim(-0.75, 0.75)
    ax.set_ylabel('Normalized Flux')
    # cb = fig.colorbar(cc)
    # cb.set_label('Kepler time', fontsize=14)

    ax.set_title(f"{kepid}    {row['APOGEE_ID']}",
                 fontsize=18)
    
    # ylim = (ax.get_ylim()[0], np.nanpercentile(stitched_lc.flux, 99))
    ylim = np.nanpercentile(stitched_lc.flux, [0.01, 99.])
    yptp = ylim[1] - ylim[0]
    ylim = (ylim[0] - 0.5*yptp, ylim[1] + 0.5*yptp)
    ax.set_ylim(ylim)
    
    yspan = ylim[1] - ylim[0]
    ax.text(0.7, ylim[1] - yspan/20, fold_text,
            ha='right', va='top', color='#777777')
    
    info_text = (f"$P={row['MAP_P'].value:.2f}$ {u.day:latex_inline}\n" + 
                 f"$e={row['MAP_e']:.2f}$\n" +
                 f"$M_2\,\sin i={row['m2_min_50'].value:.2f}$ {u.Msun:latex_inline}$=" + 
                 f"{row['m2_min_50'].to_value(u.Mjup):.1f}$ {u.Mjup:latex_inline}\n" + 
                 r"$\varpi " + f"= {row['parallax']:.2f}$")
    ax.text(-0.7, ylim[1] - yspan/20, info_text,
            ha='left', va='top', color='#222222',
            fontsize=14, bbox=dict(facecolor='w', alpha=0.7))
    
    info_text = (r"$[{\rm M}/{\rm H}] = " + f"{row['M_H']:.2f}$\n" + 
                 r"$T_{\rm eff}=" + f"{row['TEFF']:.0f}$\n" + 
                 r"$\log g = " + f"{row['LOGG']:.2f}$")
    ax.text(-0.7, ylim[0] + yspan/20, info_text,
            ha='left', va='bottom', color='#222222',
            fontsize=14, bbox=dict(facecolor='w', alpha=0.7))
    
    # Radial velocity
    ax = axes[1]
    phase = ((rv_data.t - t0).jd / P.to_value(u.day) + 0.5) % 1. - 0.5
    ax.errorbar(phase, (rv_data.rv - row['MAP_v0']).to_value(u.km/u.s), 
                yerr=rv_data.rv_err.to_value(u.km/u.s),
                marker='o', ls='none')
    ax.axhline(0, zorder=-100, color='#cccccc')
    
    ax.set_xlabel('Phase')
    ax.set_ylabel(f'$v_r - v_0$ [{u.km/u.s:latex_inline}]')
    ax.set_ylim(-1.5*row['MAP_K'].to_value(u.km/u.s),
                 1.5*row['MAP_K'].to_value(u.km/u.s))
    
    fig.set_facecolor('w')
    fig.tight_layout()
    return fig, axes

In [None]:
plot_path = '../plots/kepler/'
os.makedirs(plot_path, exist_ok=True)

In [None]:
for row in kep_sample:
    plot_filename = os.path.join(plot_path, f"kepler-lc-{row['APOGEE_ID']}.png")
    
#     if os.path.exists(plot_filename):
#         continue
    
    lcfs = lk.search_lightcurvefile(f"KIC {row['kepid']}", mission='Kepler').download_all()
    try:
        stitched_lc = lcfs.PDCSAP_FLUX.stitch()
    except ValueError:
        print(f"Weirdo light curve for {row['APOGEE_ID']}")
        # stitched_lc = lcfs[0].PDCSAP_FLUX.stitch()
        continue
    
    transit_time, transit_P = get_transit_period(stitched_lc, row['MAP_P'])
    frac_dP = np.abs(transit_P-row['MAP_P'].value) / row['MAP_P'].value
    
    if frac_dP < 0.1:  # use transit P
        fold_text = 'folded on transit period'
        P = transit_P * u.day
        t0 = transit_time

    else:  # use RV P
        fold_text = 'folded on RV period'
        visits = allvisit[allvisit['APOGEE_ID'] == row['APOGEE_ID']]
        P = row['MAP_P']
        t0 = Time(visits['JD'].min() + row['MAP_M0'].to_value(u.radian)/(2*np.pi) * P.to_value(u.day),
                  format='jd', scale='tcb')
    
    visits = allvisit[allvisit['APOGEE_ID'] == row['APOGEE_ID']]
    rv_data = get_rvdata(visits)
    fig, axes = plot_folded(stitched_lc, rv_data=rv_data, 
                            P=P, t0=t0,
                            kepid=f"KIC {row['kepid']}", row=row,
                            fold_text=fold_text)
    
    fig.savefig(plot_filename, dpi=250)
    plt.close(fig)

In [None]:
# for row in k2_sample:
#     plot_filename = os.path.join(plot_path, f"k2-lc-{row['APOGEE_ID']}.png")
# #     if os.path.exists(plot_filename):
# #         continue
    
#     lcfs = lk.search_lightcurvefile(f"EPIC {row['epic_number']}", mission='K2').download_all()
#     try:
#         stitched_lc = lcfs.PDCSAP_FLUX.stitch()
#     except ValueError:
#         print(f"Weirdo light curve for {row['APOGEE_ID']}")
#         # stitched_lc = lcfs[0].PDCSAP_FLUX.stitch()
#         continue
    
#     transit_time, transit_P = get_transit_period(stitched_lc, row['MAP_P'])
#     frac_dP = np.abs(transit_P-row['MAP_P'].value) / row['MAP_P'].value
    
#     if frac_dP < 0.1:  # use transit P
#         fold_text = 'folded on transit period'
#         P = transit_P * u.day
#         t0 = transit_time

#     else:  # use RV P
#         fold_text = 'folded on RV period'
#         visits = allvisit[allvisit['APOGEE_ID'] == row['APOGEE_ID']]
#         P = row['MAP_P']
#         t0 = Time(visits['JD'].min() + row['MAP_M0'].to_value(u.radian)/(2*np.pi) * P.to_value(u.day),
#                   format='jd', scale='tcb')
    
#     visits = allvisit[allvisit['APOGEE_ID'] == row['APOGEE_ID']]
#     rv_data = get_rvdata(visits)
#     fig, ax = plot_folded(stitched_lc, rv_data=rv_data, 
#                           P=P, t0=t0,
#                           kepid=f"EPIC {row['epic_number']}", row=row,
#                           fold_text=fold_text)
    
#     fig.savefig(plot_filename, dpi=250)
#     plt.close(fig)

## From full catalog, find binaries with support at P < 100 days:

In [None]:
import tables as tb
import exoplanet as xo

In [None]:
def get_lomb_period(lc):
    # Convert to parts per thousand
    x = lc.astropy_time.tcb.jd
    y = lc.flux
    mu = np.nanmedian(y)
    y = (y / mu - 1) * 1e3
    yerr = lc.flux_err * 1e3

    x_ref = np.min(x)
    x = x - x_ref
    
    # ---
    
    m = np.isfinite(y)
    results = xo.estimators.lomb_scargle_estimator(
        x[m], y[m], 
        max_peaks=1, min_period=2., max_period=100.0, samples_per_peak=50
    )
    
    peak = results["peaks"][0]
    freq, power = results["periodogram"]
    
    return peak["period"]

In [None]:
def plot_folded_multi(lc, rv_data, Ps, t0,
                      kepid, row, labels):
    
    fig, axes = plt.subplots(1 + len(Ps), 1, figsize=(12, 12), sharex=True)
    
    for n, P in enumerate(Ps):
        ax = axes[n]
        phase = ((lc.astropy_time - t0).jd / P.to_value(u.day) + 0.5) % 1. - 0.5

        for i in [-1, 0, 1]:
            cc = ax.scatter(phase + i, lc.flux,
                            s=1, alpha=0.2, lw=1, 
                            c=lc.time)

        ax.axhline(1, marker='', zorder=100, color='w', alpha=0.4)
        
        ax.set_xlim(-0.75, 0.75)
        ax.set_ylabel('Normalized Flux', fontsize=12)
        # cb = fig.colorbar(cc)
        # cb.set_label('Kepler time', fontsize=14)

        # ylim = (ax.get_ylim()[0], np.nanpercentile(stitched_lc.flux, 99))
        ylim = np.nanpercentile(stitched_lc.flux, [0.01, 99.])
        yptp = ylim[1] - ylim[0]
        ylim = (ylim[0] - 0.5*yptp, ylim[1] + 0.5*yptp)
        ax.set_ylim(ylim)

        yspan = ylim[1] - ylim[0]
        ax.text(0.7, ylim[1] - yspan/20, labels[n],
                ha='right', va='top', color='#777777')
        
        if n == 0:
            ax.set_title(f"{kepid}    {row['APOGEE_ID']}",
                         fontsize=18)
            
            info_text = (f"$P={row['MAP_P'].value:.2f}$ {u.day:latex_inline}\n" + 
                         f"$e={row['MAP_e']:.2f}$\n" +
                         f"$M_2\,\sin i={row['m2_min_50'].value:.2f}$ {u.Msun:latex_inline}$=" + 
                         f"{row['m2_min_50'].to_value(u.Mjup):.1f}$ {u.Mjup:latex_inline}\n" + 
                         r"$\varpi " + f"= {row['parallax']:.2f}$")
            ax.text(-0.7, ylim[1] - yspan/20, info_text,
                    ha='left', va='top', color='#222222',
                    fontsize=12, bbox=dict(facecolor='w', alpha=0.7))

            info_text = (r"$[{\rm M}/{\rm H}] = " + f"{row['M_H']:.2f}$\n" + 
                         r"$T_{\rm eff}=" + f"{row['TEFF']:.0f}$\n" + 
                         r"$\log g = " + f"{row['LOGG']:.2f}$")
            ax.text(-0.7, ylim[0] + yspan/20, info_text,
                    ha='left', va='bottom', color='#222222',
                    fontsize=12, bbox=dict(facecolor='w', alpha=0.7))
    
    # Radial velocity
    ax = axes[len(Ps)]
    phase = ((rv_data.t - t0).jd / P.to_value(u.day) + 0.5) % 1. - 0.5
    ax.errorbar(phase, (rv_data.rv - row['MAP_v0']).to_value(u.km/u.s), 
                yerr=rv_data.rv_err.to_value(u.km/u.s),
                marker='o', ls='none')
    ax.axhline(0, zorder=-100, color='#cccccc')
    
    ax.set_xlabel('Phase')
    ax.set_ylabel(f'$v_r - v_0$ [{u.km/u.s:latex_inline}]')
    ax.set_ylim(-1.5*row['MAP_K'].to_value(u.km/u.s),
                 1.5*row['MAP_K'].to_value(u.km/u.s))
    
    fig.set_facecolor('w')
    fig.tight_layout()
    return fig, axes

In [None]:
with tb.open_file('../catalogs/lnK0.0_logL4.6_samples.hdf5', 'r') as f:
    all_samples = f.root.samples.read()

In [None]:
good_apids = []
for row in full_kep_sample:
    apid = np.array(row['APOGEE_ID']).astype(all_samples['APOGEE_ID'].dtype)
    samples = all_samples[all_samples['APOGEE_ID'] == apid]
    
    frac = (samples['P'] < 100.).sum() / len(samples)
    if frac > 0.5:
        good_apids.append(row['APOGEE_ID'])

In [None]:
len(good_apids)

In [None]:
os.makedirs(os.path.join(plot_path, 'multimodal'), exist_ok=True)
             
for row in full_kep_sample[np.isin(full_kep_sample['APOGEE_ID'].astype(str), good_apids)]:
    plot_filename = os.path.join(plot_path, 'multimodal', f"kepler-lc-{row['APOGEE_ID']}.png")
    if os.path.exists(plot_filename):
        continue
    
    try:
        lcfs = lk.search_lightcurvefile(f"KIC {row['kepid']}", mission='Kepler').download_all()
        stitched_lc = lcfs.PDCSAP_FLUX.stitch()
    except ValueError:
        print(f"Weirdo light curve for {row['APOGEE_ID']}")
        # stitched_lc = lcfs[0].PDCSAP_FLUX.stitch()
        continue
    
    rv_P = row['MAP_P']
    transit_time, transit_P = get_transit_period(stitched_lc)
    lomb_P = get_lomb_period(stitched_lc) * u.day
    
    visits = allvisit[allvisit['APOGEE_ID'] == row['APOGEE_ID']]
    rv_data = get_rvdata(visits)
    fig, axes = plot_folded_multi(stitched_lc, rv_data, 
                                  [rv_P, lomb_P, transit_P*u.day], 
                                  transit_time,
                                  f"KIC {row['kepid']}", row,
                                  labels=['RV period', 'Lomb-Scargle period', 'BLS period'])
    
    fig.savefig(plot_filename, dpi=250)
    plt.close(fig)