In [1]:
import numpy as np
import matplotlib.pyplot as plt

import astropy.stats
from   astropy.io import fits as pyfits
import csv
import sys
import os
from   timeit import default_timer as timer
import warnings
import corner

from alderaan.constants import *
import alderaan.io as io

# flush buffer to avoid mixed outputs from progressbar
sys.stdout.flush()

# turn off FutureWarnings
warnings.filterwarnings('ignore', category=FutureWarning)

# start program timer
global_start_time = timer()

In [2]:
# select mission, target, and paths
MISSION = "Kepler"
TARGET  = "K02150"
PRIMARY_DIR = '/Users/research/projects/alderaan/'
TRACE_FILE  = '/Users/research/projects/alderaan/Traces/' + TARGET + '/' + TARGET + '_transit_shape.fits'

if MISSION == "Simulated":
    CSV_FILE = PRIMARY_DIR + "Catalogs/simulated_catalog.csv"
    TRUE_TTV_DIR = PRIMARY_DIR + "Simulations/TTVs/"
    
if MISSION == "Kepler":
    CSV_FILE = PRIMARY_DIR + "Catalogs/cumulative_koi_catalog.csv"

# Make sure the necessary paths exist

In [None]:
# directory in which to find lightcurve data
if MISSION == 'Kepler': DOWNLOAD_DIR = PRIMARY_DIR + 'MAST_downloads/'
if MISSION == 'Simulated': DOWNLOAD_DIR = PRIMARY_DIR + 'Simulations/'

# directories in which to place pipeline outputs    
FIGURE_DIR    = PRIMARY_DIR + 'Figures/' + TARGET + '/'
TRACE_DIR     = PRIMARY_DIR + 'Traces/' + TARGET + '/'
QUICK_TTV_DIR = PRIMARY_DIR + 'QuickTTVs/' + TARGET + '/'
DLC_DIR       = PRIMARY_DIR + 'Detrended_lightcurves/' + TARGET + '/'
NOISE_DIR     = PRIMARY_DIR + 'Noise_models/' + TARGET + '/'

# check if all the paths exist and create them if not
if os.path.exists(FIGURE_DIR) == False:
    os.mkdir(FIGURE_DIR)
    
if os.path.exists(TRACE_DIR) == False:
    os.mkdir(TRACE_DIR)
    
if os.path.exists(QUICK_TTV_DIR) == False:
    os.mkdir(QUICK_TTV_DIR)
    
if os.path.exists(DLC_DIR) == False:
    os.mkdir(DLC_DIR)
    
if os.path.exists(NOISE_DIR) == False:
    os.mkdir(NOISE_DIR)

# Read in pre-constrained stellar parameters

In [None]:
# Read in the data from csv file
print('Reading in data from csv file')

# read in a csv file containing info on targets
csv_keys, csv_values = io.read_csv_file(CSV_FILE)

# put these csv data into a dictionary
target_dict = {}
for k in csv_keys: 
    target_dict[k] = io.get_csv_data(k, csv_keys, csv_values)

    
if MISSION == 'Kepler':
    KOI_ID = TARGET
    
elif MISSION == 'Simulated':
    KOI_ID = "K" + TARGET[1:]

else:
    raise ValueError("MISSION must be 'Kepler' or 'Simulated'")
    
    
# pull relevant quantities and establish GLOBAL variables
use = np.array(target_dict['koi_id']) == KOI_ID

KIC = np.array(target_dict['kic_id'], dtype='int')[use]
NPL = np.array(target_dict['npl'], dtype='int')[use]

RSTAR_TRUE = np.array(target_dict['rstar'],  dtype='float')[use]

LOGRHO_TRUE = np.array(target_dict['logrho'], dtype='float')[use]
LOGRHO_ERR1_TRUE = np.array(target_dict['logrho_err1'], dtype='float')[use]
LOGRHO_ERR2_TRUE = np.array(target_dict['logrho_err2'], dtype='float')[use]

U1_TRUE = np.array(target_dict['limbdark_1'], dtype='float')[use]
U2_TRUE = np.array(target_dict['limbdark_2'], dtype='float')[use]

In [None]:
# do some consistency checks
if all(k == KIC[0] for k in KIC): KIC = KIC[0]
else: raise ValueError('There are inconsistencies with KIC in the csv input file')

if all(n == NPL[0] for n in NPL): NPL = NPL[0]
else: raise ValueError('There are inconsistencies with NPL in the csv input file')

if all(r == RSTAR_TRUE[0] for r in RSTAR_TRUE): RSTAR_TRUE = RSTAR_TRUE[0]
else: raise ValueError('There are inconsistencies with RSTAR in the csv input file')

if all(r == LOGRHO_TRUE[0] for r in LOGRHO_TRUE): LOGRHO_TRUE = LOGRHO_TRUE[0]
else: raise ValueError('There are inconsistencies with LOGRHO in the csv input file')

if all(r == LOGRHO_ERR1_TRUE[0] for r in LOGRHO_ERR1_TRUE): LOGRHO_ERR1_TRUE = LOGRHO_ERR1_TRUE[0]
else: raise ValueError('There are inconsistencies with LOGRHO_ERR1 in the csv input file')

if all(r == LOGRHO_ERR2_TRUE[0] for r in LOGRHO_ERR2_TRUE): LOGRHO_ERR2_TRUE = LOGRHO_ERR2_TRUE[0]
else: raise ValueError('There are inconsistencies with LOGRHO_ERR2 in the csv input file')

if all(u == U1_TRUE[0] for u in U1_TRUE): U1_TRUE = U1_TRUE[0]
else: raise ValueError('There are inconsistencies with U1 in the csv input file')

if all(u == U2_TRUE[0] for u in U2_TRUE): U2_TRUE = U2_TRUE[0]
else: raise ValueError('There are inconsistencies with U2 in the csv input file')

In [None]:
RHO_TRUE = 10**(LOGRHO_TRUE)
RHO_ERR1_TRUE = 10**(LOGRHO_TRUE + LOGRHO_ERR1_TRUE) - RHO_TRUE
RHO_ERR2_TRUE = 10**(LOGRHO_TRUE + LOGRHO_ERR2_TRUE) - RHO_TRUE

RHO_ERR_TRUE = np.sqrt(RHO_ERR1_TRUE**2 + RHO_ERR2_TRUE**2)/np.sqrt(2)

# Get shape model posteriors

In [None]:
with pyfits.open(TRACE_FILE) as trace:
    print(trace.info())

In [None]:
# Read in the fits file with saved traces

with pyfits.open(TRACE_FILE) as trace:
    header  = trace[0].header
    hdulist = pyfits.HDUList(trace)
    
    NDRAWS, NPL = trace['P'].shape
    
    # limb darkening parameters
    U = trace['U'].data
    U1, U2 = U[:,0], U[:,1]
    
    # basis parameters
    C0 = trace['C0'].data
    C1 = trace['C1'].data
    B  = trace['B'].data
    LOG_R = trace['LOG_R'].data/np.log(10)
    LOG_DUR = trace['LOG_DUR'].data/np.log(10)
    
    # physical parameters
    P   = trace['P'].data
    T0  = trace['T0'].data
    R   = trace['R'].data
    DUR = trace['DUR'].data
    RHO = trace['RHO'].data
    
    # grazing coordinate
    GAMMA = trace["GAMMA"].data
    
    # TTV parameters
    TTS = [None]*NPL

    for npl in range(NPL):    
        TTS[npl] = trace['TTS_{0}'.format(npl)].data

In [None]:
with pyfits.open(TRACE_FILE) as trace:
    header  = trace[0].header
    hdulist = pyfits.HDUList(trace)
    
    NDRAWS, NPL = trace['R'].shape
    
    
    # GP parameters
    LOGSW4 = np.zeros((NDRAWS,4))
    LOGW0  = np.zeros((NDRAWS,4))
    LOGQ   = np.zeros((NDRAWS,4))
    
    for z in range(4):
        try: LOGSW4[:,z] = trace['LOGSW4_{0}'.format(z)].data
        except: pass
        
        try: LOGW0[:,z] = trace['LOGW0_{0}'.format(z)].data
        except: pass
        
        try: LOGQ[:,z] = trace['LOGQ_{0}'.format(z)].data
        except: pass

In [None]:
for npl in range(NPL):
    per = (np.median(P[:,npl]),  np.std(P[:,npl]))
    r   = (np.median(R[:,npl]), astropy.stats.mad_std(R[:,npl]))
    b   = (np.median(B[:,npl]),  astropy.stats.mad_std(B[:,npl]))
        
    print("\nPLANET {0}".format(npl))
    print("  period = {:.3f} +/- {:.3f}\t[days]".format(per[0],per[1]))
    print("  rp/Rs  = {:.3f} +/- {:.3f}".format(r[0],r[1]))
    print("  impact = {:.3f} +/- {:.3f}".format(b[0],b[1]))

# For simulated data, read in ground truths

In [None]:
if MISSION == "Simulated":
    # Read in the data from csv file
    print('Reading in simulated "ground truth" data from csv file\n')

    # read in a csv file containing info on targets
    csv_keys, csv_values = io.read_csv_file(CSV_FILE)

    # put these csv data into a dictionary
    target_dict = {}
    for k in csv_keys: 
        target_dict[k] = io.get_csv_data(k, csv_keys, csv_values)


    # pull relevant quantities and establish GLOBAL variables
    KOI_ID = "K" + TARGET[1:]

    use = np.array(target_dict['koi_id']) == KOI_ID
    KIC = np.array(target_dict['kic_id'], dtype='int')[use]

    u1_true = np.array(target_dict['limbdark_1'], dtype='float')[use]
    u2_true = np.array(target_dict['limbdark_2'], dtype='float')[use]

    P_true  = np.array(target_dict['period'], dtype='float')[use]
    T0_true = np.array(target_dict['epoch'],  dtype='float')[use]
    rp_true = np.array(target_dict['prad'], dtype='float')[use]
    b_true  = np.array(target_dict['impact'], dtype='float')[use]
    
    
    # do some consistency checks
    if all(k == KIC[0] for k in KIC): KIC = KIC[0]
    else: raise ValueError('There are inconsistencies with KIC in the csv input file')

    if all(u == u1_true[0] for u in u1_true): u1_true = u1_true[0]
    else: raise ValueError('There are inconsistencies with U1 in the csv input file')

    if all(u == u2_true[0] for u in u2_true): u2_true = u2_true[0]
    else: raise ValueError('There are inconsistencies with U2 in the csv input file')
        
        
    # sort planet truths by period
    order = np.argsort(P_true)

    P_true  = P_true[order]
    T0_true = T0_true[order]
    rp_true = rp_true[order]
    b_true  = b_true[order]
    
    
    print("true radii:", rp_true)
    print("true impact:", b_true)

# Limb darkening

In [None]:
data = np.stack([U1, U2]).swapaxes(0,1)
labels = ['U1', 'U2']

if MISSION == "Simulated":
    truths = [u1_true, u2_true]
else:
    truths = None

fig = corner.corner(data, labels=labels, truths=truths)

# Basis parameters

In [None]:
for npl in range(NPL):
    data = np.stack([C0[:,npl], C1[:,npl], B[:,npl], LOG_R[:,npl], LOG_DUR[:,npl]]).swapaxes(0,1)
    labels = [r"$C_0$", r"$C_1$", r"$b$", r"$\log r$", r"$\log T$"]

    fig = corner.corner(data, labels=labels, color='C{0}'.format(npl), truth_color="k");

# Physical parameters

In [None]:
for npl in range(NPL):
    data = np.stack([P[:,npl], T0[:,npl], R[:,npl], B[:,npl], RHO[:,npl], DUR[:,npl]]).swapaxes(0,1)
    labels = ["P", r"$t_0$", "r", "b", r"$\rho_{circ}$", "T"]
    
    
    if MISSION == "Simulated":
        truths = [None, None, rp_true[npl], b_true[npl], None, None]
    else:
        truths = None


    fig = corner.corner(data, labels=labels, truths=truths, color='C{0}'.format(npl), truth_color="k");

# Closer investigation of near-grazing transits

In [None]:
for npl in range(NPL):
    data = np.stack([B[:,npl], R[:,npl], DUR[:,npl]]).swapaxes(0,1)
    labels = ["b", "r", "T"]

    fig = corner.corner(data, labels=labels, color='C{0}'.format(npl));

In [None]:
for npl in range(NPL):
    g = GAMMA[:,npl]
    bins = np.linspace(-1,np.percentile(g,99.9),55)
    
    plt.figure()
    plt.hist(g, bins=bins, histtype="step", density=True, lw=2, color="C{0}".format(npl))
    plt.axvline(1, color="k", ls=":")
    plt.title(r"$\gamma$", fontsize=24)
    plt.show()

# Is the distribution of $g \equiv \rho_{\rm circ}/\rho_{\rm obs}$ multimodal?

In [None]:
data = []
labels = []

for npl in range(NPL):
    data.append((np.log10(RHO[:,npl]/RHO_TRUE))/3)
    labels.append(r"$\log(g_{0})$".format(npl+1))

data = np.stack(data).swapaxes(0,1)

fig = corner.corner(data, labels=labels, color="k", truth_color="C0")

# Noise hyperparameters

In [None]:
for z in range(4):
    data  = []
    labels = []

    if np.sum(LOGSW4[:,z] != 0):
        data.append(LOGSW4[:,z])
        labels.append('LogSw4')
        
    if np.sum(LOGW0[:,z] != 0):
        data.append(LOGW0[:,z])
        labels.append('Logw0')    
    
    if np.sum(LOGQ[:,z] != 0):
        data.append(LOGQ[:,z])
        labels.append('LogQ')
        
    
    try:
        data = np.stack(data).swapaxes(0,1)
        fig = corner.corner(data, labels=labels)
        
    except:
        pass

# Try reweighting for eccentricity posteriors

In [None]:
from chainconsumer import ChainConsumer

# For some reason, ChainConsumer can't find Latex unless I make a matplotlib plot first
# This is a workaround -- I'll find a real solution later
plt.figure(figsize=(1,1))
plt.plot(np.linspace(0,1,2), "k:")
plt.show()

In [None]:
# duplicate chains by this amount
upsample = 1000 

# preconstrained density (e.g. from GaiaDR2/Berger+2020 isochrones)
rho_obs = (RHO_TRUE, RHO_ERR_TRUE)

for npl in range(NPL):
    
    # draw samples in (e,w) and calculate log(weight)
    rho_circ = np.repeat(RHO[:,npl], upsample)
    ecc = np.random.uniform(0, 1, len(rho_circ))
    omega = np.random.uniform(-0.5*np.pi, 1.5*np.pi, len(rho_circ))
    g = (1 + ecc * np.sin(omega)) / np.sqrt(1 - ecc ** 2)
    rho = rho_circ / g ** 3

    log_weight = -0.5 * ((rho - rho_obs[0]) / rho_obs[1]) ** 2
    weight = np.exp(log_weight - np.max(log_weight))
    
    
    # now plot it
    c = ChainConsumer()
    c.add_chain(np.vstack((ecc,omega*180/pi)).T, weights=weight,parameters=[r'$e$',r'$\omega$'])
    fig = c.plotter.plot()
    fig.set_size_inches(3 + fig.get_size_inches())

# Can we set priors on eccentricity to recover inclination?

In [None]:
# duplicate chains by this amount
upsample = 1000 

# preconstrained density (e.g. from GaiaDR2/Berger+2020 isochrones)
rho_obs = (RHO_TRUE, RHO_ERR_TRUE)

for npl in range(NPL):
    
    # draw samples in (e,w) and calculate log(weight) for a given Rayleigh scale
    rho_circ = np.repeat(RHO[:,npl], upsample)
    
    sig_e = 0.0355
    esinw, ecosw = np.random.normal(loc=0, scale=sig_e, size=2*len(rho_circ)).reshape(2,-1)
    ecc = np.sqrt(esinw**2 + ecosw**2)
    omega = np.arctan2(esinw, ecosw)
    
    while np.any(ecc >= 1):
        print("redrawing", np.sum(ecc>=1))
        esinw, ecosw = np.random.normal(loc=0, scale=sig_e, size=2*np.sum(ecc>=1)).reshape(2,-1)
        omega[ecc>=1] = np.arctan2(esinw, ecosw)
        ecc[ecc>=1] = np.sqrt(esinw**2 + ecosw**2)
    
    
    g = (1 + ecc * np.sin(omega)) / np.sqrt(1 - ecc ** 2)
    rho = rho_circ / g ** 3

    log_weight = -0.5 * ((rho - rho_obs[0]) / rho_obs[1]) ** 2
    weight = np.exp(log_weight - np.max(log_weight))

    # upsample impact parameter
    b = np.repeat(B[:,npl], upsample)
    
    # now plot it
    c = ChainConsumer()
    c.add_chain(np.vstack((ecc,b)).T, weights=weight,parameters=[r'$e$',r'$b$'])
    fig = c.plotter.plot()
    fig.set_size_inches(3 + fig.get_size_inches())