In [9]:
import numpy as np
import matplotlib.pyplot as plt

import astropy.stats
from   astropy.io import fits as pyfits
import csv
import sys
import os
from   timeit import default_timer as timer
import warnings
import corner

from chainconsumer import ChainConsumer

from alderaan.constants import *
import alderaan.io as io

# flush buffer to avoid mixed outputs from progressbar
sys.stdout.flush()

# turn off FutureWarnings
warnings.filterwarnings('ignore', category=FutureWarning)

# start program timer
global_start_time = timer()

In [10]:
# select mission, target, and paths
MISSION = "Kepler"
TARGET  = "K01426-01"
PRIMARY_DIR = '/Users/research/projects/alderaan/'

if MISSION == "Simulated":
    CSV_FILE = PRIMARY_DIR + "Catalogs/simulated_catalog.csv"
    TRUE_TTV_DIR = PRIMARY_DIR + "Simulations/TTVs/"
    
if MISSION == "Kepler":
    CSV_FILE = PRIMARY_DIR + "Catalogs/cumulative_koi_catalog.csv"

In [11]:
PLANET_NO = int(TARGET[-2:])
KOI_ID = TARGET[:6]

TARGET, KOI_ID, PLANET_NO

('K01426-01', 'K01426', 1)

# Make sure the necessary paths exist

In [12]:
# directory in which to find lightcurve data
if MISSION == 'Kepler': DOWNLOAD_DIR = PRIMARY_DIR + 'MAST_downloads/'
if MISSION == 'Simulated': DOWNLOAD_DIR = PRIMARY_DIR + 'Simulations/'

# directories in which to place pipeline outputs    
FIGURE_DIR    = PRIMARY_DIR + 'Figures/' + KOI_ID + '/'
TRACE_DIR     = PRIMARY_DIR + 'Traces/' + KOI_ID + '/'
QUICK_TTV_DIR = PRIMARY_DIR + 'QuickTTVs/' + KOI_ID + '/'
DLC_DIR       = PRIMARY_DIR + 'Detrended_lightcurves/' + KOI_ID + '/'
NOISE_DIR     = PRIMARY_DIR + 'Noise_models/' + KOI_ID + '/'

# check if all the paths exist and create them if not
if os.path.exists(FIGURE_DIR) == False:
    os.mkdir(FIGURE_DIR)
    
if os.path.exists(TRACE_DIR) == False:
    os.mkdir(TRACE_DIR)
    
if os.path.exists(QUICK_TTV_DIR) == False:
    os.mkdir(QUICK_TTV_DIR)
    
if os.path.exists(DLC_DIR) == False:
    os.mkdir(DLC_DIR)
    
if os.path.exists(NOISE_DIR) == False:
    os.mkdir(NOISE_DIR)

# Read in pre-constrained stellar parameters

In [13]:
# Read in the data from csv file
print('Reading in data from csv file')

# read in a csv file containing info on targets
csv_keys, csv_values = io.read_csv_file(CSV_FILE)

# put these csv data into a dictionary
target_dict = {}
for k in csv_keys: 
    target_dict[k] = io.get_csv_data(k, csv_keys, csv_values)

    
if MISSION == 'Kepler':
    KOI_ID = KOI_ID
    
elif MISSION == 'Simulated':
    KOI_ID = "K" + KOI_ID[1:]

else:
    raise ValueError("MISSION must be 'Kepler' or 'Simulated'")
    
    
# pull relevant quantities and establish GLOBAL variables
use = np.array(target_dict['koi_id']) == KOI_ID

KIC = np.array(target_dict['kic_id'], dtype='int')[use]
NPL = np.array(target_dict['npl'], dtype='int')[use]

RSTAR_TRUE = np.array(target_dict['rstar'],  dtype='float')[use]

LOGRHO_TRUE = np.array(target_dict['logrho'], dtype='float')[use]
LOGRHO_ERR1_TRUE = np.array(target_dict['logrho_err1'], dtype='float')[use]
LOGRHO_ERR2_TRUE = np.array(target_dict['logrho_err2'], dtype='float')[use]

U1_TRUE = np.array(target_dict['limbdark_1'], dtype='float')[use]
U2_TRUE = np.array(target_dict['limbdark_2'], dtype='float')[use]

Reading in data from csv file


In [14]:
# do some consistency checks
if all(k == KIC[0] for k in KIC): KIC = KIC[0]
else: raise ValueError('There are inconsistencies with KIC in the csv input file')

if all(n == NPL[0] for n in NPL): NPL = NPL[0]
else: raise ValueError('There are inconsistencies with NPL in the csv input file')

if all(r == RSTAR_TRUE[0] for r in RSTAR_TRUE): RSTAR_TRUE = RSTAR_TRUE[0]
else: raise ValueError('There are inconsistencies with RSTAR in the csv input file')

if all(r == LOGRHO_TRUE[0] for r in LOGRHO_TRUE): LOGRHO_TRUE = LOGRHO_TRUE[0]
else: raise ValueError('There are inconsistencies with LOGRHO in the csv input file')

if all(r == LOGRHO_ERR1_TRUE[0] for r in LOGRHO_ERR1_TRUE): LOGRHO_ERR1_TRUE = LOGRHO_ERR1_TRUE[0]
else: raise ValueError('There are inconsistencies with LOGRHO_ERR1 in the csv input file')

if all(r == LOGRHO_ERR2_TRUE[0] for r in LOGRHO_ERR2_TRUE): LOGRHO_ERR2_TRUE = LOGRHO_ERR2_TRUE[0]
else: raise ValueError('There are inconsistencies with LOGRHO_ERR2 in the csv input file')

if all(u == U1_TRUE[0] for u in U1_TRUE): U1_TRUE = U1_TRUE[0]
else: raise ValueError('There are inconsistencies with U1 in the csv input file')

if all(u == U2_TRUE[0] for u in U2_TRUE): U2_TRUE = U2_TRUE[0]
else: raise ValueError('There are inconsistencies with U2 in the csv input file')

In [15]:
RHO_TRUE = 10**(LOGRHO_TRUE)
RHO_ERR1_TRUE = 10**(LOGRHO_TRUE + LOGRHO_ERR1_TRUE) - RHO_TRUE
RHO_ERR2_TRUE = 10**(LOGRHO_TRUE + LOGRHO_ERR2_TRUE) - RHO_TRUE

RHO_ERR_TRUE = np.sqrt(RHO_ERR1_TRUE**2 + RHO_ERR2_TRUE**2)/np.sqrt(2)

# Get shape model posteriors

In [None]:
trace_file  = TRACE_DIR + KOI_ID + "_{:02d}_shape_C.fits".format(PLANET_NO)

trace = {}

with pyfits.open(trace_file) as trace_data:
    for i in range(1,len(trace_data)):
        key = trace_data[i].header["EXTNAME"]

        trace[key] = np.squeeze(trace_data[i].data)

In [None]:
npl = PLANET_NO

per = (np.median(trace["P"]), astropy.stats.mad_std(trace["P"]))
r   = (np.median(trace["R"]), astropy.stats.mad_std(trace["R"]))
b   = (np.median(trace["B"]), astropy.stats.mad_std(trace["B"]))

print("\nPLANET {0}".format(npl))
print("  period = {:.3f} +/- {:.3f}\t[days]".format(per[0],per[1]))
print("  rp/Rs  = {:.3f} +/- {:.3f}".format(r[0],r[1]))
print("  impact = {:.3f} +/- {:.3f}".format(b[0],b[1]))

# For simulated data, read in ground truths

In [None]:
if MISSION == "Simulated":
    # Read in the data from csv file
    print('Reading in simulated "ground truth" data from csv file\n')

    # read in a csv file containing info on targets
    csv_keys, csv_values = io.read_csv_file(CSV_FILE)

    # put these csv data into a dictionary
    target_dict = {}
    for k in csv_keys: 
        target_dict[k] = io.get_csv_data(k, csv_keys, csv_values)


    # pull relevant quantities and establish GLOBAL variables
    KOI_ID = "K" + TARGET[1:]

    use = np.array(target_dict['koi_id']) == KOI_ID
    KIC = np.array(target_dict['kic_id'], dtype='int')[use]

    u1_true = np.array(target_dict['limbdark_1'], dtype='float')[use]
    u2_true = np.array(target_dict['limbdark_2'], dtype='float')[use]

    P_true  = np.array(target_dict['period'], dtype='float')[use]
    T0_true = np.array(target_dict['epoch'],  dtype='float')[use]
    rp_true = np.array(target_dict['prad'], dtype='float')[use]
    b_true  = np.array(target_dict['impact'], dtype='float')[use]
    
    
    # do some consistency checks
    if all(k == KIC[0] for k in KIC): KIC = KIC[0]
    else: raise ValueError('There are inconsistencies with KIC in the csv input file')

    if all(u == u1_true[0] for u in u1_true): u1_true = u1_true[0]
    else: raise ValueError('There are inconsistencies with U1 in the csv input file')

    if all(u == u2_true[0] for u in u2_true): u2_true = u2_true[0]
    else: raise ValueError('There are inconsistencies with U2 in the csv input file')
        
        
    # sort planet truths by period
    order = np.argsort(P_true)

    P_true  = P_true[order]
    T0_true = T0_true[order]
    rp_true = rp_true[order]
    b_true  = b_true[order]
    
    
    print("true radii:", rp_true)
    print("true impact:", b_true)

# Make corner plots

In [None]:
data = np.stack([trace["LOG_R"], trace["B"], trace["LOG_DUR"]]).T
labels = [r'$\log r$', r'$b$', r'$\log T$']

c = ChainConsumer()
c.add_chain(data, weights=trace["WEIGHTS"], parameters=labels)
fig = c.plotter.plot()
fig.set_size_inches(3 + fig.get_size_inches())

In [None]:
# duplicate chains by this amount
upsample = 100

# preconstrained density (e.g. from GaiaDR2/Berger+2020 isochrones)
rho_obs = (RHO_TRUE, RHO_ERR_TRUE)



# draw samples in (e,w) and calculate log(weight) for a given Rayleigh scale
rho_circ = np.repeat(trace["RHO"], upsample)
    
sig_e = 0.0355
esinw, ecosw = np.random.normal(loc=0, scale=sig_e, size=2*len(rho_circ)).reshape(2,-1)
ecc = np.sqrt(esinw**2 + ecosw**2)
omega = np.arctan2(esinw, ecosw)
    
while np.any(ecc >= 1):
    print("redrawing", np.sum(ecc>=1))
    esinw, ecosw = np.random.normal(loc=0, scale=sig_e, size=2*np.sum(ecc>=1)).reshape(2,-1)
    omega[ecc>=1] = np.arctan2(esinw, ecosw)
    ecc[ecc>=1] = np.sqrt(esinw**2 + ecosw**2)
    
    
g = (1 + ecc * np.sin(omega)) / np.sqrt(1 - ecc ** 2)
rho = rho_circ / g ** 3

log_weight = -0.5 * ((rho - rho_obs[0]) / rho_obs[1]) ** 2
w_rho = np.exp(log_weight - np.max(log_weight))
w_rho /= np.sum(w_rho)


log_r = np.repeat(trace["LOG_R"], upsample)
b     = np.repeat(trace["B"], upsample)
log_T = np.repeat(trace["LOG_DUR"], upsample)
w_umb = np.repeat(trace["WEIGHTS"], upsample)

In [None]:
data = np.stack([log_r, b, log_T]).T
labels = [r'$\log r$', r'$b$', r'$\log T$']

c = ChainConsumer()
c.add_chain(data, weights=w_rho*w_umb, parameters=labels)
fig = c.plotter.plot()
fig.set_size_inches(3 + fig.get_size_inches())

# Try reweighting for eccentricity posteriors

In [None]:
from chainconsumer import ChainConsumer

# For some reason, ChainConsumer can't find Latex unless I make a matplotlib plot first
# This is a workaround -- I'll find a real solution later
plt.figure(figsize=(1,1))
plt.plot(np.linspace(0,1,2), "k:")
plt.show()

In [None]:
# duplicate chains by this amount
upsample = 1000 

# preconstrained density (e.g. from GaiaDR2/Berger+2020 isochrones)
rho_obs = (RHO_TRUE, RHO_ERR_TRUE)

for npl in range(NPL):
    
    # draw samples in (e,w) and calculate log(weight)
    rho_circ = np.repeat(RHO[:,npl], upsample)
    ecc = np.random.uniform(0, 1, len(rho_circ))
    omega = np.random.uniform(-0.5*np.pi, 1.5*np.pi, len(rho_circ))
    g = (1 + ecc * np.sin(omega)) / np.sqrt(1 - ecc ** 2)
    rho = rho_circ / g ** 3

    log_weight = -0.5 * ((rho - rho_obs[0]) / rho_obs[1]) ** 2
    weight = np.exp(log_weight - np.max(log_weight))
    
    
    # now plot it
    c = ChainConsumer()
    c.add_chain(np.vstack((ecc,omega*180/pi)).T, weights=weight,parameters=[r'$e$',r'$\omega$'])
    fig = c.plotter.plot()
    fig.set_size_inches(3 + fig.get_size_inches())

# Can we set priors on eccentricity to recover inclination?

In [None]:
# duplicate chains by this amount
upsample = 1000 

# preconstrained density (e.g. from GaiaDR2/Berger+2020 isochrones)
rho_obs = (RHO_TRUE, RHO_ERR_TRUE)

for npl in range(NPL):
    
    # draw samples in (e,w) and calculate log(weight) for a given Rayleigh scale
    rho_circ = np.repeat(RHO[:,npl], upsample)
    
    sig_e = 0.0355
    esinw, ecosw = np.random.normal(loc=0, scale=sig_e, size=2*len(rho_circ)).reshape(2,-1)
    ecc = np.sqrt(esinw**2 + ecosw**2)
    omega = np.arctan2(esinw, ecosw)
    
    while np.any(ecc >= 1):
        print("redrawing", np.sum(ecc>=1))
        esinw, ecosw = np.random.normal(loc=0, scale=sig_e, size=2*np.sum(ecc>=1)).reshape(2,-1)
        omega[ecc>=1] = np.arctan2(esinw, ecosw)
        ecc[ecc>=1] = np.sqrt(esinw**2 + ecosw**2)
    
    
    g = (1 + ecc * np.sin(omega)) / np.sqrt(1 - ecc ** 2)
    rho = rho_circ / g ** 3

    log_weight = -0.5 * ((rho - rho_obs[0]) / rho_obs[1]) ** 2
    weight = np.exp(log_weight - np.max(log_weight))

    # upsample impact parameter
    b = np.repeat(B[:,npl], upsample)
    
    # now plot it
    c = ChainConsumer()
    c.add_chain(np.vstack((ecc,b)).T, weights=weight,parameters=[r'$e$',r'$b$'])
    fig = c.plotter.plot()
    fig.set_size_inches(3 + fig.get_size_inches())