# Instructions

The overall goal here is to find a Kepler target, get the lightcurve data, clean and normalize the data, then fit the transits. The bolded instructions below are the tasks which require the most human work, while most of the others will just require running the code which automatically performs the task. 

What needs to be turned in:
* This notebook with all of your code work edits
* Verify your solved period with a reliable online resource. Show that outsourced period and a link to the source(s). If your period is significantly different (>1 day), attempt to explain why. (you can write all this in the cell below)

**The steps:**

1) **Find a target star which hosts a giant planet companion which was observed by the Kepler mission**

2) Download the lightcurve fits files

3) Load the lightcurve data into your code

4) Normalize the lightcurve

5) Approximate the orbital period and the t0 using the box least squares method (BLS)

6) **Approximate the parameters of the transit fit**

7) Solve the transit fit using MCMC

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import csaps

In [3]:
%matplotlib notebook

# Filter and normalization function definitions

In [4]:
# Takes in spectra
def normalize_region(wl, flux, ferr, sds, plot_q=True, ss_smo=1e-3):
    
    __, gi = stdFiltIt(wl, np.copy(flux), 
                       np.ones_like(flux), 
                       sds, smo=ss_smo, plot_q=plot_q)

    cont_fit = csaps.UnivariateCubicSmoothingSpline(wl[gi], 
                                                    flux[gi], 
                                                    smooth=ss_smo
                                                   )(wl)

    if plot_q:
        
        plt.figure(figsize=(10, 5))
        plt.plot(wl, flux, '.-k')
        plt.plot(wl, cont_fit, '-r')
        plt.title('Fit')
        plt.xlabel('time')
        plt.ylabel('Flux')
        
        plt.grid(1)
        
        plt.show()
    
    #####################################
    # Normalize continuum

    flux_norm = flux / cont_fit
    
    ferr_norm = ferr / cont_fit

    if plot_q:
        
        plt.figure(figsize=(10, 5))
        #plt.plot(wl, flux_norm, '.-k')
        plt.plot(wl, flux_norm, '-', c=[0.5, 0.5, 0.5], linewidth=1, label='Flux')
        
        if 1:
            plt.fill_between(wl, flux_norm - ferr_norm, flux_norm + ferr_norm, 
                             step='mid', 
                             color=[0.85, 0.85, 0.85], label='Flux Error')
        
        plt.axhline(1.0, color='m', linewidth=1, label='Continuum')
            
        plt.title('Normalized')
        
        plt.xlabel('time [day]')
        plt.ylabel('Flux [ppt]')
        
        plt.grid(1)
        
        plt.show()
    
    return flux_norm, ferr_norm


def wlr2ir(wl, wl_bounds):
    
    line_idx_bnds = [0, 0]

    line_idx_bnds[0] = np.argwhere(np.abs(wl - wl_bounds[0]) == np.nanmin(np.abs(wl - wl_bounds[0])))
    line_idx_bnds[1] = np.argwhere(np.abs(wl - wl_bounds[1]) == np.nanmin(np.abs(wl - wl_bounds[1])))

    # What indicies span within the wavelength bounds?
    return np.arange(line_idx_bnds[0], line_idx_bnds[1] + 1)


def stdFiltIt(wl, arr_1d, weights_1d, sdms, smo, plot_q):
    
    if plot_q:
        plt.figure(figsize=(10, 5))
        plt.title('Filter plot')
        plt.plot(wl, arr_1d, '.k')

    fit = arr_1d + np.nan
    
    for sdm in sdms:

        gi = np.logical_and(np.isfinite(weights_1d), np.isfinite(arr_1d))
        
        fit = csaps.UnivariateCubicSmoothingSpline(wl[gi], arr_1d[gi], smooth=smo)(wl)

        dy_sd = np.std(arr_1d[gi] - fit[gi]) * sdm

        gi = np.logical_and(np.abs(arr_1d - fit) <= dy_sd, gi)

        if plot_q:
            plt.plot(wl[np.logical_not(gi)], arr_1d[np.logical_not(gi)], 'xr')
            plt.plot(wl, fit + dy_sd, ':g')
            plt.plot(wl, fit - dy_sd, ':r')
            plt.grid(True)

        arr_1d[np.logical_not(gi)] = np.nan

    if plot_q:
        plt.show()

    return arr_1d, gi

# Load a lightcurve

In [5]:
from astropy.io import fits
from os import listdir

#data_dir0 = '/media/etdisk1/kepdata/data/lightcurves/011446443/'
data_dir0 = '/media/etdisk1/kepdata/data/lightcurves/005792202/' # http://www.openexoplanetcatalogue.com/planet/Kepler-27%20c/

###

kid = 3

data_dir = data_dir0 + listdir(data_dir0)[kid]

hdu = fits.open(data_dir)


time = hdu[1].data.field(0)

flux = hdu[1].data.field(7)

flux_err = hdu[1].data.field(8)


import lightkurve as lk

lc = lk.LightCurve(time=time[:], flux=flux[:], flux_err=flux_err[:])



# Or download a light curve

**Search for data of a specific target**

In [6]:
target_name = '005792202'

#target_name = 'Kepler-432'

# target_name = 'Kepler-88'

# target_name = 'Kepler-470'

In [7]:
from lightkurve.search import search_lightcurvefile

res = search_lightcurvefile(target_name, radius=None, 
                            cadence='long', mission=('Kepler', 'K2', 'TESS'), 
                            quarter=None, month=None, campaign=None, sector=None, limit=10)

res

#,observation,target_name,productFilename,distance
0,Kepler Quarter 1,kplr005792202,kplr005792202-2009166043257_llc.fits,0.0
1,Kepler Quarter 2,kplr005792202,kplr005792202-2009259160929_llc.fits,0.0
2,Kepler Quarter 3,kplr005792202,kplr005792202-2009350155506_llc.fits,0.0
3,Kepler Quarter 4,kplr005792202,kplr005792202-2010078095331_llc.fits,0.0
4,Kepler Quarter 5,kplr005792202,kplr005792202-2010174085026_llc.fits,0.0
5,Kepler Quarter 6,kplr005792202,kplr005792202-2010265121752_llc.fits,0.0
6,Kepler Quarter 7,kplr005792202,kplr005792202-2010355172524_llc.fits,0.0
7,Kepler Quarter 8,kplr005792202,kplr005792202-2011073133259_llc.fits,0.0
8,Kepler Quarter 9,kplr005792202,kplr005792202-2011177032512_llc.fits,0.0
9,Kepler Quarter 10,kplr005792202,kplr005792202-2011271113734_llc.fits,0.0


**Download the data and stitch all the quartly data together into one light curve**

In [8]:
#quality bitmask --> "hard" --> anything that's not 'good' won't be included in the dataset
lc = res.download_all(quality_bitmask='hard')

#don't use try-except unless you can't avoid it...you *shouldn't* need it
try:
    lc = lc.stitch()
    #stitches all light cuves together
except:
    pass

lc



KeplerLightCurve(ID: 5792202)

# Check the size of the data array

If the array is over 50,000 elements, the following processes will take a long time. If too long, the processes also might fail due to running out of memory (dependent on how much memory your PC has).

In [8]:
len(lc.time)

37478

If the size of the array is too large, you should bin the data to reduce its size.

Binning will combine datapoints to make a smaller dataset.

For example, 

T = [t1, t2, t3, t4, t5, t6, ...]

binning T by 2 would combine the elements in T like so,

T_binned_by_2 = [mean(t1, t2), mean(t3, t4), mean(t5, t6), ...]

such that the bined array has length of len(T) / 2

In [9]:
if 0: #if too big
    lc = lc.bin(binsize=5)

# Filter NaN's out of the dataset

In [9]:
lc = lc.remove_nans()

time, flux, flux_err = lc.time, lc.flux, lc.flux_err

New array length after binning (if binning enabled)

In [10]:
len(time)

37447

# Plot the original lightcurve data

In [11]:
plt.figure(figsize=(10, 5))
plt.plot(time, flux, '.k')
plt.grid(1)
plt.show(block=0)
plt.xlabel('time [day]')
plt.ylabel('flux [ppt]');

<IPython.core.display.Javascript object>

# Normalize the light curve

1st plot shows the results of the iterative standard deviation filter process

2nd plot shows the fit

3rd plot shows the result of flux normalization


This process will likely require you to edit the "sds" and/or "ss_smo" variables. The sds variable can be a list or a numpy 1D array. It constains the standard deviation multiplier for each iteration of the filter. The length of "sds" will set the number of iterations the filter runs. Typically, you want to start with a large multiplier and slowly reduce the multiplier. How high and how low the multiplier should be is dependent on the data. Here are some general rules for choosing the values in sds:

* If your data has very large outliers, you need to start with a very large multiplier. If you dont, you risk filtering out large amounts of good data, which is catastrophic.
* If your data has very few outliers, you can start with a low multiplier to speed up the process. This is optional and, in general, causes no harm.
* You need to CLOSELY look at your filtered data (plot 1) and make sure you are filtering out as much transit data as possible, even if you begin to cut into a small amount of good data! If you see that the filter did not get rid of all transit datapoints, you need to set a lower sds final multiplier
* Along the length of your lightcurve you should not be filtrering large amounts of data. If you are, you need to increase the final multiplier value

Typically, the optimal values in sds will look similar to this (quick decrease in high values, then slow at the final smaller values):

sds = [5, 4, 3, 2.8, 2.5, 2.5]

You can manually set sds, or use np.linspace to automatically set the values.

In [12]:
#standard deviations, this will control how many loops
# it goes through when looping
#ex. go from big std.devs to get rid of the huge ouliers, then go through and get smaller ones too
#ex. 1.5 to 1.9 to maybe filter out less?
#start with bigger value then adjust if needed
sds = np.linspace(7, 1.5, 5) 

flux_norm, ferr_norm = normalize_region(time, 
                                        flux,          
                                        flux_err,      
                                        sds,           # Standard deviation multiplier for each iteration of the filter
                                        plot_q=True,   # Show filter and fit plots
                                        #ss_smo=0.99    # Smoothing spline smooth level
                                        ss_smo=0.5    # Smoothing spline smooth level
                                       )

<IPython.core.display.Javascript object>



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# Replace the flux and flux error data with the normalized versions we just created

In [14]:
lc.flux = flux_norm
lc.flux_err = ferr_norm

# Plot the normalized data

You should stop here and closely look at your normalized data to make sure there are no problematic regions where, for example:

* normalization was not done propely (seen as longterm curvature) or 
* transits have been removed due to overfitting with a spline that was too relaxed (hard to tell, but can be seen if you compare to the original data).

In [15]:
plt.figure(figsize=(10, 5))
plt.plot(time, flux_norm, '.k')
plt.show()

<IPython.core.display.Javascript object>

# The box least squares (BLS) function definition

In [16]:
def get_per(t, f, per_bounds=[1, 100], zoom_time_wdith=1.0):
    
    # Use box least squares (BLS) method to find the period and t0 for the transit
    
    gi = np.isfinite(t + f)
    
    t = t[gi]
    f = f[gi]
    
    
    # https://exoplanet.dfm.io/en/stable/tutorials/together/

    from astropy.timeseries import BoxLeastSquares

    m = np.zeros(len(t), dtype=bool)
    period_grid = np.exp(np.linspace(np.log(per_bounds[0]), np.log(per_bounds[1]), 10000))
    bls_results = []
    periods = []
    t0s = []
    depths = []

    # Compute the periodogram for each planet by iteratively masking out
    # transits from the higher signal to noise planets. Here we're assuming
    # that we know that there are exactly two planets.
    for i in range(1):
        bls = BoxLeastSquares(t[~m], f[~m])
        bls_power = bls.power(period_grid, 0.1, oversample=20)
        bls_results.append(bls_power)

        # Save the highest peak as the planet candidate
        index = np.argmax(bls_power.power)
        periods.append(bls_power.period[index])
        t0s.append(bls_power.transit_time[index])
        depths.append(bls_power.depth[index])

        # Mask the data points that are in transit for this candidate
        #m |= bls.transit_mask(t, periods[-1], 0.5, t0s[-1])
        
        
        
    fig, axes = plt.subplots(len(bls_results), 2, figsize=(10, 5))

    for i in range(len(bls_results)):
        # Plot the periodogram
        if len(bls_results) > 1:
            ax = axes[i, 0]
        else:
            ax = axes[0]
        ax.axvline(np.log10(periods[i]), color="C1", lw=5, alpha=0.8)
        ax.plot(np.log10(bls_results[i].period), bls_results[i].power, "k")
        ax.annotate(
            "period = {0:.4f} d".format(periods[i]),
            (0, 1),
            xycoords="axes fraction",
            xytext=(5, -5),
            textcoords="offset points",
            va="top",
            ha="left",
            fontsize=12,
        )
        ax.set_ylabel("bls power")
        ax.set_yticks([])
        ax.set_xlim(np.log10(period_grid.min()), np.log10(period_grid.max()))
        if i < len(bls_results) - 1:
            ax.set_xticklabels([])
        else:
            ax.set_xlabel("log10(period)")

        # Plot the folded transit
        if len(bls_results) > 1:
            ax = axes[i, 1]
        else:
            ax = axes[1]
        p = periods[i]
        x_fold = (t - t0s[i] + 0.5 * p) % p - 0.5 * p
        m = np.abs(x_fold) < zoom_time_wdith
        ax.plot(x_fold[m], f[m], ".k")

        # Overplot the phase binned light curve
        bins = np.linspace(-zoom_time_wdith, zoom_time_wdith, np.int64(32/0.4*zoom_time_wdith))
        denom, _ = np.histogram(x_fold, bins)
        num, _ = np.histogram(x_fold, bins, weights=f)
        denom[num == 0] = 1.0
        ax.plot(0.5 * (bins[1:] + bins[:-1]), num / denom, color="C1")

        ax.set_xlim(-zoom_time_wdith, zoom_time_wdith)
        ax.set_ylabel("relative flux [ppt]")
        if i < len(bls_results) - 1:
            ax.set_xticklabels([])
        else:
            ax.set_xlabel("time since transit")

    fig.subplots_adjust(hspace=0.02)
    
    return periods[0], t0s[0]

# Use box least squares (BLS) method to find the period and t0 for the transit

In [17]:
p, t0 = get_per(time, flux_norm, 
                per_bounds=[5, 1500], # Give a lower and upper bound period for the transit search
                zoom_time_wdith=1.0   # Days shown to the left and right of the transit in the left subplot
               )

<IPython.core.display.Javascript object>

# You can apply binning again if you needed extra data for finding the period and t0 from the BLS routine

It is ulikely that you need to apply binning here

In [18]:
if 0:
    
    lc = lc.bin(binsize=10)

# Function definitions for transit fitting which is solved by the Markov chain Monte Carlo (MCMC) method

https://en.wikipedia.org/wiki/Markov_chain_Monte_Carlo

In [19]:
import exoplanet as xo
import pymc3 as pm

def create_mcmc_model(t, f, ferr, pm, ig):
    
    import pymc3 as pm

    with pm.Model() as model:

        # The baseline flux (what most of the data is centered around; is it 1.0001 instead of 1.000?)
        # Randomly sampled from normal distribution
        mean = pm.Normal("mean", mu=ig['cont'], sd=ig['cont_sd'])

        # The time of a reference transit for each planet
        t0 = pm.Normal("t0", mu=ig['t0'], sd=ig['t0_sd'], shape=1)

        # The log period; also tracking the period itself
        logP = pm.Normal("logP", mu=np.log(ig['P']), sd=ig['P_sd'], shape=1)
        
        period = pm.Deterministic("period", pm.math.exp(logP))

        # The Kipping (2013) parameterization for quadratic limb darkening paramters
        u = xo.distributions.QuadLimbDark("u", testval=np.array([ig['limb_dark_u']]))

        r = pm.Uniform("r", lower=ig['r_ratio_lb'], upper=ig['r_ratio_ub'], shape=1, testval=np.array([ig['r_ratio_sv']]))
        
        b = xo.distributions.ImpactParameter("b", ror=r, shape=1, testval=ig['limb_dark_b'])
        
        
        # 
        rs = pm.Normal("r_star", mu=ig['r_star'], sd=10.0, shape=1)
        #ms = pm.Normal("m_star", mu=ig['m_star'], sd=10.0, shape=1)

        # Set up a Keplerian orbit for the planets
        #orbit = xo.orbits.KeplerianOrbit(period=period, t0=t0, b=b, r_star=rs, m_star=ms)
        orbit = xo.orbits.KeplerianOrbit(period=period, t0=t0, b=b, r_star=rs, m_star=1.0)

        # Compute the model light curve using starry
        light_curves = xo.LimbDarkLightCurve(u).get_light_curve(orbit=orbit, r=r, t=t)
        
        light_curve = pm.math.sum(light_curves, axis=-1) + mean

        # Here we track the value of the model light curve for plotting purposes
        pm.Deterministic("light_curves", light_curves)

        # In this line, we simulate the dataset that we will fit
        sim = xo.eval_in_model(light_curve)
        #sim += yerr * np.random.randn(len(sim))

        # The likelihood function assuming known Gaussian uncertainty
        pm.Normal("obs", mu=light_curve, sd=ferr, observed=f)

        # Fit for the maximum a posteriori parameters given the simuated
        # dataset
        #input_kwargs = {'tol':1}

        map_soln = xo.optimize(start=model.test_point)#, **input_kwargs)

            
    return model, pm, map_soln, sim


def plot_mcmc_model(t, f, ferr, pm, ig, plts):
    
    import pymc3 as pm
    
    with pm.Model() as model:

        # The baseline flux
        mean = pm.Normal("mean", mu=ig['cont'], sd=ig['cont_sd'])

        # The time of a reference transit for each planet
        t0 = pm.Normal("t0", mu=ig['t0'], sd=ig['t0_sd'], shape=1)

        # The log period; also tracking the period itself
        logP = pm.Normal("logP", mu=np.log(ig['P']), sd=ig['P_sd'], shape=1)
        
        period = pm.Deterministic("period", pm.math.exp(logP))

        # The Kipping (2013) parameterization for quadratic limb darkening paramters
        u = xo.distributions.QuadLimbDark("u", testval=np.array([ig['limb_dark_u']]))

        r = pm.Uniform("r", lower=ig['r_ratio_lb'], 
                       upper=ig['r_ratio_ub'], 
                       shape=1, 
                       testval=np.array([ig['r_ratio_sv']]))
        
        b = xo.distributions.ImpactParameter("b", ror=r, shape=1, testval=ig['limb_dark_b'])
        
        
        # 
        rs = pm.Normal("r_star", mu=ig['r_star'], sd=10.0, shape=1)
        #ms = pm.Normal("m_star", mu=ig['m_star'], sd=10.0, shape=1)

        # Set up a Keplerian orbit for the planets
        #orbit = xo.orbits.KeplerianOrbit(period=period, t0=t0, b=b, r_star=rs, m_star=ms)
        orbit = xo.orbits.KeplerianOrbit(period=period, t0=t0, b=b, r_star=rs, m_star=1.0)

        # Compute the model light curve using starry
        light_curves = xo.LimbDarkLightCurve(u).get_light_curve(orbit=orbit, r=r, t=t)
        
        light_curve = pm.math.sum(light_curves, axis=-1) + mean

        # Here we track the value of the model light curve for plotting purposes
        pm.Deterministic("light_curves", light_curves)

        # In this line, we simulate the dataset that we will fit
        sim = xo.eval_in_model(light_curve)
        
        if 1 in plts:
            plt.figure(figsize=(5, 5))
            plt.plot(t, f, ".k", ms=4, label="data")
            plt.plot(t, sim, lw=1, label="companion")
            plt.xlim(t.min(), t.max())
            plt.ylabel("relative flux")
            plt.xlabel("time [days]")
            plt.legend(fontsize=10)
            plt.title("map model")
        
        
        if 2 in plts:
            plt.figure(figsize=(5, 5))
            p = ig['P'][0]
            x_fold = (t - ig['t0'] + 0.5 * p) % p - 0.5 * p
            plt.errorbar(x_fold, f, yerr=ferr, fmt=".k", zorder=-1000)

            # Plot the folded model
            inds = np.argsort(x_fold)
            inds = inds[np.abs(x_fold)[inds] < 1.6]
            #pred = map_soln["light_curves"][inds, n] + map_soln["mean"]
            #pred = np.median(pred, axis=0)
            plt.plot(x_fold[inds], sim[inds], color="m", label="model")
            plt.legend(fontsize=10, loc=4)
            #plt.xlim(-0.5 * p, 0.5 * p)
            plt.xlabel("time since transit [days]")
            plt.ylabel("relative flux")
            plt.xlim(-1.6, 1.6)

# View our solved period and t0 to make sure they make sense

In [20]:
p, t0

(31.32850053822185, 28.138432450729894)

In [21]:
type(p), type(t0)

(numpy.float64, numpy.float64)

In [22]:
type(lc.time[0]), type(lc.flux[0]), type(lc.flux_err[0])

(numpy.float64, numpy.float64, numpy.float64)

# Soving with MCMC reqires us to give good starting guesses for the transit fit

Below are all of the values that a proper tranist fit requires. The solve will try to solve these, but since there is so many variables, the solver needs us to give it reasonable starting guesses otherwise it will fail to solve properly. You should play with the cell below, changing the variables as needed until the fit matches the data closely.

Below are definitions for the variables and a tip on what alteration of that variable will do. The format is as follows:

* Variable Name - definition
    * What changing that variable will look like
    * Additional info

@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

* 'cont' - baseline level. 
    * The average flux
    * Typically 1 since normalization sets it to 1.
    
    
* 'cont_sd' - Continuum standard deviation level. A solver parameter. 
    * You dont really need to touch this.
    
    
* 't0' - t0 [day]. Was solved in the BLS routine. Use the solved t0 here and add any corrections you need to it
    * Move the fit to the left or right
    
    
* 't0_sd' - t0 standard deviation level. A solver parameter. 
    * You dont really need to touch this.
    
    
* 'P' - orbital period [day]. Was solved in the BLS routine. Use the solved period here.
    * Changing this will change the quality of your light curve fold data. 
    * You probably shouldnt touch this since the solved period from the BLS routine is pretty robust when done correctly
    
    
* 'P_sd' - orbital period standard deviation level.A solver parameter. 
    * You dont really need to touch this.
    
    
* 'limb_dark_u' - Limb darkening parameter (u)
    * Changes the curvature at the edges and center of the transit fit
    * Lower values give lower curvature
    
    
* 'limb_dark_b' - Limb darkening parameter (b)
    * Changes the transit width
    
    
* 'r_ratio_sv' - radius ratio (r_p / r_sun)
    * Changes the transit depth
    
    
* 'r_ratio_lb' - radius ratio lower bound
    * The solver wont try radius ratio values lower than this
    
    
* 'r_ratio_ub' - radius ratio upper bound
    * The solver wont try radius ratio values higher than this

In [23]:
if 1:
    ig = {'cont': 1.0,        # Continuum level
          'cont_sd': 1.0,     # Continuum standard deviation level
          't0':[t0 + 0.00],   # t0 [day]
          't0_sd':1.0,        # t0 standard deviation level
          'P':[p],            # period [day]
          'P_sd':1.0,         # period standard deviation level
          'limb_dark_u':0.195,# LD param - edge curvature 
          'limb_dark_b':0.7,  # LD param - transit width
          'r_ratio_sv':0.070, # radius ratio (r_p / r_sun) - depth of transit
          'r_ratio_lb':0.001, # radius ratio lower bound
          'r_ratio_ub':1.0,   # radius ratio upper bound
          'r_star':1.0,       # radius of primary star
          'm_star':1.0}       # mass of primary star

    
if 0:
    ig = {'cont': 1.0,        # Continuum level
          'cont_sd': 1.0,     # Continuum standard deviation level
          't0':[t0 + 0.00],   # t0 [day]
          't0_sd':1.0,        # t0 standard deviation level
          'P':[p],            # period [day]
          'P_sd':1.0,         # period standard deviation level
          'limb_dark_u':0.595,# LD param - edge curvature 
          'limb_dark_b':0.01,  # LD param - transit width
          'r_ratio_sv':0.058, # radius ratio (r_p / r_sun) - depth of transit
          'r_ratio_lb':0.001, # radius ratio lower bound
          'r_ratio_ub':1.0,   # radius ratio upper bound
          'r_star':2.2,       # radius of primary star
          'm_star':1.01}       # mass of primary star
    
    

if 0: # K-470
    ig = {'cont': 1.0,        # Continuum level
          'cont_sd': 1.0,     # Continuum standard deviation level
          't0':[t0 + 0.00],   # t0 [day]
          't0_sd':1.0,        # t0 standard deviation level
          'P':[p],            # period [day]
          'P_sd':1.0,         # period standard deviation level
          'limb_dark_u':0.595,# LD param - edge curvature 
          'limb_dark_b':0.0001,  # LD param - transit width
          'r_ratio_sv':0.098, # radius ratio (r_p / r_sun) - depth of transit
          'r_ratio_lb':0.001, # radius ratio lower bound
          'r_ratio_ub':1.0,   # radius ratio upper bound
          'r_star':1.2,       # radius of primary star
          'm_star':1.01}       # mass of primary star
    
plot_mcmc_model(lc.time, lc.flux, lc.flux_err, pm, ig, plts=[2])

  rval = inputs[0].__getitem__(inputs[1:])
  rval = inputs[0].__getitem__(inputs[1:])


<IPython.core.display.Javascript object>

# Create a model of our transit and do an inital LSQ solve on the parameters to prepare it for the MCMC solver

In [24]:
model, pm, map_soln, f_sim = create_mcmc_model(lc.time, lc.flux, lc.flux_err, pm, ig)

  rval = inputs[0].__getitem__(inputs[1:])
optimizing logp for variables: [r_star, b, r, u, logP, t0, mean]
  rval = inputs[0].__getitem__(inputs[1:])
  rval = inputs[0].__getitem__(inputs[1:])
  rval = inputs[0].__getitem__(inputs[1:])
  rval = inputs[0].__getitem__(inputs[1:])
  rval = inputs[0].__getitem__(inputs[1:])
  rval = inputs[0].__getitem__(inputs[1:])
  rval = inputs[0].__getitem__(inputs[1:])
  rval = inputs[0].__getitem__(inputs[1:])
  rval = inputs[0].__getitem__(inputs[1:])
269it [00:01, 187.27it/s, logp=2.100866e+05]
message: Desired error not necessarily achieved due to precision loss.
logp: 209372.05783148805 -> 210086.63426539078


# Plot definitions

In [25]:
def plot_pre_mcmc(t, f, ferr, map_soln):

    ###############################################################        

    plt.figure(figsize=(7, 5))
    plt.plot(t, f-1, ".k", ms=4, label="data")

    #if len(t0s) == 1:
    for i, l in enumerate("a"):
        plt.plot(t, map_soln["light_curves"][:, i], lw=1, label="planet {0}".format(l))
    #else:
    #    for i, l in enumerate("bc"):
    #        plt.plot(t, map_soln["light_curves"][:, i], lw=1, label="planet {0}".format(l))
    plt.xlim(t.min(), t.max())
    plt.ylabel("relative flux")
    plt.xlabel("time [days]")
    plt.legend(fontsize=10)
    plt.title("map model")

    ###############################################################

    
    planet_letters = 'a'

    for n, letter in enumerate(planet_letters):
        plt.figure(figsize=(7, 5))

        # Get the posterior median orbital parameters
        #if len(t0s) == 1:
        p = np.median(map_soln["period"])
        t0 = np.median(map_soln["t0"])
        #else:
        #    p = np.median(map_soln["period"][:, n])
        #    t0 = np.median(map_soln["t0"][:, n])
            
            
        print(p, t0)

        # Compute the median of posterior estimate of the contribution from
        # the other planet. Then we can remove this from the data to plot
        # just the planet we care about.
        #if len(t0s) == 1:
        other = np.median(map_soln["light_curves"][:, 0])
        #else:
        #    other = np.median(map_soln["light_curves"][:, (n + 1) % 2], axis=0)

        # Plot the folded data
        x_fold = (t - t0 + 0.5 * p) % p - 0.5 * p
        plt.errorbar(x_fold, f - other, yerr=ferr, fmt=".k", label="data", zorder=-1000)

        # Plot the folded model
        inds = np.argsort(x_fold)
        inds = inds[np.abs(x_fold)[inds] < 0.6]
        pred = map_soln["light_curves"][inds, n] + map_soln["mean"]
        #pred = np.median(pred, axis=0)
        plt.plot(x_fold[inds], pred, color="C1", label="model")

        # Annotate the plot with the planet's period
        txt = "period = {0:.4f} +/- {1:.4f} d".format(np.mean(map_soln["period"][n]), 
                                                      np.std(map_soln["period"][n]))
        
        plt.annotate(txt,
                     (0, 0),
                     xycoords="axes fraction",
                     xytext=(5, 5),
                     textcoords="offset points",
                     ha="left",
                     va="bottom",
                     fontsize=12)

        plt.legend(fontsize=10, loc=4)
        #plt.xlim(-0.5 * p, 0.5 * p)
        plt.xlabel("time since transit [days]")
        plt.ylabel("relative flux")
        plt.title("planet {0}".format(letter))
        plt.xlim(-0.6, 0.6)

# See the LSQ solver results before solving with MCMC

If the fit failed here (looks bad), you need to try to give better starting value estimates

In [26]:
plot_pre_mcmc(lc.time, lc.flux, lc.flux_err, map_soln)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

31.330506683061547 28.10832315135518


# Solve the transit with the MCMC solver!

In [None]:
np.random.seed(42)

with model:
    trace = pm.sample(tune=3000,
                      draws=3000,
                      start=map_soln,
                      chains=4,
                      step=xo.get_dense_nuts_step(target_accept=0.9))

  rval = inputs[0].__getitem__(inputs[1:])
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [r_star, b, r, u, logP, t0, mean]
  rval = inputs[0].__getitem__(inputs[1:])
Sampling 4 chains, 12 divergences: 100%|██████████| 24000/24000 [04:49<00:00, 82.85draws/s] 
  rval = inputs[0].__getitem__(inputs[1:])


# View the results of the solver

We can see the "mean" (highest likelyhood) and the "sd" (uncertainty) of the values we solved

In [262]:
mcmc_summary = pm.summary(trace, var_names=['r_star', "period", "t0", "r", "b", "u", "mean"])

mcmc_summary

Unnamed: 0,mean,sd,hdi_3%,hdi_97%,mcse_mean,mcse_sd,ess_mean,ess_sd,ess_bulk,ess_tail,r_hat
r_star[0],1.474,0.014,1.447,1.501,0.0,0.0,6032.0,6032.0,6174.0,5510.0,1.0
period[0],24.669,0.0,24.669,24.669,0.0,0.0,16808.0,16203.0,16235.0,9143.0,1.0
t0[0],9.519,0.0,9.518,9.519,0.0,0.0,15338.0,15338.0,15358.0,9146.0,1.0
r[0],0.12,0.002,0.117,0.123,0.0,0.0,5935.0,5935.0,6042.0,5359.0,1.0
b[0],0.573,0.014,0.547,0.598,0.0,0.0,5756.0,5756.0,6049.0,5317.0,1.0
u[0],0.285,0.055,0.178,0.386,0.001,0.0,9399.0,9399.0,9349.0,7743.0,1.0
u[1],0.288,0.09,0.121,0.457,0.001,0.001,8533.0,7869.0,8532.0,7670.0,1.0
mean,1.0,0.0,1.0,1.0,0.0,0.0,16527.0,16440.0,16437.0,8402.0,1.0


# View the probabillity distributions for the solved parameters

These plots show probabillity distributions for each parameter and they show likelyhood correlations between each parameter

In [263]:
import corner

samples = pm.trace_to_dataframe(trace, varnames=["period", "r"])
#truth = np.concatenate(xo.eval_in_model([period, r], model.test_point, model=model))

figure = corner.corner(samples, labels=["period 1", "radius 1"])

figure.set_dpi(70)

    
samples = pm.trace_to_dataframe(trace, varnames=['r_star', 'period', 't0', "r", 'b', 'mean'])
#truth = np.concatenate(xo.eval_in_model([period, r], model.test_point, model=model))

figure = corner.corner(samples, labels=['r_star', 'period 1', 't0', "radius 1", 'b', 'mean'])

#figure.set_size_inches(8, 8)
figure.set_dpi(70)

#circle shows likelihood of correlation for period and radius, so center of circle is best fits
#it's a nice circle --> strong correlation

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# Define a function to plot the highest likelyhood parameters as a fit on our data

In [264]:
def plot_mcmc_fit(t, f, ferr, trace):

    planet_letters = 'a'

    for n, letter in enumerate(planet_letters):
        plt.figure(figsize=(7, 5))

        # Get the posterior median orbital parameters
        p = np.median(trace["period"][:, n])
        t0 = np.median(trace["t0"][:, n])

        # Compute the median of posterior estimate of the contribution from
        # the other planet. Then we can remove this from the data to plot
        # just the planet we care about.
        #if len(t0s) == 1:
        other = np.median(trace["light_curves"][:, :, 0])
        #else:
        #other = np.median(trace["light_curves"][:, :, (n + 1) % 2], axis=0)

        # Plot the folded data
        x_fold = (t - t0 + 0.5 * p) % p - 0.5 * p
        plt.errorbar(x_fold, f - other, yerr=ferr, fmt=".k", label="data", zorder=-1000)

        # Plot the folded model
        inds = np.argsort(x_fold)
        inds = inds[np.abs(x_fold)[inds] < 0.6]
        pred = trace["light_curves"][:, inds, n] + trace["mean"][:, None]
        pred = np.median(pred, axis=0)
        plt.plot(x_fold[inds], pred, color="C1", label="model")

        # Annotate the plot with the planet's period
        txt = "period = {0:.4f} +/- {1:.4f} d".format(
            np.mean(trace["period"][:, n]), np.std(trace["period"][:, n])
        )
        plt.annotate(
            txt,
            (0, 0),
            xycoords="axes fraction",
            xytext=(5, 5),
            textcoords="offset points",
            ha="left",
            va="bottom",
            fontsize=12,
        )

        plt.legend(fontsize=10, loc=4)
        #plt.xlim(-0.5 * p, 0.5 * p)
        plt.xlabel("time since transit [days]")
        plt.ylabel("relative flux")
        plt.title("planet {0}".format(letter))
        plt.xlim(-0.6, 0.6)

# Plot the best fit for our data

In [265]:
plot_mcmc_fit(lc.time, lc.flux, lc.flux_err, trace)

#plt.ylim(None, 1+1e-3)

<IPython.core.display.Javascript object>