# Scrapbook

This is a deconstruction of the initial setup for our emcee gp optimizer. 

Let's start by importing all the functions and packages we might need.

In [1]:
%matplotlib notebook 
#restart the kernel if switching from inline to notebook
import matplotlib.pyplot as plt
plt.style.use('seaborn-white')
import corner

import numpy as np
import scipy.stats
import pandas as pd

import emcee
import george

import gp_sandbox as cgp
from subsample import subsample

Now we can get our asteroid time and data info from our text files. We will then be able to sample it using Daniela's subsample method.

In [2]:
asteroid = 1388
txt = '../data/'+str(asteroid)+'_lc_49627_to_49787.txt'

data = pd.read_csv(txt, delimiter=' ',
                 header=None, names=['time','flux'], dtype={'time':float, 'flux':float})

days, delay = 5, 2

# convert days to points
span = 2880 * days
start_pt = 2880 * delay

time = np.array(data.time[start_pt:span+start_pt])
flux = np.array(data.flux[start_pt:span+start_pt])

f_err = np.ones_like(flux) * np.std(flux)/10.0

tsample, fsample, flux_err = subsample(time, flux, flux_err=f_err, npoints=100, kind="random")

Let's check and make sure the flux amplitude looks reasonable.

In [3]:
best_log_amp = np.log(fsample.var())
np.exp(best_log_amp)

0.010171214528505116

Now we can set up what we think our parameter values might be. We can use the log_amp if it seems reasonable.

In [4]:
params = [1, best_log_amp, 1, -1.38]
print(params)

[1, -4.5881936533867842, 1, -1.38]


In [5]:
mean, log_amp, gamma, log_period = params

amp = np.exp(log_amp)
kernel = amp * george.kernels.ExpSine2Kernel(gamma = gamma, log_period = log_period)
gp = george.GP(kernel)#, fit_mean=True, mean=mean)
gp.compute(tsample, flux_err)
print(gp.parameter_names)
print(gp.parameter_vector)


gp.get_parameter_dict()

('mean:value', 'white_noise:value', 'kernel:k1:log_constant', 'kernel:k2:gamma', 'kernel:k2:log_period')
[  0.         -27.40787756  -4.58819365   1.          -1.38      ]


OrderedDict([('kernel:k1:log_constant', -4.5881936533867842),
             ('kernel:k2:gamma', 1.0),
             ('kernel:k2:log_period', -1.3799999999999999)])

In [6]:
# compute kernel with given time and error
gp.compute(tsample, flux_err)
gp.get_parameter_dict()

OrderedDict([('kernel:k1:log_constant', -4.5881936533867842),
             ('kernel:k2:gamma', 1.0),
             ('kernel:k2:log_period', -1.3799999999999999)])

In [41]:
x0 = gp.get_parameter_vector()
type(x0)
x0

array([-4.58819365,  1.        , -1.38      ])

In [42]:
bnds = ([-10.,30.],[0.,100.],[-3.178,0.])

In [45]:
from scipy.optimize import minimize

def neg_ln_like(p):
    gp.set_parameter_vector(p)
    try:
        negloglike =  -gp.log_likelihood(fsample)
        return negloglike
    # throw exception if you run into an error
    except scipy.linalg.LinAlgError:
        return np.inf

def grad_neg_ln_like(p):
    gp.set_parameter_vector(p)
    try:
        grad_loglike =  -gp.grad_log_likelihood(fsample)
        return grad_loglike
    # throw exception if you run into an error
    except scipy.linalg.LinAlgError:
        return np.inf

result = minimize(fun=neg_ln_like, x0=x0, method='L-BFGS-B', jac=grad_neg_ln_like)#, bounds=bnds)
print(result.hess_inv.todense())
print(result.x)
#gp.set_parameter_vector(result.x)

#if (print_result == True):
#    print(result)

[[  2.88868296e+05  -1.06690561e+05  -3.59463180e+02]
 [ -1.06690561e+05   3.94051114e+04   1.32764222e+02]
 [ -3.59463180e+02   1.32764222e+02   4.47311297e-01]]
[-1.01439405  0.545576   -1.38948455]


In [46]:
gp.set_parameter_vector(result.x)

In [48]:
p0 = np.random.multivariate_normal(mean=result.x, cov=result.hess_inv, size=100)

ValueError: cov must be 2 dimensional and square