In [1]:
import nbimporter

# Import main functionality
from GPDM_direct_fixedpoints import *

plotly.offline.init_notebook_mode(connected=True)

# Saving outputs and timing
import pickle, datetime, time

from joblib import Parallel, delayed
import multiprocessing
import itertools

num_cores = multiprocessing.cpu_count()

# %run GPDM_direct_fixedpoints.ipynb
# %run GPDM_Examples.ipynb

Importing Jupyter notebook from GPDM_direct_fixedpoints.ipynb



Matplotlib is building the font cache using fc-list. This may take a moment.



# Experimental setup

1. We first generate training and test data with the same parameters.

2. We then fit GP models with a fixed number of parameters, but vary the number of fixed points versus basic inducing points

3. We compare the performance of the GP models against one-another as well as against linear autoregression.

## 1. Generate training and test data

In [None]:
def bifurc_trans_func(x, slope):
    return slope*x - x**3

In [None]:
def bifurc_draw_trial(T, mu_0_0, Sigma_0_0, slope, maxval,Sigma_eps, Sigma_nu, **kwargs):
    x0 = mu_0_0 + np.sqrt(Sigma_0_0)*np.random.randn(1)
    x = np.zeros((1,T))
    y = np.zeros((1,T))
    for t in range(T):
        if t==0:
            xprev = x0
        else:
            xprev = x[:,t-1]

        x[:,t] =  (bifurc_trans_func(xprev, slope)
                    + np.sqrt(Sigma_eps)*np.random.randn(1))
        
        y[:,t] = x[:,t] + np.sqrt(Sigma_nu)*np.random.randn(1)
        
    return (x,y)  

In [None]:
bifurc_params = OrderedDict()
bifurc_params["rseed"] = 1234 # Happens to creates balanced training set in the first 8 trials in y
bifurc_params["Ny"] = 500
bifurc_params["T"] = 20
bifurc_params["slope"] = 1.15
bifurc_params['maxval'] = 1.0
bifurc_params["Sigma_eps"] = 0.2**2
bifurc_params["Sigma_nu"] = 0.05**2
bifurc_params["mu_0_0"] = 0.
bifurc_params["Sigma_0_0"] = 1e-6

  
def bifurc_create_data(bifurc_params):
    # Generate trials and collect them into [D x T x N] array
    np.random.seed(bifurc_params["rseed"])
    all_trials_x = []
    all_trials_y = []
    for n in range(bifurc_params["Ny"]):
        x,y = bifurc_draw_trial(**bifurc_params)

        all_trials_x.append(x[:,:,None])
        all_trials_y.append(y[:,:,None])

    x = np.concatenate(all_trials_x, axis=2)
    y = np.concatenate(all_trials_y, axis=2)

#     # Swap around trials to ensure balanced training sets
#     if bifurc_params["slope"]*bifurc_params['maxval'] > 1.1:
#         cur_trial = 0
#         cur_trial_type = 1.
#         while cur_trial < 0.8*bifurc_params["Ny"]:
#             # If current trial is not correct type, switch it with one that is
#             if not (np.mean(y[:,(bifurc_params["T"]-4):,cur_trial]) * cur_trial_type > 0.2*bifurc_params['maxval']):
#                 switch_trial = cur_trial+1
#                 while (not (np.mean(y[:,(bifurc_params["T"]-4):,switch_trial]) * cur_trial_type > 0.2*bifurc_params['maxval'])):
#                     switch_trial += 2

#                 tmp = np.copy(y[:,:,switch_trial])
#                 y[:,:,switch_trial] = np.copy(y[:,:,cur_trial])
#                 y[:,:,cur_trial] = tmp

#                 tmp = np.copy(x[:,:,switch_trial])
#                 x[:,:,switch_trial] = np.copy(x[:,:,cur_trial])
#                 x[:,:,cur_trial] = tmp

#             #print np.mean(y[:,(bifurc_params["T"]-4):,cur_trial]) * cur_trial_type
#             cur_trial += 1
#             cur_trial_type *= -1.
    
    return [x, y]

[x, y] = bifurc_create_data(bifurc_params)

plots_by_run = []
for v in range(128):
    plots_by_run.append(
        plt_type.Scatter(x=np.squeeze(np.arange(bifurc_params["T"])), 
                      y=np.squeeze(y[:,:,v]), 
                      mode='lines')
    )
    
    
print rmse(pred_lin_AR1(y[:,:,200:], y[:,:,:50], cutoff=None), y[:,:,200:])
    
    
xstar = np.atleast_2d(np.arange(-2.5,2.5,0.05))
true_tr_vals = bifurc_trans_func(xstar, bifurc_params['slope'])

#set_trace()

plt([plt_type.Scatter(x=np.squeeze(xstar), 
                  y=np.squeeze(true_tr_vals), mode='markers', name = 'True trans. f.',
                  marker=dict(color='orange')),
 plt_type.Scatter(x=np.squeeze(xstar), 
                  y=np.squeeze(true_tr_vals+2.*np.sqrt(bifurc_params["Sigma_eps"])), mode='markers', name = 'True trans. f.',
                  marker=dict(color='orange', size=2)),
 plt_type.Scatter(x=np.squeeze(xstar), 
                  y=np.squeeze(true_tr_vals-2.*np.sqrt(bifurc_params["Sigma_eps"])), mode='markers', name = 'True trans. f.',
                  marker=dict(color='orange', size=2)),
plt_type.Scatter(x=np.squeeze(xstar), 
                  y=np.squeeze(xstar), mode='markers', name = 'x=x',
                  marker=dict(color='blue'))
     ])

plt(plots_by_run)

In [None]:
def bifurc_callback_plot_external(pvec_partial, 
                                  opt_params, init_paramvec, transforms, dict_ind, dict_shape,
                                  bifurc_params, trainingData=None
                                 ):
    
    paramvec = replace_params(pvec_partial, opt_params, init_paramvec)
    paramdict = vec_to_params(paramvec, dict_ind, dict_shape, transforms)
       
    # Unpack the usual parameters
    (Sigma_eps, mu_0_0, Sigma_0_0, C, Sigma_nu, z, u, Sigma_u, lengthscales, kernel_variance, s, J)  = \
        paramdict.values()[:12]
    
    if np.any(np.isnan(lengthscales)):
        set_trace()
    
    # Deal with the extra possible parameters
    Sigma_s = None; Sigma_J=None;
    if 'Sigma_s' in paramdict.keys():
        Sigma_s = paramdict['Sigma_s']
    if 'Sigma_J' in paramdict.keys():
        Sigma_J = paramdict['Sigma_J']
        
    # Plot transition function
    xstar = np.atleast_2d(np.arange(np.min(z)-0.5,np.max(z)+0.5,0.05))
    
    
    # Debugging stuff
#     z=z[:,:2]
#     u=u[:,:2]
#     Sigma_u=Sigma_u[:2,:]
#     print Sigma_J
    
    L, targets, params = fp_get_static_K(eta=kernel_variance, lengthscales=lengthscales, z=z, u=u, s=s, J=J, 
                                         sig_eps=Sigma_eps, sig_u=Sigma_u, sig_s=Sigma_s, sig_J = Sigma_J)
    mu_star, sig_star, K_pred = fp_predict(xstar, L, targets, params)

    # print(time_full_iter(pvec, y, dict_ind, dict_shape)[0])
        
    # Get true function values
    true_tr_vals = bifurc_trans_func(xstar, bifurc_params['slope'])
    
    #set_trace()
    
    # Map Sigma_s values to the range 8-20
    if Sigma_s.size>1:
        FP_SIZE = -np.squeeze(np.log(Sigma_s))
        FP_SIZE = FP_SIZE - np.min(FP_SIZE)
        FP_SIZE = (FP_SIZE/np.max(FP_SIZE))*12. + 8.
    else:
        FP_SIZE = 10.
    
    pltArray= [plt_type.Scatter(x=np.squeeze(xstar), y=np.squeeze(mu_star), mode='markers', name='GP post. mean',
                         marker=dict(color='blue')),
         plt_type.Scatter(x=np.squeeze(xstar), y=np.squeeze(mu_star)+2.*np.squeeze(np.sqrt(sig_star)), mode='markers', 
                         marker=dict(size=2, color='blue')),
         plt_type.Scatter(x=np.squeeze(xstar), y=np.squeeze(mu_star)-2.*np.squeeze(np.sqrt(sig_star)), mode='markers', 
                         marker=dict(size=2, color='blue')),      
         plt_type.Scatter(x=np.squeeze(xstar), 
                          y=np.squeeze(true_tr_vals), mode='markers', name = 'True trans. f.',
                          marker=dict(color='orange')),
         plt_type.Scatter(x=np.squeeze(xstar), 
                          y=np.squeeze(true_tr_vals+2.*np.sqrt(bifurc_params["Sigma_eps"])), mode='markers', name = 'True trans. f.',
                          marker=dict(color='orange', size=2)),
         plt_type.Scatter(x=np.squeeze(xstar), 
                          y=np.squeeze(true_tr_vals-2.*np.sqrt(bifurc_params["Sigma_eps"])), mode='markers', name = 'True trans. f.',
                          marker=dict(color='orange', size=2)),
         plt_type.Scatter(x=np.squeeze(z), y=np.squeeze(-2.0*np.ones_like(z)), mode='markers', marker=dict(size=10),
                         name = 'Ind point loc'),
         plt_type.Scatter(x=np.squeeze(z), y=np.squeeze(u), mode='markers', marker=dict(size=10),
                         name = 'Ind point val'),
         plt_type.Scatter(x=np.atleast_1d(np.squeeze(s)), y=np.atleast_1d(np.squeeze(s)), mode='markers', marker=dict(size=FP_SIZE),
                         name = 'Fixed point')
        ]
    
    if trainingData is not None:
        trainingData_t = np.reshape(trainingData[:,1:,:],(1,-1))
        trainingData_t1 = np.reshape(trainingData[:,:-1,:],(1,-1))
        pltArray.append(plt_type.Scatter(x=np.squeeze(trainingData_t1), y=np.squeeze(trainingData_t),
                                        mode='markers', marker=dict(size=4, color='black'),
                                         name='Training data'
                                        ))
    
    plt(pltArray)

## 2. Fitting GP models to varying slopes

In [None]:
bifurc_params = OrderedDict()
bifurc_params["rseed"] = 1234 # Happens to creates balanced training set in the first 8 trials in y
bifurc_params["Ny"] = 500
bifurc_params["T"] = 20
bifurc_params["slope"] = 0.25
bifurc_params['maxval'] = 1.0
bifurc_params["Sigma_eps"] = 0.2**2
#bifurc_params["Sigma_eps"] = 0.1**2
bifurc_params["Sigma_nu"] = 0.05**2
bifurc_params["mu_0_0"] = 0.
bifurc_params["Sigma_0_0"] = 1e-6

In [None]:
def fit_GP(bifurc_params, num_fixed_points, num_trials, batchnum, start_time, callback_plot=False, return_init=False):
    # Set number of parameters
    D = 1
    Ns = num_fixed_points
    Nz = 16
    
    [x,y] = bifurc_create_data(bifurc_params)

    # Fix the training data
    y_train = y[:,:,batchnum*num_trials:(batchnum*num_trials+num_trials)]
    x_train = x[:,:,batchnum*num_trials:(batchnum*num_trials+num_trials)]

    np.random.seed(bifurc_params['rseed']*2)

    #######################################################
    # Initialise the parameters
    paramdict = init_params(y_train, D, Nz, Ns, grad_sigma=True)

    # Fix noise to true value
    paramdict['Sigma_nu'] = bifurc_params['Sigma_nu'] * np.ones_like(paramdict['Sigma_nu'])

#     # Re-initialise / add some more parameters
#     paramdict['Sigma_s'] = 1e-2*np.ones((Ns,1))
#     paramdict['Sigma_J'] = 1e-2*np.ones((Ns*D,1))
    
    # Add transformations for certain parameters 
    # (Note that the parameter indices may be changed by transforms! (for cholesky repres of matrices, "SquareMatrix" type))
    transforms = OrderedDict()
    for par in ['Sigma_0_0', 'Sigma_u', 'Sigma_s', 'Sigma_J', 'lengthscales', 'Sigma_eps', 'Sigma_nu', 'kernel_variance']:
        transforms[par] = {}
        transforms[par]['type'] = "Square"
    
    # Create vectorised and transformed representation
    (init_paramvec, dict_ind, dict_shape) = params_to_vec(paramdict, transforms=transforms)
 
    #######################################################
    # Optimise only certain elements of paramvec (messy with indices)
    opt_params = np.arange(init_paramvec.shape[0])
    # opt_params = np.delete(opt_params, np.hstack([dict_ind['C'], dict_ind['Sigma_nu'], dict_ind['J']])) # All except the ones listed here
    opt_params = np.delete(opt_params, np.hstack([dict_ind['C'], dict_ind['Sigma_nu']])) # All except the ones listed here
    cur_pvec = init_paramvec[opt_params]

    #######################################################
    # Add bounds for parameters 
    bnds = list(((None, None),) * init_paramvec.shape[0])
    # cur_dim = 0
    # cur_z = 0
    # cur_tot = 0
    # z_mins = np.min(z, axis=1)
    # z_maxs = np.max(z, axis=1)
    # for i in np.concatenate([dict_ind['z'], dict_ind['s']]): # Note the idiotic python reshape order for setting bounds per dim
    #     z_min = z_mins[cur_dim]
    #     z_max = z_maxs[cur_dim]
    #     bnds[i] = (z_min-0.05*(z_max-z_min), z_max+0.05*(z_max-z_min))
    #     cur_z = cur_z+1
    #     if cur_tot < D*Nz:
    #         cur_z = np.mod(cur_z, Nz)
    #     else:
    #         cur_z = np.mod(cur_z, Ns)
    #     cur_tot = cur_tot+1
    #     if cur_z==0:
    #         cur_dim = cur_dim+1
    #     if cur_tot==D*Nz:
    #         cur_dim = 0
    # for i in np.concatenate([dict_ind['J']]):
    #     bnds[i] = (-1., 1.)
    bnds_final = []
    for i in opt_params:
        bnds_final.append(bnds[i])
    bnds = tuple(bnds_final)

    #######################################################
    # Add priors (to span at least the bounds)
    priors = []
    
    # Add prior to ensure inducing points AND fixed points are smooth
    cur_prior = {}
    cur_prior['type'] = "InducingSmooth_and_DPP"
    cur_prior['metadata'] = {}
    def unpack_dict_tmp(pdict):
        kernelparams = {'lengthscales': pdict['lengthscales'], 'kernel_variance': pdict['kernel_variance']}
        # Return the parameters we want in the required format (joint smoothness of (z-u) and s)
        return (np.concatenate([pdict['u'], pdict['s']],axis=1), 
            np.concatenate([pdict['z'], pdict['s']],axis=1),
            np.concatenate([pdict['Sigma_u'], pdict['Sigma_s']]),
            kernelparams)
    cur_prior['metadata']['unpack_dict'] = unpack_dict_tmp
    cur_prior['metadata']['kernel_func'] = RBF
    cur_prior['metadata']['prior_weight_Smooth'] = 1e0
    cur_prior['metadata']['prior_weight_DPP'] = 1e0
    priors.append(cur_prior)
    
    
#     # Add prior to ensure inducing points are smooth
#     cur_prior = {}
#     cur_prior['type'] = "InducingSmooth_and_DPP"
#     cur_prior['metadata'] = {}
#     def unpack_dict_tmp(pdict):
#         kernelparams = {'lengthscales': pdict['lengthscales'], 'kernel_variance': pdict['kernel_variance']}
#         # Return the parameters we want in the required format (joint smoothness of (z-u) and s)
#         return (pdict['u'], 
#                 pdict['z'],
#                 pdict['Sigma_u'],
#                 kernelparams)
#     cur_prior['metadata']['unpack_dict'] = unpack_dict_tmp
#     cur_prior['metadata']['kernel_func'] = RBF
#     cur_prior['metadata']['prior_weight_Smooth'] = 1e0
#     cur_prior['metadata']['prior_weight_DPP'] = 1e0
#     priors.append(cur_prior)
    
#     # Add prior to ensure fixed points are smooth
#     cur_prior = {}
#     cur_prior['type'] = "InducingSmooth_and_DPP"
#     cur_prior['metadata'] = {}
#     def unpack_dict_tmp(pdict):
#         kernelparams = {'lengthscales': pdict['lengthscales'], 'kernel_variance': pdict['kernel_variance']}
#         # Return the parameters we want in the required format (joint smoothness of (z-u) and s)
#         return (pdict['s'], 
#                 pdict['s'],
#                 pdict['Sigma_s'],
#                 kernelparams)
#     cur_prior['metadata']['unpack_dict'] = unpack_dict_tmp
#     cur_prior['metadata']['kernel_func'] = RBF
#     cur_prior['metadata']['prior_weight_Smooth'] = 1e0
#     cur_prior['metadata']['prior_weight_DPP'] = 1e0
#     priors.append(cur_prior)

    # # Add a strong prior to learn actual fixed points
    # logGamma_prior = create_prior("LogGamma", [2., 0.5, -6.])
    # for i in np.concatenate([dict_ind['Sigma_s'], dict_ind['Sigma_J']]):
    #     prior_funcs[i] = logGamma_prior

    # tmp_x = np.logspace(-6.0,2,100)    
    # plt(plt_type.Figure(data=[plt_type.Scatter(x=tmp_x, y=np.exp(-logGamma_prior(tmp_x)))], layout=plt_type.Layout(xaxis=dict(type= "log"))))
    # plt(plt_type.Figure(data=[plt_type.Scatter(x=tmp_x, y=logGamma_prior(tmp_x))], layout=plt_type.Layout(xaxis=dict(type= "log"))))

    if return_init:
        return [cur_pvec, opt_params, init_paramvec, transforms, dict_ind, dict_shape]
    
    #######################################################
#     # Prepare the optimisation
#     tmp_func = lambda pvec_partial: (time_full_iter(replace_params(pvec_partial, opt_params, init_paramvec), 
#                                                 y_train, dict_ind, dict_shape, 
#                                                 transforms=transforms,
#                                                 priors=priors)[0])
#     objective_with_grad = value_and_grad(tmp_func, argnum=0)

    # Prepare the optimisation
    f_objective = lambda pvec_partial: (time_full_iter(replace_params(pvec_partial, opt_params, init_paramvec), 
                                                y_train, dict_ind, dict_shape, 
                                                transforms=transforms,
                                                priors=priors)[0])
    f_minibatch = lambda pvec_partial: (minibatch_iter(replace_params(pvec_partial, opt_params, init_paramvec), 
                                                y_train, dict_ind, dict_shape, 
                                                transforms=transforms,
                                                priors=priors,
                                                minibatch_size=4)[0])
    f_minibatch_with_grad = grad(f_minibatch, argnum=0)



    # By iterating minimize within a for cycle, we can save all intermediate results and set ending times
    expr_fname_params = "_varyslope_%0.3d" % (np.int32(bifurc_params["slope"]*100))
    save_fname_params = "_%0.2d_fix_%0.3d_trials_batch_%0.2d" % (num_fixed_points, num_trials, batchnum)
    save_fname = "Experiment_1d_bifurc_results/bifurc_1d_" + start_time + save_fname_params + expr_fname_params + ".pkl"
    init_time = time.time()
    max_time = 6.0*3600 # Maximum iteration time in seconds, break if reached
    all_results = []

    for it in range(1):
        np.random.seed(1234+it)
        result = adamOptimize(f_objective, f_minibatch_with_grad, cur_pvec,
                          options={'maxiter':500, 'disp':True})
#     for it in range(50):
#         result = scipy.optimize.minimize(objective_with_grad, cur_pvec, jac=True, method='L-BFGS-B', bounds=bnds, callback=None,
#                               options={'maxiter':50, 'disp':True})
        all_results.append(result)

        # Save the results
        with open(save_fname, 'wb') as f:
            pickle.dump([y_train, x_train, bifurc_params,
                         all_results, 
                         init_paramvec, dict_ind, dict_shape, opt_params, 
                         bnds, transforms], f)
        cur_pvec = result.x
        cur_time = time.time()
        
        if callback_plot:
            print([it, cur_time - init_time, result.fun])
            bifurc_callback_plot_external(cur_pvec, 
                                  opt_params, init_paramvec, transforms, dict_ind, dict_shape,
                                  bifurc_params
                                 )
        
        
        # Exit if maximum time is reached

        if ((cur_time - init_time) > max_time):
            print(["Maximum iteration time reached at iter", it])
            break

        if len(all_results)>=2:
            if (all_results[-1].fun - all_results[-2].fun) >= (-1e-2*num_trials):
                print(["Update did not improve objective function, stopping"])
                break
                
#     return all_results

In [None]:
# bifurc_params['slope'] = 2.0
# all_results = fit_GP(bifurc_params,5,16,0,start_time=datetime.datetime.now().strftime("%Y%m%dT%H%M%S"), callback_plot=True)

In [None]:
start_time = datetime.datetime.now().strftime("%Y%m%dT%H%M%S")

# Noise level: eps=0.2^2
nbatches = 1
slope_set = np.array([0.5, 0.75, 0.8, 0.85, 0.9, 0.95, 1.0, 1.05, 1.1, 1.15, 1.2, 1.25, 1.3, 1.35, 1.4, 1.45, 1.5, 1.75, 2.0, 2.25])
ntrials_set = np.array([32])
nfix_set = np.array([5])

bifurc_params["Sigma_eps"] = 0.2**2
def fit_GP_varyslope(bifurc_params, slope, *args):
    bifurc_params['slope'] = slope
    fit_GP(bifurc_params, *args)
    

Parallel(n_jobs=num_cores, verbose=5)(
    delayed(fit_GP_varyslope)(bifurc_params, slope, 5, 32, batchnum, start_time) for batchnum, slope  in itertools.product(
        range(nbatches), slope_set)
)

In [None]:
# # Create second batches

# # Noise level: eps=0.2^2
# start_time='20180206T230638'
# nbatches = 2
# slope_set = np.array([0.5, 0.75, 0.8, 0.85, 0.9, 0.95, 1.0, 1.05, 1.1, 1.15, 1.2, 1.25, 1.3, 1.35, 1.4, 1.45, 1.5, 1.75, 2.0, 2.25])
# ntrials_set = np.array([32])
# nfix_set = np.array([5])

# bifurc_params["Sigma_eps"] = 0.2**2
# def fit_GP_varyslope(bifurc_params, slope, *args):
#     bifurc_params['slope'] = slope
#     fit_GP(bifurc_params, *args)
    

# Parallel(n_jobs=num_cores, verbose=5)(
#     delayed(fit_GP_varyslope)(bifurc_params, slope, 5, 32, batchnum, start_time) for batchnum, slope  in itertools.product(
#         range(1,nbatches), slope_set)
# )



# # Noise level: eps=0.1^2
# start_time='20180207T022517'
# nbatches = 2
# slope_set = np.array([0.5, 0.75, 0.8, 0.85, 0.9, 0.95, 1.0, 1.05, 1.1, 1.15, 1.2, 1.25, 1.3, 1.35, 1.4, 1.45, 1.5, 1.75, 2.0, 2.25])
# ntrials_set = np.array([32])
# nfix_set = np.array([5])
# bifurc_params["Sigma_eps"] = 0.1**2

# def fit_GP_varyslope(bifurc_params, slope, *args):
#     bifurc_params['slope'] = slope
#     fit_GP(bifurc_params, *args)
    

# Parallel(n_jobs=num_cores, verbose=5)(
#     delayed(fit_GP_varyslope)(bifurc_params, slope, 5, 32, batchnum, start_time) for batchnum, slope  in itertools.product(
#         range(1,nbatches), slope_set)
# )

In [None]:
# [cur_pvec, opt_params, init_paramvec, transforms, dict_ind, dict_shape] = (
#     fit_GP(4,6,3,start_time=datetime.datetime.now().strftime("%Y%m%dT%H%M%S"), return_init=True))

# bifurc_callback_plot_external(cur_pvec, 
#                                   opt_params, init_paramvec, transforms, dict_ind, dict_shape,
#                                   bifurc_params
#                                  )

In [None]:
# start_time = datetime.datetime.now().strftime("%Y%m%dT%H%M%S")

# nbatches = 6
# ntrials_set = np.array([2, 4, 8, 16, 32])
# nfix_set = np.array([0,1,2,3,4,5])

# Parallel(n_jobs=num_cores, verbose=5)(
#     delayed(fit_GP)(nfix, ntrials, batchnum, start_time) for batchnum, ntrials, nfix  in itertools.product(range(nbatches), ntrials_set, nfix_set,)
# )

# 3. Load and compare fits

In [None]:
# Noise level: eps=0.2^2, joint prior
start_time='20180208T082207'
nbatches = 2
slope_set = np.array([0.5, 0.75, 0.8, 0.85, 0.9, 0.95, 1.0, 1.05, 1.1, 1.15, 1.2, 1.25, 1.3, 1.35, 1.4, 1.45, 1.5, 1.75, 2.0, 2.25])
ntrials_set = np.array([32])
nfix_set = np.array([5])



# # Noise level: eps=0.2^2
# start_time='20180206T230638'
# nbatches = 2
# slope_set = np.array([0.5, 0.75, 0.8, 0.85, 0.9, 0.95, 1.0, 1.05, 1.1, 1.15, 1.2, 1.25, 1.3, 1.35, 1.4, 1.45, 1.5, 1.75, 2.0, 2.25])
# ntrials_set = np.array([32])
# nfix_set = np.array([5])


# # Noise level: eps=0.1^2
# start_time='20180207T022517'
# nbatches = 2
# slope_set = np.array([0.5, 0.75, 0.8, 0.85, 0.9, 0.95, 1.0, 1.05, 1.1, 1.15, 1.2, 1.25, 1.3, 1.35, 1.4, 1.45, 1.5, 1.75, 2.0, 2.25])
# ntrials_set = np.array([32])
# nfix_set = np.array([5])


# start_time='20180206T170518'
# nbatches = 1
# slope_set = np.array([0.25, 0.5, 0.75, 1.0, 1.05, 1.1, 1.15, 1.2, 1.25, 1.3, 1.35, 1.4, 1.45, 1.5, 2.0, 2.25])
# ntrials_set = np.array([16])
# nfix_set = np.array([5])

# # SGD - Slope: 2.0
# start_time = '20180205T170604'
# nbatches = 6
# ntrials_set = np.array([2, 4, 8, 16, 32])
# nfix_set = np.array([0,1,2,3,4,5])

# # SGD - Slope: 2.0
# start_time = '20180205T105514'
# nbatches = 6
# ntrials_set = np.array([4, 8, 16, 32])
# nfix_set = np.array([0,1,2,3,4,5])

In [None]:
# Load the saved files
def load_GP_fit(slope_ind, ntrials_ind, nfix_ind, batchnum_ind, slope, ntrials, nfix, batchnum, start_time):
    expr_fname_params = "_varyslope_%0.3d" % (np.int32(slope*100))
    load_fname_params = "_%0.2d_fix_%0.3d_trials_batch_%0.2d" % (nfix, ntrials, batchnum)
    load_fname = "Experiment_1d_bifurc_results/bifurc_1d_" + start_time + load_fname_params + expr_fname_params + ".pkl"
#     load_fname_params = "_%0.2d_fix_%0.3d_trials_batch_%0.2d" % (nfix, ntrials, batchnum)
#     load_fname = "Experiment_1d_bifurc_results/bifurc_1d_" + start_time + load_fname_params + ".pkl"
    # load_fname_params = "_%2d_fix_%3d_trials" % (nfix, ntrials)
    # load_fname = "Experiment_1d_wells_results/well_1d_k2_" + start_time + load_fname_params + ".pkl"

    try:
        # Load and store results
        results_file = pickle.load(open(load_fname, 'r'))
    except:
        results_file = []
    
    return results_file

GP_fit_saves = Parallel(n_jobs=num_cores)(
    delayed(load_GP_fit)(slope_ind, ntrials_ind, nfix_ind, batchnum_ind, slope, ntrials, nfix, batchnum, start_time)
    for (slope_ind, ntrials_ind, nfix_ind, batchnum_ind), (slope, ntrials, nfix, batchnum) in (
        itertools.izip(
            itertools.product(range(len(slope_set)), range(len(ntrials_set)), range(len(nfix_set)), range(nbatches)),
            itertools.product(slope_set, ntrials_set, nfix_set, range(nbatches))
        )
    )
)

In [None]:
def nestedlist_to_array(nested_list, max_level = 0):
    cur_nested_list = nested_list
    level = 0
    array_dims = []
    array_dims_range = []
    while (level <= max_level) and (type(cur_nested_list)==list):
        array_dims.append(len(cur_nested_list))
        array_dims_range.append(range(len(cur_nested_list)))
        cur_nested_list = cur_nested_list[0]
        level += 1

    out = np.empty(tuple(array_dims), dtype=object)
    
    for inds in itertools.product(*array_dims_range):
        cur_nested_list = nested_list
        for i in inds:
            cur_nested_list = cur_nested_list[i]
        out[inds] = cur_nested_list
    
    return out

In [None]:
GP_fit_saves_rs = np.reshape(nestedlist_to_array(GP_fit_saves, max_level=0), 
                             (len(slope_set), len(ntrials_set), len(nfix_set), nbatches))

In [None]:
def fp_dynamic_threshold(curFit):
    [y_train, x_train, bifurc_params,
     all_results, 
     init_paramvec, dict_ind, dict_shape, opt_params, 
     bnds, transforms] = curFit

    paramvec = replace_params(all_results[-1].x, opt_params, init_paramvec)
    paramdict = vec_to_params(paramvec, dict_ind, dict_shape, transforms)

    # Unpack the usual parameters
    (Sigma_eps, mu_0_0, Sigma_0_0, C, Sigma_nu, z, u, Sigma_u, lengthscales, kernel_variance, s, J)  = \
        paramdict.values()[:12]

    if np.any(np.isnan(lengthscales)):
        set_trace()

    # Deal with the extra possible parameters
    Sigma_s = None; Sigma_J=None;
    if 'Sigma_s' in paramdict.keys():
        Sigma_s = paramdict['Sigma_s']
    if 'Sigma_J' in paramdict.keys():
        Sigma_J = paramdict['Sigma_J']

    D = s.shape[0]
    p_vals = np.zeros(Sigma_s.shape)
    p_vals2 = np.zeros(Sigma_s.shape)
    KL_div_cavity_to_full = np.zeros(Sigma_s.shape)
    KL_div_full_to_cavity = np.zeros(Sigma_s.shape)
    # Compute the posterior variance at each fix point given all points but that fix point 
    # to judge the influence of that fixed point on the posterior
    
    L, targets, params = fp_get_static_K(eta=kernel_variance, lengthscales=lengthscales, z=z, u=u, s=s, J=J, 
                                         sig_eps=0.*Sigma_eps, sig_u=Sigma_u, sig_s=Sigma_s, sig_J = Sigma_J)
    mu_star, sig_star, K_pred = fp_predict(s, L, targets, params)
    
    for i in range(s.shape[1]):
        
        p_vals[i,:] = 2*(1-scipy.stats.norm.cdf(np.sqrt(
            np.dot(np.dot((s[:,i]-mu_star[:,i]),np.linalg.inv(sig_star[i,:,:]+1e-4*np.eye(D))), (s[:,i]-mu_star[:,i]).T)
        )))
        
    
        cur_nll_term, mu_t1_t1, Sigma_t1_t1 = update_t_t(mu_star[:,[i]], sig_star[i,:,:], np.eye(D), 1e-4*np.ones((D,1)), s[:,[i]])
        p_vals[i,:] = np.exp(-1.*cur_nll_term)

        
#         s_cavity = np.concatenate([s[:,:i], s[:,(i+1):]], axis=1)
#         Sigma_s_cavity = np.concatenate([Sigma_s[:i,:], Sigma_s[(i+1):,:]], axis=0)
#         J_cavity = np.concatenate([J[:i,:,:], J[(i+1):,:,:]], axis=0)
#         Sigma_J_cavity = np.concatenate([Sigma_J[:i,:], Sigma_J[(i+1):,:]], axis=0)

#         L, targets, params = fp_get_static_K(eta=kernel_variance, lengthscales=lengthscales, z=z, u=u, s=s_cavity, J=J_cavity, 
#                                              sig_eps=0.*Sigma_eps, sig_u=Sigma_u, sig_s=Sigma_s_cavity, sig_J = Sigma_J_cavity)
#         mu_star_cavity, sig_star_cavity, K_pred = fp_predict(s[:,[i]], L, targets, params)
        
#         # Based on "Half width half maximum", to estimate what the variance should be at a point, given the cavity and the point
#         # We then compare this expected variance to the actual one
#         Sig_expected = (np.sqrt(np.sum((s[:,[i]]-mu_star_cavity)**2)) / (np.sqrt(2*np.log(2.))))**2
        
#         p_vals[i] = Sig_expected/Sigma_s[i]

        # Get the p-value of the learned fixed point
#         p_vals[i,:] = 2*(1-scipy.stats.norm.cdf(np.abs(mu_star[:,[i]]-mu_star_cavity)/np.sqrt(sig_star[i,:,:]+sig_star_cavity)))
#         p_vals_old[i,:] = 2*(1-scipy.stats.norm.cdf(np.abs(s[:,[i]]-mu_star_cavity)/np.sqrt(Sigma_s[i,:]+sig_star_cavity)))

#         p_vals[i,:] = 2*(1-scipy.stats.norm.cdf(np.sqrt(
#             np.dot(np.dot((s[:,i]-mu_star_cavity),np.linalg.inv(sig_star_cavity+Sigma_s[i]*np.eye(D))), (s[:,i]-mu_star_cavity).T)
#         )))
        
#         # KL[star_cavity || star[i]]
#         KL_div_cavity_to_full[i,:] = 1./2.*(
#             np.linalg.slogdet(sig_star[i,:,:])[1] - np.linalg.slogdet(sig_star_cavity)[1] 
#             + np.trace(np.dot(np.linalg.inv(sig_star[i,:,:]), sig_star_cavity))
#             + np.dot(np.dot((mu_star[:,i]-mu_star_cavity),np.linalg.inv(sig_star[i,:,:])), (mu_star[:,i]-mu_star_cavity).T)
#             -D
#         )
        
        
#         # KL[star[i] || star_cavity]
#         KL_div_full_to_cavity[i,:] = 1./2.*(
#             np.linalg.slogdet(sig_star_cavity)[1] - np.linalg.slogdet(sig_star[i,:,:])[1] 
#             + np.trace(np.dot(np.linalg.inv(sig_star_cavity), sig_star[i,:,:]))
#             + np.dot(np.dot((mu_star_cavity - mu_star[:,i]),np.linalg.inv(sig_star_cavity)), (mu_star_cavity - mu_star[:,i]).T)
#             -D
#         )
    
        #print (Sigma_s[i], s[:,i], mu_star[:,i], p_vals[i]) #, KL_div_cavity_to_full[i], KL_div_full_to_cavity[i])
    return p_vals #[KL_div_cavity_to_full]

In [None]:
fp_dynamic_threshold(GP_fit_saves_rs[19,0,0,0])

In [None]:
[y_train, x_train, bifurc_params,
     all_results, 
     init_paramvec, dict_ind, dict_shape, opt_params, 
     bnds, transforms] = GP_fit_saves_rs[17,0,0,0]

paramvec = replace_params(all_results[-1].x, opt_params, init_paramvec)
paramdict = vec_to_params(paramvec, dict_ind, dict_shape, transforms)
    
paramdict['J']    

In [None]:


plt_all_bifurc = []
plt_all_conf = []
for (slope_ind, slope, curFit) in itertools.izip(range(len(slope_set)), slope_set, GP_fit_saves_rs[:,0,0,0]):
    [y_train, x_train, bifurc_params,
     all_results, 
     init_paramvec, dict_ind, dict_shape, opt_params, 
     bnds, transforms] = curFit
    
    paramvec = replace_params(all_results[-1].x, opt_params, init_paramvec)
    paramdict = vec_to_params(paramvec, dict_ind, dict_shape, transforms)
    
    fp_sort = np.squeeze(np.argsort(paramdict['s']).T)
    
#     print [slope, slope_ind]
#     print np.concatenate([paramdict['s'].T[fp_sort], paramdict['Sigma_s'][fp_sort], np.squeeze(paramdict['J'])[:,None][fp_sort]], axis=1)
#     print 
    
    
    # Relying on "how much this fixed points affects posterior"
    fp_p_vals = fp_dynamic_threshold(curFit)
#     fp_p_vals_orig = fp_p_vals
    
#     # Relying on Sigma_s
#     fp_p_vals = -np.log(paramdict['Sigma_s'])

#     thresh = 0.05
    
    color_arr = np.array(['red', 'blue', 'black'])
    color_choice = np.int32(np.squeeze(np.abs(paramdict['J'])>1.))
    #color_choice[np.squeeze(paramdict['Sigma_s'] > fp_dynamic_threshold(curFit))] = 2
    
    fp_size = fp_p_vals - np.min(fp_p_vals)
    fp_size = fp_size/np.max(fp_size)
    fp_size = np.squeeze(fp_size*16+4)
    
    plt_all_bifurc.append(plt_type.Scatter(
        x=slope*np.ones((paramdict['Sigma_s'].shape[0],)),
            y=np.squeeze(paramdict['s']),
            mode='markers', marker=dict(symbol='.', color=color_arr[color_choice], size=fp_size)
        ))
    
#     
    
    #fp_p_vals = paramdict['Sigma_s']
    
    plt_all_conf.append(plt_type.Scatter(
            x=slope*np.ones((paramdict['Sigma_s'].shape[0],))+5e-3*np.random.randn(paramdict['Sigma_s'].shape[0],),
            y=np.squeeze(fp_p_vals),
            mode='markers', marker=dict(symbol='x', color=color_arr[color_choice])
            )   
          )

    
    
#     color_arr = np.array(['red', 'blue', 'black'])
#     color_choice = np.int32(np.squeeze(paramdict['J'])>1)
#     color_choice[np.squeeze(np.log(paramdict['Sigma_s']) >-2)] = 2
    
#     plt_all.append(plt_type.Scatter(
#             x=slope*np.ones((paramdict['Sigma_s'].shape[0],)),
#             y=np.squeeze(np.log(paramdict['Sigma_s'])),
#             mode='markers', marker=dict(symbol='x', color=color_arr[color_choice])
#             )   
#           )


plt(plt_all_bifurc)
plt(plt_all_conf)

In [None]:
# plt_all = []
# for (slope_ind, slope, curFit) in itertools.izip(range(len(slope_set)), slope_set, GP_fit_saves):
#     [y_train, x_train, bifurc_params,
#      all_results, 
#      init_paramvec, dict_ind, dict_shape, opt_params, 
#      bnds, transforms] = curFit
    
#     paramvec = replace_params(all_results[-1].x, opt_params, init_paramvec)
#     paramdict = vec_to_params(paramvec, dict_ind, dict_shape, transforms)
    
#     fp_sort = np.squeeze(np.argsort(paramdict['s']).T)
    
# #     print [slope, slope_ind]
# #     print np.concatenate([paramdict['s'].T[fp_sort], paramdict['Sigma_s'][fp_sort], np.squeeze(paramdict['J'])[:,None][fp_sort]], axis=1)
# #     print 
    
#     color_arr = np.array(['red', 'blue', 'black'])
#     color_choice = np.int32(np.squeeze(paramdict['J'])>1)
#     color_choice[np.squeeze(np.log(paramdict['Sigma_s']) >-0.5)] = 2
    
#     plt_all.append(plt_type.Scatter(
#             x=slope*np.ones((paramdict['Sigma_s'].shape[0],)),
#             y=np.squeeze(np.log(paramdict['Sigma_s'])),
#             mode='markers', marker=dict(symbol='x', color=color_arr[color_choice])
#             )   
#           )

# plt(plt_all)

In [None]:
GP_fit_saves[3]

In [None]:
np.log(paramdict['Sigma_s']) >-1.

In [None]:
slope_set

# Old analysis

In [None]:
# Create predictions for each test set trial
def test_GP_pred(y_test, GPfit, cutoff=None):
    if len(GPfit)>0:
        # Sort results into variables to work with
        [y_train, x_train, bifurc_params,
                     all_results, 
                     init_paramvec, dict_ind, dict_shape, opt_params, 
                     bnds, transforms] = GPfit                                   

        [x, y] = bifurc_create_data(bifurc_params)
        # Establish test dataset
        y_test = y[:,:,350:]
        x_test = x[:,:,350:]
        
        
        # Get GP predictions on test data
        return pred_GP(y_test, 
                       replace_params(all_results[-1].x, opt_params, init_paramvec), 
                       transforms, dict_ind, dict_shape, cutoff = cutoff)
    else:
        return []


# Load or make and save predictions
force_redo = 1
cutoff = None
preds_fname = "Experiment_1d_bifurc_results/bifurc_1d_" + start_time + "_predictions.pkl"
try:
    if not force_redo:
        [GP_predictions, ntrials_set, nfix_set, nbatches] = (
            pickle.load(open(preds_fname, 'rb')))
    else:
        raise Exception('Redoing GP predictions per request')
except: # (OSError, IOError) as e:
    GP_predictions = Parallel(n_jobs=num_cores)(
        delayed(test_GP_pred)(y_test, GPfit, cutoff=cutoff)
        for GPfit in GP_fit_saves
    )
    pickle.dump([GP_predictions, ntrials_set, nfix_set, nbatches], 
                open(preds_fname, 'wb'))


In [None]:
# Get errors based on predictions
axisError = (0,1,2)

AR_RMSE = []
for GPfit in GP_fit_saves:
    if len(GPfit)>0:
        AR_RMSE.append(rmse(pred_lin_AR1(y_test, GPfit[0], cutoff=cutoff), y_test, axis=axisError))
    else:
        AR_RMSE.append(np.infty)

GP_NLL = []
GP_RMSE = []
for GPpred in GP_predictions:
    if len(GPpred)>0:
        GP_NLL.append(np.sum(GPpred[5], axis=tuple(np.array(axisError)[1:]-1)))
        GP_RMSE.append(rmse(GPpred[7], y_test, axis=axisError))
    else:
        GP_NLL.append(np.infty)
        GP_RMSE.append(np.infty)
    

AR_RMSE = np.reshape(np.array(AR_RMSE), (len(ntrials_set), len(nfix_set), nbatches))
GP_RMSE = np.reshape(np.array(GP_RMSE), (len(ntrials_set), len(nfix_set), nbatches))
GP_NLL = np.reshape(np.array(GP_NLL), (len(ntrials_set), len(nfix_set), nbatches))


In [None]:
np.set_printoptions(precision=2)
print "AR_RMSE"
print np.array(np.median(AR_RMSE, axis=2))
print "-------------"
print "GP_RMSE"
print np.array(np.median(GP_RMSE, axis=2))
print "-------------"
print "GP_NLL"
print np.array(np.median(GP_NLL, axis=2))

In [None]:
# Plot negative log likelihoods for each options
def plot_nll_fixpoints(cur_nll_ntrial, nfix_set, nbatches):
    cur_plts = []
    for (nfix_ind, nfix) in itertools.izip(range(len(nfix_set)), nfix_set):
        cur_plts.append(plt_type.Scatter(x=nfix*np.ones_like(np.squeeze(cur_nll_ntrial[nfix_ind,:])), 
                                         y=np.squeeze(cur_nll_ntrial[nfix_ind,:]), 
                                         mode='markers', 
                                         marker = dict(color=plotly.colors.DEFAULT_PLOTLY_COLORS[nfix_ind]),
                                         name = "nfix=%d" % nfix, 
                                         legendgroup="nfix=%d" % nfix))
    return cur_plts
    
    
from plotly import tools as plt_tools

def plot_nll(cur_nll, ntrials_set, nfix_set, nbatches):
    fig_subplts = plt_tools.make_subplots(rows=1, cols=len(ntrials_set), subplot_titles=ntrials_set, shared_yaxes=True)
    for (ntrials_ind, ntrials) in itertools.izip(range(len(ntrials_set)), ntrials_set):        
        tmp = plot_nll_fixpoints(cur_nll[ntrials_ind,:,:], nfix_set, nbatches)
        for trace in tmp:
            fig_subplts.append_trace(trace,1,ntrials_ind+1)
            if ntrials_ind>0:
                fig_subplts['data'][-1]['showlegend']=False
    plt(fig_subplts)
    return fig_subplts

a = plot_nll(GP_NLL, ntrials_set, nfix_set, nbatches)

In [None]:
np.set_printoptions(precision=2)
print "-------------"
print "GP_NLL"
print np.array(np.min(GP_NLL, axis=2))
print "-------------"
print "GP_NLL"
print np.array(np.argmin(GP_NLL, axis=2))

In [None]:
# Plot a single fit
GP_fit_saves_rs = np.reshape(nestedlist_to_array(GP_fit_saves, max_level=0), 
                             (len(slope_set), len(ntrials_set), len(nfix_set), nbatches))

slope_ind =17
nfix_ind = 0
ntrials_ind = 0
batchnum = 0

[y_train, x_train, bifurc_params,
 all_results, 
 init_paramvec, dict_ind, dict_shape, opt_params, 
 bnds, transforms] = GP_fit_saves_rs[slope_ind, ntrials_ind, nfix_ind, batchnum]

print slope_set[slope_ind]

# init_paramvec[dict_ind['Sigma_J']] = 0.0
# opt_params = opt_params[opt_params<np.min(dict_ind['Sigma_J'])]

bifurc_callback_plot_external(init_paramvec[opt_params], 
        opt_params, init_paramvec, transforms, dict_ind, dict_shape,
        bifurc_params, trainingData=y_train)

# for res in all_results[:-1]:
#     bifurc_callback_plot_external(res.x, 
#             opt_params, init_paramvec, transforms, dict_ind, dict_shape,
#             bifurc_params)

plt([plt_type.Scatter(x=np.array(all_results[-1].objs)[:,0], y = np.array(all_results[-1].objs)[:,1])])
# for theta_ind in range(len(all_results[0].theta_hist)):
#     if (not np.mod(theta_ind, 30)) and (theta_ind<200):
#         print np.array(all_results[0].objs)[np.array(all_results[0].objs)[:,0]==theta_ind, :]
#         bifurc_callback_plot_external(all_results[0].theta_hist[theta_ind], 
#             opt_params, init_paramvec, transforms, dict_ind, dict_shape,
#             bifurc_params)
    
    
# theta_no_u = replace_params(all_results[-1].x, opt_params, init_paramvec)
# theta_no_u[dict_ind['Sigma_u']] = 1e9*theta_no_u[dict_ind['Sigma_u']]

# bifurc_callback_plot_external(theta_no_u[opt_params], 
#         opt_params, init_paramvec, transforms, dict_ind, dict_shape,
#         bifurc_params)

#all_results[-1].x = all_results[-1].theta_hist[142]

bifurc_callback_plot_external(all_results[-1].x,
        opt_params, init_paramvec, transforms, dict_ind, dict_shape,
        bifurc_params)

#

paramvec = replace_params(all_results[-1].x, opt_params, init_paramvec)
paramdict = vec_to_params(paramvec, dict_ind, dict_shape, transforms)

# Unpack the usual parameters
(Sigma_eps, mu_0_0, Sigma_0_0, C, Sigma_nu, z, u, Sigma_u, lengthscales, kernel_variance, s, J, Sigma_s, Sigma_J)  = \
    paramdict.values()
    
print np.concatenate([s.T, Sigma_s, np.squeeze(J)[:,None]], axis=1)

In [None]:
paramdict

In [None]:
Sigma_J

In [None]:
np.array(all_results[0].objs)[:,0]==50

In [None]:
not np.mod(200, 50)

In [None]:
range(len(all_results[0].theta_hist))

In [None]:
np.array(all_results[-1].objs)

In [None]:
paramvec = replace_params(all_results[-1].x, opt_params, init_paramvec)
paramdict = vec_to_params(paramvec, dict_ind, dict_shape, transforms)

# Unpack the usual parameters
(Sigma_eps, mu_0_0, Sigma_0_0, C, Sigma_nu, z, u, Sigma_u, lengthscales, kernel_variance, s, J, Sigma_s, Sigma_J)  = \
    paramdict.values()
    
np.set_printoptions(precision=8)
paramdict

In [None]:
len(all_results)

In [None]:
vec_to_params(init_paramvec, dict_ind, dict_shape, transforms)

In [None]:
np.set_printoptions(precision=8)
print z
print u
print Sigma_u
print s
print Sigma_s
print Sigma_J

In [None]:
print lengthscales

In [None]:
# Add priors (to span at least the bounds)
priors = []

# Add prior to ensure inducing points are smooth
cur_prior = {}
cur_prior['func'] = create_prior(prior_distribution="InducingSmooth")
cur_prior['inds'] = range(len(init_paramvec))
cur_prior['metadata'] = {}
def unpack_x_tmp(x):
    # Unpack all the usual parameters
    param_tuple = vec_to_params(x, dict_ind, dict_shape)
    (Sigma_eps, mu_0_0, Sigma_0_0, C, Sigma_nu, 
     z, u, Sigma_u, 
     lengthscales, kernel_variance, s, J, Sigma_s, Sigma_J)  = \
        param_tuple

    kernelparams = {'lengthscales': lengthscales, 'kernel_variance': kernel_variance}
    # Return the ones we want in the required format
    return (np.concatenate([u, s],axis=1), 
            np.concatenate([z, s],axis=1),
            np.concatenate([Sigma_u, Sigma_s]),
            kernelparams)
cur_prior['metadata']['unpack_x'] = unpack_x_tmp
cur_prior['metadata']['kernel_func'] = RBF
priors.append(cur_prior)

tmp_func = lambda pvec_partial: (time_full_iter(replace_params(pvec_partial, opt_params, init_paramvec), 
                                                y_train, dict_ind, dict_shape, 
                                                log_transformed=log_transformed,
                                                priors=priors)[0])


In [None]:
paramvec = replace_params(all_results[1].x, opt_params, init_paramvec)
#paramvec = log_transform_inv(paramvec, log_transformed)
paramvec = paramvec[opt_params]
a = grad(tmp_func)(paramvec)

In [None]:
paramvec = replace_params(a, opt_params, init_paramvec)
#paramvec = log_transform_inv(paramvec, log_transformed)

# Unpack the usual parameters
param_tuple = vec_to_params(paramvec, dict_ind, dict_shape)
(Sigma_eps, mu_0_0, Sigma_0_0, C, Sigma_nu, z, u, Sigma_u, lengthscales, kernel_variance, s, J, Sigma_s, Sigma_J)  = \
    param_tuple

In [None]:
np.set_printoptions(precision=8)
print z
print u
print Sigma_u
print s
print Sigma_s

In [None]:
np.set_printoptions(precision=8)
print z
print u
print Sigma_u

In [None]:
print s
print Sigma_s

In [None]:
vec_to_params(init_paramvec, dict_ind, dict_shape, transforms=transforms)