In [1]:
import nbimporter

# Import main functionality
from GPDM_direct_fixedpoints import *

plotly.offline.init_notebook_mode(connected=True)

# Saving outputs and timing
import pickle, datetime, time

from joblib import Parallel, delayed
import multiprocessing
import itertools

num_cores = multiprocessing.cpu_count()

# %run GPDM_direct_fixedpoints.ipynb
# %run GPDM_Examples.ipynb

Importing Jupyter notebook from GPDM_direct_fixedpoints.ipynb



Matplotlib is building the font cache using fc-list. This may take a moment.



# Experimental setup

1. We first generate training and test data with the same parameters.

2. We then fit GP models with a fixed number of parameters, but vary the number of fixed points versus basic inducing points

3. We compare the performance of the GP models against one-another as well as against linear autoregression.

## 1. Generate training and test data

In [2]:
def tanh_draw_trial(T, mu_0_0, Sigma_0_0, slope, maxval,Sigma_eps, Sigma_nu, **kwargs):
    x0 = mu_0_0 + np.sqrt(Sigma_0_0)*np.random.randn(1)
    x = np.zeros((1,T))
    y = np.zeros((1,T))
    for t in range(T):
        if t==0:
            xprev = x0
        else:
            xprev = x[:,t-1]

        x[:,t] = (np.tanh(slope*np.atleast_1d(xprev))*maxval
                    + np.sqrt(Sigma_eps)*np.random.randn(1))
        
        y[:,t] = x[:,t] + np.sqrt(Sigma_nu)*np.random.randn(1)
        
    return (x,y)  

In [8]:
tanh_params = OrderedDict()
tanh_params["rseed"] = 1234 # Happens to creates balanced training set in the first 8 trials in y
tanh_params["Ny"] = 500
tanh_params["T"] = 50
tanh_params["slope"] = 1.7
tanh_params['maxval'] = 1.0
tanh_params["Sigma_eps"] = 0.5**2
tanh_params["Sigma_nu"] = 0.05**2
tanh_params["mu_0_0"] = 0.
tanh_params["Sigma_0_0"] = 1e-6

  

# Generate trials and collect them into [D x T x N] array
np.random.seed(tanh_params["rseed"])
all_trials_x = []
all_trials_y = []
for n in range(tanh_params["Ny"]):
    x,y = tanh_draw_trial(**tanh_params)
    
    all_trials_x.append(x[:,:,None])
    all_trials_y.append(y[:,:,None])
    
x = np.concatenate(all_trials_x, axis=2)
y = np.concatenate(all_trials_y, axis=2)

# # Swap around trials to ensure balanced training sets
# if tanh_params["slope"]*tanh_params['maxval'] > 1.1:
#     cur_trial = 0
#     cur_trial_type = 1.
#     while cur_trial < 0.8*tanh_params["Ny"]:
#         # If current trial is not correct type, switch it with one that is
#         if not (np.mean(y[:,(tanh_params["T"]-4):,cur_trial]) * cur_trial_type > 0.2*tanh_params['maxval']):
#             switch_trial = cur_trial+1
#             while (not (np.mean(y[:,(tanh_params["T"]-4):,switch_trial]) * cur_trial_type > 0.2*tanh_params['maxval'])):
#                 switch_trial += 2

#             tmp = np.copy(y[:,:,switch_trial])
#             y[:,:,switch_trial] = np.copy(y[:,:,cur_trial])
#             y[:,:,cur_trial] = tmp

#             tmp = np.copy(x[:,:,switch_trial])
#             x[:,:,switch_trial] = np.copy(x[:,:,cur_trial])
#             x[:,:,cur_trial] = tmp

#         #print np.mean(y[:,(tanh_params["T"]-4):,cur_trial]) * cur_trial_type
#         cur_trial += 1
#         cur_trial_type *= -1.

plots_by_run = []
for v in range(128):
    plots_by_run.append(
        plt_type.Scatter(x=np.squeeze(np.arange(tanh_params["T"])), 
                      y=np.squeeze(y[:,:,v]), 
                      mode='lines')
    )
    
    
print rmse(pred_lin_AR1(y[:,:,200:], y[:,:,:50], cutoff=None), y[:,:,200:])
    
    
xstar = np.atleast_2d(np.arange(-2.5,2.5,0.05))
true_tr_vals = np.tanh(tanh_params['slope']*xstar)*tanh_params['maxval']

#set_trace()

plt([plt_type.Scatter(x=np.squeeze(xstar), 
                  y=np.squeeze(true_tr_vals), mode='markers', name = 'True trans. f.',
                  marker=dict(color='orange')),
 plt_type.Scatter(x=np.squeeze(xstar), 
                  y=np.squeeze(true_tr_vals+np.sqrt(tanh_params["Sigma_eps"])), mode='markers', name = 'True trans. f.',
                  marker=dict(color='orange', size=2)),
 plt_type.Scatter(x=np.squeeze(xstar), 
                  y=np.squeeze(true_tr_vals-np.sqrt(tanh_params["Sigma_eps"])), mode='markers', name = 'True trans. f.',
                  marker=dict(color='orange', size=2)),
plt_type.Scatter(x=np.squeeze(xstar), 
                  y=np.squeeze(xstar), mode='markers', name = 'x=x',
                  marker=dict(color='blue'))
     ])

plt(plots_by_run)

0.552187089825


In [9]:
def tanh_callback_plot_external(pvec_partial, 
                                  opt_params, init_paramvec, transforms, dict_ind, dict_shape,
                                  tanh_params
                                 ):
    
    paramvec = replace_params(pvec_partial, opt_params, init_paramvec)
    paramdict = vec_to_params(paramvec, dict_ind, dict_shape, transforms)
       
    # Unpack the usual parameters
    (Sigma_eps, mu_0_0, Sigma_0_0, C, Sigma_nu, z, u, Sigma_u, lengthscales, kernel_variance, s, J)  = \
        paramdict.values()[:12]
    
    if np.any(np.isnan(lengthscales)):
        set_trace()
    
    # Deal with the extra possible parameters
    Sigma_s = None; Sigma_J=None;
    if 'Sigma_s' in paramdict.keys():
        Sigma_s = paramdict['Sigma_s']
    if 'Sigma_J' in paramdict.keys():
        Sigma_J = paramdict['Sigma_J']
        
    # Plot transition function
    xstar = np.atleast_2d(np.arange(np.min(z)-0.5,np.max(z)+0.5,0.05))

    L, targets, params = fp_get_static_K(eta=kernel_variance, lengthscales=lengthscales, z=z, u=u, s=s, J=J, 
                                         sig_eps=Sigma_eps, sig_u=Sigma_u, sig_s=Sigma_s, sig_J = Sigma_J)
    mu_star, sig_star, K_pred = fp_predict(xstar, L, targets, params)

    # print(time_full_iter(pvec, y, dict_ind, dict_shape)[0])
        
    # Get true function values
    true_tr_vals = np.tanh(tanh_params['slope']*xstar)*tanh_params['maxval']
    
    #set_trace()
    
    # Map Sigma_s values to the range 8-20
    if Sigma_s.size>1:
        FP_SIZE = -np.squeeze(np.log(Sigma_s))
        FP_SIZE = FP_SIZE - np.min(FP_SIZE)
        FP_SIZE = (FP_SIZE/np.max(FP_SIZE))*12. + 8.
    else:
        FP_SIZE = 10.
    
    plt([plt_type.Scatter(x=np.squeeze(xstar), y=np.squeeze(mu_star), mode='markers', name='GP post. mean',
                         marker=dict(color='blue')),
         plt_type.Scatter(x=np.squeeze(xstar), y=np.squeeze(mu_star)+np.squeeze(np.sqrt(sig_star)), mode='markers', 
                         marker=dict(size=2, color='blue')),
         plt_type.Scatter(x=np.squeeze(xstar), y=np.squeeze(mu_star)-np.squeeze(np.sqrt(sig_star)), mode='markers', 
                         marker=dict(size=2, color='blue')),      
         plt_type.Scatter(x=np.squeeze(xstar), 
                          y=np.squeeze(true_tr_vals), mode='markers', name = 'True trans. f.',
                          marker=dict(color='orange')),
         plt_type.Scatter(x=np.squeeze(xstar), 
                          y=np.squeeze(true_tr_vals+np.sqrt(tanh_params["Sigma_eps"])), mode='markers', name = 'True trans. f.',
                          marker=dict(color='orange', size=2)),
         plt_type.Scatter(x=np.squeeze(xstar), 
                          y=np.squeeze(true_tr_vals-np.sqrt(tanh_params["Sigma_eps"])), mode='markers', name = 'True trans. f.',
                          marker=dict(color='orange', size=2)),
         plt_type.Scatter(x=np.squeeze(z), y=np.squeeze(-2.0*np.ones_like(z)), mode='markers', marker=dict(size=10),
                         name = 'Ind point loc'),
         plt_type.Scatter(x=np.squeeze(z), y=np.squeeze(u), mode='markers', marker=dict(size=10),
                         name = 'Ind point val'),
         plt_type.Scatter(x=np.atleast_1d(np.squeeze(s)), y=np.atleast_1d(np.squeeze(s)), mode='markers', marker=dict(size=FP_SIZE),
                         name = 'Fixed point')
        ])

## 2. Fitting GP models

In [10]:
def fit_GP(num_fixed_points, num_trials, batchnum, start_time, callback_plot=False, return_init=False):
    # Set number of parameters
    D = 1
    Ns = num_fixed_points
    Nz = 16 - Ns

    # Fix the training data
    y_train = y[:,:,batchnum*num_trials:(batchnum*num_trials+num_trials)]
    x_train = x[:,:,batchnum*num_trials:(batchnum*num_trials+num_trials)]

    np.random.seed(tanh_params['rseed']*2)

    #######################################################
    # Initialise the parameters
    paramdict = init_params(y_train, D, Nz, Ns)

    # Fix noise to true value
    paramdict['Sigma_nu'] = tanh_params['Sigma_nu'] * np.ones_like(paramdict['Sigma_nu'])

#     # Re-initialise / add some more parameters
#     paramdict['Sigma_s'] = 1e-2*np.ones((Ns,1))
#     paramdict['Sigma_J'] = 1e-2*np.ones((Ns*D,1))
    
    # Add transformations for certain parameters 
    # (Note that the parameter indices may be changed by transforms! (for cholesky repres of matrices, "SquareMatrix" type))
    transforms = OrderedDict()
    for par in ['Sigma_0_0', 'Sigma_u', 'Sigma_s', 'Sigma_J', 'lengthscales', 'Sigma_eps', 'Sigma_nu', 'kernel_variance']:
        transforms[par] = {}
        transforms[par]['type'] = "Square"
    
    # Create vectorised and transformed representation
    (init_paramvec, dict_ind, dict_shape) = params_to_vec(paramdict, transforms=transforms)
 
    #######################################################
    # Optimise only certain elements of paramvec (messy with indices)
    opt_params = np.arange(init_paramvec.shape[0])
    # opt_params = np.delete(opt_params, np.hstack([dict_ind['C'], dict_ind['Sigma_nu'], dict_ind['J']])) # All except the ones listed here
    opt_params = np.delete(opt_params, np.hstack([dict_ind['C'], dict_ind['Sigma_nu']])) # All except the ones listed here
    cur_pvec = init_paramvec[opt_params]

    #######################################################
    # Add bounds for parameters 
    bnds = list(((None, None),) * init_paramvec.shape[0])
    # cur_dim = 0
    # cur_z = 0
    # cur_tot = 0
    # z_mins = np.min(z, axis=1)
    # z_maxs = np.max(z, axis=1)
    # for i in np.concatenate([dict_ind['z'], dict_ind['s']]): # Note the idiotic python reshape order for setting bounds per dim
    #     z_min = z_mins[cur_dim]
    #     z_max = z_maxs[cur_dim]
    #     bnds[i] = (z_min-0.05*(z_max-z_min), z_max+0.05*(z_max-z_min))
    #     cur_z = cur_z+1
    #     if cur_tot < D*Nz:
    #         cur_z = np.mod(cur_z, Nz)
    #     else:
    #         cur_z = np.mod(cur_z, Ns)
    #     cur_tot = cur_tot+1
    #     if cur_z==0:
    #         cur_dim = cur_dim+1
    #     if cur_tot==D*Nz:
    #         cur_dim = 0
    # for i in np.concatenate([dict_ind['J']]):
    #     bnds[i] = (-1., 1.)
    bnds_final = []
    for i in opt_params:
        bnds_final.append(bnds[i])
    bnds = tuple(bnds_final)

    #######################################################
    # Add priors (to span at least the bounds)
    priors = []
    
    # Add prior to ensure inducing points are smooth
    cur_prior = {}
    cur_prior['type'] = "InducingSmooth_and_DPP"
    cur_prior['metadata'] = {}
    def unpack_dict_tmp(pdict):
        kernelparams = {'lengthscales': pdict['lengthscales'], 'kernel_variance': pdict['kernel_variance']}
        # Return the parameters we want in the required format (joint smoothness of (z-u) and s)
        return (pdict['u'], 
                pdict['z'],
                pdict['Sigma_u'],
                kernelparams)
    cur_prior['metadata']['unpack_dict'] = unpack_dict_tmp
    cur_prior['metadata']['kernel_func'] = RBF
    cur_prior['metadata']['prior_weight_Smooth'] = 1e0
    cur_prior['metadata']['prior_weight_DPP'] = 1e0
    priors.append(cur_prior)
    
    # Add prior to ensure fixed points are smooth
    cur_prior = {}
    cur_prior['type'] = "InducingSmooth_and_DPP"
    cur_prior['metadata'] = {}
    def unpack_dict_tmp(pdict):
        kernelparams = {'lengthscales': pdict['lengthscales'], 'kernel_variance': pdict['kernel_variance']}
        # Return the parameters we want in the required format (joint smoothness of (z-u) and s)
        return (pdict['s'], 
                pdict['s'],
                pdict['Sigma_s'],
                kernelparams)
    cur_prior['metadata']['unpack_dict'] = unpack_dict_tmp
    cur_prior['metadata']['kernel_func'] = RBF
    cur_prior['metadata']['prior_weight_Smooth'] = 1e0
    cur_prior['metadata']['prior_weight_DPP'] = 1e0
    priors.append(cur_prior)

    # # Add a strong prior to learn actual fixed points
    # logGamma_prior = create_prior("LogGamma", [2., 0.5, -6.])
    # for i in np.concatenate([dict_ind['Sigma_s'], dict_ind['Sigma_J']]):
    #     prior_funcs[i] = logGamma_prior

    # tmp_x = np.logspace(-6.0,2,100)    
    # plt(plt_type.Figure(data=[plt_type.Scatter(x=tmp_x, y=np.exp(-logGamma_prior(tmp_x)))], layout=plt_type.Layout(xaxis=dict(type= "log"))))
    # plt(plt_type.Figure(data=[plt_type.Scatter(x=tmp_x, y=logGamma_prior(tmp_x))], layout=plt_type.Layout(xaxis=dict(type= "log"))))

    if return_init:
        return [cur_pvec, opt_params, init_paramvec, transforms, dict_ind, dict_shape]
    
    #######################################################
#     # Prepare the optimisation
#     tmp_func = lambda pvec_partial: (time_full_iter(replace_params(pvec_partial, opt_params, init_paramvec), 
#                                                 y_train, dict_ind, dict_shape, 
#                                                 transforms=transforms,
#                                                 priors=priors)[0])
#     objective_with_grad = value_and_grad(tmp_func, argnum=0)

    # Prepare the optimisation
    f_objective = lambda pvec_partial: (time_full_iter(replace_params(pvec_partial, opt_params, init_paramvec), 
                                                y_train, dict_ind, dict_shape, 
                                                transforms=transforms,
                                                priors=priors)[0])
    f_minibatch = lambda pvec_partial: (minibatch_iter(replace_params(pvec_partial, opt_params, init_paramvec), 
                                                y_train, dict_ind, dict_shape, 
                                                transforms=transforms,
                                                priors=priors,
                                                minibatch_size=2)[0])
    f_minibatch_with_grad = grad(f_minibatch, argnum=0)



    # By iterating minimize within a for cycle, we can save all intermediate results and set ending times
    #expr_fname_params = "_slope_%0.3d_max_%0.3d_" % (np.int32(tanh_params["slope"]*100), np.int32(tanh_params['maxval']*100))
    save_fname_params = "_%0.2d_fix_%0.3d_trials_batch_%0.2d" % (num_fixed_points, num_trials, batchnum)
    save_fname = "Experiment_1d_tanh_results/tanh_1d_" + start_time + save_fname_params + ".pkl"
    init_time = time.time()
    max_time = 6.0*3600 # Maximum iteration time in seconds, break if reached
    all_results = []

    for it in range(1):
        np.random.seed(1234+it)
        result = adamOptimize(f_objective, f_minibatch_with_grad, cur_pvec,
                          options={'maxiter':300, 'disp':True})
#     for it in range(50):
#         result = scipy.optimize.minimize(objective_with_grad, cur_pvec, jac=True, method='L-BFGS-B', bounds=bnds, callback=None,
#                               options={'maxiter':50, 'disp':True})
        all_results.append(result)

        # Save the results
        with open(save_fname, 'wb') as f:
            pickle.dump([y_train, x_train, tanh_params,
                         all_results, 
                         init_paramvec, dict_ind, dict_shape, opt_params, 
                         bnds, transforms], f)
        cur_pvec = result.x
        cur_time = time.time()
        
        if callback_plot:
            print([it, cur_time - init_time, result.fun])
            tanh_callback_plot_external(cur_pvec, 
                                  opt_params, init_paramvec, transforms, dict_ind, dict_shape,
                                  tanh_params
                                 )
        
        
        # Exit if maximum time is reached

        if ((cur_time - init_time) > max_time):
            print(["Maximum iteration time reached at iter", it])
            break

        if len(all_results)>=2:
            if (all_results[-1].fun - all_results[-2].fun) >= (-1e-2*num_trials):
                print(["Update did not improve objective function, stopping"])
                break
                
#     return all_results

In [None]:
# all_results = fit_GP(4,6,0,start_time=datetime.datetime.now().strftime("%Y%m%dT%H%M%S"), callback_plot=True)

In [None]:
# [cur_pvec, opt_params, init_paramvec, transforms, dict_ind, dict_shape] = (
#     fit_GP(4,6,3,start_time=datetime.datetime.now().strftime("%Y%m%dT%H%M%S"), return_init=True))

# tanh_callback_plot_external(cur_pvec, 
#                                   opt_params, init_paramvec, transforms, dict_ind, dict_shape,
#                                   tanh_params
#                                  )

In [11]:
start_time = datetime.datetime.now().strftime("%Y%m%dT%H%M%S")

nbatches = 4
ntrials_set = np.array([4, 8, 16])
nfix_set = np.array([0,2,3,4])

Parallel(n_jobs=num_cores, verbose=5)(
    delayed(fit_GP)(nfix, ntrials, batchnum, start_time) for batchnum, ntrials, nfix  in itertools.product(range(nbatches), ntrials_set, nfix_set,)
)

[Parallel(n_jobs=24)]: Done  11 out of  48 | elapsed: 66.3min remaining: 223.1min
[Parallel(n_jobs=24)]: Done  21 out of  48 | elapsed: 81.3min remaining: 104.5min
[Parallel(n_jobs=24)]: Done  31 out of  48 | elapsed: 92.4min remaining: 50.6min
[Parallel(n_jobs=24)]: Done  41 out of  48 | elapsed: 121.2min remaining: 20.7min
[Parallel(n_jobs=24)]: Done  48 out of  48 | elapsed: 133.4min finished


[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None]

## 3. Load and compare fits

In [None]:
tmp = pickle.load(open("Experiment_1d_tanh_results/tanh_1d__slope_200_max_100_20180202T225247_00_fix_002_trials_batch_02.pkl",'r'))

In [None]:
tmp[3]

In [13]:
# SGD run
start_time = '20180204T203148'
nbatches = 4
ntrials_set = np.array([4, 8, 16])
nfix_set = np.array([0,2,3,4])


# # # L-BFGS-B run
# start_time = '20180203T162940'
# nbatches = 6
# ntrials_set = np.array([2, 4, 8, 16])
# nfix_set = np.array([0,2,3, 4])


# # SGD run
# start_time = '20180203T132534'
# nbatches = 1
# ntrials_set = np.array([2, 4, 8, 16])
# nfix_set = np.array([0,2,3,4])


# start_time = '20180202T230541'
# nbatches = 4 
# ntrials_set = np.array([2, 4, 8, 16])
# nfix_set = np.array([0,1,2,3,4,5])

In [14]:
# Establish test dataset
y_test = y[:,:,350:]
x_test = x[:,:,350:]

In [15]:
# Load the saved files
def load_GP_fit(ntrials_ind, nfix_ind, batchnum_ind, ntrials, nfix, batchnum, start_time):
    load_fname_params = "_%0.2d_fix_%0.3d_trials_batch_%0.2d" % (nfix, ntrials, batchnum)
    load_fname = "Experiment_1d_tanh_results/tanh_1d_" + start_time + load_fname_params + ".pkl"
    # load_fname_params = "_%2d_fix_%3d_trials" % (nfix, ntrials)
    # load_fname = "Experiment_1d_wells_results/well_1d_k2_" + start_time + load_fname_params + ".pkl"

    try:
        # Load and store results
        results_file = pickle.load(open(load_fname, 'r'))
    except:
        results_file = []
    
    return results_file

GP_fit_saves = Parallel(n_jobs=num_cores)(
    delayed(load_GP_fit)(ntrials_ind, nfix_ind, batchnum_ind, ntrials, nfix, batchnum, start_time)
    for (ntrials_ind, nfix_ind, batchnum_ind), (ntrials, nfix, batchnum) in (
        itertools.izip(
            itertools.product(range(len(ntrials_set)), range(len(nfix_set)), range(nbatches)),
            itertools.product(ntrials_set, nfix_set, range(nbatches))
        )
    )
)

In [16]:
# Create predictions for each test set trial
def test_GP_pred(y_test, GPfit, cutoff=None):
    if len(GPfit)>0:
        # Sort results into variables to work with
        [y_train, x_train, tanh_params,
                     all_results, 
                     init_paramvec, dict_ind, dict_shape, opt_params, 
                     bnds, transforms] = GPfit                                   


        # Get GP predictions on test data
        return pred_GP(y_test, 
                       replace_params(all_results[-1].x, opt_params, init_paramvec), 
                       transforms, dict_ind, dict_shape, cutoff = cutoff)
    else:
        return []


# Load or make and save predictions
cutoff = None
preds_fname = "Experiment_1d_tanh_results/tanh_1d_" + start_time + "_predictions.pkl"
try:
    [GP_predictions, ntrials_set, nfix_set, nbatches] = (
        pickle.load(open(preds_fname, 'rb')))
except (OSError, IOError) as e:
    GP_predictions = Parallel(n_jobs=num_cores)(
        delayed(test_GP_pred)(y_test, GPfit, cutoff=cutoff)
        for GPfit in GP_fit_saves
    )
    pickle.dump([GP_predictions, ntrials_set, nfix_set, nbatches], 
                open(preds_fname, 'wb'))


In [17]:
# Get errors based on predictions
axisError = (0,1,2)

AR_RMSE = []
for GPfit in GP_fit_saves:
    if len(GPfit)>0:
        AR_RMSE.append(rmse(pred_lin_AR1(y_test, GPfit[0], cutoff=cutoff), y_test, axis=axisError))
    else:
        AR_RMSE.append(np.infty)

GP_NLL = []
GP_RMSE = []
for GPpred in GP_predictions:
    if len(GPpred)>0:
        GP_NLL.append(np.sum(GPpred[5], axis=tuple(np.array(axisError)[1:]-1)))
        GP_RMSE.append(rmse(GPpred[7], y_test, axis=axisError))
    else:
        GP_NLL.append(np.infty)
        GP_RMSE.append(np.infty)
    

AR_RMSE = np.reshape(np.array(AR_RMSE), (len(ntrials_set), len(nfix_set), nbatches))
GP_RMSE = np.reshape(np.array(GP_RMSE), (len(ntrials_set), len(nfix_set), nbatches))
GP_NLL = np.reshape(np.array(GP_NLL), (len(ntrials_set), len(nfix_set), nbatches))


In [18]:
np.set_printoptions(precision=2)
print "AR_RMSE"
print np.array(np.median(AR_RMSE, axis=2))
print "-------------"
print "GP_RMSE"
print np.array(np.median(GP_RMSE, axis=2))
print "-------------"
print "GP_NLL"
print np.array(np.median(GP_NLL, axis=2))

AR_RMSE
[[ 0.55  0.55  0.55  0.55]
 [ 0.55  0.55  0.55  0.55]
 [ 0.55  0.55  0.55  0.55]]
-------------
GP_RMSE
[[ 0.52  0.51  0.51  0.52]
 [ 0.51  0.51  0.51  0.51]
 [ 0.51  0.51  0.51  0.51]]
-------------
GP_NLL
[[ 5709.27  5651.68  5642.99  5677.45]
 [ 5605.81  5594.    5587.5   5589.52]
 [ 5569.92  5563.36  5560.86  5573.71]]


In [19]:
# Plot negative log likelihoods for each options
def plot_nll_fixpoints(cur_nll_ntrial, nfix_set, nbatches):
    cur_plts = []
    for (nfix_ind, nfix) in itertools.izip(range(len(nfix_set)), nfix_set):
        cur_plts.append(plt_type.Scatter(x=nfix*np.ones_like(np.squeeze(cur_nll_ntrial[nfix_ind,:])), 
                                         y=np.squeeze(cur_nll_ntrial[nfix_ind,:]), 
                                         mode='markers', 
                                         marker = dict(color=plotly.colors.DEFAULT_PLOTLY_COLORS[nfix_ind]),
                                         name = "nfix=%d" % nfix, 
                                         legendgroup="nfix=%d" % nfix))
    return cur_plts
    
    
from plotly import tools as plt_tools

def plot_nll(cur_nll, ntrials_set, nfix_set, nbatches):
    fig_subplts = plt_tools.make_subplots(rows=1, cols=len(ntrials_set), subplot_titles=ntrials_set, shared_yaxes=True)
    for (ntrials_ind, ntrials) in itertools.izip(range(len(ntrials_set)), ntrials_set):        
        tmp = plot_nll_fixpoints(cur_nll[ntrials_ind,:,:], nfix_set, nbatches)
        for trace in tmp:
            fig_subplts.append_trace(trace,1,ntrials_ind+1)
            if ntrials_ind>0:
                fig_subplts['data'][-1]['showlegend']=False
    plt(fig_subplts)
    return fig_subplts

a = plot_nll(GP_NLL, ntrials_set, nfix_set, nbatches)

This is the format of your plot grid:
[ (1,1) x1,y1 ]  [ (1,2) x2,y1 ]  [ (1,3) x3,y1 ]



In [20]:
np.set_printoptions(precision=2)
print "-------------"
print "GP_NLL"
print np.array(np.min(GP_NLL, axis=2))
print "-------------"
print "GP_NLL"
print np.array(np.argmin(GP_NLL, axis=2))

-------------
GP_NLL
[[ 5616.04  5591.83  5594.16  5592.86]
 [ 5583.29  5554.26  5575.46  5556.87]
 [ 5554.76  5550.32  5531.37  5545.45]]
-------------
GP_NLL
[[2 2 2 2]
 [2 1 1 1]
 [1 3 3 3]]


In [21]:
def nestedlist_to_array(nested_list, max_level = 0):
    cur_nested_list = nested_list
    level = 0
    array_dims = []
    array_dims_range = []
    while (level <= max_level) and (type(cur_nested_list)==list):
        array_dims.append(len(cur_nested_list))
        array_dims_range.append(range(len(cur_nested_list)))
        cur_nested_list = cur_nested_list[0]
        level += 1

    out = np.empty(tuple(array_dims), dtype=object)
    
    for inds in itertools.product(*array_dims_range):
        cur_nested_list = nested_list
        for i in inds:
            cur_nested_list = cur_nested_list[i]
        out[inds] = cur_nested_list
    
    return out

In [23]:
# Plot a single fit
GP_fit_saves_rs = np.reshape(nestedlist_to_array(GP_fit_saves, max_level=0), 
                             (len(ntrials_set), len(nfix_set), nbatches))

nfix_ind = 3
ntrials_ind = 1
batchnum = 0

print GP_RMSE[ntrials_ind, nfix_ind, batchnum]
print GP_NLL[ntrials_ind, nfix_ind, batchnum]

[y_train, x_train, tanh_params,
 all_results, 
 init_paramvec, dict_ind, dict_shape, opt_params, 
 bnds, transforms] = GP_fit_saves_rs[ntrials_ind, nfix_ind, batchnum]

print len(init_paramvec)

tanh_callback_plot_external(init_paramvec[opt_params], 
        opt_params, init_paramvec, transforms, dict_ind, dict_shape,
        tanh_params)

# for res in all_results[:-1]:
#     tanh_callback_plot_external(res.x, 
#             opt_params, init_paramvec, transforms, dict_ind, dict_shape,
#             tanh_params)

# plt([plt_type.Scatter(x=np.array(all_results[-1].objs)[:,0], y = np.array(all_results[-1].objs)[:,1])])
# for theta_ind in range(len(all_results[0].theta_hist)):
#     if (not np.mod(theta_ind, 30)) and (theta_ind<200):
#         print np.array(all_results[0].objs)[np.array(all_results[0].objs)[:,0]==theta_ind, :]
#         tanh_callback_plot_external(all_results[0].theta_hist[theta_ind], 
#             opt_params, init_paramvec, transforms, dict_ind, dict_shape,
#             tanh_params)
    
tanh_callback_plot_external(all_results[-1].x, 
        opt_params, init_paramvec, transforms, dict_ind, dict_shape,
        tanh_params)

#

paramvec = replace_params(all_results[-1].x, opt_params, init_paramvec)
paramdict = vec_to_params(paramvec, dict_ind, dict_shape, transforms)

# Unpack the usual parameters
(Sigma_eps, mu_0_0, Sigma_0_0, C, Sigma_nu, z, u, Sigma_u, lengthscales, kernel_variance, s, J, Sigma_s, Sigma_J)  = \
    paramdict.values()
    
print np.concatenate([s.T, Sigma_s], axis=1)

0.510563472058
5590.9814168
59


[[ 0.06  0.01]
 [-0.94  0.27]
 [ 1.    0.13]
 [-0.55  0.17]]


In [None]:
np.array(all_results[0].objs)[:,0]==50

In [None]:
not np.mod(200, 50)

In [None]:
range(len(all_results[0].theta_hist))

In [None]:
np.array(all_results[-1].objs)

In [None]:
paramvec = replace_params(all_results[-1].x, opt_params, init_paramvec)
paramdict = vec_to_params(paramvec, dict_ind, dict_shape, transforms)

# Unpack the usual parameters
(Sigma_eps, mu_0_0, Sigma_0_0, C, Sigma_nu, z, u, Sigma_u, lengthscales, kernel_variance, s, J, Sigma_s, Sigma_J)  = \
    paramdict.values()
    
np.set_printoptions(precision=8)
paramdict

In [None]:
len(all_results)

In [None]:
vec_to_params(init_paramvec, dict_ind, dict_shape, transforms)

In [None]:
np.set_printoptions(precision=8)
print z
print u
print Sigma_u
print s
print Sigma_s
print Sigma_J

In [None]:
print lengthscales

In [None]:
# Add priors (to span at least the bounds)
priors = []

# Add prior to ensure inducing points are smooth
cur_prior = {}
cur_prior['func'] = create_prior(prior_distribution="InducingSmooth")
cur_prior['inds'] = range(len(init_paramvec))
cur_prior['metadata'] = {}
def unpack_x_tmp(x):
    # Unpack all the usual parameters
    param_tuple = vec_to_params(x, dict_ind, dict_shape)
    (Sigma_eps, mu_0_0, Sigma_0_0, C, Sigma_nu, 
     z, u, Sigma_u, 
     lengthscales, kernel_variance, s, J, Sigma_s, Sigma_J)  = \
        param_tuple

    kernelparams = {'lengthscales': lengthscales, 'kernel_variance': kernel_variance}
    # Return the ones we want in the required format
    return (np.concatenate([u, s],axis=1), 
            np.concatenate([z, s],axis=1),
            np.concatenate([Sigma_u, Sigma_s]),
            kernelparams)
cur_prior['metadata']['unpack_x'] = unpack_x_tmp
cur_prior['metadata']['kernel_func'] = RBF
priors.append(cur_prior)

tmp_func = lambda pvec_partial: (time_full_iter(replace_params(pvec_partial, opt_params, init_paramvec), 
                                                y_train, dict_ind, dict_shape, 
                                                log_transformed=log_transformed,
                                                priors=priors)[0])


In [None]:
paramvec = replace_params(all_results[1].x, opt_params, init_paramvec)
#paramvec = log_transform_inv(paramvec, log_transformed)
paramvec = paramvec[opt_params]
a = grad(tmp_func)(paramvec)

In [None]:
paramvec = replace_params(a, opt_params, init_paramvec)
#paramvec = log_transform_inv(paramvec, log_transformed)

# Unpack the usual parameters
param_tuple = vec_to_params(paramvec, dict_ind, dict_shape)
(Sigma_eps, mu_0_0, Sigma_0_0, C, Sigma_nu, z, u, Sigma_u, lengthscales, kernel_variance, s, J, Sigma_s, Sigma_J)  = \
    param_tuple

In [None]:
np.set_printoptions(precision=8)
print z
print u
print Sigma_u
print s
print Sigma_s

In [None]:
np.set_printoptions(precision=8)
print z
print u
print Sigma_u

In [None]:
print s
print Sigma_s

In [None]:
vec_to_params(init_paramvec, dict_ind, dict_shape, transforms=transforms)