In [1]:
import nbimporter

# Import 1d well examples
from GPDM_Examples import *

plotly.offline.init_notebook_mode(connected=True)

from joblib import Parallel, delayed
import multiprocessing
import itertools

num_cores = multiprocessing.cpu_count()

# %run GPDM_direct_fixedpoints.ipynb
# %run GPDM_Examples.ipynb

Importing Jupyter notebook from GPDM_Examples.ipynb
Importing Jupyter notebook from GPDM_direct_fixedpoints.ipynb



Matplotlib is building the font cache using fc-list. This may take a moment.



In [2]:
kwells_params = OrderedDict()
kwells_params["rseed"] = 1234 # Happens to creates balanced training set in the first 8 trials in y
kwells_params["Ny"] = 500
kwells_params["T"] = 20
kwells_params["slope"] = -0.9
kwells_params["Sigma_eps"] = 5e-2
kwells_params["well_locs"] = np.array([1.0, -1.0])
kwells_params["well_width"] = 3
kwells_params["Sigma_nu"] = 1e-3
kwells_params["mu_0_0"] = 0.
kwells_params["Sigma_0_0"] = 1e-4


# Generate trials and collect them into [D x T x N] array
np.random.seed(kwells_params["rseed"])
all_trials_x = []
all_trials_y = []
for n in range(kwells_params["Ny"]):
    x,y = kwells_draw_trial(transform_func = (lambda x: x**3), **kwells_params)
    
    all_trials_x.append(x[:,:,None])
    all_trials_y.append(y[:,:,None])
    
x = np.concatenate(all_trials_x, axis=2)
y = np.concatenate(all_trials_y, axis=2)

# Swap around trials to ensure balanced training sets
cur_trial = 0
cur_trial_type = 1.
while cur_trial < 0.4*kwells_params["Ny"]:
    # If current trial is not correct type, switch it with one that is
    if not (np.mean(y[:,(kwells_params["T"]-4):,cur_trial]) * cur_trial_type > 0.2):
        switch_trial = cur_trial+1
        while (not (np.mean(y[:,(kwells_params["T"]-4):,switch_trial]) * cur_trial_type > 0.2)):
            switch_trial += 2
            
        tmp = np.copy(y[:,:,switch_trial])
        y[:,:,switch_trial] = np.copy(y[:,:,cur_trial])
        y[:,:,cur_trial] = tmp
        
        tmp = np.copy(x[:,:,switch_trial])
        x[:,:,switch_trial] = np.copy(x[:,:,cur_trial])
        x[:,:,cur_trial] = tmp
        
    #print np.mean(y[:,(kwells_params["T"]-4):,cur_trial]) * cur_trial_type
    cur_trial += 1
    cur_trial_type *= -1.

plots_by_run = []
for v in range(20):
    plots_by_run.append(
        plt_type.Scatter(x=np.squeeze(np.arange(kwells_params["T"])), 
                      y=np.squeeze(x[:,:,v]), 
                      mode='lines')
    )
    
    
print rmse(pred_lin_AR1(y[:,:,200:], y[:,:,:50], cutoff=None), y[:,:,200:])
    
    
xstar = np.atleast_2d(np.arange(-2.5,2.5,0.05))
true_tr_vals = kwells_true_tr_fnc(xstar, **kwells_params)

#set_trace()

plt([plt_type.Scatter(x=np.squeeze(xstar), 
                  y=np.squeeze(true_tr_vals), mode='markers', name = 'True trans. f.',
                  marker=dict(color='orange')),
 plt_type.Scatter(x=np.squeeze(xstar), 
                  y=np.squeeze(true_tr_vals+np.sqrt(kwells_params["Sigma_eps"])), mode='markers', name = 'True trans. f.',
                  marker=dict(color='orange', size=2)),
 plt_type.Scatter(x=np.squeeze(xstar), 
                  y=np.squeeze(true_tr_vals-np.sqrt(kwells_params["Sigma_eps"])), mode='markers', name = 'True trans. f.',
                  marker=dict(color='orange', size=2))
     ])

plt(plots_by_run)

0.365501842329


In [6]:
def fit_GP(num_fixed_points, num_trials, batchnum, start_time):
    # Set number of parameters
    D = 1
    Ns = num_fixed_points
    Nz = 16 - Ns

    # Fix the training data
    y_train = y[:,:,batchnum*num_trials:(batchnum*num_trials+num_trials)]
    x_train = x[:,:,batchnum*num_trials:(batchnum*num_trials+num_trials)]

    np.random.seed(kwells_params['rseed']*2)
    (Sigma_eps, mu_0_0, Sigma_0_0, C, Sigma_nu, z, u, Sigma_u, lengthscales, kernel_variance, s, J) = \
        init_params(y_train, D, Nz, Ns)

    # Fix noise to true value
    Sigma_nu = kwells_params['Sigma_nu'] * np.ones_like(Sigma_nu)

    Sigma_u = 1e-2*np.ones((Nz,1))
    Sigma_s = 1e-3*np.ones((Ns,1))
    Sigma_J = 1e-3*np.ones((Ns*D,1))

    (init_paramvec, dict_ind, dict_shape) = params_to_vec(Sigma_eps, mu_0_0, Sigma_0_0, C, Sigma_nu, z, u, Sigma_u, 
                                                          lengthscales, kernel_variance, s, J, Sigma_s=Sigma_s, Sigma_J=Sigma_J)

    # Transform certain elements of the parameter vector to optimise in log space
    # log_transformed=None
    log_transformed = np.concatenate([dict_ind['Sigma_0_0'], dict_ind['Sigma_u'],
                                      dict_ind['Sigma_s'], dict_ind['Sigma_J'],
                                      dict_ind['lengthscales'], dict_ind['Sigma_eps'], 
                                      dict_ind['Sigma_nu'],
                                      dict_ind['kernel_variance']
                                     ])

    init_paramvec = log_transform(init_paramvec, log_transformed)

    # # Optimise only certain elements of paramvec
    opt_params = np.arange(init_paramvec.shape[0])
    opt_params = np.delete(opt_params, np.hstack([dict_ind['C'], dict_ind['Sigma_nu'], dict_ind['J']])) # All except the ones listed here
    cur_pvec = init_paramvec[opt_params]
   
    # Add bounds for parameters
    bnds = list(((None, None),) * init_paramvec.shape[0])
    for i in np.concatenate([dict_ind['Sigma_0_0'], dict_ind['Sigma_nu'], dict_ind['Sigma_eps'],
                            dict_ind['Sigma_u'], dict_ind['Sigma_s'], dict_ind['Sigma_J']]):
        lb = 1e-6; ub = 1e2
        if i in log_transformed:
            lb = np.log(lb)
            ub = np.log(ub)
        bnds[i] = (lb, ub)
    for i in np.concatenate([dict_ind['lengthscales'], dict_ind['kernel_variance']]):
        if i in log_transformed:
            lb = init_paramvec[i] + np.log(0.3)
            ub = init_paramvec[i] + np.log(3.0)
        else:
            lb = init_paramvec[i]*0.3 
            ub = init_paramvec[i]*3.
        bnds[i] = (lb, ub)
    # cur_dim = 0
    # cur_z = 0
    # cur_tot = 0
    # z_mins = np.min(z, axis=1)
    # z_maxs = np.max(z, axis=1)
    # for i in np.concatenate([dict_ind['z'], dict_ind['s']]): # Note the idiotic python reshape order for setting bounds per dim
    #     z_min = z_mins[cur_dim]
    #     z_max = z_maxs[cur_dim]
    #     bnds[i] = (z_min-0.05*(z_max-z_min), z_max+0.05*(z_max-z_min))
    #     cur_z = cur_z+1
    #     if cur_tot < D*Nz:
    #         cur_z = np.mod(cur_z, Nz)
    #     else:
    #         cur_z = np.mod(cur_z, Ns)
    #     cur_tot = cur_tot+1
    #     if cur_z==0:
    #         cur_dim = cur_dim+1
    #     if cur_tot==D*Nz:
    #         cur_dim = 0
    # for i in np.concatenate([dict_ind['J']]):
    #     bnds[i] = (-1., 1.)
    bnds_final = []
    for i in opt_params:
        bnds_final.append(bnds[i])
    bnds = tuple(bnds_final)

    # Add priors (to span at least the bounds)
    prior_funcs = list(((None),) * init_paramvec.shape[0])

    # # Add a strong prior to learn actual fixed points
    # logGamma_prior = create_prior("LogGamma", [2., 0.5, -6.])
    # for i in np.concatenate([dict_ind['Sigma_s'], dict_ind['Sigma_J']]):
    #     prior_funcs[i] = logGamma_prior

    # tmp_x = np.logspace(-6.0,2,100)    
    # plt(plt_type.Figure(data=[plt_type.Scatter(x=tmp_x, y=np.exp(-logGamma_prior(tmp_x)))], layout=plt_type.Layout(xaxis=dict(type= "log"))))
    # plt(plt_type.Figure(data=[plt_type.Scatter(x=tmp_x, y=logGamma_prior(tmp_x))], layout=plt_type.Layout(xaxis=dict(type= "log"))))

    tmp_func = lambda pvec_partial: (time_full_iter(replace_params(pvec_partial, opt_params, init_paramvec), 
                                                y_train, dict_ind, dict_shape, 
                                                log_transformed=log_transformed,
                                                prior_funcs=prior_funcs)[0])
    objective_with_grad = value_and_grad(tmp_func, argnum=0)



    # By iterating minimize within a for cycle, we can save all intermediate results and set ending times
    save_fname_params = "_%0.2d_fix_%0.3d_trials_batch_%0.2d" % (num_fixed_points, num_trials, batchnum)
    save_fname = "Experiment_1d_wells_results/well_1d_k2_" + start_time + save_fname_params + ".pkl"
    init_time = time.time()
    max_time = 4.5*3600 # Maximum iteration time in seconds, break if reached
    all_results = []

    for it in range(100):
        result = scipy.optimize.minimize(objective_with_grad, cur_pvec, jac=True, method='L-BFGS-B', bounds=bnds, callback=None,
                              options={'maxiter':10, 'disp':True})
        all_results.append(result)
        # Save the results
        with open(save_fname, 'wb') as f:
            pickle.dump([y_train, x_train, kwells_params,
                         all_results, 
                         init_paramvec, dict_ind, dict_shape, opt_params, 
                         bnds, log_transformed], f)
        cur_pvec = result.x
        cur_time = time.time()
        #print([it, cur_time - init_time, result.fun])

        # Exit if maximum time is reached

        if ((cur_time - init_time) > max_time):
            print(["Maximum iteration time reached at iter", it])
            break

        if len(all_results)>=2:
            if (all_results[-1].fun - all_results[-2].fun) >= (-1e-2*num_trials):
                print(["Update did not improve objective function, stopping"])
                break

In [None]:
start_time = '20180105T145529'
nbatches = 1 
ntrials_set = np.array([128])
nfix_set = np.array([0,1,2,3,4])

Parallel(n_jobs=7)(
    delayed(fit_GP)(nfix, ntrials, batchnum, start_time) for ntrials, nfix, batchnum in itertools.product(ntrials_set, nfix_set, range(nbatches))
)