In [10]:
import scipy.io
import GPy
import numpy as np
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C
from sklearn.model_selection import train_test_split

In [11]:
def load_data(personalize=False):
    mat1 = scipy.io.loadmat("parkinsonDatTr_correct.mat")
    mat2 =  scipy.io.loadmat("parkinsonDatTe_correct.mat")

    train_x = mat1["f_tr"]
    train_y = mat1["lab_tr"]
    train_sub_num = mat1["p_tr"]
    train_per_feat = mat1["pf_tr"]

    test_x = mat2["f_te"]
    test_y = mat2["lab_te"]
    test_sub_num = mat2["p_te"]
    test_per_feat = mat2["pf_te"]
    
    if personalize:
        train_x = np.concatenate((train_x, train_per_feat), axis=1)
        test_x = np.concatenate((test_x, test_per_feat), axis=1)
    
    test_x = np.concatenate((test_x, test_sub_num), axis=1)
    
    test_x, pool_x, test_y, pool_y = train_test_split(test_x, test_y, test_size=0.5, random_state=42)
    
    test_sub_num = test_x[:, -1].reshape(test_x.shape[0], 1)
    test_x = np.delete(test_x, test_x.shape[1]-1, axis=1)
    
    pool_sub_num = pool_x[:, -1].reshape(pool_x.shape[0], 1)
    pool_x = np.delete(pool_x, pool_x.shape[1]-1, axis=1)
    return train_x, train_y, test_x, test_y, pool_x, pool_y, train_sub_num, test_sub_num, pool_sub_num

In [12]:
def train(train_x, train_y, model=None):
    kernel1 = GPy.kern.RBF(train_x.shape[1])
    if model == None:
        model = GPy.models.GPRegression(train_x, train_y.copy(), kernel=kernel1)
    else:
        model.set_XY(train_x, train_y)
    model.optimize(messages=1)
    return model

In [13]:
def predict_gp(model, test_x):
    result = model.predict(test_x)
    value = result[0][:, 0].reshape(test_x.shape[0], 1)
    var = result[1]
    std = var**0.5
    model_variance = model.parameters[0].variance[0]
    return value, std, model_variance

In [14]:
def find_MSE(y, y_pred):
    y1 = y[:, 0]
    y_pred1 = y_pred[:, 0]
    return ((y1-y_pred1)**2).mean(), ((y1-y_pred1)**2).std()

In [15]:
def get_individual_data(test_x, test_y, test_sub_num):
    sub_data = {}
    sub_labels = {}
    for i in range(0, test_x.shape[0]):
        subject_num = test_sub_num[i][0]
        try:
            sub_data[subject_num]
            sub_data[subject_num].append(test_x[i])
        except:
            sub_data[subject_num] = [test_x[i]]
        try:
            sub_labels[subject_num]
            sub_labels[subject_num].append(test_y[i])
        except:
            sub_labels[subject_num] = [test_y[i]]
    return sub_data, sub_labels

In [27]:
train_x, train_y, test_x, test_y, pool_x, pool_y, train_sub_num, test_sub_num, pool_sub_num = load_data(personalize=True)

In [28]:
model = train(train_x, train_y)
prediction, std, model_variance = predict_gp(model, test_x)

KeyboardInterrupt: 

In [29]:
##find baseline
def get_results(model, train_x, train_y, test_x, test_y):
    final_results_baseline = {}
    prediction, var, model_variance = predict_gp(model, test_x)
    mean, std = find_MSE(prediction, test_y)
    final_results_baseline["overall"] = (mean, std)
    subject_data, subject_labels = get_individual_data(test_x, test_y, test_sub_num)
    for key in subject_data.keys():
        data_x = np.array(subject_data[key])
        data_y = np.array(subject_labels[key])
        prediction, var, model_variance = predict_gp(model, data_x)
        mean, std = find_MSE(prediction, data_y)
        final_results_baseline[key] = (mean, std)
    return final_results_baseline

In [30]:
final_results_baseline = get_results(model, train_x, train_y, test_x, test_y)
print final_results_baseline

{33.0: (0.000890335667206219, 0.002436647304971899), 34.0: (0.007679801554636552, 0.0045951500942287811), 35.0: (0.010549841094458216, 0.0075962314545742326), 36.0: (0.0023760675393333913, 0.004311179808224954), 37.0: (0.022743534721514642, 0.016240356721905099), 38.0: (0.0047498446509856053, 0.010511841914811717), 39.0: (0.021074744422499979, 0.015444221565963149), 40.0: (0.00022332423491178518, 0.00050907797694900046), 41.0: (0.0014250721333938169, 0.0019512902411784586), 42.0: (0.0066364457855910214, 0.0056183698474859146), 'overall': (0.0076960736621498405, 0.011381051962419664)}


In [31]:
###AL1
final_results_al1 = {}
num_points_to_query = pool_x.shape[0]
num_points_before_update = 100
i = 0
while i < num_points_to_query:
    model = train(train_x, train_y, model)
    final_results = get_results(model, train_x, train_y, test_x, test_y)
    final_results_al1[i] = final_results
    j = 0

    while j < num_points_before_update and i < num_points_to_query:
        pool_prediction, pool_std, model_variance = predict_gp(model, pool_x)
        
        max_variance_index = np.argmax(pool_std**2)
        
        max_variance_x = pool_x[max_variance_index, :].reshape(1, train_x.shape[1])
        max_variance_y = pool_y[max_variance_index, :].reshape(1, train_y.shape[1])
        max_variance_sub_num = pool_sub_num[max_variance_index, :].reshape(1, pool_sub_num.shape[1])
        
        pool_x = np.delete(pool_x, (max_variance_index), axis=0)
        pool_y = np.delete(pool_y, (max_variance_index), axis=0)
        pool_sub_num = np.delete(pool_sub_num, (max_variance_index), axis=0)
        
        train_x = np.concatenate((train_x, max_variance_x), axis=0)
        train_y = np.concatenate((train_y, max_variance_y), axis=0)
        train_sub_num = np.concatenate((train_sub_num, max_variance_sub_num), axis=0)
        
        j = j + 1
        i = i + 1
        
    print "number of points done: " + str(i) 
print final_results_al1

Running L-BFGS-B (Scipy implementation) Code:
  runtime   i      f              |g|        
    18s19  0003  -1.367909e+04   7.152713e+03 
 01m00s19  0010  -1.368226e+04   4.125685e-05 
Runtime:  01m00s19
Optimization status: Converged

number of points done: 100
Running L-BFGS-B (Scipy implementation) Code:
  runtime   i      f              |g|        
    26s13  0004  -1.382946e+04   6.191818e+03 
 01m04s87  0010  -1.383243e+04   6.408711e-07 
Runtime:  01m04s87
Optimization status: Converged

number of points done: 200
Running L-BFGS-B (Scipy implementation) Code:
  runtime   i      f              |g|        
    47s65  0007  -1.414410e+04   6.545606e-01 
 01m21s56  0012  -1.414410e+04   1.851685e-04 
Runtime:  01m21s56
Optimization status: Converged

number of points done: 300
Running L-BFGS-B (Scipy implementation) Code:
  runtime   i      f              |g|        
    28s93  0004  -1.422899e+04   1.054345e+03 
 01m33s26  0013  -1.422988e+04   2.163938e-04 
Runtime:  01m33s26
Opt

In [22]:
print final_results_al1

{0: {33.0: (0.0058619216508665386, 0.0084417428450034356), 34.0: (0.012085142797873739, 0.0065378620313843623), 35.0: (0.024351584730638604, 0.007906475158701921), 36.0: (0.033699880215304137, 0.014543067739237149), 37.0: (0.052762320281151853, 0.01015576491309873), 38.0: (0.0042529043194775582, 0.0095485415493390662), 39.0: (0.026657332545918393, 0.017093948391813649), 40.0: (0.026798063476144152, 0.029032874239757273), 41.0: (0.015982368523160723, 0.016279524316108178), 42.0: (0.0078234203398659108, 0.0061133679079156652), 'overall': (0.020271945093568711, 0.019813133849467066)}, 100: {33.0: (0.0018732788054652157, 0.0029712442549556404), 34.0: (0.01225721955654136, 0.0064860194914040555), 35.0: (0.023861006134451239, 0.0080177568292656137), 36.0: (0.0022350863350023447, 0.0042791238256854889), 37.0: (0.046388273497455397, 0.019445222998890007), 38.0: (0.0042096280159692624, 0.0095202530549496431), 39.0: (0.023980180135600493, 0.014853549378364509), 40.0: (0.027663955414234723, 0.031

In [32]:
train_x, train_y, test_x, test_y, pool_x, pool_y, train_sub_num, test_sub_num, pool_sub_num = load_data(personalize=True)

In [None]:
###AL2
final_results_al2 = {}
num_points_to_query = pool_x.shape[0]
num_points_before_update = 100
i = 0
while i < num_points_to_query:
    model = train(train_x, train_y, model)
    final_results = get_results(model, train_x, train_y, test_x, test_y)
    final_results_al2[i] = final_results
    j = 0

    while j < num_points_before_update and i < num_points_to_query:
        pool_prediction, pool_std, model_variance = predict_gp(model, pool_x)
        print 
        max_variance_index = np.argmin((pool_prediction[:, 0].reshape(pool_prediction.shape[0], 1)/(((pool_std**2) + model_variance)**0.5)))
        
        max_variance_x = pool_x[max_variance_index, :].reshape(1, train_x.shape[1])
        max_variance_y = pool_y[max_variance_index, :].reshape(1, train_y.shape[1])
        max_variance_sub_num = pool_sub_num[max_variance_index, :].reshape(1, pool_sub_num.shape[1])
        
        pool_x = np.delete(pool_x, (max_variance_index), axis=0)
        pool_y = np.delete(pool_y, (max_variance_index), axis=0)
        pool_sub_num = np.delete(pool_sub_num, (max_variance_index), axis=0)
        
        train_x = np.concatenate((train_x, max_variance_x), axis=0)
        train_y = np.concatenate((train_y, max_variance_y), axis=0)
        train_sub_num = np.concatenate((train_sub_num, max_variance_sub_num), axis=0)
        
        j = j + 1
        i = i + 1
    
    print "number of points done: " + str(i)
print final_results_al2

Running L-BFGS-B (Scipy implementation) Code:
  runtime   i      f              |g|        
    17s95  0003  -1.361301e+04   1.998111e+05 
    41s82  0007  -1.368172e+04   1.407338e+03 