In [43]:
import scipy.io
import GPy
import numpy as np
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C
from sklearn.model_selection import train_test_split

In [44]:
def load_data(personalize=False):
    mat1 = scipy.io.loadmat("parkinsonDatTr_correct.mat")
    mat2 =  scipy.io.loadmat("parkinsonDatTe_correct.mat")

    train_x = mat1["f_tr"]
    train_y = mat1["lab_tr"]
    train_sub_num = mat1["p_tr"]
    train_per_feat = mat1["pf_tr"]

    test_x = mat2["f_te"]
    test_y = mat2["lab_te"]
    test_sub_num = mat2["p_te"]
    test_per_feat = mat2["pf_te"]
    
    if personalize:
        train_x = np.concatenate((train_x, train_per_feat), axis=1)
        test_x = np.concatenate((test_x, test_per_feat), axis=1)
    
    test_x = np.concatenate((test_x, test_sub_num), axis=1)
    
    test_x, pool_x, test_y, pool_y = train_test_split(test_x, test_y, test_size=0.5, random_state=42)
    
    test_sub_num = test_x[:, -1].reshape(test_x.shape[0], 1)
    test_x = np.delete(test_x, test_x.shape[1]-1, axis=1)
    
    pool_sub_num = pool_x[:, -1].reshape(pool_x.shape[0], 1)
    pool_x = np.delete(pool_x, pool_x.shape[1]-1, axis=1)
    return train_x, train_y, test_x, test_y, pool_x, pool_y, train_sub_num, test_sub_num, pool_sub_num

In [45]:
def train(train_x, train_y, model=None):
    kernel1 = GPy.kern.RBF(train_x.shape[1])
    if model == None:
        model = GPy.models.GPRegression(train_x, train_y.copy(), kernel=kernel1)
    else:
        model.set_XY(train_x, train_y)
    model.optimize(messages=1)
    return model

In [46]:
def predict_gp(model, test_x):
    result = model.predict(test_x)
    value = result[0][:, 0].reshape(test_x.shape[0], 1)
    var = result[1]
    std = var**0.5
    model_variance = model.parameters[0].variance[0]
    return value, std, model_variance

In [47]:
def find_MSE(y, y_pred):
    y1 = y[:, 0]
    y_pred1 = y_pred[:, 0]
    return ((y1-y_pred1)**2).mean(), ((y1-y_pred1)**2).std()

In [48]:
def get_individual_data(test_x, test_y, test_sub_num):
    sub_data = {}
    sub_labels = {}
    for i in range(0, test_x.shape[0]):
        subject_num = test_sub_num[i][0]
        try:
            sub_data[subject_num]
            sub_data[subject_num].append(test_x[i])
        except:
            sub_data[subject_num] = [test_x[i]]
        try:
            sub_labels[subject_num]
            sub_labels[subject_num].append(test_y[i])
        except:
            sub_labels[subject_num] = [test_y[i]]
    return sub_data, sub_labels

In [49]:
train_x, train_y, test_x, test_y, pool_x, pool_y, train_sub_num, test_sub_num, pool_sub_num = load_data(personalize=False)

In [None]:
model = train(train_x, train_y)
prediction, std, model_variance = predict_gp(model, test_x)

In [35]:
##find baseline
def get_results(model, train_x, train_y, test_x, test_y):
    final_results_baseline = {}
    prediction, var, model_variance = predict_gp(model, test_x)
    mean, std = find_MSE(prediction, test_y)
    final_results_baseline["overall"] = (mean, std)
    subject_data, subject_labels = get_individual_data(test_x, test_y, test_sub_num)
    for key in subject_data.keys():
        data_x = np.array(subject_data[key])
        data_y = np.array(subject_labels[key])
        prediction, var, model_variance = predict_gp(model, data_x)
        mean, std = find_MSE(prediction, data_y)
        final_results_baseline[key] = (mean, std)
    return final_results_baseline

In [36]:
final_results_baseline = get_results(model, train_x, train_y, test_x, test_y)

In [25]:
###AL1
final_results_al1 = {}
num_points_to_query = 1
num_points_before_update = 1
i = 0
while i < num_points_to_query:
    model = train(train_x, train_y, model)
    final_results = get_results(model, train_x, train_y, test_x, test_y)
    final_results_al1[i] = final_results
    j = 0

    while j < num_points_before_update:
        pool_prediction, pool_std, model_variance = predict_gp(model, pool_x)
        
        max_variance_index = np.argmax(pool_std)
        
        max_variance_x = pool_x[max_variance_index, :].reshape(1, train_x.shape[1])
        max_variance_y = pool_y[max_variance_index, :].reshape(1, train_y.shape[1])
        max_variance_sub_num = pool_sub_num[max_variance_index, :].reshape(1, pool_sub_num.shape[1])
        
        pool_x = np.delete(pool_x, (max_variance_index), axis=0)
        pool_y = np.delete(pool_y, (max_variance_index), axis=0)
        pool_sub_num = np.delete(pool_sub_num, (max_variance_index), axis=0)
        
        train_x = np.concatenate((train_x, max_variance_x), axis=0)
        train_y = np.concatenate((train_y, max_variance_y), axis=0)
        train_sub_num = np.concatenate((train_sub_num, max_variance_sub_num), axis=0)
        
        j = j + 1
        i = i + 1
        


(9, 16)
(9, 2)
(4396, 1)
(10, 16)
(10, 2)
(4397, 1)


In [42]:
###AL2
final_results_al2 = {}
num_points_to_query = 1
num_points_before_update = 1
i = 0
while i < num_points_to_query:
    model = train(train_x, train_y, model)
    final_results = get_results(model, train_x, train_y, test_x, test_y)
    final_results_al2[i] = final_results
    j = 0

    while j < num_points_before_update:
        pool_prediction, pool_std, model_variance = predict_gp(model, pool_x)
        print 
        max_variance_index = np.argmin((pool_prediction[:, 0].reshape(pool_prediction.shape[0], 1)/((pool_std + model_variance)**0.5)))
        
        max_variance_x = pool_x[max_variance_index, :].reshape(1, train_x.shape[1])
        max_variance_y = pool_y[max_variance_index, :].reshape(1, train_y.shape[1])
        max_variance_sub_num = pool_sub_num[max_variance_index, :].reshape(1, pool_sub_num.shape[1])
        
        pool_x = np.delete(pool_x, (max_variance_index), axis=0)
        pool_y = np.delete(pool_y, (max_variance_index), axis=0)
        pool_sub_num = np.delete(pool_sub_num, (max_variance_index), axis=0)
        
        train_x = np.concatenate((train_x, max_variance_x), axis=0)
        train_y = np.concatenate((train_y, max_variance_y), axis=0)
        train_sub_num = np.concatenate((train_sub_num, max_variance_sub_num), axis=0)
        
        j = j + 1
        i = i + 1


