In [1]:
import numpy as np
from matplotlib import pyplot as plt
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C ,WhiteKernel as Wht,Matern as matk
from sklearn.gaussian_process.kernels import RationalQuadratic as expker
from sklearn.metrics import mean_squared_error as MSError

In [2]:
import os
from os.path import join as oj
import pandas as pd
import numpy as np
from scipy.stats import sem
from collections import defaultdict

from utils.utils import cwd, set_up_plotting

plt = set_up_plotting()

In [3]:
from material_utils import process_data, kernel

In [4]:
Xdata, Ydata, ndata, ninputmap = process_data()

Xmax and  Xmin:  [  2.58 999.6 ] [  2.1 684.3]
Xmean  and Xstd:  [  2.35 852.84] [  0.19575495 113.88574274]
Original Training and Y : (177, 12) (177,)
Transpose Training and Y :  (12, 177) (177,)
Original Training and Y : (177, 12) (177,)
Total training and Test Data:  106 71


In [5]:
Xdata.shape, Ydata.shape, ndata

((177, 12), (177,), 177)

In [6]:
setting = 'material'
local_dir = 'results'
with cwd(oj(local_dir, setting)):
    result_datas = []
    for file in os.listdir():
        if file.endswith('.npz'):
            result_datas.append(np.load(file, allow_pickle=True))

## GP regression

In [7]:
def posterior_predictive(X_train, y_train, X_test, sigma_y=1e-8):
    K = kernel(X_train, X_train) + np.square(sigma_y) * np.eye(len(X_train))
    K_s = kernel(X_train, X_test)
    K_inv = np.linalg.inv(K + sigma_y * np.eye(len(K)))

    mu_s = K_s.T @ K_inv @ y_train
    return mu_s

def get_mse(Xdata, Ydata, X_train, X_test):

    y_train = []
    for x_i, y_i in zip(Xdata, Ydata):
        for x_ob in X_train:
            if (x_i == x_ob).all():
                y_train.append(y_i)
    y_train = np.asarray(y_train)

    
    y_test = []
    for x_i, y_i in zip(Xdata, Ydata):
        for x_ob in X_test:
            if (x_i == x_ob).all():
                y_test.append(y_i)
    y_test = np.asarray(y_test)
    
    predictive_mean = posterior_predictive(X_train, y_train, X_test, 1e-1)
    
    return np.mean((y_test - predictive_mean)**2)

In [8]:
mse_results = defaultdict(list)
n = 3
n_trials = 5
for trial_i in range(n_trials):
    obs = result_datas[trial_i]['obs'].item()
    Ts = result_datas[trial_i]['Ts']
    for collab_type, collab_obs in obs.items():
        mse = 0
        mses = []
        for i in range(n):
            if 'indiv' in collab_type:    
                mse_i = get_mse(Xdata, Ydata, collab_obs[i], Ts[i])
            else:
                mse_i = get_mse(Xdata, Ydata, collab_obs, Ts[i]) 

            mses.append(mse_i)
            mse += mse_i / n
        
        mse_results[collab_type+'-avg-mses'].append(mse)        
        mse_results[collab_type+'-mses'].append(mses)


data_df = defaultdict(list)
for collab_type, mse_list in mse_results.items():
    baseline = collab_type.replace('-avg-mses', '').replace('-mses', '').replace('_obs','').replace('indiv_greedy', 'ind')
    if baseline not in data_df['Baselines']:
        data_df['Baselines'].append(baseline)
    if '-avg-mses' in collab_type:
        avg = np.mean(mse_list)
        se = sem(mse_list)
        data_df['Avg MSE'].append(avg)
        data_df['Stderr'].append(se)
    else:
        stds = np.std(mse_list, axis=1)
        
        mean_std_mse = np.mean(stds)
        se_std_mse = sem(stds)
        data_df['Std MSE'].append(mean_std_mse)
        data_df['Stderr Std'].append(se_std_mse)

material_regression_df = pd.DataFrame(data=data_df)
material_regression_df

Unnamed: 0,Baselines,Avg MSE,Stderr,Std MSE,Stderr Std
0,greedy_1,0.222323,0.021101,0.030723,0.004569
1,greedy_2,0.262299,0.021892,0.053762,0.00876
2,greedy_3,0.225551,0.026504,0.027,0.006593
3,greedy_4,0.172505,0.023747,0.034815,0.011747
4,greedy_sum,0.219118,0.012654,0.038695,0.008453
5,dynamic_beta,0.254988,0.035777,0.045795,0.014497
6,joint,0.209274,0.025947,0.040436,0.004301
7,rand,0.20136,0.017956,0.043836,0.004232
8,entropy,0.212001,0.016309,0.028284,0.005745
9,ind,0.227731,0.000748,0.062736,0.000406


In [9]:
with cwd(oj(local_dir, setting)):
    material_regression_df.to_latex('material_regression.tex', index=False)
    material_regression_df.to_csv('material_regression.csv', index=False)