In [1]:
import numpy as np
import matplotlib.pyplot as plt

In [2]:
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.gaussian_process import GaussianProcessClassifier, GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, Matern, RationalQuadratic, ExpSineSquared, DotProduct

from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_val_score

In [3]:
sim = np.load("./hm_feedback_sim.npy")
bio = np.load("./hm_feedback_bio.npy")

sim.shape, bio.shape

((48, 4), (80, 4))

In [38]:
# create pair (x1, x2, y)
# x1 & x2 are parameters, y=1 if x1 is preferred over x2

def calc_theta_features(x1, x2, y, mode=0):

    # default: [k1, m1, n1, k2, m2, n2]
    if mode==0:
        feats = [sim[i][0], sim[i][1], sim[i][2], sim[j][0], sim[j][1], sim[j][2], y]

    # add k1-k2, m1-m2, n1-n2, k1/k2, m1/m2, n1/n2
    elif mode==1:
        feats = [
            sim[i][0], sim[i][1], sim[i][2], 
            sim[j][0], sim[j][1], sim[j][2], 
            sim[i][0]-sim[j][0], 
            sim[i][1]-sim[j][1], 
            sim[i][2]-sim[j][2], 
            sim[i][0]/sim[j][0], 
            sim[i][1]/sim[j][1], 
            sim[i][2]/sim[j][2], 
            y
        ]

    else:
        print("wrong mode!")

    return feats

def create_pref(data, mode=0):
    pairs = []
    for i in range(len(data)):
        for j in range(len(data)):
            y = 1 if data[i][-1] > data[j][-1] else 0
            pairs.append(calc_theta_features(data[i][:3], data[j][:3], y, mode=mode))
    pairs = np.array(pairs)
    print(pairs.shape)
    return pairs

In [39]:
sim_data0 = create_pref(sim, mode=0)
bio_data0 = create_pref(bio, mode=0)
sim_data1 = create_pref(sim, mode=1)
bio_data1 = create_pref(bio, mode=1)

(2304, 7)
(6400, 7)
(2304, 13)
(6400, 13)


# Tree

In [40]:
def cross_val_rf(ne_list, md_list, mss_list, X, y, cv=5):

    best = [0.0, 0.0]
    best_param = []

    for ne in ne_list:
        for md in md_list:
            for mss in mss_list:
                    
                    rf = RandomForestClassifier(n_estimators=ne, max_depth=md, min_samples_split=mss)

                    scores = cross_val_score(rf, X, y, cv=cv)
                    if scores.mean() > best[0]:
                        best = [scores.mean(), scores.std()]
                        best_param = [ne, md, mss]
                        print("best param: ", ne, md, mss)
                        print("best acc: ", best[0], best[1])

In [32]:
X, y = sim_data0[:, :-1], sim_data0[:, -1]

cross_val_rf(ne_list=[50,100,200], md_list=[8,12,16,32,None], mss_list=[2,5,10,20], X=X, y=y, cv=5)

best param:  50 8 2
best acc:  0.6141497689333207 0.0005338111855135441


In [41]:
X, y = sim_data1[:, :-1], sim_data1[:, -1]

cross_val_rf(ne_list=[50,100,200], md_list=[8,12,16,32,None], mss_list=[2,5,10,20], X=X, y=y, cv=5)

best param:  50 8 2
best acc:  0.6141497689333207 0.0005338111855135441


In [34]:
X, y = bio_data0[:, :-1], bio_data0[:, -1]

cross_val_rf(ne_list=[50,100,200], md_list=[8,12,16,32,None], mss_list=[2,5,10,20], X=X, y=y, cv=5)

best param:  50 8 2
best acc:  0.6884375 0.0003125000000000267


In [42]:
X, y = bio_data1[:, :-1], bio_data1[:, -1]

cross_val_rf(ne_list=[50,100,200], md_list=[8,12,16,32,None], mss_list=[2,5,10,20], X=X, y=y, cv=5)

best param:  50 8 2
best acc:  0.6884375 0.0003125000000000267


# GP

In [50]:
def cross_val_gp(kernel_list, X, y, cv=5):

    best = [0.0, 0.0]
    best_param = []

    for kernel in kernel_list:
                
        gpc = GaussianProcessClassifier(kernel=kernel).fit(X, y)

        scores = cross_val_score(gpc, X, y, cv=cv)
        if scores.mean() > best[0]:
            best = [scores.mean(), scores.std()]
            print("best kernel: ", kernel)
            print("best acc: ", best[0], best[1])

In [53]:
kernel_list = [
    1.0 * RBF(1.0), 
    1.0 * Matern(1.0), 
    1.0 * RationalQuadratic(1.0), 
    1.0 * ExpSineSquared(1.0), 
    1.0 * DotProduct(1.0)
]

In [54]:
X, y = sim_data0[:, :-1], sim_data0[:, -1]

cross_val_gp(kernel_list=kernel_list, X=X, y=y, cv=5)

best kernel:  1**2 * RBF(length_scale=1)
best acc:  0.6141497689333207 0.0005338111855135441


In [55]:
X, y = sim_data1[:, :-1], sim_data1[:, -1]

cross_val_gp(kernel_list=kernel_list, X=X, y=y, cv=5)

best kernel:  1**2 * RBF(length_scale=1)
best acc:  0.6141497689333207 0.0005338111855135441


In [56]:
X, y = bio_data0[:, :-1], bio_data0[:, -1]

cross_val_gp(kernel_list=kernel_list, X=X, y=y, cv=5)

best kernel:  1**2 * RBF(length_scale=1)
best acc:  0.6884375 0.0003125000000000267


In [None]:
X, y = bio_data1[:, :-1], bio_data1[:, -1]

cross_val_gp(kernel_list=kernel_list, X=X, y=y, cv=5)

# KAN