In [1]:
import numpy as np
from sklearn.neighbors import KDTree
import matplotlib.pyplot as plt

In [2]:
def load_dataset(name):
    path = "Datasets/{}".format(name)
    with open(path) as f:
        arr = []
        for line in f:
            line = line.strip()
            arr.append(line.split("\t"))
    D = np.array(arr, dtype=np.float32)
    Y = D[:,0].reshape(-1,1)
    y = (Y == 1) * 1 + (Y != 1) * -1
    return (D[:,1:], y)

In [3]:
def make_adj(X):
    W = np.zeros((X.shape[0], X.shape[0]))
    kd = KDTree(X)
    idx = kd.query(X, k=3, return_distance=False)
    for i in range(idx.shape[0]):
        for j in idx[i]:
            W[i,j] = (j != i) * 1
            W[j,i] = W[i,j]
    D = np.diag(np.sum(W, axis=1).T)
    return W, D, (D - W)

def harmonic(X, y, l):
    l_end = len(l)
    all_idx = np.arange(start=0, stop=y.shape[0], step=1, dtype=np.uint32)
    l_msk = np.ones(y.shape[0], dtype=bool)
    l_msk[l] = False
    u = all_idx[l_msk]
    X_ = np.vstack([X[l,:], X[u,:]])
    y_ = np.vstack([y[l,:], y[u,:]])
    W,_,L = make_adj(X_)
    L_inv = np.linalg.pinv(L[l_end:, l_end:])
    interploated = L_inv @ W[l_end:, :l_end] @ y_[:l_end]
    return y_, np.vstack([y_[:l_end], np.sign(interploated)])

def harmonic_kernel(X, y, l):
    _,_,L = make_adj(X)
    l_len = len(l)
    L_inv = np.linalg.pinv(L)
    K = np.zeros((l_len, l_len))
    for i in range(l_len):
        for j in range(l_len):
            K[i, j] = L_inv[l[i],l[j]]
    y_l = y[l]
    alpha = np.linalg.pinv(K) @ y_l
    ae = np.zeros((1, L.shape[0]))
    for i, idx in enumerate(l):
        ae[0, idx] = alpha[i]
    return y, np.sign(ae @ L_inv).T

def sample_labels(y, num):
    return np.random.choice(y, size=(num,))

def emp_err(gt, pred):
    return (gt != pred).astype(int).sum() / len(gt)

def laplacian_introp(X, y, sample_idx):
    sz = len(sample_idx)
    gt_all, pred_all = harmonic(X, y, sample_idx)
    return emp_err(gt_all[sz:], pred_all[sz:])

def laplacian_kernel_introp(X, y, sample_idx):
    gt_all, pred_all = harmonic_kernel(X, y, sample_idx)
    return emp_err(gt_all, pred_all)
    

In [4]:
def run_protocol():
    ds_size = [50, 100, 200, 400]
    l_size = [1,2,4,8,16]
    tables = np.zeros((len(ds_size), len(l_size), 2, 2))
    for i, sz in enumerate(ds_size):
        X, y = load_dataset("dtrain13_{}.dat".format(sz))
        clss_1 = np.arange(0, sz, 1, int)
        clss_2 = np.arange(sz, sz * 2, 1, int)
        for j, l in enumerate(l_size):
            errLI = []
            errKLI = []
            for _ in range(20):
                sampled_clss_1 = np.random.choice(clss_1, (l,), replace=False)
                sampled_clss_2 = np.random.choice(clss_2, (l,), replace=False)
                L_cal = np.hstack([sampled_clss_1, sampled_clss_2])
                errLI.append(laplacian_introp(X, y, L_cal))
                errKLI.append(laplacian_kernel_introp(X, y, L_cal))
            tables[i, j, 0, 0] = np.mean(errLI)
            tables[i, j, 1, 0] = np.std(errLI)
            tables[i, j, 0, 1] = np.mean(errKLI)
            tables[i, j, 1, 1] = np.std(errKLI)
            print("size={}, l={}: eli={}, ekli={}".format(sz, l, tables[i, j, :, 0], tables[i, j, :, 1]))
    return tables

In [5]:
tables = run_protocol()

size=50, l=1: eli=[0.19846939 0.10508099], ekli=[0.0455     0.01243986]
size=50, l=2: eli=[0.13802083 0.10210857], ekli=[0.0695     0.06681878]
size=50, l=4: eli=[0.08532609 0.08892308], ekli=[0.0495     0.02479415]
size=50, l=8: eli=[0.05833333 0.02377974], ekli=[0.044      0.01854724]
size=50, l=16: eli=[0.04485294 0.0182939 ], ekli=[0.0275     0.01042833]
size=100, l=1: eli=[0.09393939 0.09796418], ekli=[0.10675    0.17077965]
size=100, l=2: eli=[0.06147959 0.01856996], ekli=[0.0665     0.01406236]
size=100, l=4: eli=[0.07526042 0.04132264], ekli=[0.06675    0.02903769]
size=100, l=8: eli=[0.05163043 0.0149333 ], ekli=[0.04725    0.01427191]
size=100, l=16: eli=[0.04464286 0.02795085], ekli=[0.0375    0.0252735]
size=200, l=1: eli=[0.06871859 0.11585959], ekli=[0.024375   0.02630203]
size=200, l=2: eli=[0.03396465 0.03435156], ekli=[0.022625   0.01521666]
size=200, l=4: eli=[0.02793367 0.01529496], ekli=[0.019375   0.00921531]
size=200, l=8: eli=[0.02317708 0.01449705], ekli=[0.0203

In [150]:
tables[:,:,0]

array([[0.23469388, 0.15104167, 0.09836957, 0.05654762, 0.05220588],
       [0.19545455, 0.06862245, 0.0765625 , 0.06657609, 0.03630952],
       [0.07248744, 0.04267677, 0.02295918, 0.02083333, 0.01970109],
       [0.1127193 , 0.01991206, 0.01559343, 0.01619898, 0.0140625 ]])

In [151]:
tables[:,:,1]

array([[0.078    , 0.074    , 0.058    , 0.0435   , 0.031    ],
       [0.17275  , 0.06125  , 0.064    , 0.0525   , 0.0305   ],
       [0.016    , 0.017125 , 0.0165   , 0.0185   , 0.017    ],
       [0.01     , 0.014375 , 0.0086875, 0.0100625, 0.00925  ]])

In [8]:
def produce_latex_table(tbl):
    ds_size = [50, 100, 200, 400]
    l_size = [1,2,4,8,16]
    for l in l_size:
        print(" & ", str(l), end='')
    print(" \\\\")
    for i in range(tbl.shape[0]):
        s = [str(ds_size[i])]
        for j in range(tbl.shape[1]):
            s.append("\(%.2f \pm %.2f\)"%(tbl[i, j, 0], tbl[i, j, 1]))
        print(" & ".join(s), " \\\\")


In [9]:
produce_latex_table(tables[:,:,:,0])

 &  1 &  2 &  4 &  8 &  16 \\
50 & \(0.20 \pm 0.11\) & \(0.14 \pm 0.10\) & \(0.09 \pm 0.09\) & \(0.06 \pm 0.02\) & \(0.04 \pm 0.02\)  \\
100 & \(0.09 \pm 0.10\) & \(0.06 \pm 0.02\) & \(0.08 \pm 0.04\) & \(0.05 \pm 0.01\) & \(0.04 \pm 0.03\)  \\
200 & \(0.07 \pm 0.12\) & \(0.03 \pm 0.03\) & \(0.03 \pm 0.02\) & \(0.02 \pm 0.01\) & \(0.02 \pm 0.01\)  \\
400 & \(0.06 \pm 0.11\) & \(0.01 \pm 0.00\) & \(0.02 \pm 0.01\) & \(0.02 \pm 0.00\) & \(0.01 \pm 0.01\)  \\


In [10]:
produce_latex_table(tables[:,:,:,1])

 &  1 &  2 &  4 &  8 &  16 \\
50 & \(0.05 \pm 0.01\) & \(0.07 \pm 0.07\) & \(0.05 \pm 0.02\) & \(0.04 \pm 0.02\) & \(0.03 \pm 0.01\)  \\
100 & \(0.11 \pm 0.17\) & \(0.07 \pm 0.01\) & \(0.07 \pm 0.03\) & \(0.05 \pm 0.01\) & \(0.04 \pm 0.03\)  \\
200 & \(0.02 \pm 0.03\) & \(0.02 \pm 0.02\) & \(0.02 \pm 0.01\) & \(0.02 \pm 0.01\) & \(0.02 \pm 0.01\)  \\
400 & \(0.01 \pm 0.00\) & \(0.01 \pm 0.00\) & \(0.01 \pm 0.00\) & \(0.01 \pm 0.00\) & \(0.01 \pm 0.00\)  \\
