This supervised variant of NPE adds a regularization term to the loss function that encourages data points of the same class to be embedded closer to the same point.

https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=6889368

In [22]:
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.decomposition import PCA
from sklearn.manifold import Isomap
from sklearn.neighbors import KNeighborsClassifier
import numpy as np
import matplotlib.pyplot as plt


In [5]:
DATA_DIR = "./data/News/"

X_train = np.load(f"{DATA_DIR}train_embeddings.npy")
y_train = np.load(f"{DATA_DIR}train_labels.npy")
X_test = np.load(f"{DATA_DIR}test_embeddings.npy")
y_test = np.load(f"{DATA_DIR}test_labels.npy")

In [19]:
def p1solver(data, x, neighbors):
    Z = data[neighbors].T
    Z = Z - np.repeat(x.T, len(neighbors), axis=0).reshape(data.shape[1], len(neighbors))
    C = Z.T @ Z

    #conditioning C
    C = C + (np.eye(C.shape[0]) * 1e-5)
    
    w = np.linalg.solve(C, np.ones(len(neighbors)))
    w_final = np.zeros(len(data))
    w_final[neighbors] = w
    return w_final / np.sum(w_final)

def knn(k, data, test):
    return np.argsort(np.sum(data**2, axis=1) - 2 * test.dot(data.T), axis=0)[1:k+1]

def SNPE(X, y, n_neighbors, beta, num_classes):
    '''
    X: n x m data matrix
    y: m x 1 labels vector
    n_neighbors: # neighbors to use for constructing KNN graph
    beta: hyperparam for importance of label information regularization term
    num_classes: # of different labels

    returns: A, a m x num_classes matrix. Take X @ A to be the embedding of X in num_classes dimensions.
    '''

    W = []
    for i in range((len(X))): 
        neighbors = knn(n_neighbors, X, X[i])
        W.append(p1solver(X, X[i], neighbors))
    W = np.asarray(W)
    I = np.eye(X.shape[0])
    M = (I - W).T @ (I - W)

    H = np.zeros((num_classes, len(y)))
    H[y, np.arange(len(y))] = 1

    return np.linalg.inv(X.T @ M @ X + beta * X.T @ X).T @ X.T @ H.T


In [29]:
betas = [1, 3, 5, 7, 9, 11]
accs = []
for beta in betas:
    A = SNPE(X_train, y_train, n_neighbors=100, beta=beta, num_classes=20)
    X_test_embed = X_test @ A
    classifier = KNeighborsClassifier(n_neighbors=20)
    classifier.fit(X_test_embed, y_test)

    y_pred = classifier.predict(X_test_embed)
    acc = np.mean(y_test == y_pred)
    accs.append(acc)
    print(f'KNN on SNPE embeddings classification accuracy, beta = {beta}: {acc}')


KNN on SNPE embeddings classification accuracy, beta = 1: 0.7455273698264352
KNN on SNPE embeddings classification accuracy, beta = 3: 0.7420560747663552
KNN on SNPE embeddings classification accuracy, beta = 5: 0.7429906542056075
KNN on SNPE embeddings classification accuracy, beta = 7: 0.7447263017356476
KNN on SNPE embeddings classification accuracy, beta = 9: 0.7432576769025367
KNN on SNPE embeddings classification accuracy, beta = 11: 0.7425901201602136
