
Papers used: 

https://web.eecs.umich.edu/~jjcorso/t/555pdf/lleintro.pdf for lle and reg

https://citeseerx.ist.psu.edu/document?repid=rep1&type=pdf&doi=37e16d10354563e6165ef1269166d3ac75b92b82 for npe

https://www.sciencedirect.com/science/article/pii/S0925231223005763#s0025 for bert

In [3]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

In [10]:
path = "./data/News/"

emb = np.load(path + "train_embeddings.npy")
label = np.load(path + "train_labels.npy")
label_emb = np.load(path + "label_text_embeddings.npy")

In [62]:
def classifier(X_emb, y_emb, similarity_func): 
    similarity = similarity_func(X_emb, y_emb)
    preds = np.argmax(similarity, axis=1)
    return preds

def accuracy(preds, labels): 
    return np.mean([preds == labels])

def euclidian_metric(x_i, x_j): 
    return np.sqrt(np.sum(np.square(x_i - x_j), axis=-1))

In [63]:
preds = classifier(emb, label_emb, cosine_similarity)

print("Zero-shot accuracy, base: " + str(accuracy(preds, label)))

(11221, 20)
Zero-shot accuracy, base: 0.5421085464753587


In [26]:
rank = np.linalg.matrix_rank(emb)
print(rank)

768


In [82]:
def p1solver(data, x, neighbors, k):
    Z = data[neighbors].T
    Z = Z - np.repeat(x.T, len(neighbors), axis=0).reshape(data.shape[1], len(neighbors))
    C = Z.T @ Z

    #conditioning C
    C = C + (np.eye(C.shape[0]) * 1e-5)

    w = np.linalg.solve(C, np.ones(len(neighbors)))
    w_final = np.zeros(len(data))
    w_final[neighbors] = w
    return w_final / np.sum(w_final)

def knn(k, data, test):
    return np.argsort(np.sum(data**2, axis=1) - 2 * test.dot(data.T), axis=0)[1:k+1]

def NPEsolver(X, W, k): 
    I = np.identity(X.shape[1])
    M = (I-W).T @ (I-W) #1000 x 1000
    T1 = np.linalg.inv(X @ X.T) # 
    T2 = X @ M @ X.T
    evalues, evectors = np.linalg.eigh(T1 @ T2)
    Y = evectors[: , 1:k+1]
    return Y

def NPE(X, k, n_neighbors): 
    W = []
    for i in range((len(X))): 
        neighbors = knn(n_neighbors, X, X[i])
        W.append(p1solver(X, X[i], neighbors, k))
    W = np.asarray(W)
    return NPEsolver(X.T, W, k)

In [83]:
w = NPE(emb, 768, 100)
npe_emb = (w.T @ (emb.T)).T
npe_label = (w.T @ (label_emb.T)).T