In [14]:
import numpy as np
import scipy as sp
import pandas as pd
import os
from learn_multiClassNovelty_knfst import learn_multiclass_novelty_knfst
from sklearn import datasets, metrics

In [47]:
def null(a, rtol=1e-5):
    u, s, v = np.linalg.svd(a)
    rank = (s > rtol*s[0]).sum()
    return v[rank:].T.copy()


def calculate_knfst(K, labels):
    '''
    Calculates projection matrix of KNFST
    '''
    classes = np.unique(labels)
    if len(classes) < 2:
        raise Exception("KNFST requires 2 or more classes")
    n, m = K.shape
    if n != m:
        raise Exception("Kernel matrix must be quadratic")
        
    centered_k = center_kernel_matrix(K)
    basis_values, basis_vecs = np.linalg.eig(centered_k)
    
    #basis_values = np.diag(basis_values)
    

    basis_vecs = basis_vecs[:,basis_values > 1e-12]
    basis_values = basis_values[basis_values > 1e-12]
    
    basis_values = np.diag(1/np.sqrt(basis_values))
    basis_vecs  = basis_vecs.dot(basis_values)
    print(basis_vecs.shape)
    print(basis_values.shape)        
    L = np.zeros([n,n])
    for cls in classes:
        L[labels==cls, labels==cls] = 1/np.sum(labels==cls)
    M = np.ones([m,m])/m
#   #print(np.eye(m,m)-M)
    H = (np.eye(m,m)-M).dot(basis_vecs).T.dot(K).dot(np.eye(m,m)-L)
    #H = H.dot(basis_vecs)
    #H = H.T.dot(K).dot(np.eye(np.size(K))-L)
    
    T = H.dot(H.T)
    
    eigenvecs = null(T)
#     if eigenvecs.shape[1] < 1:
#         eigenvals, eigenvecs = np.linalg.eig(T)
        #eigenvals = np.diag(eigenvals)
        
    proj = (np.eye(m,m)-M).dot(basis_vecs).dot(eigenvecs)
    return proj
        
        
def center_kernel_matrix(kernel):
    '''
    Centers the data in the feature space only using the kernel matrix
    '''
    n = np.shape(kernel)[0]
    column_means = np.mean(kernel, 0)
    matrix_mean = np.mean(kernel)
    centered = kernel
    
    for idx in range(n):
        centered[idx, :] = centered[idx, :] - column_means
        centered[:, idx] = centered[:, idx] - column_means
        
    centered += matrix_mean
    return centered

In [111]:
def learn_multiclass_novelty_knfst(K, labels):
    '''
    Calculate multi-class KNFST model for multi-class novelty detection
    
    INPUT
      K: NxN kernel matrix containing similarities of n training samples
      labels: Nx1 column vector containing multi-class labels of N training samples

    OUTPUT
      proj: Projection of KNFST
      target_points: The projections of training data into the null space
    '''

    classes = np.unique(labels)
    proj = calculate_knfst(K, labels)
    target_points = np.zeros((len(classes), proj.shape[0]))
    for idx, cl in enumerate(classes):
        k_cl = K[labels==cl,:]
        target_points[idx, :] = np.mean(k_cl.dot(proj),1)
    print(target_points)
    return proj, target_points

In [112]:
PATH = r"../Datasets/bow_1000_dense/"
SAMPLE_PATH = r"../Datasets/Sample/"

train = datasets.load_files(SAMPLE_PATH)
train.data = np.vstack([np.fromstring(txt, sep='\t') for txt in train.data])

In [113]:
data = train.data
target = train.target

In [114]:
def hik(x, y):
    return np.minimum(x, y).sum()

k_hik = metrics.pairwise_kernels(data, metric=hik)
proj, target_points = learn_multiclass_novelty_knfst(k_hik, target)

(473, 473)
(473, 473)


ValueError: could not broadcast input array from shape (98) into shape (473)

In [106]:
proj.shape

(473, 1)

In [101]:
target_points

array([[ -5.96605824e-17],
       [ -7.27733936e-17],
       [ -2.40741243e-16],
       [  3.87855756e-16]])