In [161]:
import numpy as np
import matplotlib.pyplot as plt
import pylab
import scipy.io as sio
import scipy
import math
from sklearn import neighbors
#%matplotlib inline

In [268]:
def knn_score(train_data, train_label, test_data, test_label):
    knn = neighbors.KNeighborsClassifier(n_neighbors = 1)  
    knn.fit(train_data, train_label)
    s = knn.score(test_data, test_label)
    return s

def knn_verification(train_data, train_label, test_data, test_label):
    knn = neighbors.KNeighborsClassifier(n_neighbors = 1)  
    knn.fit(train_data, train_label)
    pre = knn.predict(test_data)
    print(pre[0], test_label)
    return pre[0]==test_label

In [230]:
def PCA(data,dimensions):
    '''
    data is the original data set，rows are samples of data,columns are the features
    '''
    [N, D] = data.shape
    # making data zero-means
    average = np.mean(data,0)
    data = np.mat(data-average)
    
    #covariance
    covariance = np.dot(data.T, data)
    
    #eigenvalues
    eig_var, eig_vec = np.linalg.eig(covariance)
    
    
    #from the numpy doc, the eig_var may not be ordered.
    sort_eig = np.argsort(-eig_var)
    #return the index that make a sorted array

    #so we got the sorted eig_var
    sort_eig = sort_eig[:dimensions]
    principal_vec = np.mat(eig_vec[:,sort_eig])
    low_data = np.dot(data, principal_vec).astype(float)
    return principal_vec, low_data, average


In [231]:
def LDE(data, label, k, k_, d):
    '''
    data is the original data. m*n(m=samples, n=dimensions)
    '''
    N = data.shape[0]
    c = len(np.unique(label))

    # W_opt.T = W_fld.T * W_pca.T
    # W_pca
    # Get the principal components of PCA with N-c largest eigenvalues
    W_pca, data_pca, average= PCA(data, N-c)
   
    Dim = data_pca.shape[1]
    
    # step 1
    # find the nearest neighbors (k). Here we get the distance from (x-y)^2
    # distance N*N
   
    data_tmp = np.sum(np.multiply(data_pca, data_pca), axis=1)
    distance=np.mat(data_tmp + data_tmp.T - 2*data_pca*data_pca.T)
    
    # the all neibors N
    neibors = np.argsort(distance,axis=1)
    
    
    # step 2
    # weights
    
    W=np.zeros([N,N])
    D=np.zeros([N,N])
    
    W_=np.zeros([N,N])
    D_=np.zeros([N,N])
    
    for i in range(N):
        K=0
        K_=0
        for j in neibors[i].A[0]:
            if (label[j]==label[i]):
                if(K<k):
                    W[i, j] = math.exp(-distance[i, j]/5)
                    W[j, i] = math.exp(-distance[i, j]/5) 
                    K+=1
            else:
                if(K_<k):
                    W_[i, j] = math.exp(-distance[i, j]/5)
                    W_[j, i] = math.exp(-distance[i, j]/5) 
                    K_+=1
    
    for i in range(N):
        for j in range(N):
            D[i, i] +=W[i, j] 
            D_[i, i] +=W_[i, j]
    
    # Step 3
    # mappings 
    
    L=D-W
    L_=D_-W_
    eigenValues, eigenVectors=scipy.linalg.eig(np.dot(np.dot(data_pca.T, L_),data_pca), np.dot(np.dot(data_pca.T, L), data_pca))
    eigenValuesSort=np.argsort(-eigenValues)
    W_lde = eigenVectors[:, eigenValuesSort[0:d]]
    mapping = np.dot(W_pca, W_lde)
    
    print("LDE done")
    return mapping

In [351]:
def two_D_LDE(data, label, k, k_, n_1, n_2, l_1, l_2):
    '''
    data is the original data. m*n(m=samples, n=dimensions)
    '''
    N = data.shape[0]
    Dim = data.shape[1]
    
    # step 1
    # find the nearest neighbors (k). Here we get the distance from (x-y)^2
    # distance N*N
   
    data_tmp = np.sum(np.multiply(data, data), axis=1)
    distance=np.mat(data_tmp + data_tmp.T - 2*data*data.T)
    
    # the all neibors N
    neibors = np.argsort(distance,axis=1)
    
    
    # step 2
    # weights
    
    W=np.zeros([N,N])
    D=np.zeros([N,N])

    W_=np.zeros([N,N])
    D_=np.zeros([N,N])
    for i in range(N):
        K=0
        K_=0
        for j in neibors[i].A[0]:
            if (label[j]==label[i]):
                if(K<k):
                    W[i, j] = math.exp(-distance[i, j]/5)
                    W[j, i] = math.exp(-distance[i, j]/5) 
                    K+=1
            else:
                if(K_<k):
                    W_[i, j] = math.exp(-distance[i, j]/5)
                    W_[j, i] = math.exp(-distance[i, j]/5) 
                    K_+=1
    
    
    # Step 3
    # mappings 
    
    # initial L
    L = np.ones((n_1, l_1))
    
    for i in range(N):
        A_i = data[i, :].reshape((n_1, n_2))
        for j in range(N):
            A_j = data[j, :].reshape((n_1, n_2))
            R_L = W_[i, j]*np.dot(np.dot(np.dot((A_i-A_j).T, L), L.T), (A_i-A_j))
            R_R = W[i, j]*np.dot(np.dot(np.dot((A_i-A_j).T, L), L.T), (A_i-A_j))
        eigenValues, eigenVectors=scipy.linalg.eig(R_L, R_R)
        eigenValuesSort=np.argsort(-eigenValues)
        R=np.mat(eigenVectors[:,eigenValuesSort[0:l_2]])

    for i in range(N):
        A_i = data[i, :].reshape((n_1, n_2))
        for j in range(N):
            A_j = data[j, :].reshape((n_1, n_2))
            L_L = W_[i, j]*np.dot(np.dot(np.dot((A_i-A_j).T, L), L.T), (A_i-A_j))
            L_R = W[i, j]*np.dot(np.dot(np.dot((A_i-A_j).T, L), L.T), (A_i-A_j))

    eigenValues, eigenVectors=scipy.linalg.eig(L_L, L_R)
    eigenValuesSort=np.argsort(-eigenValues)
    L=np.mat(eigenVectors[:,eigenValuesSort[0:l_1]])
        
    return L, R

In [352]:
def Kernel_LDE(data, label, k, k_, d):
    '''
    data is the original data. m*n(m=samples, n=dimensions)
    '''
    data=np.matrix(data)
    N = data.shape[0]
   
    
    # step 1
    # find the nearest neighbors (k). Here we get the distance from (x-y)^2
    # distance N*N
   
    data_tmp = np.sum(np.multiply(data, data), axis=1)
    distance=np.mat(data_tmp + data_tmp.T - 2*data*data.T)
    
    # the all neibors N
    neibors = np.argsort(distance,axis=1)
    
    
    # step 2
    # weights
    
    W=np.zeros([N,N])
    D=np.zeros([N,N])
    
    W_=np.zeros([N,N])
    D_=np.zeros([N,N])
    
    for i in range(N):
        K=0
        K_=0
        for j in neibors[i].A[0]:
            if (label[j]==label[i]):
                if(K<k):
                    W[i, j] = math.exp(-distance[i, j]/5)
                    W[j, i] = W[i, j]
                    K+=1
            else:
                if(K_<k):
                    W_[i, j] = math.exp(-distance[i, j]/5)
                    W_[j, i] =  W_[i, j] 
                    K_+=1
    
    for i in range(N):
        for j in range(N):
            D[i, i] +=W[i, j] 
            D_[i, i] +=W_[i, j]
    Km=np.zeros((N, N))
    for i in range(N):
        for j in range(N):
            Km[i, j] = math.exp(-(np.linalg.norm(data[j]-data[i],ord=2))/2)
    
    
    # Step 3
    # mappings 
    
    L=D-W
    L_=D_-W_
    ei_L=np.dot(np.dot(Km, L_), Km)
    ei_R=np.dot(np.dot(Km, L), Km)
    eigenValues, eigenVectors=scipy.linalg.eig(ei_L, ei_R)
    eigenValuesSort=np.argsort(-eigenValues)
    mapping = eigenVectors[:, eigenValuesSort[0:d]]
    
    return mapping, Km

In [353]:
data=sio.loadmat("orl_faces/32_32/ORL_32x32.mat")
face = data['fea']/255
face = np.squeeze(face)
label = data['gnd']
label = np.squeeze(label)

In [None]:
def orl_face_testing():

    s=0.0
    for i in range(50):
        matfn = 'orl_faces/32_32/6Train/'+str(i+1)+'.mat'
        index = sio.loadmat(matfn)
        testIdx = index['testIdx'] -1    
        trainIdx = index['trainIdx'] -1
        ## kernel
#         trainface = np.squeeze(face[trainIdx])
#         trainlabel = np.squeeze(label[trainIdx])
#         testface = np.squeeze(face[testIdx])
#         testlabel = np.squeeze(label[testIdx])
       
#         mapping, K_train= Kernel_LDE(trainface, trainlabel, 4, 3, 40)

#         K_test = np.zeros((trainface.shape[0], testface.shape[0]))
#         for i in range(trainface.shape[0]):
#             for j in range(testface.shape[0]):
#                 K_test[i, j]=math.exp(-(np.linalg.norm(trainface[i]-testface[j],ord=2))/2)
        
#         traindata = np.dot(mapping.T, K_train)
#         testdata = np.dot(mapping.T, K_test)
        
        
### 2D-LDE        
        tmp, face_pca, tmp = PCA(np.squeeze(face), 225)
        
        trainface = np.squeeze(face_pca[trainIdx])
        trainlabel = np.squeeze(label[trainIdx])
        testface = np.squeeze(face_pca[testIdx])
        testlabel = np.squeeze(label[testIdx])

       
        L, R= two_D_LDE(trainface, trainlabel, 7, 4, 15, 15, 8, 8)
        
        traindata = []
        testdata = []
        for i in range(trainface.shape[0]):
            traindata.append(np.dot(np.dot(L.T, trainface[i].reshape(15, 15)), R).A[0])
        for i in range(testface.shape[0]):
            testdata.append(np.dot(np.dot(L.T, testface[i].reshape(15, 15)), R).A[0])
    
        tmp=knn_score(traindata, trainlabel, testdata, testlabel)
        s+=tmp
        print(tmp)
    print('s', s/50)
orl_face_testing()



0.81875
0.825
0.8375
0.8375
0.825
0.8375
0.8
0.85


In [None]:
def Leaving_one_out():
    s=0.0
    
    for i in range(400):
        index = [i for i in range(400)]
        index.remove(i) 
        trainface = np.squeeze(face[index])
        trainlabel = np.squeeze(label[index])
        testface = np.squeeze(face[i]).reshape((1, 1024))
        testlabel = label[i]
        
        mapping = LDE(trainface, trainlabel, 7, 4, 27)
        traindata = np.dot(trainface, mapping)
        testdata = np.dot(testface, mapping)
        
        if(knn_verification(trainface, trainlabel, testface, testlabel)==False):
            s+=1
        print(i, s)
    return s/400

s = Leaving_one_out()
print("score" ,s)