In [5]:
from scipy.io import loadmat
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neighbors import NearestNeighbors
import time
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import metric_learn
from numpy import linalg as calc_eigen
from sklearn.preprocessing import normalize as normalize_vectors
import pandas as pd
import pylab as pl
from sklearn import datasets
from sklearn.decomposition import PCA
from sklearn import preprocessing
from scipy.spatial.distance import cdist
from scipy.spatial.distance import pdist
from scipy.optimize import fmin_slsqp
from scipy.optimize import minimize
import math

In [6]:
""" 
camId : which camera was used to get the shot (1 or 2)
filelist: names of the images (with format x_label_camId_index.png)
labels: class of the image (which person's image is it?)
query_idx: indexes of test set
gallery_idx: indexes of test set used for kNN 
train_idx: indexes of training and validation set
"""   
train_idxs = loadmat('cuhk03_new_protocol_config_labeled.mat')

In [7]:
import json
with open('PR_data/feature_data.json', 'r') as f:
    features = json.load(f)
features_np = np.array(features) #list of features converted to an array 
features_np.shape

(14096, 2048)

In [8]:
gallery_indices = train_idxs['gallery_idx'].flatten() - 1
gallery_images = features_np[gallery_indices]
gallery_labels = train_idxs['labels'][gallery_indices]

query_indices = train_idxs['query_idx'].flatten() - 1
query_images = features_np[query_indices]
query_labels = train_idxs['labels'][query_indices]

training_indices = train_idxs['train_idx'].flatten() - 1
training_images = features_np[training_indices]
training_labels = train_idxs['labels'][training_indices]

In [9]:
def calculateAverageFace(X):
    """
    takes the training data set as input 
    and returns the average face
    """
    average_face= np.mean(X, axis=0)
    #plt.figure()
    #plt.title("Average Face")
    #plt.imshow(np.resize(average_face, (46, 56)).T, cmap='gray')
    return average_face

def normalize(X, AvgFace):
    """
    takes the training data set and average face as input 
    and returns the normalized training data set

    """
    Q = np.empty((len(X[:,0]),len(X[0, :])))
    for index, face in enumerate(X):
        Q[index] = face - AvgFace
    return Q.T

def calculateCovarianceMatrix(Q):
    return np.matmul(Q,Q.T)/len(Q[0,:])

def calculateLowDimCovarianceMatrix(Q):
    return np.matmul(Q.T,Q)/len(Q[0,:])

def calculateEigenValuesAndVectors(S):
    """
    Takes covariance matrix as input
    and returns eigen values and vectors
    """
    v, w =  calc_eigen.eigh(S)
    #eigen_vectors[:,i] --> eigen_values[i]
    #eigen vector corresponding to eigen value

    #flips left to right... ascending to descending
    v = np.flip(v, axis=0) #turn ascending into descending
    w = np.flip(w, axis=1) #turn ascending into descending
    return v, w

def calculateWeights(Q, U):
    """
    takes input the normalized input and the eigen space
    outputs the weights of the normalized input
    """
    N = len(Q[0,:])
    W = np.empty((N,len(U[0, :])))
    for index, image in enumerate(Q.T):
        W[index] = np.matmul(image, U)
    return W

def calculateWeights2(Q, U):
    """
    alternative method to calculate weights
    """
    N = len(Q[0,:])
    W2 = np.empty((N,len(U[0, :])))
    W2 = np.matmul(Q.T, top_eigen_vectors)
    return W2

def printImage(face, title, saved_file):
    """
    takes input as the face you want to print, the title of the image, and the location of the file
    you would like to save the image to. 
    """
    plt.figure()
    plt.imshow(np.resize(face, (46, 56)).T, cmap='gray')
    plt.title(title)
    plt.axis('off')
    plt.savefig(saved_file, bbox_inches='tight')   # save the figure to file
    plt.show()
    plt.close() 
    
def plotEigenValueGraph(v, points):
    """
    plots eigen values against inrementing the number of eigen values used
    in descending order
    """
    y_points = [value for value in v[:points]]
    x_points = [i for i in range(points)]
    plt.xlabel('Number of Eigen Values')
    plt.ylabel('Eigen Values')
    plt.title('Plot of Eigen Values in Descending Order')
    plt.plot(x_points, y_points)
    plt.savefig('eigenvalues.png', bbox_inches='tight')   # save the figure to file
    plt.show()
    plt.close() 

In [10]:
def PCA(X, M):
    AvgFace = calculateAverageFace(X)
    A = normalize(X, AvgFace)
    S = calculateCovarianceMatrix(A)
    eigen_values, eigen_vectors = calculateEigenValuesAndVectors(S)
    x_points = 1000
    #plotEigenValueGraph(eigen_values, x_points)
    variance_captured = np.sum(eigen_values[:M])*100/np.sum(eigen_values)
    print("Variance Captured by top ", x_points, " features is ", variance_captured, ".")
    top_eigen_vectors = eigen_vectors[:, :M]
    reduced_dim_X =  calculateWeights(A, top_eigen_vectors)
    return reduced_dim_X, top_eigen_vectors, AvgFace

In [11]:
M = 100
reduced_training_images, eigen_space, AvgFace = PCA(training_images, M)

Variance Captured by top  1000  features is  92.92050565369442 .


In [12]:
reduced_gallery_images = calculateWeights(normalize(gallery_images, AvgFace), eigen_space)
reduced_query_images = calculateWeights(normalize(query_images, AvgFace), eigen_space)

### METHOD 1: Optimization of A

In [23]:
# function to maximize
def objective_function1(A):
    sum_dist = 0
    for index, image in enumerate(reduced_training_images):
        for index2, image2 in enumerate(reduced_training_images):
            if training_labels[index] != training_labels[index2]:
                sum_dist += math.sqrt(cdist(image.reshape(-1,1), image2.reshape(-1,1), 'mahalanobis', VI=A)) 
    return - sum_dist

#constraint
def constraint1(A):
    sum_dist = 0
    for index, image in enumerate(reduced_training_images):
        for index2, image2 in enumerate(reduced_training_images):
             if training_labels[index] == training_labels[index2]:
                    sum_dist += cdist(image.reshape(-1,1), image2.reshape(-1,1), 'mahalanobis', VI=A)
    return (1 - sum_dist)    

In [26]:
objective_function1(A0)

ValueError: XA must be a 2-dimensional array.

In [16]:
# Baseline A
A0 = np.linalg.inv(np.cov(np.transpose(reduced_training_images)))

In [25]:
optimization_learning = minimize(objective_function1, A0, method = 'CG', constraints = ({'type': 'ineq', 'fun': constraint1}))

ValueError: XA must be a 2-dimensional array.