In [1]:
import numpy as np
from scipy import misc
import os
import time

In [2]:
# Householder
def make_householder(a):
    u = a + np.copysign(np.linalg.norm(a), a[0])
    v = a / u[0]
    v[0] = 1
    H = np.eye(a.shape[0])
    beta = 2 / (np.dot(v, v.transpose()))
    vtv = np.dot(np.matrix(v).transpose(), np.matrix(v))
    H -= np.dot(beta, vtv)
    return H

def qrDecomposition(A):
    m, n = A.shape
    Q = np.eye(m)
    for i in range(n - (m == n)):
        H = np.eye(m)
        H[i:, i:] = make_householder(A[i:, i])
        Q = np.dot(Q, H)
        A = np.dot(H, A)
    return Q, A

def qr(toCompute, maxIter = 100):
    A = []
    Q = np.eye(toCompute.shape[0])
    A.append(None)
    A.append(toCompute)
    for k in range(maxIter):
        A[0] = A[1]
        q, R = qrDecomposition(A[0])
        A[1] = np.dot(R, q)
        Q = Q.dot(q)
    return np.diagonal(A[1]), Q

In [3]:
def concatMatrix(mtxLst):
    mtx = np.array(())
    flatLst = []
    for m in mtxLst:
        flatLst.append(m.flatten())
    mtx = np.vstack(flatLst)
    return mtx.transpose()

def extractEigenvecOnVal(eigval, eigvec, threshold = 1):
    delIdx = np.where(eigval < threshold)[0]
    return np.delete(eigvec, delIdx, axis=1)

def reconstructVector(M, eigvec):
    eigvecT = eigvec.transpose()
    szeNewM = (np.shape(eigvec)[1], np.shape(M)[0])
    newmatrix = np.empty(szeNewM)
    for idx, vec in enumerate(eigvecT):
        newvec = np.dot(M, vec.transpose())
        newmatrix[idx] = newvec
    return newmatrix.transpose()

def computeCostMulti(X, y, theta):
    H = np.dot(X, theta)
    diff = H.transpose() - y
    diff = np.power(diff, 2)
    sdiff = np.sum(diff, axis=1)
    cost = sdiff / (2. * (np.shape(y)[0]))
    return cost.item(0)

def gradDescent(X, y, theta, alpha, numIter = None):
    if numIter is None:
        return gradDescentConvergence(X, y, theta, alpha)
    return gradDescentIteration(X, y, theta, alpha, numIter)

def gradDescentIteration(X, y, theta, alpha, numIter):
    m = np.shape(y)[0]
    for i in range(numIter):
        H = np.dot(X, theta)
        diff = H.transpose() - y
        sigma = np.dot(X.transpose(), diff.transpose()) / m
        theta = theta - alpha * sigma
    print "Last Iteration Cost: ", computeCostMulti(X, y, theta)
    return theta

def gradDescentConvergence(X, y, theta, alpha):
    m = np.shape(y)[0]
    i = 0
    cost = computeCostMulti(X, y, theta)
    costp = cost + 1
    diff = costp - cost
    while (diff > 1e-100):
        H = np.dot(X, theta)
        diff = H.transpose() - y
        sigma = np.dot(X.transpose(), diff.transpose()) / m
        theta = theta - alpha * sigma
        costp = cost
        cost = computeCostMulti(X, y, theta)
        diff = costp - cost
        i = i + 1
    print "Convergence Cost (", i + 1,  "iteration ): ", computeCostMulti(X, y, theta)
    return theta

In [23]:
def train(mtxLst):
    thetas = list()
    M = concatMatrix(mtxLst)
    Mmean = M.mean(axis=1)
    M -= Mmean[:, np.newaxis]
    Mtld = np.dot(M.transpose(), M)
    n = np.shape(Mtld)[1]
    #eigenval, eigenvec = qr(Mtld, 400)
    eigenval, eigenvec = np.linalg.eig(Mtld)
    eigenvec = extractEigenvecOnVal(eigenval, eigenvec, 1)
    eigenvec = reconstructVector(M, eigenvec)
    print "eigenvec shape: ", np.shape(eigenvec)
    thetas = np.dot(M.transpose(), eigenvec)
    return (Mmean, eigenvec, thetas)

In [24]:
learRate = 1e-11 # learnRate pour le gradient descent
maxIteration = 10 # nombre d'iteration pour la gradient descent
img_extension = ".pgm"
trainPath = "../faceset/sample/train/" # le nom des images doivent etre formatés tel que : [Identifiant]_[numerotation].[img_extension]
validPath = "../faceset/sample/valid/" # le nom des images doivent etre formatés tel que : [Identifiant]_[numerotation].[img_extension]

In [25]:
usleep = lambda x: time.sleep(x/1000000.0)

def loadmatrixs(path):
    matrixs = []
    sze = len(os.listdir(path))
    for i, filename in enumerate(os.listdir(path)):
        if not filename.endswith(img_extension):
            continue
        img = misc.imread(path + filename)
        matrixs.append((filename, img))
        usleep(250)
    return matrixs

In [26]:
def submit(mtx, mean, eigenvec):
    mtxflat = mtx.flatten()
    mtxflat = np.vstack(list(mtxflat))
    mtxflat -= mean[:, np.newaxis]
    mtxflat = mtxflat.transpose()[0]
    theta = np.dot()
    return theta

def compare(thetaSubmit, thetas):
    minIdx = 0
    minVal = np.absolute(np.sum(thetas[0] - thetaSubmit))
    for idx, theta in enumerate(thetas):
        val = np.absolute(np.sum(np.absolute(theta - thetaSubmit)))
        if val < minVal:
            minIdx = idx
            minVal = val
    return minIdx

def compareAvgGap(thetaSubmit, thetas):
    minIdx = 0
    minVal = (np.absolute(thetas[0] - thetaSubmit)).mean()
    for idx, theta in enumerate(thetas):
        val = (np.absolute(theta - thetaSubmit)).mean()
        if val < minVal:
            minIdx = idx
            minVal = val
    return minIdx

In [27]:
# C'est la que tout commence
trainD = loadmatrixs(trainPath)
validD = loadmatrixs(validPath)
# loadmatrixs renvoie une liste de tuple tel que : (nom_image, matrice_associée)

In [28]:
# on separe le nom des images des matrices pour l'entrainement
ftrainD, mtrainD = zip(*trainD)

In [29]:
# on effectue l'entrainement et on recupere: l'image moyenne, 
# les vecteurs propres utilisé pour l'entrainement, 
# les poids associé à chaque couple image/vecteurs propres
mean, eigenvec, thetas = train(mtrainD)

eigenvec shape:  (10304, 18)


In [30]:
# Pas tellement utile mais bon..c'est sympa !
print "Mean: ", mean
print "eigenvec: \n", eigenvec
print "Thetas shape: ", np.shape(thetas)

Mean:  [ 76.75     77.75     77.5     ...,  59.59375  58.84375  59.125  ]
eigenvec: 
[[ -7.25629058e+02  -3.16869079e+01   1.34010557e+01 ...,   2.60953592e+01
    7.13153521e+01   2.41923249e+01]
 [ -7.25641654e+02  -3.85560812e+01   1.54007289e+01 ...,   2.34859114e+01
    7.38651324e+01   2.12914895e+01]
 [ -6.84930213e+02   3.21835826e+01  -6.64149191e-01 ...,   7.89611079e+01
    7.49144776e+01   5.48245773e+01]
 ..., 
 [ -1.09136106e+03  -4.67480366e+01   8.59837855e+01 ...,  -4.17692459e+01
    1.31419709e+02   8.73033005e+00]
 [ -1.04820893e+03   4.42953148e+01   5.16429657e+01 ...,  -2.94441227e+01
    1.37878848e+02   1.50189444e+01]
 [ -1.08887510e+03  -4.93968567e+01   9.08631570e+01 ...,  -4.23480545e+01
    1.39530295e+02   1.95504597e+01]]
Thetas shape:  (32, 18)


In [31]:
# On boucle sur la liste de tuples de validation
count = 0
for idx, data in enumerate(validD):
    success = False
    filename, mtx = data
    # on effectue la gradient descent sur chaque image de validation avec les vecteurs propres precedement calculé, 
    # et on recupere les poids associé
    thetaSubmit = submit(mtx, mean, eigenvec)
    print "thetaS shape: ", np.shape(thetaSubmit)
    # on recherche l'index de l'ensemble de poids issue de l'entrainement, ce rapprochant le plus des poids qui viennent
    # d'etre calculé
    matchIdx = compareAvgGap(thetaSubmit, thetas)
    # grace a l'index que l'on viens de recuperer, on compare le nom de l'image de validation
    # et le nom de l'image matché
    if filename[:filename.rfind("_")] == ftrainD[matchIdx][:ftrainD[matchIdx].rfind("_")]:
        success = True
        count = count + 1
    if success is True:
        print filename, " --> ", ftrainD[matchIdx], "[X]"
    else:
        print filename, " --> ", ftrainD[matchIdx], "[ ]"

Last Iteration Cost:  2115928351.97
thetaS shape:  (18, 1)
s2_0001.pgm  -->  s1_0003.pgm [ ]
Last Iteration Cost:  2107188754.56
thetaS shape:  (18, 1)
s4_0002.pgm  -->  s1_0003.pgm [ ]
Last Iteration Cost:  2117866612.33
thetaS shape:  (18, 1)
s2_0002.pgm  -->  s1_0003.pgm [ ]
Last Iteration Cost:  2193357461.22
thetaS shape:  (18, 1)
s3_0002.pgm  -->  s1_0003.pgm [ ]
Last Iteration Cost:  2064378418.46
thetaS shape:  (18, 1)
s1_0002.pgm  -->  s1_0003.pgm [X]
Last Iteration Cost:  2025098417.7
thetaS shape:  (18, 1)
s1_0001.pgm  -->  s1_0003.pgm [X]
Last Iteration Cost:  2094906468.21
thetaS shape:  (18, 1)
s4_0001.pgm  -->  s1_0003.pgm [ ]
Last Iteration Cost:  2210542876.04
thetaS shape:  (18, 1)
s3_0001.pgm  -->  s1_0003.pgm [ ]


In [27]:
# bah la ca affiche le taux de reussite
print count, " / ", idx + 1, "===>", count / (idx + 1.) * 100, "%"

18  /  18 ===> 100.0 %
