# Faces clustering

In [1]:
# For Kmeans algorithm. See PGM otherwise for handcrafted class
from sklearn.cluster import KMeans
# For loading matlab matrix file
from scipy.io import loadmat
# Hungarian algorithm
from scipy.optimize import linear_sum_assignment

#from scipy.sparse.linalg import eigs, eigsh
from scipy.stats import itemfreq
import numpy as np

In [2]:
import matplotlib.pyplot as plt
%matplotlib inline
%load_ext autoreload
%autoreload 2
%precision %.5f
plt.rc('text', usetex=True)
plt.rc('font',**{'family':'serif','serif':['Palatino']})

In [3]:
#Algorithm and error
from error_evaluation import *
from Ksubspace import *
from spectral_clustering import *
from SSC import *

## Globals

In [4]:
DATA_DIR = "data/"

## Data

In [5]:
YaleB = loadmat(DATA_DIR + 'ExtendedYaleB.mat')
data = YaleB['EYALEB_DATA'].astype(np.int64)
ground_truth = YaleB['EYALEB_LABEL'].reshape(-1) - 1

In [6]:
# Construct data sets of individuals
keys = [2, 10, 20, 30, 38]
data_set = {key : {"data" : data[:, ground_truth < key],
                   "labels" : ground_truth[:(ground_truth < key).sum()]
                  } for key in keys}

## Reults

In [7]:
# Keys of data_set, ascent order
I = np.array(list(data_set.keys()))
I.sort()
I

array([ 2, 10, 20, 30, 38])

In [8]:
#Length of dataset
length = {}
print ("---- Length of datasets : ----")
for i in I:
    length[i] = len(data_set[i]["labels"])
    print ("For individuals 1-{} : {}". format(i, len(data_set[i]["labels"])))

---- Length of datasets : ----
For individuals 1-2 : 128
For individuals 1-10 : 640
For individuals 1-20 : 1262
For individuals 1-30 : 1902
For individuals 1-38 : 2414


### SSC

In [9]:
SSC_res = {}

In [10]:
i = 2
SSC_res[i] = {}

In [24]:
# tau value
tau = 1
# mu2 value
mu2 = 500
temp = SSC(data_set[i]["data"], i, tau, mu2)

0.517671607527 10.9135313364 5449.81486309
0.619406448363 0.889580182128 5285.47996977
0.550763526123 0.775340485232 5147.58627333
0.486153480878 0.673277844903 5031.68068628
0.431626700986 0.59262188185 4932.54895581
0.385796899416 0.525937452648 4846.53775865
0.34698531598 0.469772549771 4770.93211175
0.314071255339 0.422153123849 4703.66116115
0.28607047824 0.381671674623 4643.12518346
0.262121267093 0.347123386332 4588.07866414
0.241577161914 0.317555952613 4537.5481521
0.223923347767 0.292188619498 4490.76751326
0.208691672471 0.270356323257 4447.12973546
0.195493743094 0.251489838041 4406.15065115
0.184020278875 0.235125625665 4367.44161793
0.173994694248 0.220877647664 4330.68846235
0.165207154243 0.208420352027 4295.63511685
0.157466826873 0.19748317943 4262.07148335
0.150616354708 0.187838912384 4229.82389399
0.14452335388 0.179292425464 4198.7476529
0.139080349395 0.171685409893 4168.72143721
0.134196179961 0.164885650414 4139.64276287
0.129793109022 0.158779185441 4111.42445

0.0358877328244 0.0389364573503 2148.00069743
0.0357374033472 0.0387652182183 2141.5350153
0.0355882028335 0.0385953327724 2135.10228674
0.0354401167472 0.0384267841498 2128.70222751
0.0352931318235 0.0382595563661 2122.33455703
0.0351472372589 0.0380936336754 2115.99899832
0.0350024192979 0.0379289999806 2109.69527795
0.0348586656713 0.0377656399165 2103.42312599
0.0347159641862 0.0376035382622 2097.18227592
0.0345743028456 0.0374426800469 2090.97246462
0.0344336698337 0.0372830505348 2084.79343227
0.0342940535134 0.0371246352219 2078.6449223
0.0341554424196 0.036967419828 2072.52668137
0.0340178252623 0.0368113902981 2066.43845929
0.0338811909174 0.0366565327931 2060.38000897
0.0337455284246 0.0365028336854 2054.35108638
0.0336108269848 0.0363502795569 2048.35145049
0.0334770770829 0.0361988571872 2042.38086321
0.0333442644237 0.0360485531169 2036.4390894
0.0332123834166 0.0358993559643 2030.52589674
0.0330814217732 0.0357512520176 2024.64105574
0.0329513693027 0.035604228762 2018.78

0.0195630499884 0.0207494909555 1340.88161924
0.0195079577679 0.0206894900567 1337.76325594
0.0194530997742 0.0206297529338 1334.65533753
0.0193984745116 0.0205702778376 1331.5578134
0.019344080497 0.020511063034 1328.47063331
0.019289916261 0.0204521068056 1325.39374737
0.0192359804282 0.0203934074501 1322.32710604
0.019182271591 0.0203349632846 1319.27066012
0.0191287877617 0.0202767723997 1316.22436076
0.019075527929 0.0202188336186 1313.18815946
0.019022491659 0.0201611456763 1310.16200803
0.018969676257 0.0201037059893 1307.14585864
0.018917080598 0.0200465131873 1304.13966378
0.0188647033786 0.0199895657367 1301.14337626
0.018812543272 0.0199328620847 1298.15694923
0.0187605989515 0.0198764006819 1295.18033614
0.0187088690982 0.0198201799891 1292.21349079
0.0186573523998 0.0197641984768 1289.25636725
0.0186060475557 0.019708454629 1286.30891995
0.0185549532719 0.0196529469393 1283.37110359
0.0185040682697 0.0195976739186 1280.44287321
0.0184533912806 0.0195426340902 1277.52418411

In [25]:
temp[1]

array([0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
       1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
       0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0,
       1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=int32)

In [26]:
evaluate_error(temp[1], data_set[i]["labels"])

Erreur : 0.2578125


0.25781

## K-subspaces

In [27]:
i = 2

In [31]:
R = ksubspaces(data_set[i]["data"], i, 9 * np.ones((i,1)), )

mu Error :  6507.08498485 U error 190.546804864
mu Error :  2724.19617314 U error 6.01341720582
mu Error :  155.71167996 U error 1.14011719948
mu Error :  87.6661972342 U error 2.11586612934
mu Error :  80.4098881654 U error 2.88126567462
mu Error :  0.0 U error 0.0
mu Error :  6486.93355909 U error 190.56150918
mu Error :  2732.38039432 U error 5.97921621548
mu Error :  74.3457901545 U error 6.00861626458
mu Error :  0.0 U error 0.0
mu Error :  6466.69297246 U error 190.551805774
mu Error :  2950.25924356 U error 5.9938688402
mu Error :  92.0946129171 U error 6.34839845115
mu Error :  70.6932718974 U error 8.23549124941
mu Error :  0.0 U error 0.0
[5172.9683801322117, 5174.1247704498146, 5233.9147317507695]


In [35]:
evaluate_error(R[0][1], data_set[i]["labels"])

Erreur : 0.40625


0.40625