# Faces clustering

In [1]:
# For Kmeans algorithm. See PGM otherwise for handcrafted class
from sklearn.cluster import KMeans
# For loading matlab matrix file
from scipy.io import loadmat
# Hungarian algorithm
from scipy.optimize import linear_sum_assignment

#from scipy.sparse.linalg import eigs, eigsh
from scipy.stats import itemfreq
import numpy as np

In [2]:
import matplotlib.pyplot as plt
%matplotlib inline
%load_ext autoreload
%autoreload 2
%precision %.5f
%load_ext line_profiler
plt.rc('text', usetex=True)
plt.rc('font',**{'family':'serif','serif':['Palatino']})

In [3]:
#Algorithm and error
from error_evaluation import *
from spectral_clustering import *
from SSC import *
from ksubspaces import *

## Globals

In [4]:
DATA_DIR = "data/"

## Data

In [5]:
YaleB = loadmat(DATA_DIR + 'ExtendedYaleB.mat')
data = YaleB['EYALEB_DATA'].astype(np.int64)
ground_truth = YaleB['EYALEB_LABEL'].reshape(-1) - 1

In [6]:
# Construct data sets of individuals
keys = [2, 10, 20, 30, 38]
data_set = {key : {"data" : data[:, ground_truth < key],
                   "labels" : ground_truth[:(ground_truth < key).sum()]
                  } for key in keys}

## Reults

In [11]:
# Keys of data_set, ascent order
I = np.array(list(data_set.keys()))
I.sort()
I

array([ 2, 10, 20, 30, 38])

In [12]:
#Length of dataset
length = {}
print ("---- Length of datasets : ----")
for i in I:
    length[i] = len(data_set[i]["labels"])
    print ("For individuals 1-{} : {}". format(i, len(data_set[i]["labels"])))

---- Length of datasets : ----
For individuals 1-2 : 128
For individuals 1-10 : 640
For individuals 1-20 : 1262
For individuals 1-30 : 1902
For individuals 1-38 : 2414


### SSC

In [13]:
SSC_res = {}

In [14]:
i = 2
SSC_res[i] = {}

In [19]:
# tau value
tau = 1
# mu2 value
mu2 = 500
temp = SSC(data_set[i]["data"], i, tau, mu2, verbose = True)

Current error: 4.76687e+04
Current error: 4.55434e+04
Current error: 4.38262e+04
Current error: 4.23540e+04
Current error: 4.11160e+04
Current error: 4.00554e+04
Current error: 3.91288e+04
Current error: 3.83210e+04
Current error: 3.76072e+04
Current error: 3.69652e+04
Current error: 3.63867e+04
Current error: 3.58609e+04
Current error: 3.53771e+04
Current error: 3.49278e+04
Current error: 3.45113e+04
Current error: 3.41206e+04
Current error: 3.37520e+04
Current error: 3.34031e+04
Current error: 3.30719e+04
Current error: 3.27554e+04
Current error: 3.24525e+04
Current error: 3.21610e+04
Current error: 3.18805e+04
Current error: 3.16099e+04
Current error: 3.13480e+04
Current error: 3.10943e+04
Current error: 3.08482e+04
Current error: 3.06091e+04
Current error: 3.03765e+04
Current error: 3.01495e+04
Current error: 2.99278e+04
Current error: 2.97113e+04
Current error: 2.94996e+04
Current error: 2.92924e+04
Current error: 2.90896e+04
Current error: 2.88907e+04
Current error: 2.86957e+04
C

Current error: 9.93016e+03
Current error: 9.90356e+03
Current error: 9.87709e+03
Current error: 9.85075e+03
Current error: 9.82454e+03
Current error: 9.79845e+03
Current error: 9.77250e+03
Current error: 9.74667e+03
Current error: 9.72097e+03
Current error: 9.69539e+03
Current error: 9.66994e+03
Current error: 9.64461e+03
Current error: 9.61941e+03
Current error: 9.59433e+03
Current error: 9.56937e+03
Current error: 9.54452e+03
Current error: 9.51980e+03
Current error: 9.49520e+03
Current error: 9.47072e+03
Current error: 9.44636e+03
Current error: 9.42211e+03
Current error: 9.39798e+03
Current error: 9.37397e+03
Current error: 9.35007e+03
Current error: 9.32628e+03
Current error: 9.30261e+03
Current error: 9.27905e+03
Current error: 9.25561e+03
Current error: 9.23227e+03
Current error: 9.20905e+03
Current error: 9.18593e+03
Current error: 9.16293e+03
Current error: 9.14003e+03
Current error: 9.11725e+03
Current error: 9.09457e+03
Current error: 9.07200e+03
Current error: 9.04953e+03
C

In [20]:
temp[1]

array([0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
       1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
       0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0,
       1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=int32)

In [21]:
evaluate_error(temp[1], data_set[i]["labels"])

0.25781

## K-subspaces

In [7]:
i = 2

In [40]:
# %load_ext line_profiler

In [86]:
%lprun -f ksubspaces ksubspaces(data_set[i]["data"], i, 3 * np.ones((i,1)), 5)

mu Error :  4965.92247221 U error 109.984822701
mu Error :  3416.44224158 U error 3.46077090084
mu Error :  307.777767676 U error 4.0163008098
mu Error :  304.357016789 U error 2.20612830512
mu Error :  162.811973321 U error 2.83835783643
mu Error :  140.015325782 U error 0.458270445847
*** KeyboardInterrupt exception caught in code being profiled.

In [35]:
evaluate_error(R[0][1], data_set[i]["labels"])

Erreur : 0.40625


0.40625

In [129]:
%lprun -f ksubspaces R1 = ksubspaces(data_set[i]["data"], i, 3 * np.ones((i,1)), 5)

Current error for 0 replicate : 35916.58420562846
Current error for 0 replicate : 26698.848022677324
Current error for 0 replicate : 26441.8308791306
Current error for 0 replicate : 25856.319083505685
Current error for 0 replicate : 25049.05950143832
Current error for 0 replicate : 24847.248807715587
Current error for 0 replicate : 24712.4252953117
Current error for 0 replicate : 24641.183511786687
Current error for 0 replicate : 24564.026926368617
Current error for 0 replicate : 24555.920715016822
Current error for 0 replicate : 24549.537951168495
Current error for 0 replicate : 24543.27529708304
Current error for 1 replicate : 40581.32391876835
Current error for 1 replicate : 26085.419198076906
Current error for 1 replicate : 25590.15080985783
Current error for 1 replicate : 25263.057740098335
Current error for 1 replicate : 25002.85132470961
Current error for 1 replicate : 24892.63046599432
Current error for 1 replicate : 24846.75213457641
Current error for 1 replicate : 24791.96428

In [144]:
2016 * 128

258048

In [23]:
evaluate_error(R1[0][0], data_set[i]["labels"])

NameError: name 'R1' is not defined

In [38]:
%lprun -f ksubspaces R2 = ksubspaces(data_set[i]["data"], i, [3]*i, 5, verbose = True)

Current error for 0 replicate : 6.05300e+05
Current error for 0 replicate : 4.33617e+05
Current error for 0 replicate : 4.18402e+05
Current error for 0 replicate : 4.17176e+05
Current error for 1 replicate : 6.61762e+05
Current error for 1 replicate : 5.84580e+05
Current error for 1 replicate : 5.49499e+05
Current error for 1 replicate : 5.27064e+05
Current error for 1 replicate : 5.23844e+05
Current error for 1 replicate : 5.23723e+05
Current error for 2 replicate : 1.02318e+06
Current error for 2 replicate : 7.01338e+05
Current error for 2 replicate : 6.72369e+05
Current error for 2 replicate : 6.54380e+05
Current error for 2 replicate : 6.29650e+05
Current error for 2 replicate : 6.22200e+05
Current error for 2 replicate : 6.20309e+05
Current error for 2 replicate : 6.19636e+05
Current error for 3 replicate : 6.83086e+05
Current error for 3 replicate : 6.60175e+05
Current error for 3 replicate : 6.54371e+05
Current error for 3 replicate : 6.53917e+05
Current error for 4 replicate : 

In [34]:
evaluate_error(R2[0][0], data_set[i]["labels"])

0.49219

In [None]:
R1 = ksubspaces(data_set[i]["data"][:, ::10], i, 3 * np.ones((i,1)), 1)

In [33]:
R2 = ksubspaces(data_set[i]["data"], i, [3] * i, 10, verbose = True)

Current error for 0 replicate : 3.52319e+04
Current error for 0 replicate : 2.46997e+04
Current error for 0 replicate : 2.46117e+04
Current error for 0 replicate : 2.45358e+04
Current error for 0 replicate : 2.45253e+04
Current error for 0 replicate : 2.45195e+04
Current error for 1 replicate : 3.51137e+04
Current error for 1 replicate : 2.57066e+04
Current error for 1 replicate : 2.47695e+04
Current error for 1 replicate : 2.45418e+04
Current error for 1 replicate : 2.45295e+04
Current error for 2 replicate : 2.98564e+04
Current error for 2 replicate : 2.46553e+04
Current error for 2 replicate : 2.46220e+04
Current error for 2 replicate : 2.45864e+04
Current error for 2 replicate : 2.45575e+04
Current error for 2 replicate : 2.45474e+04
Current error for 2 replicate : 2.45389e+04
Current error for 3 replicate : 3.51969e+04
Current error for 3 replicate : 2.50902e+04
Current error for 3 replicate : 2.47624e+04
Current error for 3 replicate : 2.46734e+04
Current error for 3 replicate : 

In [15]:
R2[0][1]

array([0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1,
       0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1,
       1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

In [32]:
R2[0].argmax(0)

array([1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1,
       1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [20]:
data_set[i]["data"][:, ::10].shape

(2016, 242)