In [514]:
# Import all useful libraries
import pandas as pd
import numpy as np
from scipy import linalg
from sklearn.neighbors import kneighbors_graph
from scipy import sparse

In [515]:
# Making array for column names
names_fou =np.array(list(range(1,77)))
names_fac =np.array(list(range(1,217)))
names_kar =np.array(list(range(1,65)))
names_pix =np.array(list(range(1,241)))
names_zer =np.array(list(range(1,48)))
names_mor =np.array(list(range(1,7)))

In [516]:
# Reading all files
fourier = pd.read_csv('mfeat-fou',names = names_fou, delim_whitespace=True)
profile_corr = pd.read_csv('mfeat-fac',names = names_fac, delim_whitespace=True)
karhunen = pd.read_csv('mfeat-kar',names = names_kar, delim_whitespace=True)
pixels = pd.read_csv('mfeat-pix',names = names_pix, delim_whitespace=True)
zernike = pd.read_csv('mfeat-zer',names = names_zer, delim_whitespace=True)
morpho = pd.read_csv('mfeat-mor',names = names_mor, delim_whitespace=True)

In [517]:
# Calculating connectivity matrix using knn and neighbours are chosen by hyperparameter tuning
connectivity_fou = kneighbors_graph(fourier, n_neighbors=57, mode='connectivity')
connectivity_pro = kneighbors_graph(profile_corr, n_neighbors=57, mode='connectivity')
connectivity_kar = kneighbors_graph(karhunen, n_neighbors=57, mode='connectivity')
connectivity_pix = kneighbors_graph(pixels, n_neighbors=57, mode='connectivity')
connectivity_zer = kneighbors_graph(zernike, n_neighbors=57, mode='connectivity')
connectivity_mor = kneighbors_graph(morpho, n_neighbors=57, mode='connectivity')


In [519]:
# Calculating Adjacency matrix
Adj_fou = (1/2)*(connectivity_fou + connectivity_fou.T) 
Adj_pro = (1/2)*(connectivity_pro + connectivity_pro.T) 
Adj_kar = (1/2)*(connectivity_kar + connectivity_kar.T) 
Adj_pix = (1/2)*(connectivity_pix + connectivity_pix.T) 
Adj_zer = (1/2)*(connectivity_zer + connectivity_zer.T)  
Adj_mor = (1/2)*(connectivity_mor + connectivity_mor.T)

In [522]:
# Calculating normalized laplacian  
lap_fou = sparse.csgraph.laplacian(csgraph=Adj_fou, normed=True)
lap_pro = sparse.csgraph.laplacian(csgraph=Adj_pro, normed=True)
lap_kar = sparse.csgraph.laplacian(csgraph=Adj_kar, normed=True)
lap_pix = sparse.csgraph.laplacian(csgraph=Adj_pix, normed=True)
lap_zer = sparse.csgraph.laplacian(csgraph=Adj_zer, normed=True)
lap_mor = sparse.csgraph.laplacian(csgraph=Adj_mor, normed=True)
lap_fou = lap_fou.toarray()
lap_pro = lap_pro.toarray()
lap_kar = lap_kar.toarray()
lap_pix = lap_pix.toarray()
lap_zer = lap_zer.toarray()
lap_mor = lap_mor.toarray()

In [523]:
# Calculating eigenvalues and eigenvectors of laplacian matrix
fou_values,fou_vectors = linalg.eig(lap_fou)
pro_values,pro_vectors = linalg.eig(lap_pro)
kar_values,kar_vectors = linalg.eig(lap_kar)
pix_values,pix_vectors = linalg.eig(lap_pix)
zer_values,zer_vectors = linalg.eig(lap_zer)
mor_values,mor_vectors = linalg.eig(lap_mor)

In [525]:
# Sorting and making pairs of eigen values and eigen vectors
eigenvals_sorted_fou = fou_values[np.argsort(fou_values)]
eigenvecs_sorted_fou = fou_vectors[np.argsort(fou_values)]
eigenvals_sorted_pro = pro_values[np.argsort(pro_values)]
eigenvecs_sorted_pro = pro_vectors[np.argsort(pro_values)]
eigenvals_sorted_kar = kar_values[np.argsort(kar_values)]
eigenvecs_sorted_kar = kar_vectors[np.argsort(kar_values)]
eigenvals_sorted_pix = pix_values[np.argsort(pix_values)]
eigenvecs_sorted_pix = pix_vectors[np.argsort(pix_values)]
eigenvals_sorted_zer = zer_values[np.argsort(zer_values)]
eigenvecs_sorted_zer = zer_vectors[np.argsort(zer_values)]
eigenvals_sorted_mor = mor_values[np.argsort(mor_values)]
eigenvecs_sorted_mor = mor_vectors[np.argsort(mor_values)]
fou_eig_pairs = [(np.real(eigenvals_sorted_fou[i]), np.real(eigenvecs_sorted_fou[i])) for i in range(len(fou_values))]
pro_eig_pairs = [(np.real(eigenvals_sorted_pro[i]), np.real(eigenvecs_sorted_pro[i])) for i in range(len(pro_values))]
kar_eig_pairs = [(np.real(eigenvals_sorted_kar[i]), np.real(eigenvecs_sorted_kar[i])) for i in range(len(kar_values))]
pix_eig_pairs = [(np.real(eigenvals_sorted_pix[i]), np.real(eigenvecs_sorted_pix[i])) for i in range(len(pix_values))]
zer_eig_pairs = [(np.real(eigenvals_sorted_zer[i]), np.real(eigenvecs_sorted_zer[i])) for i in range(len(zer_values))]
mor_eig_pairs = [(np.real(eigenvals_sorted_mor[i]), np.real(eigenvecs_sorted_mor[i])) for i in range(len(mor_values))]

In [548]:
# Converting array to dataframe and taking only first 10 vectors
fou_transformed = pd.DataFrame(np.real(eigenvecs_sorted_fou[:, 1:11]))
pro_transformed = pd.DataFrame(np.real(eigenvecs_sorted_pro[:, 1:11]))
kar_transformed = pd.DataFrame(np.real(eigenvecs_sorted_kar[:, 1:11]))
pix_transformed = pd.DataFrame(np.real(eigenvecs_sorted_pix[:, 1:11]))
zer_transformed = pd.DataFrame(np.real(eigenvecs_sorted_zer[:, 1:11]))
mor_transformed = pd.DataFrame(np.real(eigenvecs_sorted_mor[:, 1:11]))

In [550]:
# Importing library for k-means
from sklearn.cluster import KMeans

In [551]:
# Making objects for k-means
kmeans_fou = KMeans(n_clusters=10)
kmeans_pro = KMeans(n_clusters=10)
kmeans_kar = KMeans(n_clusters=10)
kmeans_pix = KMeans(n_clusters=10)
kmeans_zer = KMeans(n_clusters=10)
kmeans_mor = KMeans(n_clusters=10)

In [552]:
# Fitting data to k-means object
kmeans_fou.fit(fou_transformed)
kmeans_pro.fit(pro_transformed)
kmeans_kar.fit(kar_transformed)
kmeans_pix.fit(pix_transformed)
kmeans_zer.fit(zer_transformed)
kmeans_mor.fit(mor_transformed)

KMeans(algorithm='auto', copy_x=True, init='random', max_iter=300,
       n_clusters=10, n_init=10, n_jobs=None, precompute_distances='auto',
       random_state=None, tol=0.0001, verbose=0)

In [553]:
# Taking labels formed by k-means
labels_fou = kmeans_fou.labels_
labels_pro = kmeans_pro.labels_
labels_kar = kmeans_kar.labels_
labels_pix = kmeans_pix.labels_
labels_zer = kmeans_zer.labels_
labels_mor = kmeans_mor.labels_

In [554]:
# True labels taken from data description
labels_true = np.repeat(np.arange(0,10),200)

In [555]:
# Import library for ARI value calculation
from sklearn.metrics import adjusted_rand_score

In [557]:
# Printing ARI values
print("In SC, ARI for mfeat-fou is {}".format(adjusted_rand_score(labels_true, labels_fou)))
print("In SC, ARI for mfeat-pro is {}".format(adjusted_rand_score(labels_true, labels_pro)))
print("In SC, ARI for mfeat-kar is {}".format(adjusted_rand_score(labels_true, labels_kar)))
print("In SC, ARI for mfeat-pix is {}".format(adjusted_rand_score(labels_true, labels_pix)))
print("In SC, ARI for mfeat-zer is {}".format(adjusted_rand_score(labels_true, labels_zer)))
print("In SC, ARI for mfeat-mor is {}".format(adjusted_rand_score(labels_true, labels_mor)))

In SC, ARI for mfeat-fou is 0.2337370619939308
In SC, ARI for mfeat-pro is 0.20025879214990658
In SC, ARI for mfeat-kar is 0.3374651506140877
In SC, ARI for mfeat-pix is 0.28049657334235634
In SC, ARI for mfeat-zer is 0.22964150272955774
In SC, ARI for mfeat-mor is 0.11486486079330274
