In [1]:
# Import basic libraries
import pandas as pd
import numpy as np

In [2]:
# Making array for column names
names_fou =np.array(list(range(1,77)))
names_fac =np.array(list(range(1,217)))
names_kar =np.array(list(range(1,65)))
names_pix =np.array(list(range(1,241)))
names_zer =np.array(list(range(1,48)))
names_mor =np.array(list(range(1,7)))

In [3]:
# Read all the files
fourier = pd.read_csv('mfeat-fou',names = names_fou, delim_whitespace=True)
profile_corr = pd.read_csv('mfeat-fac',names = names_fac, delim_whitespace=True)
karhunen = pd.read_csv('mfeat-kar',names = names_kar, delim_whitespace=True)
pixels = pd.read_csv('mfeat-pix',names = names_pix, delim_whitespace=True)
zernike = pd.read_csv('mfeat-zer',names = names_zer, delim_whitespace=True)
morpho = pd.read_csv('mfeat-mor',names = names_mor, delim_whitespace=True)

In [6]:
# Standardizing the data 
fourier_std= (fourier - fourier.mean(axis=0))/(fourier.std(axis=0))
profile_corr_std = (profile_corr - profile_corr.mean(axis=0))/(profile_corr.std(axis=0))
karhunen_std = (karhunen - karhunen.mean(axis=0))/(karhunen.std(axis=0))
pixels_std = (pixels - pixels.mean(axis=0))/(pixels.std(axis=0))
zernike_std = (zernike - zernike.mean(axis=0))/(zernike.std(axis=0))
morpho_std = (morpho - morpho.mean(axis=0))/(morpho.std(axis=0))

In [7]:
# Calculating covariance matrix
fou_cov = fourier_std.cov()
pro_cov = profile_corr_std.cov()
kar_cov = karhunen_std.cov()
pix_cov = pixels_std.cov()
zer_cov = zernike_std.cov()
mor_cov = morpho_std.cov()

In [8]:
# Calculating eigen values and eigen vectors of covariance matrix
fou_values,fou_vectors = np.linalg.eig(fou_cov)
pro_values,pro_vectors = np.linalg.eig(pro_cov)
kar_values,kar_vectors = np.linalg.eig(kar_cov)
pix_values,pix_vectors = np.linalg.eig(pix_cov)
zer_values,zer_vectors = np.linalg.eig(zer_cov)
mor_values,mor_vectors = np.linalg.eig(mor_cov)

In [9]:
# Making a list of (eigenvalue, eigenvector) tuples
fou_eig_pairs = [(np.abs(fou_values[i]), fou_vectors[i]) for i in range(len(fou_values))]
pro_eig_pairs = [(np.abs(pro_values[i]), pro_vectors[i]) for i in range(len(pro_values))]
kar_eig_pairs = [(np.abs(kar_values[i]), kar_vectors[i]) for i in range(len(kar_values))]
pix_eig_pairs = [(np.abs(pix_values[i]), pix_vectors[i]) for i in range(len(pix_values))]
zer_eig_pairs = [(np.abs(zer_values[i]), zer_vectors[i]) for i in range(len(zer_values))]
mor_eig_pairs = [(np.abs(mor_values[i]), mor_vectors[i]) for i in range(len(mor_values))]

# Sorting the (eigenvalue, eigenvector) tuples from high to low
fou_eig_pairs.sort(key=lambda x: x[0], reverse=True)
pro_eig_pairs.sort(key=lambda x: x[0], reverse=True)
kar_eig_pairs.sort(key=lambda x: x[0], reverse=True)
pix_eig_pairs.sort(key=lambda x: x[0], reverse=True)
zer_eig_pairs.sort(key=lambda x: x[0], reverse=True)
mor_eig_pairs.sort(key=lambda x: x[0], reverse=True)


In [10]:
# Reshaping and choosing only first 10 vectors 
fou_matrix_w = fou_eig_pairs[0][1].reshape(1,76)
pro_matrix_w = pro_eig_pairs[0][1].reshape(1,216)
kar_matrix_w = kar_eig_pairs[0][1].reshape(1,64)
pix_matrix_w = pix_eig_pairs[0][1].reshape(1,240)
zer_matrix_w = zer_eig_pairs[0][1].reshape(1,47)
mor_matrix_w = mor_eig_pairs[0][1].reshape(1,6)

for i in range(1,10):
    fou_matrix_w = np.vstack((fou_matrix_w,fou_eig_pairs[i][1].reshape(1,76)))
    pro_matrix_w = np.vstack((pro_matrix_w,pro_eig_pairs[i][1].reshape(1,216)))
    kar_matrix_w = np.vstack((kar_matrix_w,kar_eig_pairs[i][1].reshape(1,64)))
    pix_matrix_w = np.vstack((pix_matrix_w,pix_eig_pairs[i][1].reshape(1,240)))
    zer_matrix_w = np.vstack((zer_matrix_w,zer_eig_pairs[i][1].reshape(1,47)))
for i in range(1,6):    
    mor_matrix_w = np.vstack((mor_matrix_w,mor_eig_pairs[i][1].reshape(1,6)))

In [25]:
# Transformation of data
fou_transformed = np.real(fourier.dot(fou_matrix_w.T))
pro_transformed = np.real(profile_corr.dot(pro_matrix_w.T))
kar_transformed = np.real(karhunen.dot(kar_matrix_w.T))
pix_transformed = np.real(pixels.dot(pix_matrix_w.T))
zer_transformed = np.real(zernike.dot(zer_matrix_w.T))
mor_transformed = np.real(morpho.dot(mor_matrix_w.T))

In [26]:
# Import library for k-means
from sklearn.cluster import KMeans

In [27]:
# Making object for k-means
kmeans_fou = KMeans(n_clusters=10)
kmeans_pro = KMeans(n_clusters=10)
kmeans_kar = KMeans(n_clusters=10)
kmeans_pix = KMeans(n_clusters=10)
kmeans_zer = KMeans(n_clusters=10)
kmeans_mor = KMeans(n_clusters=10)

In [28]:
# Fitting data to k-means object
kmeans_fou.fit(fou_transformed)
kmeans_pro.fit(pro_transformed)
kmeans_kar.fit(kar_transformed)
kmeans_pix.fit(pix_transformed)
kmeans_zer.fit(zer_transformed)
kmeans_mor.fit(mor_transformed)

KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300,
       n_clusters=10, n_init=10, n_jobs=None, precompute_distances='auto',
       random_state=None, tol=0.0001, verbose=0)

In [29]:
# Labels assigned by k-means to each data point
labels_fou = kmeans_fou.labels_
labels_pro = kmeans_pro.labels_
labels_kar = kmeans_kar.labels_
labels_pix = kmeans_pix.labels_
labels_zer = kmeans_zer.labels_
labels_mor = kmeans_mor.labels_

In [30]:
# True labels as mentioned in data description
labels_true = np.repeat(np.arange(0,10),200)

In [31]:
# Import library to calculate ARI value
from sklearn.metrics import adjusted_rand_score

In [32]:
# Printing all values
print("In PCA, ARI for mfeat-fou is {}".format(adjusted_rand_score(labels_true, labels_fou)))
print("In PCA, ARI for mfeat-pro is {}".format(adjusted_rand_score(labels_true, labels_pro)))
print("In PCA, ARI for mfeat-kar is {}".format(adjusted_rand_score(labels_true, labels_kar)))
print("In PCA, ARI for mfeat-pix is {}".format(adjusted_rand_score(labels_true, labels_pix)))
print("In PCA, ARI for mfeat-zer is {}".format(adjusted_rand_score(labels_true, labels_zer)))
print("In PCA, ARI for mfeat-mor is {}".format(adjusted_rand_score(labels_true, labels_mor)))

In PCA, ARI for mfeat-fou is 0.3808427615472246
In PCA, ARI for mfeat-pro is 0.37196679300981667
In PCA, ARI for mfeat-kar is 0.3698393500158109
In PCA, ARI for mfeat-pix is 0.22569910237712898
In PCA, ARI for mfeat-zer is 0.2114436519991947
In PCA, ARI for mfeat-mor is 0.3137473896944475
