## Importing
This cell is only concerned with importing the libraries and methods needed for implementing spectral clustering.

In [199]:
import numpy as np
from sklearn.decomposition import PCA
from sklearn.metrics.pairwise import rbf_kernel
from sklearn.cluster import KMeans
from sklearn import metrics

## Reading data
To run the algorithm, we read the data in 2 multidimensional arrays, one for training of size 19*8*48*125*45, and one for evaluation of size 19*8*12*125*45.

In [200]:
aps_eval = []
temp = []
for i in range(1, 20):
    p_eval = []
    for j in range(1, 9):
        s_eval = []
        for k in range (49, 61):
            temp = []
            path = "data\\a"
            path += f'0{i}' if i < 10 else f'{i}'
            path += f'\\p{j}\\s'
            path += f'{k}.txt'
            file = open(path, "r")
            for l in range(125):
                temp.append(np.array(file.readline().split(','), dtype=float))
            s_eval.append(np.array(temp))
        p_eval.append(np.array(np.array(s_eval)))
    aps_eval.append(np.array(p_eval))
aps_eval = np.array(aps_eval)

In [201]:
eval_points_means = []
for a in range(19):
    for p in range(8):
        for s in range(12):
            eval_points_means.append(np.mean(aps_eval[a][p][s], axis=0))
eval_points_means = np.array(eval_points_means)
print(len(eval_points_means))
print(len(eval_points_means[0]))

1824
45


In [202]:
eval_points_flattened = []
for a in range(19):
    for p in range(8):
        for s in range(12):
            eval_points_flattened.append([])
            for r in range(125):
                for n in range(45):
                    eval_points_flattened[a*96+p*12+s].append(aps_eval[a][p][s][r][n])
eval_points_flattened = PCA(n_components=0.85).fit_transform(eval_points_flattened)
print(len(eval_points_flattened))
print(len(eval_points_flattened[0]))

1824
136


In [203]:
sim_mat_means = rbf_kernel(eval_points_means, eval_points_means, 0.00001)
for i in range(len(sim_mat_means)):
    x = np.sum(sim_mat_means[i]) - 1
    sim_mat_means[i] /= -x
    sim_mat_means[i][i] = 1.0

sim_mat_flattened = rbf_kernel(eval_points_flattened, eval_points_flattened, 0.00001)
for i in range(len(sim_mat_flattened)):
    x = np.sum(sim_mat_flattened[i]) - 1
    sim_mat_flattened[i] /= -x
    sim_mat_flattened[i][i] = 1.0


In [204]:
eigen_values, eigen_vectors = np.linalg.eig(sim_mat_means)
idx = np.real(eigen_values).argsort()[::-1]
eigen_vectors = np.real(np.array(eigen_vectors[idx, :]).transpose())
data_means = []
for l, eigen_vector in zip(range(19), eigen_vectors):
    if len(data_means) == 0:
        data_means.append(eigen_vector)
    else:
        data_means = np.append(data_means, [eigen_vector], axis=0)
data_means = np.array(data_means).transpose()

eigen_values, eigen_vectors = np.linalg.eig(sim_mat_flattened)
idx = np.real(eigen_values).argsort()[::-1]
eigen_vectors = np.real(np.array(eigen_vectors[idx, :]).transpose())
data_flattened = []
for l, eigen_vector in zip(range(19), eigen_vectors):
    if len(data_flattened) == 0:
        data_flattened.append(eigen_vector)
    else:
        data_flattened = np.append(data_flattened, [eigen_vector], axis=0)
data_flattened = np.array(data_flattened).transpose()

In [205]:
for i in range(len(data_means)):
    vec_sum = np.linalg.norm(data_means[i])
    data_means[i] /= vec_sum

for i in range(len(data_flattened)):
    vec_sum = np.linalg.norm(data_flattened[i])
    data_flattened[i] /= vec_sum

In [207]:
kmeans = KMeans(n_clusters=19)
kmeans.fit(data_means)
print(metrics.silhouette_score(data_means, kmeans.labels_))
labels_file = open("labels.txt", "w")
for i in range(19):
    for j in range(8):
        for q in range(12):
            labels_file.write(str(kmeans.labels_[i*96+j*12+q]))
            labels_file.write(" ")
        labels_file.write('\n')
    labels_file.write('\n')
labels_file.close()
kmeans = KMeans(n_clusters=19)
kmeans.fit(data_flattened)
print(metrics.silhouette_score(data_flattened, kmeans.labels_))
labels_file = open("labels1.txt", "w")
for i in range(19):
    for j in range(8):
        for q in range(12):
            labels_file.write(str(kmeans.labels_[i*96+j*12+q]))
            labels_file.write(" ")
        labels_file.write('\n')
    labels_file.write('\n')
labels_file.close()

0.20045943118572188
0.26340144637892726
