In [46]:
import torch
import torch.nn.functional as F
import pandas as pd
import numpy as np

In [41]:
def k_means(data, num_clusters, max_iters=100):
    """For data we'll use eigenvalues of the Laplacian of a graph"""
    centroids = data[:num_clusters, :]
    for _ in range(max_iters):
        distances = torch.cdist(data, centroids, p=2)
        cluster_assignments = torch.argmin(distances, dim=1)
        new_centroids = torch.stack([data[cluster_assignments == i].mean(0) for i in range(num_clusters)])
        if torch.equal(new_centroids, centroids):
            break
        centroids = new_centroids
        
    return centroids, cluster_assignments

In [43]:
def spectral_clustering(adjacency_matrix, num_clusters=2):
    # Laplacian:
    degree_matrix = torch.diag(torch.sum(adjacency_matrix, dim=1))
    laplacian_matrix = degree_matrix - adjacency_matrix

    # Eigenvalue Decomposition
    eigenvalues, eigenvectors = torch.linalg.eigh(laplacian_matrix)
    eigenvectors = eigenvectors[:, 1:num_clusters+1]  # Use the first num_clusters eigenvectors
    eigenvectors = F.normalize(eigenvectors, p=2, dim=1)

    # K-Means for clustering
    centroids, cluster_assignments = k_means(eigenvectors, num_clusters)

    return cluster_assignments

## Testing spectral clustering with sparse matrix

In [11]:
# Example usage:
adjacency_matrix = torch.tensor([
    [0., 1., 1., 0., 0., 0., 0., 0., 1., 1.],
    [1., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
    [1., 1., 0., 0., 0., 0., 0., 0., 0., 0],
    [0., 0., 0., 0., 1., 1., 0., 0., 0., 0],
    [0., 0., 0., 1., 0., 1., 0., 0., 0., 0],
    [0., 0., 0., 1., 1., 0., 1., 1., 0., 0],
    [0., 0., 0., 0., 0., 1., 0., 1., 0., 0],
    [0., 0., 0., 0., 0., 1., 1., 0., 0., 0],
    [1., 0., 0., 0., 0., 0., 0., 0., 0., 1],
    [1., 0., 0., 0., 0., 0., 0., 0., 1., 0]])
cluster_assignments = spectral_clustering(adjacency_matrix, 2)

print(cluster_assignments)

tensor([0, 1, 1, 1, 1, 0, 0, 0, 0, 0])


In [18]:
import numpy as np
A = np.array([
    [0, 1, 1, 0, 0, 0, 0, 0, 1, 1],
    [1, 0, 1, 0, 0, 0, 0, 0, 0, 0],
    [1, 1, 0, 0, 0, 0, 0, 0, 0, 0],
    [0, 0, 0, 0, 1, 1, 0, 0, 0, 0],
    [0, 0, 0, 1, 0, 1, 0, 0, 0, 0],
    [0, 0, 0, 1, 1, 0, 1, 1, 0, 0],
    [0, 0, 0, 0, 0, 1, 0, 1, 0, 0],
    [0, 0, 0, 0, 0, 1, 1, 0, 0, 0],
    [1, 0, 0, 0, 0, 0, 0, 0, 0, 1],
    [1, 0, 0, 0, 0, 0, 0, 0, 1, 0]])

D = np.diag(A.sum(axis=1))
L = D-A
vals, vecs = np.linalg.eig(A)

from sklearn.cluster import SpectralClustering

# Building the clustering model
spectral_model_rbf = SpectralClustering(n_clusters = 2, affinity='precomputed')
 
# Training the model and Storing the predicted cluster labels
labels_rbf = spectral_model_rbf.fit_predict(A)
print(labels_rbf)


[0 0 0 1 1 1 1 1 0 0]




## Testing with fully connected matrix

In [77]:
ska_data_file = "/home/duser/masters_project/data/ska_raw_dataset.txt"

#load the file into a numpy array
df = pd.read_csv(ska_data_file, sep=" ", header=None)
#convert the pandas dataframe to a numpy array
data = df.to_numpy()
#the last two element of each row is the x and y coordinate of the point

n = len(data)
dist_matrix = np.zeros((n,n))
for i in range(n):
    for j in range(n):
        x_i, y_i = data[i][-2], data[i][-1]
        x_j, y_j = data[j][-2], data[j][-1]
        euclidean_distance = (x_i - x_j)**2 + (y_i - y_j)**2
        euclidean_distance = euclidean_distance**0.5
        dist_matrix[i][j] = euclidean_distance

dist_matrix = torch.tensor(dist_matrix)
dist_matrix

tensor([[0.0000, 0.0033, 0.0011,  ..., 0.3451, 0.3455, 0.3455],
        [0.0033, 0.0000, 0.0022,  ..., 0.3421, 0.3424, 0.3425],
        [0.0011, 0.0022, 0.0000,  ..., 0.3440, 0.3443, 0.3444],
        ...,
        [0.3451, 0.3421, 0.3440,  ..., 0.0000, 0.0009, 0.0006],
        [0.3455, 0.3424, 0.3443,  ..., 0.0009, 0.0000, 0.0005],
        [0.3455, 0.3425, 0.3444,  ..., 0.0006, 0.0005, 0.0000]],
       dtype=torch.float64)

In [88]:
spectral_clustering(dist_matrix, 2)

tensor([1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1,
        1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1,
        1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
        1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1,
        1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0,
        1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0,
        0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1,
        0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1,
        1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,

In [94]:
from sklearn.cluster import SpectralClustering

# Building the clustering model
spectral_model_rbf = SpectralClustering(n_clusters = 2, n_init=1, n_components=1, affinity='precomputed')
 
# Training the model and Storing the predicted cluster labels
labels_rbf = spectral_model_rbf.fit_predict(dist_matrix)
print(labels_rbf)

[1 1 0 1 1 0 0 0 0 0 0 1 0 0 0 0 1 0 1 0 0 1 1 0 0 0 0 1 0 1 0 1 1 0 0 1 1
 1 0 1 0 0 1 1 1 0 0 1 1 0 1 1 1 0 0 1 0 0 0 0 0 0 0 1 0 0 1 0 0 1 0 0 0 0
 1 1 1 0 1 0 0 1 0 0 1 1 1 1 0 0 0 0 1 0 0 1 0 1 1 0 1 0 1 0 0 1 0 1 0 0 0
 0 0 0 0 1 1 0 1 1 1 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 1 0 0 1 0 0 1
 1 0 0 0 0 0 1 1 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 1 1 1 0 1 0 1 1
 0 0 1 1 0 1 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 0 1 1 1 0 0 1 0 1 1 0
 1 1 1 0 1 1 1 1 1 1 0 1 1 0 1 0 0 1 0 1 1 0 0 1 1 0 1 0 1 1 1 0 0 0 0 0 1
 0 0 1 0 0 0 1 0 0 0 1 0 0 0 1 1 0 0 0 0 1 1 0 1 0 1 0 1 0 0 1 1 1 0 1 0 1
 1 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 1 1 0 0 1 0 0 0 0 0 0 1 1 1 1 0
 1 0 0 1 1 1 0 0 1 0 0 1 1 0 1 1 0 1 0 0 1 0 0 0 0 0 0 1 1 1 0 1 1 1 1 0 1
 0 1 1 1 1 1 0 1 1 0 0 1 1 1 1 0 0 1 0 0 0 0 0 0 1 0 1 0 0 1 0 0 0 0 0 1 0
 0 1 1 0 0 0 0 0 0 1 1 0 0 1 1 0 0 1 0 1 1 1 0 0 0 1 1 1 0 0 0 0 0 0 1 1 0
 0 0 0 1 1 0 0 1 0 0 1 0 0 0 0 0 1 1 0 0 0 0 0 1 1 0 1 1 1 0 1 1 1 1 0 1 1
 0 1 1 1 1 1 0 0 0 1 0 1 

In [24]:
import networkx as nx

# Create a graph
G = nx.Graph()
for i, weight in enumerate(graph.x):
    G.add_node(i, weight=weight)

# Add edges with attributes
for (i, j), attr in zip(graph.edge_index.T, graph.edge_attr):
    G.add_edge(i, j, weight=attr)


In [27]:
adjacency_matrix = nx.adjacency_matrix(G, weight='weight').toarray()

  return asarray(a).ndim


ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (261632,) + inhomogeneous part.