In [1]:
import numpy as np
import pandas as pd
from numpy.random import randint
import matplotlib.pyplot as plt
from scipy.spatial import distance
from scipy.spatial.distance import cdist
from sklearn.cluster import SpectralClustering
from scipy import spatial

In [2]:
# Function to compute the stochastic matrix
def stochastic_matrix(data, sigma=1.0):
    # Compute the similarity matrix
    pairwise_dists = cdist(data, data, 'euclidean')
    S = np.exp(-pairwise_dists * 2 / sigma * 2)
    
    # Turn the similarity matrix into a stochastic matrix
    row_sums = S.sum(axis=1)
    Stochastic_Matrix = S / row_sums[:, np.newaxis]
    
    return Stochastic_Matrix

In [3]:
# Function to compute the spectral clustering
def spectral_clustering(data, n_clusters):
    # Compute the stochastic matrix
    S = stochastic_matrix(data)
    
    # Apply spectral clustering to the resulting data
    spectral = SpectralClustering(n_clusters=n_clusters, affinity='precomputed')
    labels = spectral.fit_predict(S)
    
    return labels

In [4]:
# List of datasets
datasets = ['Circles.csv', 'Half_moons.csv', 'ThreeGaussians.csv', 'TwoGaussians.csv']

In [5]:
# Apply spectral clustering to each dataset and plot the results
for dataset in datasets:
    data = pd.read_csv(dataset)
    if dataset == 'ThreeGaussians.csv':
        spectral = SpectralClustering(n_clusters=3, affinity='nearest_neighbors')
    else:
        spectral = SpectralClustering(n_clusters=2, affinity='nearest_neighbors')
    labels = spectral.fit_predict(data)
    
    plt.figure()
    plt.scatter(data.values[:,0], data.values[:,1], c=labels)
    plt.title('Spectral clustering results for '+dataset)
    plt.show()

FileNotFoundError: [Errno 2] No such file or directory: 'Circles.csv'

In [6]:
import numpy as np
from scipy.spatial.distance import cdist
from sklearn.cluster import SpectralClustering
from sklearn.neighbors import kneighbors_graph
from scipy.sparse import csgraph
import matplotlib.pyplot as plt
import pandas as pd

# Function to compute the Laplacian matrix
def compute_laplacian(data, n_neighbors=10):
    # Compute the adjacency (or similarity) matrix using kneighbors_graph
    connectivity = kneighbors_graph(data, n_neighbors=n_neighbors, include_self=False)
    adjacency_matrix = 0.5 * (connectivity + connectivity.T)
    # Compute the Laplacian matrix
    laplacian = csgraph.laplacian(adjacency_matrix, normed=True)
    return laplacian

# List of datasets
datasets = ['Circles.csv', 'Half_moons.csv', 'ThreeGaussians.csv', 'TwoGaussians.csv']

# Apply spectral clustering to each dataset and plot the results
for dataset in datasets:
    data = pd.read_csv(dataset)
    n_clusters = 3 if dataset == 'ThreeGaussians.csv' else 2
    labels = SpectralClustering(n_clusters=n_clusters, affinity='nearest_neighbors').fit_predict(data)

    laplacian = compute_laplacian(data)
    eigenvals, eigenvcts = np.linalg.eig(laplacian.toarray())

    plt.figure()
    plt.scatter(data.values[:,0], data.values[:,1], c=labels)
    plt.title('Spectral clustering results for '+dataset)
    plt.show()

    # Sorting the eigenvalues and corresponding eigenvectors in ascending order
    idx = eigenvals.argsort()   
    eigenvals = eigenvals[idx]
    eigenvcts = eigenvcts[:,idx]

    # Plot the eigenvectors
    for i in range(n_clusters):
        plt.figure()
        plt.scatter(range(eigenvcts.shape[0]), eigenvcts[:, i])
        plt.xlim([0, 200])
        plt.ylim([-2, 2])
        plt.title(f'Eigenvector {i+1} for {dataset}')
        plt.show()

FileNotFoundError: [Errno 2] No such file or directory: 'Circles.csv'