In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs
from sklearn.cluster import KMeans
from sklearn.metrics import pairwise_distances
from sklearn.decomposition import PCA

In [2]:
# Generate example data (blobs dataset)
X, y = make_blobs(n_samples=300, centers=3, random_state=42)

In [3]:
# Create the Fuzzy C-Means model
n_clusters = 3
m = 2  # Fuzziness parameter (a value of 2 is commonly used)
error = 1e-6  # Tolerance to check for convergence
max_iter = 100  # Maximum number of iterations
random_state = 42

# Transform data to lower-dimensional space using PCA for improved performance (optional)
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X)

In [4]:
def calculate_membership_degrees(X, centroids):
    distances = pairwise_distances(X, centroids)
    inv_distances = 1.0 / distances**(2 / (m - 1))
    membership_degrees = inv_distances / np.sum(inv_distances, axis=1)[:, np.newaxis]
    return membership_degrees

In [5]:
def update_cluster_centers(X, membership_degrees):
    centroids = np.dot(membership_degrees.T, X) / np.sum(membership_degrees, axis=0)[:, np.newaxis]
    return centroids

In [6]:
def fuzzy_cmeans(X, n_clusters, m, error, max_iter, random_state):
    np.random.seed(random_state)
    n_samples, n_features = X.shape
    
    # Randomly initialize the cluster centers
    centroids = X[np.random.choice(n_samples, n_clusters, replace=False)]
    
    for _ in range(max_iter):
        old_centroids = centroids.copy()
        
        # Calculate membership degrees for each data point
        membership_degrees = calculate_membership_degrees(X, centroids)
        
        # Update cluster centers based on membership degrees
        centroids = update_cluster_centers(X, membership_degrees)
        
        # Check for convergence
        if np.linalg.norm(centroids - old_centroids) < error:
            break
    
    return centroids, membership_degrees

In [7]:
# Fit the Fuzzy C-Means model to the data
centroids, membership_degrees = fuzzy_cmeans(X_pca, n_clusters, m, error, max_iter, random_state)

# Extract cluster assignments from membership degrees
clusters = np.argmax(membership_degrees, axis=1)

# Print the cluster centers (centroids)
print("Cluster Centers:")
print(centroids)

  inv_distances = 1.0 / distances**(2 / (m - 1))
  membership_degrees = inv_distances / np.sum(inv_distances, axis=1)[:, np.newaxis]


ValueError: Input contains NaN.

In [None]:
# Plot the clustering result
plt.figure(figsize=(8, 6))
plt.scatter(X_pca[:, 0], X_pca[:, 1], c=clusters, cmap='viridis', s=50)
plt.scatter(centroids[:, 0], centroids[:, 1], c='red', marker='X', s=200, label='Cluster Centers')
plt.title("Fuzzy C-Means Clustering Result")
plt.xlabel("Feature 1")
plt.ylabel("Feature 2")
plt.legend()
plt.show()
