<a href="https://colab.research.google.com/github/fjadidi2001/Machine_Learning_Journey/blob/main/C_means.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Step 1: Load Libraries and Dataset
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import skfuzzy as fuzz

# Load the Iris dataset
from sklearn.datasets import load_iris
data = load_iris()
df = pd.DataFrame(data.data, columns=data.feature_names)
df['target'] = data.target

# Step 2: EDA
print("Dataset Information:")
print(df.info())
print("\nSummary Statistics:")
print(df.describe())

# Check for missing values
print("\nMissing Values:")
print(df.isnull().sum())

# Visualize the target distribution
sns.countplot(x='target', data=df)
plt.title("Target Distribution")
plt.show()

# Pairplot of features
sns.pairplot(df, hue='target', diag_kind='kde')
plt.show()

# Step 3: Feature Engineering
# Separate features and target
X = df.drop(columns=['target'])
y = df['target']

# Scale the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Step 4: PCA
pca = PCA(n_components=2)  # Reduce to 2 dimensions for visualization
X_pca = pca.fit_transform(X_scaled)

# Visualize PCA-transformed data
plt.figure(figsize=(8, 6))
for target in np.unique(y):
    plt.scatter(X_pca[y == target, 0], X_pca[y == target, 1], label=f"Class {target}")
plt.xlabel("Principal Component 1")
plt.ylabel("Principal Component 2")
plt.title("PCA of Iris Dataset")
plt.legend()
plt.show()

# Step 5: C-Means Clustering
# Transpose PCA data for Fuzzy C-Means
X_pca_transposed = np.transpose(X_pca)

# Perform Fuzzy C-Means clustering
n_clusters = 3  # Number of clusters to form
cntr, u, _, _, _, _, _ = fuzz.cluster.cmeans(
    X_pca_transposed,
    c=n_clusters,
    m=2,              # Fuzziness parameter
    error=0.005,      # Convergence criteria
    maxiter=1000      # Max iterations
)

# Assign clusters based on max membership
cluster_labels = np.argmax(u, axis=0)

# Visualize Clustering Results
plt.figure(figsize=(8, 6))
for i in range(n_clusters):
    plt.scatter(X_pca[cluster_labels == i, 0], X_pca[cluster_labels == i, 1], label=f"Cluster {i+1}")
plt.scatter(cntr[:, 0], cntr[:, 1], c='red', marker='X', s=200, label='Centroids')
plt.xlabel("Principal Component 1")
plt.ylabel("Principal Component 2")
plt.title("Fuzzy C-Means Clustering on PCA-Reduced Data")
plt.legend()
plt.show()


ModuleNotFoundError: No module named 'skfuzzy'