In [12]:
import openpyxl
import random
import math
import matplotlib.pyplot as plt
import imageio
import numpy as np

class KMeans:
    def __init__(self, k=3, max_iters=100):
        self.k = k
        self.max_iters = max_iters
        self.centroids = []

    def fit(self, X, save_path="kmeans_animation.gif"):
        """Performs K-Means clustering and generates animation."""
        # Randomly initialize K centroids from data points
        self.centroids = random.sample(X, self.k)
        frames = []

        for iteration in range(self.max_iters):
            clusters = {i: [] for i in range(self.k)}

            # Assign each point to the nearest centroid
            for point in X:
                distances = [self._euclidean_distance(point, centroid) for centroid in self.centroids]
                cluster_index = distances.index(min(distances))
                clusters[cluster_index].append(point)

            # Save frame before updating centroids
            frame = self._plot_clusters(X, clusters, iteration)
            frames.append(frame)

            # Recalculate centroids
            new_centroids = []
            for i in range(self.k):
                cluster_points = clusters[i]
                if cluster_points:
                    new_centroids.append(self._compute_mean(cluster_points))
                else:
                    new_centroids.append(self.centroids[i])  # Keep the same if empty

            # Stop if centroids don't change
            if new_centroids == self.centroids:
                break
            self.centroids = new_centroids

        # Save GIF animation
        imageio.mimsave(save_path, frames, duration=0.5)
        return clusters

    def _compute_mean(self, points):
        """Compute the mean of a cluster."""
        num_points = len(points)
        dim = len(points[0])
        return [sum(p[i] for p in points) / num_points for i in range(dim)]

    def _euclidean_distance(self, point1, point2):
        """Calculate Euclidean distance between two points."""
        return math.sqrt(sum((point1[i] - point2[i]) ** 2 for i in range(len(point1))))

    def _plot_clusters(self, X, clusters, iteration):
        """Generate a frame for animation."""
        fig, ax = plt.subplots(figsize=(8, 6))
        
        colors = ["red", "blue", "green", "purple", "orange", "brown", "pink", "gray"]
        for i, (cluster_index, points) in enumerate(clusters.items()):
            for point in points:
                ax.scatter(point[0], point[1], color=colors[cluster_index], alpha=0.6)
            ax.scatter(self.centroids[cluster_index][0], self.centroids[cluster_index][1], 
                       color="black", marker="X", s=200, edgecolors="white", label=f"Centroid {cluster_index+1}")

        ax.set_title(f"K-Means Clustering (Iteration {iteration+1})")
        ax.set_xlabel("Feature 1")
        ax.set_ylabel("Feature 2")
        ax.legend()
        
        fig.canvas.draw()
        image = np.array(fig.canvas.renderer.buffer_rgba())
        plt.close(fig)
        return image


# Load dataset from Excel
file_path = "DataSetNew-GPS.xlsx"  # Change this to your actual file
wb = openpyxl.load_workbook(file_path)
sheet = wb.active

# Read data manually
data = []
for row in sheet.iter_rows(values_only=True):
    data.append(row)

# Randomly select 1000 rows from 240000
random.shuffle(data)
data = data[:200]

# Extract features (first 2 columns)
X = [list(row[:2]) for row in data]

# Min-Max Normalization
min_x1 = min(point[0] for point in X)
max_x1 = max(point[0] for point in X)
min_x2 = min(point[1] for point in X)
max_x2 = max(point[1] for point in X)

X = [[(p[0] - min_x1) / (max_x1 - min_x1), (p[1] - min_x2) / (max_x2 - min_x2)] for p in X]

# Run K-Means clustering
kmeans = KMeans(k=2)  # Change k to adjust number of clusters
clusters = kmeans.fit(X, save_path="kmeans_clustering.gif")

print("K-Means Clustering GIF Saved Successfully!")


K-Means Clustering GIF Saved Successfully!
