# Installing Libraries (Python version >= 3.8)

In [None]:
import sys
version = sys.version_info
print(version)
assert version.major == 3 and version.minor >= 8

In [None]:
!python -m pip install numpy==1.23.5 pandas==1.5.3 scikit-learn==1.2.2 matplotlib==3.7.4

# Downloading Dataset

In [None]:
import numpy as np
import pandas as pd
from sklearn import datasets

iris = datasets.load_iris()
iris_data = pd.DataFrame(iris.data, columns=iris.feature_names)
iris_data.head()

# Training Model

In [None]:
class KMeans:

    def __init__(self, n_clusters: int, max_iter: int = 300):
        """Initialize KMeans model.

        :param n_clusters: Number of clusters
        :param max_iter: Maximum number of iterations
        """
        self.n_clusters = n_clusters
        self.max_iter = max_iter

    def fit(self, X: np.ndarray) -> np.ndarray:
        """Train KMeans model.

        :param X: Training data
        :return: Cluster labels
        """
        # Initialize centroids randomly
        centroids = X[np.random.choice(X.shape[0], self.n_clusters)]

        # Initialize updated centroids and cluster labels with zeros
        updated_centroids = np.zeros((self.n_clusters, X.shape[1]))
        cluster_labels = np.zeros(X.shape[0])

        for _ in range(self.max_iter):
            # Repeat for all samples
            for m in range(X.shape[0]):
                # Calculate distances between each data and centroids
                distances = np.sum((centroids - X[m]) ** 2, axis=1)
                # Assign the nearest sample to the cluster
                cluster_labels[m] = np.argmin(distances)

            # Repeat for all clusters
            for c in range(self.n_clusters):
                # Update centroids by taking the mean of all samples in the cluster
                updated_centroids[c] = X[cluster_labels == c].mean(axis=0)
            centroids = updated_centroids

        return cluster_labels

In [None]:
cluster_labels = KMeans(4).fit(iris_data.values)
cluster_labels

# Visualizing Results

In [None]:
from matplotlib import pyplot as plt

iris_data["cluster_label"] = cluster_labels
plt.scatter(iris_data.iloc[:, 0], iris_data.iloc[:, 1], c=iris_data.loc[:, "cluster_label"], cmap="winter")
plt.title("k-means clustering result (k=4)")