# **Experiment 8 - K Means Clustering**


**Imports**


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs

**Creating Dataset**


In [None]:
data, cluster = make_blobs(
    n_samples=1000, n_features=2, random_state=23)

**Plotting Dataset**


In [None]:
plt.figure(figsize=(5, 5))
plt.scatter(data)

**Initializing Random Centroids**


In [None]:
k = 3
clusters = {}
np.random.seed(23)

for i in range(k):
    center = 2 * (2 * np.random.random((data.shape[1],)) - 1)
    points = []
    cluster = {
        "center": center,
        "points": []
    }
    clusters[i] = cluster

# clusters

**Plotting the Random Clusters**


In [None]:
plt.figure(figsize=(5, 5))
plt.scatter(data[:, 0], data[:, 1])
for i in clusters:
    center = clusters[i]["center"]
    plt.scatter(center[0], center[1], marker="*", c="red")

**Function for Euclidean Distance**


In [None]:
def distance(point1, point2):
    return np.sqrt(np.sum((point1 - point2) ** 2))

**Function to Assign Cluster**


In [None]:
def assign_cluster(data, clusters):
    for i in range(data.shape[0]):
        dist = []
        current_x = data[i]

        for j in range(k):
            dis = distance(current_x, clusters[j]["center"])
            dist.append(dis)

        current_cluster = np.argmin(dist)
        clusters[current_cluster]["points"].append(current_x)

    return clusters

**Function to Update Cluster**


In [None]:
def update_cluster(data, clusters):
    for i in range(k):
        points = np.array(clusters[i]["points"])

        if points.shape[0] > 0:
            new_center = points.mean(axis=0)
            clusters[i]["center"] = new_center
            clusters[i]["points"] = []

    return clusters

**Predict the Clusters**


In [None]:
def predict_cluster(data, clusters):
    predict = []

    for i in range(data.shape[0]):
        dist = []

        for j in range(k):
            dist.append(distance(data[i], clusters[j]["center"]))

        predict.append(np.argmin(dist))

    return predict

**Calling the Functions**


In [None]:
clusters = assign_cluster(data, clusters)
clusters = update_cluster(data, clusters)
predict = predict_cluster(data, clusters)

**Plot the Points as Clusters**


In [None]:
plt.figure(figsize=(5, 5))
plt.scatter(data[:, 0], data[:, 1], c=predict)
for i in clusters:
    center = clusters[i]["center"]
    plt.scatter(center[0], center[1], marker="^", c="red")