In [1]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn.utils import check_array

In [2]:
def euclidean_distance(point, centroid):
    return np.sqrt(np.sum((point - centroid) ** 2))

In [3]:
# Loading an image (replace filename if you want):
image_path = 'giraffe.png'
image = cv2.imread(image_path)

# Reducing the size of the image, so that DBSCAN runs in a reasonable amount of time:
# small_image is 0.5x the size of the original. You may change this value.
image = cv2.resize(image, None, fx=0.5, fy=0.5, interpolation=cv2.INTER_AREA)

height, width, _ = image.shape
pixel_data = image.reshape(-1, 3)

In [4]:
pixel_data.shape

(26800, 3)

In [5]:
#Kmeans parameters
k = 2
max_iterations = 100

### Test Kmeans

In [6]:
# data_points = np.random.rand(50,3) * 10
data_points = pixel_data
data_points.shape

(26800, 3)

In [7]:
test_labels = []

In [8]:
np.random.seed(42)
centroids = data_points[np.random.choice(data_points.shape[0],k,replace=False)]
labels = []

for iteration in range(max_iterations):
    # print(f"Iteration {iteration}, Centroids = {centroids}")
    clusters = [[] for _ in range(k)]
    test_labels = []
    for point in data_points:
        distances = [euclidean_distance(point, centroid) for centroid in centroids]
        cluster_index = np.argmin(distances)
        clusters[cluster_index].append(point)
        test_labels.append(cluster_index)
        # print(f'Point = {point}, distances = {distances}, cluster_index = {cluster_index}')
    # print(f'clusters = {clusters}')

    new_centroids = []
    for cluster in clusters:
        if cluster:
            new_centroid = np.mean(cluster, axis=0)
            new_centroids.append(new_centroid)
        else:
            new_centroids.append(centroids[len(new_centroids)])

    new_centroids = np.array(new_centroids)

    if np.allclose(centroids, new_centroids):
        print(f'Converged after {iteration + 1} iterations')
        break

    centroids = new_centroids
    labels = test_labels
    # print('-'*25)
    

Converged after 11 iterations


In [9]:
labels.count(1) + labels.count(0)

26800

In [10]:
for index in range(len(clusters)):
    print(f'Cluster with index {index} = {clusters[index]}, Total values = {len(clusters[index])}')

Cluster with index 0 = [array([185, 162, 124], dtype=uint8), array([187, 163, 127], dtype=uint8), array([189, 165, 130], dtype=uint8), array([190, 165, 131], dtype=uint8), array([191, 166, 132], dtype=uint8), array([191, 169, 134], dtype=uint8), array([192, 170, 135], dtype=uint8), array([193, 171, 137], dtype=uint8), array([194, 172, 138], dtype=uint8), array([196, 173, 139], dtype=uint8), array([195, 174, 143], dtype=uint8), array([196, 175, 144], dtype=uint8), array([198, 177, 146], dtype=uint8), array([198, 177, 146], dtype=uint8), array([199, 178, 147], dtype=uint8), array([200, 179, 148], dtype=uint8), array([200, 180, 149], dtype=uint8), array([201, 181, 150], dtype=uint8), array([202, 182, 151], dtype=uint8), array([202, 182, 151], dtype=uint8), array([204, 184, 153], dtype=uint8), array([204, 184, 153], dtype=uint8), array([204, 185, 155], dtype=uint8), array([204, 186, 157], dtype=uint8), array([205, 187, 158], dtype=uint8), array([206, 188, 159], dtype=uint8), array([206, 18

In [11]:
clusters = [np.array(cluster) for cluster in clusters]
clusters

[array([[185, 162, 124],
        [187, 163, 127],
        [189, 165, 130],
        ...,
        [ 74, 148, 213],
        [ 74, 148, 214],
        [ 88, 156, 210]], dtype=uint8),
 array([[84, 81, 73],
        [75, 78, 79],
        [74, 80, 80],
        ...,
        [ 4, 35, 39],
        [ 4, 36, 41],
        [ 5, 37, 41]], dtype=uint8)]

In [12]:
for index, cluster in enumerate(clusters):
    print(index, cluster)

0 [[185 162 124]
 [187 163 127]
 [189 165 130]
 ...
 [ 74 148 213]
 [ 74 148 214]
 [ 88 156 210]]
1 [[84 81 73]
 [75 78 79]
 [74 80 80]
 ...
 [ 4 35 39]
 [ 4 36 41]
 [ 5 37 41]]


In [13]:
labels = []
for point in data_points:
    for label, cluster in enumerate(clusters):
        if point in cluster:
            labels.append(label)

print(labels)

[0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 

In [14]:
labels.__len__()

45942