In [1]:
import numpy as np
import cv2
import tqdm
import os
import sys

# color of different clusters
GBR = [[0, 0, 255],
       [0, 128, 255],
       [255, 0, 0],
       [128, 0, 128],
       [255, 0, 255]]

# path configuration
project_root = os.path.abspath('.')
output_path = os.path.join(project_root)
input_path = os.path.join(project_root)
if not os.path.exists(output_path):
    os.makedirs(output_path)

In [2]:
def kmeans(data: np.ndarray, n_cl: int):
    """
        K-means

    :param data:    original data
    :param n_cl:    number of classes
    :param seeds:   seeds
    :return:        new labels and new seeds
    """
    n_samples, channel = data.shape

    centers = data[np.random.choice(n_samples, n_cl, replace=False)]
    old_labels = np.zeros((n_samples,))

    while True:
        distance = np.sqrt(np.sum((data[:, np.newaxis, :] - centers[np.newaxis, :, :]) ** 2, axis=2))
        new_labels = np.argmin(distance, axis=1)

        centers = np.array([np.mean(data[new_labels == cat], axis=0) for cat in range(n_cl)])

        if np.all(new_labels == old_labels):
            break
        old_labels = new_labels

    return old_labels

In [3]:
def detect(video, n_cl=2):
    # load video, get number of frames and get shape of frame
    cap = cv2.VideoCapture(video)
    fps = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
            int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))

    # instantiate a video writer
    video_writer = cv2.VideoWriter(os.path.join(output_path, "result_with_%dclz.mp4" % n_cl),
                                   cv2.VideoWriter_fourcc(*'mp4v'),
                                   (fps / 10),
                                   size,
                                   isColor=True)

    # initialize frame and seeds
    ret, frame = cap.read()


    print("Begin clustering with %d classes:" % n_cl)
    bar = tqdm.tqdm(total=fps)  # progress bar
    while ret:
        frame = np.float32(frame)
        h, w, c = frame.shape

        # k-means
        data = frame.reshape((h * w, c))
        labels = kmeans(data, n_cl=n_cl)

        # give different cluster different colors
        new_frame = np.array([GBR[labels[i]] for i in range(h * w)])
        new_frame = new_frame.reshape((h, w, c)).astype("uint8")
        video_writer.write(new_frame)

        ret, frame = cap.read()
        bar.update()

    # release resources
    video_writer.release()
    cap.release()
    cv2.destroyAllWindows()


video_sample = os.path.join(input_path, "road_video.MOV")
detect(video_sample, n_cl=3)


Begin clustering with 3 classes:


100%|██████████| 35/35 [03:18<00:00,  5.68s/it]


### Questions

1. What are the strengths of K-means; when does it perform well?

  - it's simple and efficient, especially for large datasets
  - it's versatile and can be used with a variety of different types of data.
  - it's guaranteed to reach convergence
  - it could be easy to adapt new samples

2. What are the weaknesses of K-means; when does it perform poorly?

  - the parameter `k` needs to be chosen manually
  - centroids can be dragged by outliers, or outliers might get their own cluster instead of being ignored
  - the model scales with number of dimensions
  - k-means has trouble clustering data where clusters are of varying sizes and density

3. What makes K-means a good candidate for the clustering problem, if you have enough knowledge about the data?

  - when the data is well-defined and well-separated
  - k-means can be a good choice for its high performance when the dataset is relative large
