# K-means


In [10]:
import numpy as np
import os
import cv2
import numpy as np
import image_processing as ip


#### Step 1: Clusterización


In [3]:
class Pixel:
    def __init__(self, rgb, x, y):
        self.rgb = rgb
        self.x = x
        self.y = y

In [13]:
class KMeans:
    def __init__(self, k_value=3, image_path=None):
        self.k = k_value
        self.image_path = image_path
        self.error = 100
        self.max_int = 1000000
        self.error_evolution = []

    """
    Función para calcular la disntacia euclidiana entre dos puntos.
    """
    def calculateEuclidieanDistance(self, point1, point2):
        return sum(((int(point1[c]) - int(point2[c])) ** 2) for c in range(self.channels))

    """
    Función para inicializar los centroides.
    """
    def initializeCentroids(self, image):
        centroids = [image[np.random.randint(0, self.rows)][np.random.randint(0, self.cols)]]
        for _ in range(self.k-1):
            candidate_rgb = []
            candidate_distance = 0
            for x in range(self.rows):
                for y in range(self.cols):
                    tmp_dist = self.max_int
                    rgb = []
                    for c in centroids:
                        distance = self.calculateEuclidieanDistance(
                            image[x][y], c)
                        if(distance < tmp_dist):
                            tmp_dist = distance
                            rgb = image[x][y]
                    if(candidate_distance < tmp_dist):
                        candidate_rgb = rgb
                        candidate_distance = tmp_dist
            centroids.append(candidate_rgb)
        self.centroids = centroids

    """
    La función train() calcula mediante un proceso iterativo los centroides óptimos (que minimizar el error) para el conjunto de datos.
    """
    def train(self, image):
        init_segments = {}
        counts = {}
        for center in self.centroids:
            counts[tuple(center)] = 0
            if self.channels == 1:
                init_segments[center] = 0
            elif self.channels == 3:
                init_segments[tuple(center)] = [0, 0, 0]
        for x in range(self.rows):
            for y in range(self.cols):
                dist = self.max_int
                center_point = None
                for _, center in enumerate(self.centroids):
                    temp_distance = self.calculateEuclidieanDistance(center, image[x][y])
                    if(temp_distance < dist):
                        dist = temp_distance
                        center_point = center
                init_segments[tuple(center_point)] = [
                    x+y for x, y in zip(init_segments[tuple(center_point)], image[x][y])]
                counts[tuple(center_point)] += 1

        for center in self.centroids:
            if(self.channels == 1):
                init_segments[tuple(center)] = init_segments[tuple(
                    center)][0]//counts[tuple(center)]
            else:
                for c in range(self.channels):
                    init_segments[tuple(center)][c] = init_segments[tuple(
                        center)][c]//counts[tuple(center)]

        last_centroids = self.centroids.copy()
        self.centroids = list(init_segments.values()).copy()

        error = sum(
            sum(abs(x - y) for x, y in zip(center, self.centroids[count]))
            for count, center in enumerate(last_centroids)
        )
        self.error = error
        self.error_evolution.append(error)

    """
    En la función clusterize() se le asigna a cada centroide su conjuntos de pixels correspondientes, utilizanod la función de calculo de la distancia euclidiana.
    Así mismo, se realiza la identificación del cluster correspondiente al supuerto tumor mediante el método de selección del cluster con menor número de pixeles. 
    """
    def clusterize(self, image, write_image_path, write_image_name):
        clusterized_data = {}
        for centroid in self.centroids:
            clusterized_data[str(centroid)] = []
        for x in range(self.rows):
            for y in range(self.cols):
                dist = self.max_int
                center = None
                for centroid in self.centroids:
                    temp = self.calculateEuclidieanDistance(image[x][y], centroid)
                    if(temp < dist):
                        center = centroid
                        dist = temp
                        clusterized_data[str(centroid)].append(
                            Pixel(center, x, y))
                image[x][y] = center

        image_temp = image
        # seleccionamos el cluster con menos pixeles considerando que podría tratarse del tumor
        min_centroid = self.centroids[0]
        for centroid in self.centroids:
            if len(clusterized_data[str(min_centroid)]) > len(clusterized_data[str(centroid)]):
                min_centroid = centroid

        for centroid in self.centroids:
            if min_centroid == centroid:
                for pixel in clusterized_data[str(centroid)]:
                    image_temp[pixel.x][pixel.y] = [255, 0, 0]

        cv2.imwrite(f'{write_image_path}{write_image_name}', image_temp)
        return clusterized_data[str(min_centroid)]

    """
    Función para ejecutar todo el kmeans. Incluye la inicialización random de los centroides y el llamado a la función de entrenamiento.
    """
    def execute(self, write_image_path, write_image_name, error_max=3):
        image = cv2.imread(self.image_path)
        self.rows, self.cols, self.channels = image.shape
        self.initializeCentroids(image)
        self.train(image)

        #def show_info(error, centroids): return print(
        #    "Error : {} px , Centroids : {}".format(error, centroids))  # [OPTIONAL] Show error <---------
        while(self.error > error_max):
            self.train(image)
            # show_info(self.error,self.centroids) ####### [OPTIONAL] Show error <---------
        print(self.error_evolution)
        return self.clusterize(image, write_image_path, write_image_name)


In [None]:
read_path = 'data/dataset/'
write_path = 'data/write_dataset/'

def execute_kmeans_for_i_images(break_iterator):
    imgs_with_tumor_cluster = {}
    for filename in os.listdir(read_path):
        if break_iterator == 0:
            break
        else:
            break_iterator -= 1
        abc = KMeans(k_value=4, image_path=f"{read_path}{filename}")
        tumor_cluster = abc.execute(write_path, filename)
        imgs_with_tumor_cluster[filename] = tumor_cluster
    return imgs_with_tumor_cluster

imgs_with_tumor_cluster = execute_kmeans_for_i_images(10)

#### Step 2: Calcular el número de pixeles dentro del poligono del archivo JSON


In [8]:
for img_with_tumor_cluster in imgs_with_tumor_cluster.items():
    ip.analyze(img_with_tumor_cluster[0], img_with_tumor_cluster[1])

El ratio de puntos para la imagen 189.jpg es: 1.00230729153745%
El ratio de puntos para la imagen 162.jpg es: 0.15229972586049345%
El ratio de puntos para la imagen 176.jpg es: 11.883235832890149%
El ratio de puntos para la imagen 348.jpg es: 0.0%
El ratio de puntos para la imagen 360.jpg es: 6.543654365436543%
El ratio de puntos para la imagen 374.jpg es: 16.407744186826832%
El ratio de puntos para la imagen 228.jpg es: 4.579480405107883%
El ratio de puntos para la imagen 214.jpg es: 12.208444050782404%
El ratio de puntos para la imagen 200.jpg es: 3.2653350332908344%
El ratio de puntos para la imagen 201.jpg es: 0.7971700463355089%


#### Experimentación


In [87]:
image_name = "107.jpg"

abc = KMeans(k_value=9, image_path=f"{read_path}{image_name}")
tumor_cluster = abc.execute(write_path, image_name, error_max=1)
ip.analyze(image_name, tumor_cluster)


[45, 27, 15, 18, 12, 15, 18, 21, 15, 12, 12, 12, 15, 18, 18, 9, 9, 9, 9, 6, 6, 3, 3, 3, 3, 0]
El ratio de puntos para la imagen 107.jpg es: 18.45672575599583%


#### Resultados

##### Mejor caso

In [5]:
import cv2
import numpy as np
from shapely.geometry import Polygon

x = [75, 71,
     67,
     71,
     73,
     77,
     100,
     106,
     114,
     123,
     133,
     158,
     164,
     167,
     166,
     157,
     144,
     126,
     111,
     112,
     105,
     92,
     79]
y = [89,
     99,
     111,
     126,
     134,
     144,
     152,
     163,
     164,
     161,
     165,
     154,
     135,
     119,
     103,
     84,
     72,
     67,
     68,
     81,
     86,
     80,
     84]
alpha = 0.5  # that's your transparency factor
path = 'data/write_dataset/107.jpg'

polygon_points = []
for i in range(len(x)):
     polygon_points.append((x[i], y[i]))

polygon = Polygon(polygon_points)
def int_coords(x): return np.array(x).round().astype(np.int32)


exterior = [int_coords(polygon.exterior.coords)]

image = cv2.imread(path)
overlay = image.copy()
cv2.fillPoly(overlay, exterior, color=(255, 255, 0))
cv2.addWeighted(overlay, alpha, image, 1 - alpha, 0, image)
cv2.imwrite("data/write_dataset/107_with_real_tumor_on_clusterized.jpg", image)

True

##### Peor caso

In [17]:
import cv2
import numpy as np
from shapely.geometry import Polygon

x = [309,
                  284,
                  269,
                  269,
                  274,
                  286,
                  358,
                  390,
                  402,
                  365,
                  335,
                  321]
y = [158,
                  166,
                  184,
                  205,
                  235,
                  257,
                  273,
                  257,
                  232,
                  153,
                  140,
                  146]
alpha = 0.5  # that's your transparency factor
path = 'data/dataset/348.jpg'

polygon_points = []
for i in range(len(x)):
     polygon_points.append((x[i], y[i]))

polygon = Polygon(polygon_points)
def int_coords(x): return np.array(x).round().astype(np.int32)


exterior = [int_coords(polygon.exterior.coords)]

image = cv2.imread(path)
overlay = image.copy()
cv2.fillPoly(overlay, exterior, color=(255, 255, 0))
cv2.addWeighted(overlay, alpha, image, 1 - alpha, 0, image)
cv2.imwrite("data/write_dataset/348_no_clusterized.jpg", image)

True