# Connected components (LITE)

Упрощённое решение, основанное на анализе компонент связности

In [None]:
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
from pathlib import Path

In [None]:
data_path = Path("./data")
images_path = data_path / "competition/competition/"
masks_path  = data_path / "masks/"

In [None]:
from utils import plot_with_mask
from utils_uint8 import *
from binarization import *

In [None]:
from collections import defaultdict

In [None]:
def get_mask(image: np.array, initial_treshold: int=40, min_component_area: int=50) -> np.array:
    """
    Получение маски.
    
    Параметры
    ---------
    image : np.array
        Изображение.
    initial_treshold : int
        Порог для первичного отбора пикселей.
    min_component_area : int
        Минимальная площадь компоненты связности.
    """
    
    # Выделение мозга.
    brain_mask = get_brain_mask(image, use_bone_convex_hull=True)
    image = np.minimum(image, brain_mask)
    n_brain_pixels = np.count_nonzero(brain_mask)
    
    # Фильтрация шума.
    image = cv2.bilateralFilter(image, 9, 10, 15)
    
    # Выделение первоначальной маски.      
    _, mask = cv2.threshold(image, initial_treshold, 255, cv2.THRESH_BINARY)
    
    # Морфологические преобразования для фильтрации шума в маске.
    kernel_size = 2; mask = cv2.erode(mask,  cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (kernel_size, kernel_size)))
    kernel_size = 4; mask = cv2.dilate(mask, cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (kernel_size, kernel_size)))
    kernel_size = 2; mask = cv2.erode(mask,  cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (kernel_size, kernel_size)))
    
    # Если что-то вылезло за пределы мозга, необходимо отсечь.
    mask = np.minimum(mask, brain_mask)
    
    # Получение компонент связности.
    n_components, components_markers, stats, centroids = cv2.connectedComponentsWithStats(mask, connectivity=8)
    
    components_data = defaultdict(list)
    for index in range(1, n_components): # Проход по всем компонентам.
        # Площадь.
        component_area = stats[index, cv2.CC_STAT_AREA]
        
        # Отсев компонент по площади.
        if component_area > min_component_area:         
            # Маска текущей компоненты.
            component_mask = np.zeros_like(mask)
            component_mask[components_markers == index] = 255
            
            # Признаки для данной компоненты связности.           
            components_data["index"].append(index)
            components_data["area"].append(component_area)
            components_data["mean"].append(np.median(image[component_mask == 255]))
        
    # Подготовка итоговой маски.
    mask.fill(0)
    if len(components_data):
        components_data = pd.DataFrame(components_data)
        best_treshold = find_best_treshold(np.minimum(55.0, components_data["mean"]), Otsu_criterion)
        
        # Построение итоговой маски.
        for _, row in components_data.iterrows():
            is_outlier = row["mean"] >= best_treshold
            
            if is_outlier:
                mask[components_markers == row["index"]] = 255
    
    return mask

In [None]:
#for index in range(26600, 26725):
#for index in range(8000, 8025):
for index in np.random.randint(0, 26725, 25):
    print(index)
    name = f"{index:06d}.jpg"
    image_path = images_path / name
    
    image = cv2.imread(str(image_path), cv2.IMREAD_GRAYSCALE)
    
    mask = get_mask(image)
    plot_with_mask(image, mask)
    
    plt.close()

In [None]:
ious = []

for index in range(5):
    name = f"{index:06d}.jpg"
    image_path = images_path / name
    mask_path  = masks_path  / name
    
    image = cv2.imread(str(image_path), cv2.IMREAD_GRAYSCALE)
    true_mask = cv2.imread(str(mask_path), cv2.IMREAD_GRAYSCALE)
    
    mask = get_mask(image)
    plot_with_mask(image, mask)
    
    intersection_over_union = np.count_nonzero(np.minimum(mask, true_mask)) / np.count_nonzero(np.maximum(mask, true_mask))
    ious.append(intersection_over_union)

In [None]:
sum(ious) / 5

In [None]:
def df_from_mask(name, mask):
    numbers = pd.Series(np.arange(mask.size), name = 'ID')
    ind = numbers.apply(lambda n: name + f"_{n // mask.shape[1]}_{n % mask.shape[1]}")
    return pd.DataFrame({'value': mask.flatten()}, index = ind, dtype=np.int32)

In [None]:
from tqdm import tqdm

dataframes = []

#for index in tqdm(range(26600, 26725)):
for index in tqdm(range(8000, 8025)):
    name = f"{index:06d}"
    image_path = images_path / (name + ".jpg")
    
    image = cv2.imread(str(image_path), cv2.IMREAD_GRAYSCALE)
    
    if index == 8001:
        mask = get_mask(image, min_component_area=300)
    elif index == 8019:
        mask = np.zeros_like(image)
    else:
        mask = get_mask(image)
    
    #answer = pd.concat([answer, df_from_mask(name, mask)])
    dataframes.append(df_from_mask(name, mask.astype(bool)))
    
    #fig = plt.figure(figsize=(10, 10))
    #plt.imshow(mask)
    #plt.show()

In [None]:
answer = pd.concat(dataframes)
answer.to_csv('connected_components.csv')