In [1]:
import matplotlib.pyplot as plt
import cv2
import numpy as np
import pandas as pd
from torch.utils.data import DataLoader
from dataset import get_dataset


In [None]:
# Set the number of classes to 2 for binary classification and 4 for multiclass classification
num_classes = 4

if num_classes == 4:
    file_name = "hair_densities.csv"
else:
    file_name = "binary_hair_densities.csv"

In [2]:
def show(img, gray=False):
    plt.axis('off')
    if gray:
        plt.imshow(img, plt.cm.binary)
        plt.show()
        return
    plt.imshow(img)
    plt.show()



In [3]:
def hair_density(image, label, show_images=False):    
    blur_image = cv2.blur(image,(10,10))
    grayScale = cv2.cvtColor(blur_image, cv2.COLOR_RGB2GRAY)
        
    kernel = cv2.getStructuringElement(1,(17,17))
    blackhat = cv2.morphologyEx(grayScale, cv2.MORPH_BLACKHAT, kernel)
    _,threshold = cv2.threshold(blackhat,10,255,cv2.THRESH_BINARY)
   
    num = cv2.countNonZero(threshold)
    size = threshold.shape[-1]*threshold.shape[1]
    
    if num/size >= 0.02 and show_images:
        figure, axes = plt.subplots(1, 3, figsize=(12, 3))
        axes[0].axis('off')
        axes[0].imshow(image)
        axes[0].set_title('Original Image')
        axes[1].axis('off')
        axes[1].imshow(grayScale, plt.cm.binary)
        axes[1].set_title('Gray Scale')
        axes[2].axis('off')
        axes[2].imshow(threshold)
        axes[2].set_title('Detected Hairs')
        plt.show()
    return num/size


In [None]:
data_set = get_dataset("test", use_plain_transform=True, id_as_label=True, num_classes=num_classes)

data_loader = DataLoader(data_set, batch_size=1, shuffle=False, num_workers=0)

hair_densities = {"isic_id": [], "hair_density": [], "high_hair_density": []}
counter = 0
if num_classes == 2:
    label_count ={0: 0, 1:0}
else:
    label_count ={0: 0, 1:0, 2:0, 3:0}
for batch, labels in data_loader:
    isic_id = labels[0][0]
    img = batch[0].permute(1,2,0).numpy()
    img = img.astype(np.uint8)
    density = hair_density(img, labels, show_images=counter < 3)
    if density >= 0.02:
        print(f"Image: {counter+1}")
        print(isic_id)
        label = labels[1][0].item()
        print(labels[1][0].item())
        print(density)
        counter = counter + 1
        label_count[label] = label_count[label] + 1
    else:
        hair_densities["high_hair_density"].append(0)
    
    hair_densities["isic_id"].append(isic_id)
    hair_densities["hair_density"].append(density)
    
print(f"Found {counter} hair images")
print(label_count)

dataframe_hair_densities = pd.DataFrame(hair_densities)
print(dataframe_hair_densities)
dataframe_hair_densities.to_csv(f"metadata/{file_name}", index=False)


In [None]:
# By running this cell you can check the images manually to remove false positives.
# You will be presented with the images one by one.
# Enter 'y' if you see hair, or anything else if not

df = pd.read_csv(f"metadata/{file_name}")
print(df.head())

data_set = get_dataset("test", use_plain_transform=True, id_as_label=True, num_classes=num_classes)
data_loader = DataLoader(data_set, batch_size=1, shuffle=False, num_workers=0)

if num_classes == 2:
    label_count ={0: 0, 1:0}
else:
    label_count ={0: 0, 1:0, 2:0, 3:0}

for batch, labels in data_loader:
    isic_id = labels[0][0]
    img = batch[0].permute(1,2,0).numpy()
    img = img.astype(np.uint8)
    label = labels[1][0].item()
    if df[df['isic_id'] == isic_id]['high_hair_density'].item() == 1:
        plt.axis('off')
        plt.imshow(img)
        plt.show()
        confirm = input()
        if confirm == "y":
            label_count[label] = label_count[label]+1
        else:
            df.loc[df['isic_id'] == isic_id, 'high_hair_density'] = 0

print(label_count)
if num_classes == 4:
    df.to_csv("hair_densities_manual.csv", index=False)
else:
    df.to_csv("binary_hair_densities_manual.csv", index=False)
