In [8]:
%load_ext autoreload
%autoreload 2

import cv2
import matplotlib.pyplot as plt
import numpy as np
from dataset import get_dataset
import pandas as pd
from torch.utils.data import DataLoader

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
# Set the number of classes to 2 for binary classification and 4 for multiclass classification
num_classes = 4

if num_classes == 4:
    file_name = "skin_tones.csv"
else:
    file_name = "binary_skin_tones.csv"


## Skin BGR Bounds

* Very Light
```python
lo = np.array([180, 180, 0])
hi = np.array([255, 255, 255])
```
* Light
```python
lo = np.array([150, 150, 0])
hi = np.array([200, 200, 255])
```
* Not Light
```python
lo = np.array([80, 80, 0])
hi = np.array([150, 150, 255])
```


In [9]:
def determine_skin_tone(img, show=True):
    if show:
        figure, axes = plt.subplots(1, 5, figsize=(12, 3))
    
    if show:
        axes[0].axis('off')
        axes[0].imshow(img)
        axes[0].set_title('Original Image')

    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
    blur = cv2.blur(img,(50,50))

    if show:
        axes[1].axis('off')
        axes[1].imshow(cv2.cvtColor(blur, cv2.COLOR_BGR2RGB))
        axes[1].set_title('Blurred Image')

    lows = {'Type I': np.array([180, 180, 0]), 'Type II': np.array([150, 150, 0]), 'Type III': np.array([80, 80, 0])}
    highs = {'Type I': np.array([255, 255, 255]), 'Type II': np.array([200, 200, 255]), 'Type III': np.array([150, 150, 255])}
    max_black = 0
    label = None
    
    for i, color in enumerate(lows.keys()):
        mask = cv2.inRange(blur, lows[color], highs[color])
        res = cv2.bitwise_and(blur, blur, mask=mask)
        res_black = cv2.cvtColor(res, cv2.COLOR_RGB2GRAY)
        num = cv2.countNonZero(res_black)
        if num > max_black:
            max_black = num
            label = color
        if show:
            axes[i+2].axis('off')
            axes[i+2].imshow(cv2.cvtColor(res, cv2.COLOR_BGR2RGB))
            axes[i+2].set_title('Filter Type ' + str(i+1))
    if show:
        plt.tight_layout()
        plt.show()
    
    return label if label is not None else 'Other'


In [None]:
data_set = get_dataset("test", use_plain_transform=True, id_as_label=True, num_classes=num_classes)

data_loader = DataLoader(data_set, batch_size=1, shuffle=False, num_workers=0)

skin_tones = {"isic_id": [], "skin_tone": [], "class_label": []}
counter = 0
for batch, labels in data_loader:
    isic_id = labels[0][0]
    label = labels[1][0].item()
    img = batch[0].permute(1,2,0).numpy()
    img = img.astype(np.uint8)
    skin_tone = determine_skin_tone(img, counter <= 20)
    
    skin_tones["skin_tone"].append(skin_tone)
    skin_tones["isic_id"].append(isic_id)
    skin_tones["class_label"].append(label)
    counter += 1

df_skin_tones = pd.DataFrame(skin_tones)
print(df_skin_tones['skin_tone'].value_counts())
print("Type III")
print(df_skin_tones[df_skin_tones['skin_tone'] == 'Type III']['class_label'].value_counts())
print("Type I")
print(df_skin_tones[df_skin_tones['skin_tone'] == 'Type I']['class_label'].value_counts())
print("Type II")
print(df_skin_tones[df_skin_tones['skin_tone'] == 'Type II']['class_label'].value_counts())

print("Other")
print(df_skin_tones[df_skin_tones['skin_tone'] == 'Other']['class_label'].value_counts())


print(df_skin_tones)
df_skin_tones.to_csv(f"metadata/{file_name}", index=False)
