In [None]:
# re-implementation of skin tone labelling method from Detecting Melanoma Fairly: Skin Tone Detection and Debiasing for Skin Lesion Classification
# Peter J. Bevan & Amir Atapour-Abarghouei
# calculation of ITA value adapted from code: https://github.com/mattgroh/fitzpatrick17k/blob/main/ita_fitzpatrick_analysis.ipynb

In [None]:
import pandas as pd
import numpy as np
import cv2
from skimage import io, color
from io import BytesIO
import os
import requests
from PIL import Image
import math
import random

In [None]:
#from google.colab import drive
#drive.mount('/content/drive')
data_path = r"drive/MyDrive/Edinburgh/MLP/MLPcoursework4/data"

In [None]:
df = pd.read_csv("drive/MyDrive/Edinburgh/MLP/MLPcoursework4/fitzpatrick17k.csv")

In [None]:
# Download dataset
"""for img_no in range(len(df)):
    try:
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
        response = requests.get(df["url"][img_no], headers=headers)
        response.raise_for_status()

        # Use BytesIO for in-memory binary streams
        img = Image.open(BytesIO(response.content))

        # Resize the image
        img_resized = img.resize((256, 256), Image.ANTIALIAS)

        # Save the resized image
        img_resized.save(f"drive/MyDrive/Edinburgh/MLP/MLPcoursework4/data/{df['md5hash'][img_no]}.jpg", 'JPEG')
    except requests.exceptions.RequestException as e:
        print(f"Error downloading: {e}")
    if img_no % 500 == 0:
      print("Downloaded ", img_no + 1, " images")"""

In [None]:
# create cleaned and filtered csv
# remove images without a fitzpatrick skin tone label
filtered_df = df[df["fitzpatrick_scale"] != -1].copy()

# remove images that failed to download from csv
def does_exist(hash_val):
    return os.path.isfile(data_path + f"/{hash_val}.jpg")
mask = filtered_df["md5hash"].apply(does_exist)
filtered_df = filtered_df[mask]
filtered_df = filtered_df.reset_index(drop=True)

# mark images as train or validation and save new filtered csv
random.seed(0)
np.random.seed(0)
valid_frac = 0.1
total_num = len(filtered_df)
valid_num = int(total_num * valid_frac)
valid_indices = random.sample(range(total_num), valid_num)
df_row = np.zeros((total_num), dtype=int)
df_row[valid_indices] = 1
filtered_df["validation"] = df_row

# save filtered and split csv
filtered_df.to_csv("drive/MyDrive/Edinburgh/MLP/MLPcoursework4/fitzpatrick17k_filtered.csv", index=False)

In [None]:
# pre-load images and save as np arrays for quicker loading in furture
"""for idx in range(len(filtered_df)):
    img = io.imread(data_path + f"/{filtered_df.at[idx, 'md5hash']}.jpg")
    np.save(f"drive/MyDrive/Edinburgh/MLP/MLPcoursework4/image_arrays/{filtered_df.at[idx, 'md5hash']}", np.array(img))
    if idx % 100 == 0:
        print("loaded ", idx, " out of ", len(filtered_df), " images")"""

In [None]:
df = pd.read_csv("drive/MyDrive/Edinburgh/MLP/MLPcoursework4/fitzpatrick17k_filtered.csv")
df.head()

In [None]:
df_scale = df["fitzpatrick_scale"]
df_scale.value_counts()

In [None]:
# display an image
img = io.imread(data_path + f"/{df['md5hash'][0]}.jpg")
print(img.shape)
io.imshow(img)

In [None]:
# ITA functions from https://github.com/mattgroh/fitzpatrick17k/blob/main/ita_fitzpatrick_analysis.ipynb
def ITA(image):
    """
    Calculates the individual typology angle (ITA) for a given
    RGB image.

    Inputs:
        image - (str) RGB image file path

    Outputs:
        ITA - (float) individual typology angle
    """

    # Convert to CIE-LAB color space
    RGB = Image.open(image)
    CIELAB = np.array(color.rgb2lab(RGB))

    # Get L and B (subset to +- 1 std from mean)
    L = CIELAB[:, :, 0]
    L = np.where(L != 0, L, np.nan)
    std, mean = np.nanstd(L), np.nanmean(L)
    L = np.where(L >= mean - std, L, np.nan)
    L = np.where(L <= mean + std, L, np.nan)

    B = CIELAB[:, :, 2]
    B = np.where(B != 0, B, np.nan)
    std, mean = np.nanstd(B), np.nanmean(B)
    B = np.where(B >= mean - std, B, np.nan)
    B = np.where(B <= mean + std, B, np.nan)

    # Calculate ITA
    ITA = math.atan2(np.nanmean(L) - 50, np.nanmean(B)) * (180 / np.pi)

    return ITA

def ITA_label(ITA, method):
    """
    Maps an input ITA to a fitzpatrick label given
    a choice method

    Inputs:
        ITA - (float) individual typology angle
        method - (str) 'kinyanjui' or None

    OutputsL
        (int) fitzpatrick type 1-6
    """

    # Use thresholds from kinyanjui et. al.
    if method == 'kinyanjui':
        if ITA > 55:
            return 1
        elif ITA > 41:
            return 2
        elif ITA > 28:
            return 3
        elif ITA > 19:
            return 4
        elif ITA > 10:
            return 5
        elif ITA <= 10:
            return 6
        else:
            return None
     # Use empirical thresholds
    else:
        if ITA >= 45:
            return 1
        elif ITA > 28:
            return 2
        elif ITA > 17:
            return 3
        elif ITA > 5:
            return 4
        elif ITA > -20:
            return 5
        elif ITA <= -20:
            return 6
        else:
            return None

In [None]:
# get manual and ITA labels and compare them
manual_labels = []
fitzpatrick_labels_emp = []
fitzpatrick_labels_kinyanjui = []
for idx, row in df.iterrows():
    path = data_path + f"/{row['md5hash']}.jpg"
    if row["validation"] == 1:
        manual_labels.append(row["fitzpatrick_scale"])
        ITA_value = ITA(path)
        fitzpatrick_label_emp = ITA_label(ITA_value, method="emperical")
        fitzpatrick_labels_emp.append(fitzpatrick_label_emp)
        fitzpatrick_label_kin = ITA_label(ITA_value, method="kinyanjui")
        fitzpatrick_labels_kinyanjui.append(fitzpatrick_label_kin)
    if (idx + 1) % 3000 == 0:
        print(idx)

In [None]:
acc_emp = np.mean([1 if x==y else 0 for x, y in zip(manual_labels, fitzpatrick_labels_emp)])
acc_kin = np.mean([1 if x==y else 0 for x, y in zip(manual_labels, fitzpatrick_labels_kinyanjui)])
print("Acc emperical: ", acc_emp)
print("Acc kinyanjui: ", acc_kin)

In [None]:
one_acc_emp = np.mean([1 if (x-1 <= y <= x+1) else 0 for x, y in zip(manual_labels, fitzpatrick_labels_emp)])
one_acc_kin = np.mean([1 if (x-1 <= y <= x+1) else 0 for x, y in zip(manual_labels, fitzpatrick_labels_kinyanjui)])
print("+-1 Acc emperical: ", one_acc_emp)
print("+-1 Acc kinyanjui: ", one_acc_kin)