In [1]:
import os
import numpy as np
from keras.preprocessing import image
import cv2
from PIL import Image
from os import path
import shutil, copy
from image_cropper import *
from copy import deepcopy
from collections import defaultdict

In [2]:
mappings = [
  {
    "type": "MEL",
    "mappings": [
      { "DDIClassName": "melanoma", "HamClassName": "MEL" },
      { "DDIClassName": "nodular-melanoma-(nm)", "HamClassName": "MEL" },
      { "DDIClassName": "melanoma-acral-lentiginous", "HamClassName": "MEL" },
      { "DDIClassName": "melanoma-in-situ", "HamClassName": "MEL" }
    ]
  },
  {
    "type": "NV",
    "mappings": [
      { "DDIClassName": "melanocytic-nevi", "HamClassName": "NV" },
      {
        "DDIClassName": "atypical-spindle-cell-nevus-of-reed",
        "HamClassName": "NV"
      },
      { "DDIClassName": "blue-nevus", "HamClassName": "NV" },
      {
        "DDIClassName": "nevus-lipomatosus-superficialis",
        "HamClassName": "NV"
      },
      {
        "DDIClassName": "pigmented-spindle-cell-nevus-of-reed",
        "HamClassName": "NV"
      },
      { "DDIClassName": "acral-melanotic-macule", "HamClassName": "NV" }, # Pigmented macule, typically benign
      { "DDIClassName": "dysplastic-nevus", "HamClassName": "NV" } # Higher risk for melanoma, but still classified as nevus
    ]
  },
  {
    "type": "BCC",
    "mappings": [
      { "DDIClassName": "basal-cell-carcinoma", "HamClassName": "BCC" },
      {
        "DDIClassName": "basal-cell-carcinoma-nodular",
        "HamClassName": "BCC"
      },
      {
        "DDIClassName": "basal-cell-carcinoma-superficial",
        "HamClassName": "BCC"
      }
    ]
  },
  {
    "type": "AKIEC",
    "mappings": [
      { "DDIClassName": "actinic-keratosis", "HamClassName": "AKIEC" },
      {
        "DDIClassName": "squamous-cell-carcinoma-in-situ",
        "HamClassName": "AKIEC"
      }
    ]
  },
  {
    "type": "BKL",
    "mappings": [
      { "DDIClassName": "benign-keratosis", "HamClassName": "BKL" },
      { "DDIClassName": "seborrheic-keratosis", "HamClassName": "BKL" },
      {
        "DDIClassName": "seborrheic-keratosis-irritated",
        "HamClassName": "BKL"
      },
      { "DDIClassName": "lichenoid-keratosis", "HamClassName": "BKL" }
    ]
  },
  {
    "type": "DF",
    "mappings": [
      { "DDIClassName": "dermatofibroma", "HamClassName": "DF" },
      { "DDIClassName": "fibrous-papule", "HamClassName": "DF" } # Reason: Histologically similar benign fibrous lesions
    ]
  },
  {
    "type": "VASC",
    "mappings": [
      { "DDIClassName": "angioma", "HamClassName": "VASC" }, # General term for benign vascular tumors
      { "DDIClassName": "angioleiomyoma", "HamClassName": "VASC" }, # Benign smooth muscle tumor with vascular elements
      { "DDIClassName": "pyogenic-granuloma", "HamClassName": "VASC" }, # Benign vascular lesion, often post-injury
      { "DDIClassName": "arteriovenous-hemangioma", "HamClassName": "VASC" }, # Congenital vascular malformation
      { "DDIClassName": "glomangioma", "HamClassName": "VASC" } # Benign tumor from glomus cells, vascular origin
    ]
  }
]


In [3]:
def return_list(data_path, data_type):
    file_list = [file for file in os.listdir(data_path) if file.lower().endswith(data_type)]
    # print(str(len(file_list)))
    return file_list

def mk_dir(dir_path):
    if not os.path.exists(dir_path):
        os.makedirs(dir_path)
    return dir_path

source_data_type = '.png'
target_data_type = '.jpg'
TARGET_SIZE = 384
ham_classes = ["MEL", "NV", "BCC", "AKIEC", "BKL", "DF", "VASC"]
HamLabels = defaultdict(lambda: "NONE")

In [4]:
metadata = pd.read_csv("../../datasets/DDI/metadata.csv")
target_metadata = deepcopy(metadata)
target_metadata['HamMappedLabel'] = HamLabels["DEFAULT"]

In [5]:
for ham_class_mapping in mappings:
    ham_class = ham_class_mapping["type"]
    for ddi_class_mapping in ham_class_mapping["mappings"]:
        HamLabels[ddi_class_mapping["DDIClassName"]] = ham_class
        target_metadata.loc[target_metadata.disease == ddi_class_mapping["DDIClassName"], "HamMappedLabel"] = ham_class

In [6]:
data_img_path = '../../datasets/DDI/images/'

data_save_path = '../../datasets/processed/DDI/roi_square_cropped/'
data_save_path_images = mk_dir(data_save_path + 'images/')

In [7]:
target_metadata.to_csv(f"{data_save_path}target_metadata_all.csv", index=False)
target_metadata = target_metadata[target_metadata.HamMappedLabel != "NONE"]
target_metadata.to_csv(f"{data_save_path}target_metadata.csv", index=False)

In [8]:
i=0
for file_name in list(set(target_metadata.DDI_file)):
    i+=1
    if i % 100 == 0:
        print('Processing Img {idx}: {file_name}'.format(idx=i, file_name=file_name))
    
    org_img = np.asarray(image.load_img(data_img_path + file_name))
    remove=min(org_img.shape[0], org_img.shape[1])//2
    x,y= org_img.shape[0]//2, org_img.shape[1]//2
    # centred square crop
    org_img = org_img[x-remove:x+remove, y-remove:y+remove]

    # roi square crop
    gray_image = cv2.cvtColor(org_img, cv2.COLOR_BGR2GRAY)
    ret, thresh = cv2.threshold(gray_image, 60, 255, 0)
    try:
        rect = obtain_bb(thresh)

        x,y,w,h = rect
        int_ratio = intensity_ratio(gray_image, x, y, w, h)

        if float(int_ratio) > 1.1:
            print('Image {} cropped successfully.'.format(file_name))
            org_img = org_img[y:y+h, x:x+w]
    except:
        print('Image {} was not found'.format(file_name))
        continue

    # resize
    org_img = cv2.resize(org_img, (TARGET_SIZE, TARGET_SIZE), interpolation = cv2.INTER_AREA)

    cropImg = Image.fromarray(org_img)
    cropImg.save(path.join(data_save_path_images, file_name[:-4] + target_data_type))

Processing Img 100: 000639.png
Processing Img 200: 000526.png
Image 000092.png was not found
Image 000582.png cropped successfully.
Processing Img 300: 000530.png
