In [1]:
import os
import numpy as np
from keras.preprocessing import image
import cv2
from PIL import Image
from os import path
import shutil, copy
from image_cropper import *
from copy import deepcopy
from collections import defaultdict

In [2]:
mappings = [
  {
    "type": "MEL",
    "mappings": [
      { "SdClassName": "Malignant_Melanoma", "SdClassLabel": 115 },
      { "SdClassName": "Lentigo_Maligna_Melanoma", "SdClassLabel": 101 }
    ]
  },
  {
    "type": "NV",
    "mappings": [
      { "SdClassName": "Compound_Nevus", "SdClassLabel": 35 },
      { "SdClassName": "Congenital_Nevus", "SdClassLabel": 36 },
      { "SdClassName": "Junction_Nevus", "SdClassLabel": 92 },
      { "SdClassName": "Halo_Nevus", "SdClassLabel": 81 },
      { "SdClassName": "Becker's_Nevus", "SdClassLabel": 23 },
      { "SdClassName": "Blue_Nevus", "SdClassLabel": 26 },
      { "SdClassName": "Dysplastic_Nevus", "SdClassLabel": 53 },
      { "SdClassName": "Nevus_Sebaceous_of_Jadassohn", "SdClassLabel": 134 },
      { "SdClassName": "Nevus_Spilus", "SdClassLabel": 135 }
    ]
  },
  {
    "type": "BCC",
    "mappings": [
      { "SdClassName": "Basal_Cell_Carcinoma", "SdClassLabel": 21 }
    ]
  },
  {
    "type": "AKIEC",
    "mappings": [
      { "SdClassName": "Actinic_solar_Damage(Actinic_Keratosis)", "SdClassLabel": 5 },
      { "SdClassName": "Bowen's_Disease", "SdClassLabel": 27 }
    ]
  },
  {
    "type": "BKL",
    "mappings": [
      { "SdClassName": "Benign_Keratosis", "SdClassLabel": 25 },
      { "SdClassName": "Seborrheic_Keratosis", "SdClassLabel": 167 }
    ]
  },
  {
    "type": "DF",
    "mappings": [
      { "SdClassName": "Dermatofibroma", "SdClassLabel": 43 }
    ]
  },
  {
    "type": "VASC",
    "mappings": [
      { "SdClassName": "Angioma", "SdClassLabel": 15 },
      { "SdClassName": "Strawberry_Hemangioma", "SdClassLabel": 177 },
      { "SdClassName": "Pyogenic_Granuloma", "SdClassLabel": 156 }
    ]
  }
]


In [3]:
def return_list(data_path, data_type):
    file_list = [file for file in os.listdir(data_path) if file.lower().endswith(data_type)]
    # print(str(len(file_list)))
    return file_list

def mk_dir(dir_path):
    if not os.path.exists(dir_path):
        os.makedirs(dir_path)
    return dir_path

data_type = '.jpg'
TARGET_SIZE = 384
ham_classes = ["MEL", "NV", "BCC", "AKIEC", "BKL", "DF", "VASC"]
HamLabels = defaultdict(lambda: "NONE")

In [4]:
metadata = pd.read_csv("../../datasets/SD-198/sd-198/metadata.csv")
target_metadata = deepcopy(metadata)
target_metadata['HamMappedLabel'] = HamLabels["DEFAULT"]

In [5]:
for ham_class_mapping in mappings:
    ham_class = ham_class_mapping["type"]
    for sd_class_mapping in ham_class_mapping["mappings"]:
        HamLabels[sd_class_mapping["SdClassName"]] = ham_class
        target_metadata.loc[target_metadata.class_name == sd_class_mapping["SdClassName"], "HamMappedLabel"] = ham_class

In [6]:
data_img_path = '../../datasets/SD-198/sd-198/images/'

data_save_path = '../../datasets/processed/SD-198/roi_square_cropped/'
data_save_path_images = mk_dir(data_save_path + 'images/')

In [7]:
target_metadata.to_csv(f"{data_save_path}target_metadata_all.csv", index=False)
target_metadata = target_metadata[target_metadata.HamMappedLabel != "NONE"]
target_metadata.to_csv(f"{data_save_path}target_metadata.csv", index=False)

In [8]:
i=0
for sd_class in list(set(target_metadata.class_name)):
    file_load_path = f"{data_img_path}{sd_class}/"
    file_images_list = return_list(file_load_path, data_type)

    for file_name in file_images_list:
        i+=1
        if i % 100 == 0:
            print('Processing Img {idx}: {file_name}'.format(idx=i, file_name=file_name))
        
        org_img = np.asarray(image.load_img(file_load_path + file_name))
        remove=min(org_img.shape[0], org_img.shape[1])//2
        x,y= org_img.shape[0]//2, org_img.shape[1]//2
        # centred square crop
        org_img = org_img[x-remove:x+remove, y-remove:y+remove]

        # roi square crop
        gray_image = cv2.cvtColor(org_img, cv2.COLOR_BGR2GRAY)
        ret, thresh = cv2.threshold(gray_image, 60, 255, 0)
        try:
            rect = obtain_bb(thresh)

            x,y,w,h = rect
            int_ratio = intensity_ratio(gray_image, x, y, w, h)

            if float(int_ratio) > 1.1:
                print('Image {} cropped successfully.'.format(file_name))
                org_img = org_img[y:y+h, x:x+w]
        except:
            print('Image {} was not found'.format(file_name))
            continue
    
        # resize
        org_img = cv2.resize(org_img, (TARGET_SIZE, TARGET_SIZE), interpolation = cv2.INTER_AREA)
    
        cropImg = Image.fromarray(org_img)
        cropImg.save(path.join(data_save_path_images, file_name[:-4] + '.jpg'))

Processing Img 100: 020740HB.jpg
Image 009086HB.jpg cropped successfully.
Image 045138HB.jpg cropped successfully.
Processing Img 200: 010332HB.jpg
Processing Img 300: 020078VB.jpg
Processing Img 400: 045678HB.jpg
Processing Img 500: 020203HB.jpg
Processing Img 600: 000607HB.jpg
Processing Img 700: 000375HB.jpg
Processing Img 800: 014247HB.jpg
Processing Img 900: 021724HB.jpg
Image 015028VB.jpg cropped successfully.
Processing Img 1000: 015174HB.jpg
