# Skin lesion classification of dermoscopic images using machine learning and convolutional neural network

19 December 2022

https://www.nature.com/articles/s41598-022-22644-9#Tab7

https://aihub.or.kr/aihubdata/data/view.do?currMenu=&topMenu=&aihubDataSe=realm&dataSetSn=561

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Preprocessing

In [2]:
import cv2
import glob
import json
import numpy as np
import os
from tqdm import tqdm
import re
import pandas as pd

In [3]:
def get_image_and_json_paths(src_path):
    image_paths = sorted(glob.glob(os.path.join(src_path, '**', '*.jpg'), recursive=True))
    json_paths = sorted(glob.glob(os.path.join(src_path, '**', '*.json'), recursive=True))

    return image_paths, json_paths


def get_image_data(image_paths):
    return [cv2.imread(image_path) for image_path in tqdm(image_paths, desc='Loading Images')]

def extract_metadata_and_locations_from_json(json_paths):
    meta_data, polygon_data, box_data = [], [], []
    for json_path in tqdm(json_paths, desc='Loading JSON', unit=' file'):
        with open(json_path, "r", encoding="utf-8") as file:
            json_data = json.loads(re.sub(r'[\x00-\x1F\x7F-\x9F]', '', file.read()))
            labeling_info, metadata = json_data['labelingInfo'], json_data.get('metaData', None)

            filtered_metadata = {
                'breed': metadata.get('breed', None),
                'age': metadata.get('age', None),
                'gender': metadata.get('gender', None),
                'region': metadata.get('region', None),
                'lesions': metadata.get('lesions', None)
            }

            for entry in labeling_info:
                if 'polygon' in entry:
                    polygon_data.append(entry['polygon'].get('location', None))
                if 'box' in entry:
                    box_data.append(entry['box'].get('location', None))

            meta_data.append(filtered_metadata)

    return meta_data, polygon_data, box_data

def create_mask_maps(image_data, polygon_data, box_data):
    mask_maps = []

    for image in tqdm(image_data, desc='Generating Mask Maps for Each Image'):
        mask_map = np.zeros(image.shape[:2] + (4,), dtype=np.uint8)
        fill_value = 1

        for entry in tqdm(polygon_data, desc='Drawing Polygon', position=1, leave=False):
            if 'polygon' in entry:
                loc = entry['polygon']['location']
                if loc:
                    loc_array = [[coord['x'], coord['y']] for coord in loc]
                    polygon_points = np.array(loc_array, np.int32).reshape((-1, 1, 2))
                    cv2.fillPoly(mask_map[..., 0], [polygon_points], 255)
                    cv2.fillPoly(mask_map[..., 1], [polygon_points], fill_value)

        for entry in tqdm(box_data, desc='Drawing Box', position=2, leave=False):
            if 'box' in entry:
                loc = entry['box']['location']
                if loc:
                    cv2.rectangle(mask_map[..., 2], tuple(loc[:2]), tuple(loc[2:]), 255, thickness=-1)
                    cv2.rectangle(mask_map[..., 3], tuple(loc[:2]), tuple(loc[2:]), fill_value, thickness=-1)

        mask_maps.append(mask_map)

    return mask_maps

def resize_images(images, width, height):
    return [cv2.resize(image, (width, height), interpolation=cv2.INTER_AREA) for image in tqdm(images, desc='Resizing Images')]

In [4]:
src_path = "/content/drive/Shareddrives/반려견"

In [5]:
image_paths, json_paths = get_image_and_json_paths(src_path)

In [None]:
image_data = get_image_data(image_paths)

Loading Images:  25%|██▌       | 362/1425 [02:50<12:44,  1.39it/s]

In [None]:
image_data[:5]

In [None]:
meta_data, polygon_data, box_data = extract_metadata_and_locations_from_json(json_paths)

In [None]:
meta_data[:5]

In [None]:
polygon_data[:5]

In [None]:
box_data[:5]

In [None]:
masks_maps = create_mask_maps(image_data, polygon_data, box_data)

In [None]:
mask_maps[:5][..., 0]

In [None]:
original_size_mask_maps = resize_images(masks_maps, 96, 96)

Resizing Images: 100%|██████████| 750/750 [00:08<00:00, 89.06it/s]


In [None]:
original_size_mask_maps

[array([[[0, 0, 0, 0],
         [0, 0, 0, 0],
         [0, 0, 0, 0],
         ...,
         [0, 0, 0, 0],
         [0, 0, 0, 0],
         [0, 0, 0, 0]],
 
        [[0, 0, 0, 0],
         [0, 0, 0, 0],
         [0, 0, 0, 0],
         ...,
         [0, 0, 0, 0],
         [0, 0, 0, 0],
         [0, 0, 0, 0]],
 
        [[0, 0, 0, 0],
         [0, 0, 0, 0],
         [0, 0, 0, 0],
         ...,
         [0, 0, 0, 0],
         [0, 0, 0, 0],
         [0, 0, 0, 0]],
 
        ...,
 
        [[0, 0, 0, 0],
         [0, 0, 0, 0],
         [0, 0, 0, 0],
         ...,
         [0, 0, 0, 0],
         [0, 0, 0, 0],
         [0, 0, 0, 0]],
 
        [[0, 0, 0, 0],
         [0, 0, 0, 0],
         [0, 0, 0, 0],
         ...,
         [0, 0, 0, 0],
         [0, 0, 0, 0],
         [0, 0, 0, 0]],
 
        [[0, 0, 0, 0],
         [0, 0, 0, 0],
         [0, 0, 0, 0],
         ...,
         [0, 0, 0, 0],
         [0, 0, 0, 0],
         [0, 0, 0, 0]]], dtype=uint8),
 array([[[0, 0, 0, 0],
         [0, 0, 0, 

In [None]:
resized_mask_maps = create_mask_maps(resize_images(image_data, 96, 96), polygon_data, box_data)

Resizing Images: 100%|██████████| 750/750 [00:06<00:00, 123.93it/s]
Generating Mask Maps for Each Image:   0%|          | 0/750 [00:00<?, ?it/s]
Drawing Polygon:   0%|          | 0/764 [00:00<?, ?it/s][A
                                                        [A

Drawing Box:   0%|          | 0/764 [00:00<?, ?it/s][A[A

                                                    [A[A
Drawing Polygon:   0%|          | 0/764 [00:00<?, ?it/s][A
                                                        [A

Drawing Box:   0%|          | 0/764 [00:00<?, ?it/s][A[A

                                                    [A[A
Drawing Polygon:   0%|          | 0/764 [00:00<?, ?it/s][A
                                                        [A

Drawing Box:   0%|          | 0/764 [00:00<?, ?it/s][A[A

                                                    [A[A
Drawing Polygon:   0%|          | 0/764 [00:00<?, ?it/s][A
                                                        [A

Drawing Box:  

In [None]:
resized_mask_maps

[array([[[0, 0, 0, 0],
         [0, 0, 0, 0],
         [0, 0, 0, 0],
         ...,
         [0, 0, 0, 0],
         [0, 0, 0, 0],
         [0, 0, 0, 0]],
 
        [[0, 0, 0, 0],
         [0, 0, 0, 0],
         [0, 0, 0, 0],
         ...,
         [0, 0, 0, 0],
         [0, 0, 0, 0],
         [0, 0, 0, 0]],
 
        [[0, 0, 0, 0],
         [0, 0, 0, 0],
         [0, 0, 0, 0],
         ...,
         [0, 0, 0, 0],
         [0, 0, 0, 0],
         [0, 0, 0, 0]],
 
        ...,
 
        [[0, 0, 0, 0],
         [0, 0, 0, 0],
         [0, 0, 0, 0],
         ...,
         [0, 0, 0, 0],
         [0, 0, 0, 0],
         [0, 0, 0, 0]],
 
        [[0, 0, 0, 0],
         [0, 0, 0, 0],
         [0, 0, 0, 0],
         ...,
         [0, 0, 0, 0],
         [0, 0, 0, 0],
         [0, 0, 0, 0]],
 
        [[0, 0, 0, 0],
         [0, 0, 0, 0],
         [0, 0, 0, 0],
         ...,
         [0, 0, 0, 0],
         [0, 0, 0, 0],
         [0, 0, 0, 0]]], dtype=uint8),
 array([[[0, 0, 0, 0],
         [0, 0, 0, 

In [None]:
metadata_df = pd.DataFrame(meta_data)

In [None]:
metadata_df

Unnamed: 0,breed,age,gender,region,lesions,polygon_location,box_location
0,포메라니안,8,F,B,A7,[],[]
1,푸들,1,M,L,A7,[],[]
2,푸들,1,M,L,A7,[],[]
3,푸들,1,M,H,A7,[],[]
4,푸들,1,M,B,A7,[],[]
...,...,...,...,...,...,...,...
744,푸들,3,F,H,A3,[],[]
745,푸들,3,F,H,A3,[],[]
746,푸들,3,F,H,A3,[],[]
747,푸들,3,F,B,A3,[],[]


# Modeling

In [None]:
import tensorflow as tf
from tensorflow.keras import models
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [None]:
# train, test set 분할
test_size = 0.2
temp_img_data, test_img_data, temp_meta_data, test_meta_data, temp_orig_size_maps, test_orig_size_maps, temp_resized_maps, test_resized_maps = train_test_split(
    image_data, metadata_df, original_size_mask_maps, resized_mask_maps,
    test_size=test_size, random_state=42
)

# test set을 기준으로 적용된 비율로 val_size 계산
val_size = 0.2
adjusted_val_size = val_size / (1.0 - test_size)

# train, validation set 분할
train_img_data, val_img_data, train_meta_data, val_meta_data, train_orig_size_maps, val_orig_size_maps, train_resized_maps, val_resized_maps = train_test_split(
    temp_img_data, temp_meta_data, temp_orig_size_maps, temp_resized_maps,
    test_size=adjusted_val_size, random_state=42
)

In [None]:
# Sequential 모델 생성
model = models.Sequential()

# 첫번째 Conv2D 레이어
model.add(Conv2D(filters=32, kernel_size=(3, 3), activation='relu', padding='same', input_shape=(96,96,3)))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(3, 3)))

# 첫번째 Dropout 레이어
model.add(Dropout(0.25))

# 두번째 Conv2D 레이어
model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))

# 두번째 Dropout 레이어
model.add(Dropout(0.25))

# 세번째 Conv2D 레이어
model.add(Conv2D(filters=128, kernel_size=(3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(Conv2D(filters=128, kernel_size=(3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))

# 세번째 Dropout 레이어
model.add(Dropout(0.25))

# Flatten 레이어
model.add(Flatten())

# 첫번째 Dense 레이어
model.add(Dense(units=1024, activation='relu'))
model.add(BatchNormalization())

# 두번째 Dropout 레이어
model.add(Dropout(0.5))

# 두번째 Dense 레이어: 최종 출력 레이어
model.add(Dense(units=7, activation='softmax'))

# 모델 컴파일
opt = Adam(lr=0.001, decay=0.00001)
model.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])

# 모델 구조 요약
model.summary()

# 모델 학습
epochs = 150
batch_size = 32

history = model.fit(train_data, epochs=epochs, batch_size=batch_size, validation_data=val_data)

# 모델 평가
test_loss, test_acc = model.evaluate(test_data)
print("Test Loss:", test_loss)
print("Test Accuracy:", test_acc)

In [None]:
# TensorFlow Lite 모델로 변환
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

# 변환된 모델을 파일로 저장
with open('your_model.tflite', 'wb') as f:
    f.write(tflite_model)