# Skin lesion classification of dermoscopic images using machine learning and convolutional neural network

19 December 2022

https://www.nature.com/articles/s41598-022-22644-9#Tab7

https://aihub.or.kr/aihubdata/data/view.do?currMenu=&topMenu=&aihubDataSe=realm&dataSetSn=561

In [15]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Preprocessing

In [16]:
import cv2
import glob
import json
import numpy as np
import os
from tqdm import tqdm
import re
import pandas as pd

In [25]:
def get_image_and_json_paths(src_path):
    image_paths = glob.glob(os.path.join(src_path, '**', '*.jpg'), recursive=True)
    image_paths.sort()

    json_paths = glob.glob(os.path.join(src_path, '**', '*.json'), recursive=True)
    json_paths.sort()

    return image_paths, json_paths


def get_image_data(image_paths):
    image_data = []
    for image_path in tqdm(image_paths, desc='Loading Images'):
        image = cv2.imread(image_path)
        image_data.append(image)

    return image_data

def extract_metadata_and_locations_from_json(json_paths):
    meta_data = []
    polygon_data = []
    box_data = []

    for json_path in tqdm(json_paths, desc='Loading JSON', unit=' file'):
        with open(json_path, "r", encoding="utf-8") as file:
            file_content = file.read()
            control_char_regex = r'[\x00-\x1F\x7F-\x9F]'
            cleaned_content = re.sub(control_char_regex, '', file_content)
            json_data = json.loads(cleaned_content)
            metadata = json_data.get('metaData', None)

            filtered_metadata = {
                'breed': metadata.get('breed', None),
                'age': metadata.get('age', None),
                'gender': metadata.get('gender', None),
                'region': metadata.get('region', None),
                'lesions': metadata.get('lesions', None),
                'polygon_location': [],
                'box_location': []
            }

            labeling_info = json_data.get('labelingInfo', [])

            for entry in labeling_info:
                if 'polygon' in entry:
                    polygon_metadata = filtered_metadata.copy()
                    polygon_metadata['polygon_location'] = entry['polygon'].get('location', None)
                    polygon_data.append(polygon_metadata)
                if 'box' in entry:
                    box_metadata = filtered_metadata.copy()
                    box_metadata['box_location'] = entry['box'].get('location', None)
                    box_data.append(box_metadata)

            meta_data.append(filtered_metadata)

    return meta_data, polygon_data, box_data

def create_mask_maps(image_data, polygon_data, box_data):
    mask_maps = []

    for image, polygons, boxes in tqdm(zip(image_data, polygon_data, box_data), desc='Generating Mask Maps'):
        mask_map = np.zeros(image.shape[:2] + (4,), dtype=np.uint8)

        for entry in tqdm(polygon_data, desc='Drawing Polygon'):
            loc = entry['polygon_location']
            fill_value = 1  # 수정된 부분
            if loc:
                polygon_points = np.array(loc, np.int32).reshape((-1, 1, 2))
                cv2.fillPoly(mask_map[..., 0], [polygon_points], 255)
                cv2.fillPoly(mask_map[..., 1], [polygon_points], fill_value)

        for entry in tqdm(box_data, desc='Drawing Box'):
            loc = entry['box_location']
            fill_value = 1  # 수정된 부분
            if loc:
                cv2.rectangle(mask_map[..., 2], tuple(loc[:2]), tuple(loc[2:]), 255, thickness=-1)
                cv2.rectangle(mask_map[..., 3], tuple(loc[:2]), tuple(loc[2:]), fill_value, thickness=-1)

        mask_maps.append(mask_map)

    return mask_maps

def resize_images(images, width, height):
    resized_images = []

    for image in tqdm(images, desc='Resizing Images'):
        resized_image = cv2.resize(image, (width, height), interpolation=cv2.INTER_AREA)
        resized_images.append(resized_image)

    return resized_images

def prepare_image_data_and_masks(src_path):
    image_paths, json_paths = get_image_and_json_paths(src_path)
    image_data = get_image_data(image_paths)
    meta_data, polygon_data, box_data = extract_metadata_and_locations_from_json(json_paths)
    masks_maps = create_mask_maps(image_data, polygon_data, box_data)
    original_size_mask_maps = resize_images(masks_maps, 96, 96)
    resized_mask_maps = create_mask_maps(resize_images(image_data, 96, 96), polygon_data, box_data)

    metadata_df = pd.DataFrame(meta_data)

    return image_data, metadata_df, original_size_mask_maps, resized_mask_maps

In [26]:
src_path = "/content/drive/Shareddrives/반려견"
image_data, metadata_df, original_size_mask_maps, resized_mask_maps = prepare_image_data_and_masks(src_path)

Loading Images: 100%|██████████| 750/750 [00:25<00:00, 28.91it/s]
Loading JSON: 100%|██████████| 749/749 [00:00<00:00, 952.63 file/s]
Generating Mask Maps: 0it [00:00, ?it/s]
Drawing Polygon:   0%|          | 0/764 [00:00<?, ?it/s]
Generating Mask Maps: 0it [00:00, ?it/s]


TypeError: ignored

# Modeling

In [None]:
import tensorflow as tf
from tensorflow.keras import models
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [None]:
# train, test set 분할
test_size = 0.2
temp_img_data, test_img_data, temp_meta_data, test_meta_data, temp_orig_size_maps, test_orig_size_maps, temp_resized_maps, test_resized_maps = train_test_split(
    image_data, metadata_df, original_size_mask_maps, resized_mask_maps,
    test_size=test_size, random_state=42
)

# test set을 기준으로 적용된 비율로 val_size 계산
val_size = 0.2
adjusted_val_size = val_size / (1.0 - test_size)

# train, validation set 분할
train_img_data, val_img_data, train_meta_data, val_meta_data, train_orig_size_maps, val_orig_size_maps, train_resized_maps, val_resized_maps = train_test_split(
    temp_img_data, temp_meta_data, temp_orig_size_maps, temp_resized_maps,
    test_size=adjusted_val_size, random_state=42
)

In [None]:
# Sequential 모델 생성
model = models.Sequential()

# 첫번째 Conv2D 레이어
model.add(Conv2D(filters=32, kernel_size=(3, 3), activation='relu', padding='same', input_shape=(96,96,3)))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(3, 3)))

# 첫번째 Dropout 레이어
model.add(Dropout(0.25))

# 두번째 Conv2D 레이어
model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))

# 두번째 Dropout 레이어
model.add(Dropout(0.25))

# 세번째 Conv2D 레이어
model.add(Conv2D(filters=128, kernel_size=(3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(Conv2D(filters=128, kernel_size=(3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))

# 세번째 Dropout 레이어
model.add(Dropout(0.25))

# Flatten 레이어
model.add(Flatten())

# 첫번째 Dense 레이어
model.add(Dense(units=1024, activation='relu'))
model.add(BatchNormalization())

# 두번째 Dropout 레이어
model.add(Dropout(0.5))

# 두번째 Dense 레이어: 최종 출력 레이어
model.add(Dense(units=7, activation='softmax'))

# 모델 컴파일
opt = Adam(lr=0.001, decay=0.00001)
model.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])

# 모델 구조 요약
model.summary()

# 모델 학습
epochs = 150
batch_size = 32

history = model.fit(train_data, epochs=epochs, batch_size=batch_size, validation_data=val_data)

# 모델 평가
test_loss, test_acc = model.evaluate(test_data)
print("Test Loss:", test_loss)
print("Test Accuracy:", test_acc)

In [None]:
# TensorFlow Lite 모델로 변환
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

# 변환된 모델을 파일로 저장
with open('your_model.tflite', 'wb') as f:
    f.write(tflite_model)