# Skin lesion classification of dermoscopic images using machine learning and convolutional neural network

19 December 2022

https://www.nature.com/articles/s41598-022-22644-9#Tab7

https://aihub.or.kr/aihubdata/data/view.do?currMenu=&topMenu=&aihubDataSe=realm&dataSetSn=561

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
def get_image_and_json_paths(src_path):
    image_paths = sorted(glob.glob(os.path.join(src_path, '**', '*.jpg'), recursive=True))
    json_paths = sorted(glob.glob(os.path.join(src_path, '**', '*.json'), recursive=True))

    return image_paths, json_paths

def get_image_data(image_paths):
    return [cv2.imread(image_path) for image_path in tqdm(image_paths, desc='Loading Images')]

def extract_metadata_and_locations_from_json(json_paths):
    meta_data, polygon_data, box_data = [], [], []
    for json_path in tqdm(json_paths, desc='Loading JSON', unit=' file'):
        try:
            with open(json_path, "r", encoding="utf-8") as file:
                json_data = json.loads(re.sub(r'[\x00-\x1F\x7F-\x9F]', '', file.read()))
                labeling_info, metadata = json_data['labelingInfo'], json_data.get('metaData', None)

                filtered_metadata = {
                    'breed': metadata.get('breed', None),
                    'age': metadata.get('age', None),
                    'gender': metadata.get('gender', None),
                    'region': metadata.get('region', None),
                    'lesions': metadata.get('lesions', None)
                }

                for entry in labeling_info:
                    if 'polygon' in entry:
                        polygon_data.append(entry['polygon'].get('location', None))
                    if 'box' in entry:
                        box_data.append(entry['box'].get('location', None))

                meta_data.append(filtered_metadata)
        except Exception as e:
            print(f"Error occurred while processing file: {json_path}")
            print(f"Error message: {e}")

    return meta_data, polygon_data, box_data

# camera dog

# camera cat

In [2]:
import cv2
import glob
import json
import numpy as np
import os
from tqdm import tqdm
import re
import pandas as pd
from google.colab.patches import cv2_imshow

In [None]:


def create_mask_maps(image_data, polygon_data, box_data):
    mask_maps = []

    for image in tqdm(image_data, desc='Generating Mask Maps for Each Image'):
        mask_map = np.zeros(image.shape[:2] + (4,), dtype=np.uint8)
        fill_value = 1

        for entry in polygon_data:
            if 'polygon' in entry:
                loc = entry['polygon']['location']
                if loc:
                    loc_array = [[coord['x'], coord['y']] for coord in loc]
                    polygon_points = np.array(loc_array, np.int32).reshape((-1, 1, 2))
                    cv2.fillPoly(mask_map[..., 0], [polygon_points], 255)
                    cv2.fillPoly(mask_map[..., 1], [polygon_points], fill_value)

        for entry in box_data:
            if 'box' in entry:
                loc = entry['box']['location']
                if loc:
                    cv2.rectangle(mask_map[..., 2], tuple(loc[:2]), tuple(loc[2:]), 255, thickness=-1)
                    cv2.rectangle(mask_map[..., 3], tuple(loc[:2]), tuple(loc[2:]), fill_value, thickness=-1)

        mask_maps.append(mask_map)

    return mask_maps

def combine_images_and_masks(image_list, mask_list):
    combined_images = []

    for i in tqdm(range(len(image_list)), desc='Combining Images and Masks'):
        image = image_list[i]
        mask = mask_list[i]

        combined_image = np.concatenate([image, mask[:, :, np.newaxis]], axis=-1)
        combined_images.append(combined_image)

    return combined_images

def resize_images(images, width, height):
    return [cv2.resize(image, (width, height), interpolation=cv2.INTER_AREA) for image in tqdm(images, desc='Resizing Images')]

In [4]:
src_path = "/content/drive/Shareddrives/반려묘"

In [5]:
image_paths, json_paths = get_image_and_json_paths(src_path)

In [6]:
image_data = get_image_data(image_paths)

Loading Images: 100%|██████████| 5500/5500 [1:46:37<00:00,  1.16s/it]


In [None]:
meta_data, polygon_data, box_data = extract_metadata_and_locations_from_json(json_paths)

Loading JSON:  51%|█████▏    | 2821/5500 [28:25<29:41,  1.50 file/s]

In [None]:
masks_maps = create_mask_maps(image_data, polygon_data, box_data)

In [None]:
polygon_binary_masks = [mask_map[..., 0] for mask_map in masks_maps]

In [None]:
polygon_binary_masks = combine_images_and_masks(image_data, polygon_binary_masks)

In [None]:
def check_image_mask_properties(image_list, mask_list):
    for i in range(len(image_list)):
        image = image_list[i]
        mask = mask_list[i]

        print(f"Image {i + 1}:")
        print(f"  Image Shape: {image.shape}")
        print(f"  Mask Shape: {mask.shape}")

        if image.shape[:2] != mask.shape[:2]:
            print("  Image and mask shapes do not match!")

        if image.shape[2] != 3:
            print("  Image does not have 3 color channels!")

        if mask.shape[2] != 1:
            print("  Mask does not have 1 channel!")

        print()

# 이미지와 마스크 데이터의 속성 확인
check_image_mask_properties(image_data, polygon_binary_masks)

In [None]:
polygon_segmentation_maps = [mask_map[..., 1] for mask_map in masks_maps]

In [None]:
polygon_segmentation_maps = combine_images_and_masks(image_data, polygon_segmentation_maps)

In [None]:
box_binary_masks = [mask_map[..., 2] for mask_map in masks_maps]

In [None]:
box_binary_masks = combine_images_and_masks(image_data, box_binary_masks)

In [None]:
box_segmentation_maps = [mask_map[..., 3] for mask_map in masks_maps]

In [None]:
box_segmentation_maps = combine_images_and_masks(image_data, box_segmentation_maps)

In [None]:
original_size_mask_maps = resize_images(masks_maps, 96, 96)

In [None]:
resized_mask_maps = create_mask_maps(resize_images(image_data, 96, 96), polygon_data, box_data)

In [None]:
metadata_df = pd.DataFrame(meta_data)

In [None]:
metadata_df

# microscope dog

# microscope cat

In [None]:
import tensorflow as tf
from tensorflow.keras import models
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.layers import BatchNormalization, Dropout, Flatten, Dense
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import classification_report

In [None]:
train_images, test_images, train_labels, test_labels = train_test_split(resize_images(image_data, 96, 96), metadata_df['lesions'], test_size=0.3, random_state=42)
test_images, val_images, test_labels, val_labels = train_test_split(test_images, test_labels, test_size=0.5, random_state=42)

In [None]:
# Sequential 모델 생성
model = models.Sequential()

# Conv2D 레이어와 MaxPooling2D 레이어 추가
model.add(Conv2D(filters=32, kernel_size=(3, 3), activation='relu', padding='same', input_shape=(96,96,3)))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(3, 3)))

# Dropout 레이어 추가
model.add(Dropout(0.25))

model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Dropout(0.25))

model.add(Conv2D(filters=128, kernel_size=(3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(Conv2D(filters=128, kernel_size=(3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Dropout(0.25))

# Flatten 레이어
model.add(Flatten())

# Dense 레이어와 Dropout 레이어 추가
model.add(Dense(units=1024, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))

# 최종 출력 레이어
model.add(Dense(units=7, activation='softmax'))

# 모델 컴파일
opt = Adam(lr=0.001, decay=0.00001)
model.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])

# 모델 구조 요약
model.summary()

# 모델 학습
epochs = 150
batch_size = 32

history = model.fit(train_images, train_labels, epochs=epochs, batch_size=batch_size, validation_data=(val_images, val_labels))

In [None]:
# 모델 평가
test_loss, test_acc = model.evaluate(test_images, test_labels)
print("Test Loss:", test_loss)
print("Test Accuracy:", test_acc)

# 분류 모델을 평가할 수 있는 다양한 지표 계산
y_pred = model.predict(test_images)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true_classes = np.argmax(test_labels, axis=1)

# classification_report를 사용하여 분류 모델의 성능 평가
print(classification_report(y_true_classes, y_pred_classes))

In [None]:
# TensorFlow Lite 모델로 변환
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

# 변환된 모델을 파일로 저장
with open('your_model.tflite', 'wb') as f:
    f.write(tflite_model)