# Skin lesion classification of dermoscopic images using machine learning and convolutional neural network

19 December 2022

https://www.nature.com/articles/s41598-022-22644-9#Tab7

https://aihub.or.kr/aihubdata/data/view.do?currMenu=&topMenu=&aihubDataSe=realm&dataSetSn=561

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Preprocessing

In [6]:
import cv2
import glob
import json
import numpy as np
import os
from tqdm import tqdm
import re

def get_image_and_json_paths(src_path):
    image_paths = glob.glob(os.path.join(src_path, '**', '*.jpg'), recursive=True)
    image_paths.sort()

    json_paths = glob.glob(os.path.join(src_path, '**', '*.json'), recursive=True)
    json_paths.sort()

    return image_paths, json_paths


def get_image_data(image_paths):
    image_data = []
    for image_path in tqdm(image_paths, desc='Loading Images'):
        image = cv2.imread(image_path)
        image_data.append(image)

    return image_data

def get_data_info(json_paths):
    meta_data = []
    polygon_data = []
    box_data = []

    for json_path in tqdm(json_paths, desc='Loading JSON', unit=' file'):
        try:
            with open(json_path, "r", encoding="utf-8") as file:
                file_content = file.read()
                control_char_regex = r'[\x00-\x1F\x7F-\x9F]'
                cleaned_content = re.sub(control_char_regex, '', file_content)
                json_data = json.loads(cleaned_content)
                metadata = json_data.get('metaData', None)

                filtered_metadata = {
                    'breed': metadata.get('breed', None),
                    'age': metadata.get('age', None),
                    'gender': metadata.get('gender', None),
                    'region': metadata.get('region', None),
                    'species': metadata.get('species', None),
                    'lesions': metadata.get('lesions', None),
                    'polygon_location': [],
                    'box_location': []
                }

                labeling_info = json_data.get('labelingInfo', [])

                for entry in labeling_info:
                    if 'polygon' in entry:
                        polygon_metadata = filtered_metadata.copy()
                        polygon_metadata['polygon_location'] = entry['polygon'].get('location', None)
                        polygon_data.append(polygon_metadata)
                    if 'box' in entry:
                        box_metadata = filtered_metadata.copy()
                        box_metadata['box_location'] = entry['box'].get('location', None)
                        box_data.append(box_metadata)

                meta_data.append(filtered_metadata)

        except json.JSONDecodeError as e:
            print(f"Error decoding: {json_path} - {e}")

    return meta_data, polygon_data, box_data

def resize_image(image, width, height):
    tqdm.write(f'Resizing image from {image.shape[:2]} to {(width, height)}')
    resized_image = cv2.resize(image, (width, height), interpolation=cv2.INTER_AREA)
    return resized_image

def create_masks_maps(image, polygon_info, box_info):
    masks_map = np.zeros(image.shape[:2] + (4,), dtype=np.uint8)

    for entry in tqdm(polygon_info, desc='Drawing Polygon'):
        loc = entry.get('polygon_location', [])
        fill_value = entry.get('lesions', 0) + 1
        if loc:
            polygon_points = np.array(loc, np.int32).reshape((-1, 1, 2))
            cv2.fillPoly(masks_map[..., 0], [polygon_points], 255)
            cv2.fillPoly(masks_map[..., 1], [polygon_points], fill_value)

    for entry in tqdm(box_info, desc='Drawing Box'):
        loc = entry.get('box_location', [])
        fill_value = entry.get('lesions', 0) + 1
        if loc:
            cv2.rectangle(masks_map[..., 2], tuple(loc[:2]), tuple(loc[2:]), 255, thickness=-1)
            cv2.rectangle(masks_map[..., 3], tuple(loc[:2]), tuple(loc[2:]), fill_value, thickness=-1)

    return masks_map

def generate_masks_maps(image_data, polygon_data, box_data):
    masks_maps = []
    for data in tqdm(zip(image_data, polygon_data, box_data), desc='Generating Masks Maps'):
        masks_map = create_masks_maps(*data)
        masks_maps.append(masks_map)
    return masks_maps

In [7]:
src_path = "/content/drive/Shareddrives/152.반려동물 피부질환 데이터/validation/반려견"

In [None]:
image_paths, json_paths = get_image_and_json_paths(src_path)

In [None]:
image_data = get_image_data(image_paths)

NameError: ignored

In [None]:
meta_data, polygon_data, box_data = get_data_info(json_paths)

Loading JSON:   4%|▍         | 1748/40283 [04:46<00:59, 650.88 file/s]

Error decoding: /content/drive/Shareddrives/152.반려동물 피부질환 데이터/validation/반려견/무증상/A1_구진_플라크/IMG_D_A7_242015.json - Expecting value: line 1 column 1 (char 0)


Loading JSON:  23%|██▎       | 9246/40283 [1:07:29<3:46:34,  2.28 file/s]


FileNotFoundError: ignored

In [None]:
masks_maps = generate_masks_maps(image_data, polygon_data, box_data)

In [None]:
Before_Resize_Polygon_Binary_Mask.csv
After_Resize_Polygon_Binary_Mask.csv
Before_Resize_Box_Binary_Mask.csv
After_Resize_Box_Binary_Mask.csv
Before_Resize_Polygon_Segmentation_Map.csv
After_Resize_Polygon_Segmentation_Map.csv
Before_Resize_Box_Segmentation_Map.csv
After_Resize_Box_Segmentation_Map.csv
Before_Resize_Polygon_Box_Binary_Mask.csv
After_Resize_Polygon_Box_Binary_Mask.csv
Before_Resize_Polygon_Box_Segmentation_Map.csv
After_Resize_Polygon_Box_Segmentation_Map.csv
Before_Resize_Polygon_Binary_Mask_MetaData.csv
After_Resize_Polygon_Binary_Mask_MetaData.csv
Before_Resize_Box_Binary_Mask_MetaData.csv
After_Resize_Box_Binary_Mask_MetaData.csv
Before_Resize_Polygon_Segmentation_Map_MetaData.csv
After_Resize_Polygon_Segmentation_Map_MetaData.csv
Before_Resize_Box_Segmentation_Map_MetaData.csv
After_Resize_Box_Segmentation_Map_MetaData.csv

# Modeling

In [None]:
import tensorflow as tf
from tensorflow.keras import models
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator

import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

In [None]:
# 분류 모델과 하이퍼파라미터를 설정합니다
model_LR = LogisticRegression(random_state=9)
model_LDA = LinearDiscriminantAnalysis(solver='svd')
model_KNN = KNeighborsClassifier(n_neighbors=5)
model_DT = DecisionTreeClassifier(n_estimators=100)
model_RF = RandomForestClassifier(n_estimators=200, random_state=0)
model_GaussianNB = GaussianNB(var_smoothing=1e-09)
model_SVM = SVC(kernel='linear', C=1, random_state=0)

# 분류 모델들을 리스트에 담습니다
models = [model_LR, model_LDA, model_KNN, model_DT, model_RF, model_GaussianNB, model_SVM]

# 각 분류 모델을 학습시키고 예측 결과를 출력합니다
for model in models:
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    score = accuracy_score(y_test, y_pred)
    print(f"{model.__class__.__name__}: {score}")

In [None]:
# Sequential 모델 생성
model = models.Sequential()

# 첫번째 Conv2D 레이어
model.add(Conv2D(filters=32, kernel_size=(3, 3), activation='relu', padding='same', input_shape=(96,96,3)))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(3, 3)))

# 첫번째 Dropout 레이어
model.add(Dropout(0.25))

# 두번째 Conv2D 레이어
model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))

# 두번째 Dropout 레이어
model.add(Dropout(0.25))

# 세번째 Conv2D 레이어
model.add(Conv2D(filters=128, kernel_size=(3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(Conv2D(filters=128, kernel_size=(3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))

# 세번째 Dropout 레이어
model.add(Dropout(0.25))

# Flatten 레이어
model.add(Flatten())

# 첫번째 Dense 레이어
model.add(Dense(units=1024, activation='relu'))
model.add(BatchNormalization())

# 두번째 Dropout 레이어
model.add(Dropout(0.5))

# 두번째 Dense 레이어: 최종 출력 레이어
model.add(Dense(units=7, activation='softmax'))

# 모델 컴파일
opt = Adam(lr=0.001, decay=0.00001)
model.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])

# 모델 구조 요약
model.summary()

# 모델 학습
epochs = 150
batch_size = 32

history = model.fit(train_data, epochs=epochs, batch_size=batch_size, validation_data=val_data)

# 모델 평가
test_loss, test_acc = model.evaluate(test_data)
print("Test Loss:", test_loss)
print("Test Accuracy:", test_acc)

In [None]:
# TensorFlow Lite 모델로 변환
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

# 변환된 모델을 파일로 저장
with open('your_model.tflite', 'wb') as f:
    f.write(tflite_model)