# Skin lesion classification of dermoscopic images using machine learning and convolutional neural network

19 December 2022

https://www.nature.com/articles/s41598-022-22644-9#Tab7

https://aihub.or.kr/aihubdata/data/view.do?currMenu=&topMenu=&aihubDataSe=realm&dataSetSn=561

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Preprocessing

In [None]:
# 운영체제 및 파일 시스템 관련
import os
import glob

# JSON 처리
import json

# 이미지 처리
from PIL import Image
import cv2
from google.colab.patches import cv2_imshow
import numpy as np

# 머신 러닝 모델
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC

# 딥 러닝 모델 (텐서플로)
import tensorflow as tf
from tensorflow.keras import models
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense, BatchNormalization
from tensorflow.keras.optimizers import Adam


In [None]:
def get_paths(root_directory, extension):

    paths = []

    for path in glob.glob(os.path.join(root_directory, '**', extension), recursive=True):
        paths.append(path)

    paths.sort()

    return paths


def read_json_metadata(json_path):
    with open(json_path, 'r') as f:
        json_data = json.load(f)

    meta_data = json_data['metaData']
    return meta_data
    # metadata를 읽어와서 반환하는 함수


def get_polygon_and_box_coordinates(json_path):
    with open(json_path, 'r') as f:
        json_data = json.load(f)

    labeling_info = json_data['labelingInfo']
    polygon_coordinates = []
    box_location = []

    for item in labeling_info:
        if 'polygon' in item:
            polygon = item['polygon']
            location = polygon['location'][0]
            num_coordinates = len(location) // 2
            coordinates = [(location[f'x{i+1}'], location[f'y{i+1}']) for i in range(num_coordinates)]
            polygon_coordinates.append(coordinates)
            # 각각의 폴리곤 좌표를 다루는 코드

        if 'box' in item:
            box = item['box']
            location = box['location'][0]
            x = location['x']
            y = location['y']
            width = location['width']
            height = location['height']
            box_location.append((x, y, width, height))
            # 박스 좌표를 다루는 코드

    return polygon_coordinates, box_location
    # 폴리곤과 박스 좌표를 반환하는 함수


def crop_image_by_polygon(polygon_coordinates, image_path):
    image = Image.open(image_path)
    image_np = np.array(image)
    image_bgr = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)

    mask = np.zeros(image_bgr.shape[:2], dtype=np.uint8)
    cv2.fillPoly(mask, [np.array(polygon_coordinates)], 255)

    cropped_image = cv2.bitwise_and(image_bgr, image_bgr, mask=mask)

    return cropped_image
    # 폴리곤 좌표에 따라 이미지를 잘라내는 함수


def crop_image_by_box(box_coordinate, image_path):
    x, y, width, height = box_coordinate
    image = Image.open(image_path)
    image_np = np.array(image)
    image_bgr = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)

    cropped_image = image_bgr[y:y + height, x:x + width]

    return cropped_image
    # 박스 좌표에 따라 이미지를 잘라내는 함수

In [None]:
image_extensions = '*.jpg'
json_extension = '*.json'

root_directory = '/content/drive/Shareddrives/데이터230705/152.반려동물 피부질환 데이터/Validation/반려묘'
image_paths, json_paths = get_paths(root_directory, image_extensions), get_paths(root_directory, json_extension)

In [None]:
for i, j in zip(image_paths[:5], json_paths[:5]):
    print(i)
    print(j)
    print('='*50)

/content/drive/Shareddrives/데이터230705/152.반려동물 피부질환 데이터/Validation/반려묘/피부/일반카메라/무증상/A2_비듬_각질_상피성잔고리/IMG_C_A7_017282.jpg
/content/drive/Shareddrives/데이터230705/152.반려동물 피부질환 데이터/Validation/반려묘/피부/일반카메라/무증상/A2_비듬_각질_상피성잔고리/IMG_C_A7_017282.json
/content/drive/Shareddrives/데이터230705/152.반려동물 피부질환 데이터/Validation/반려묘/피부/일반카메라/무증상/A2_비듬_각질_상피성잔고리/IMG_C_A7_017283.jpg
/content/drive/Shareddrives/데이터230705/152.반려동물 피부질환 데이터/Validation/반려묘/피부/일반카메라/무증상/A2_비듬_각질_상피성잔고리/IMG_C_A7_017283.json
/content/drive/Shareddrives/데이터230705/152.반려동물 피부질환 데이터/Validation/반려묘/피부/일반카메라/무증상/A2_비듬_각질_상피성잔고리/IMG_C_A7_017284.jpg
/content/drive/Shareddrives/데이터230705/152.반려동물 피부질환 데이터/Validation/반려묘/피부/일반카메라/무증상/A2_비듬_각

In [None]:
read_json_metadata(json_paths[0])

{'Raw data ID': 'IMG_C_A7_017282.jpg',
 'copyrighter': '㈜미소정보기술',
 'resolution': '1920X1080',
 'date': '2021-12-15',
 'breed': '코리안숏헤어',
 'age': '6',
 'gender': 'F',
 'region': 'L',
 'camera type': 'IMG',
 'species': 'C',
 'lesions': 'A7',
 'diagnosis': '',
 'Path': '무증상',
 'identifier': '피부질환',
 'src_path': '/라벨링데이터/반려묘/피부/일반카메라/무증상/A2_비듬_각질_상피성잔고리',
 'label_path': '/라벨링데이터/반려묘/피부/일반카메라/무증상/A2_비듬_각질_상피성잔고리',
 'type': 'json',
 'fileformat': 'jpg'}

In [None]:
cv2_imshow(cv2.imread(image_paths[0]))

Output hidden; open in https://colab.research.google.com to view.

# Modeling

In [None]:
# 분류 모델과 하이퍼파라미터를 설정합니다
model_LR = LogisticRegression(random_state=9)
model_LDA = LinearDiscriminantAnalysis(solver='svd')
model_KNN = KNeighborsClassifier(n_neighbors=5)
model_DT = DecisionTreeClassifier(n_estimators=100)
model_RF = RandomForestClassifier(n_estimators=200, random_state=0)
model_GaussianNB = GaussianNB(var_smoothing=1e-09)
model_SVM = SVC(kernel='linear', C=1, random_state=0)

# 분류 모델들을 리스트에 담습니다
models = [model_LR, model_LDA, model_KNN, model_DT, model_RF, model_GaussianNB, model_SVM]

# 각 분류 모델을 학습시키고 예측 결과를 출력합니다
for model in models:
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    score = accuracy_score(y_test, y_pred)
    print(f"{model.__class__.__name__}: {score}")

In [None]:
# Sequential 모델 생성
model = models.Sequential()

# 첫번째 Conv2D 레이어
model.add(Conv2D(filters=32, kernel_size=(3, 3), activation='relu', padding='same', input_shape=(96,96,3)))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(3, 3)))

# 첫번째 Dropout 레이어
model.add(Dropout(0.25))

# 두번째 Conv2D 레이어
model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))

# 두번째 Dropout 레이어
model.add(Dropout(0.25))

# 세번째 Conv2D 레이어
model.add(Conv2D(filters=128, kernel_size=(3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(Conv2D(filters=128, kernel_size=(3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))

# 세번째 Dropout 레이어
model.add(Dropout(0.25))

# Flatten 레이어
model.add(Flatten())

# 첫번째 Dense 레이어
model.add(Dense(units=1024, activation='relu'))
model.add(BatchNormalization())

# 두번째 Dropout 레이어
model.add(Dropout(0.5))

# 두번째 Dense 레이어: 최종 출력 레이어
model.add(Dense(units=7, activation='softmax'))

# 모델 컴파일
opt = Adam(lr=0.001, decay=0.00001)
model.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])

# 모델 구조 요약
model.summary()

# 모델 학습
epochs = 150
batch_size = 32

history = model.fit(train_data, epochs=epochs, batch_size=batch_size, validation_data=val_data)

# 모델 평가
test_loss, test_acc = model.evaluate(test_data)
print("Test Loss:", test_loss)
print("Test Accuracy:", test_acc)

In [None]:
# TensorFlow Lite 모델로 변환
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

# 변환된 모델을 파일로 저장
with open('your_model.tflite', 'wb') as f:
    f.write(tflite_model)