# Skin lesion classification of dermoscopic images using machine learning and convolutional neural network

19 December 2022

https://www.nature.com/articles/s41598-022-22644-9#Tab7

https://aihub.or.kr/aihubdata/data/view.do?currMenu=&topMenu=&aihubDataSe=realm&dataSetSn=561

In [None]:
from google.colab import drive

# '/content/drive' 폴더를 Google Drive와 연결합니다.
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import zipfile
import shutil

zip_path_list = [
    '/content/drive/Shareddrives/데이터230705/152.반려동물 피부질환 데이터/01.데이터/1.Training/2_라벨링데이터_0605_add/TL01.zip',
    '/content/drive/Shareddrives/데이터230705/152.반려동물 피부질환 데이터/01.데이터/1.Training/2_라벨링데이터_0605_add/TL02.zip',
    '/content/drive/Shareddrives/데이터230705/152.반려동물 피부질환 데이터/01.데이터/2.Validation/2_라벨링데이터_0605_add/VL01.zip'
]

destination_path_list = [
    '/content/drive/MyDrive/training',
    '/content/drive/MyDrive/validation'
]

def zip_extract(zip_path, destination_path):
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(destination_path)

zip_extract(zip_path_list[0], destination_path_list[0])
# zip_extract(zip_path_list[1], destination_path_list[0])
# zip_extract(zip_path_list[2], destination_path_list[1])

OSError: ignored

In [None]:
import shutil

directory_path = "/content/drive/MyDrive/반려견"

# 디렉토리와 그 안의 모든 파일 및 하위 디렉토리 삭제
shutil.rmtree(directory_path)

In [None]:
import shutil

# 코랩 런타임에서 압축이 해제된 파일 또는 폴더 경로 (예시)
unzipped_file_path = '/content/unzipped_file_or_folder'

# 내 드라이브 또는 공유 드라이브에 저장할 경로 (예시)
destination_path = '/content/drive/MyDrive/some_destination_path' # 내 드라이브의 경우
# destination_path = '/content/drive/Shareddrives/some_desired_path' # 공유 드라이브의 경우

# 압축 해제된 파일을 내 드라이브 또는 공유 드라이브로 이동
shutil.move(unzipped_file_path, destination_path)


In [None]:
import shutil
shutil.move('/content/drive/MyDrive/반려묘', '/content/drive/Shareddrives/데이터230705/152.반려동물 피부질환 데이터/training/')

'/content/drive/Shareddrives/데이터230705/152.반려동물 피부질환 데이터/training/반려묘'

# Preprocessing

In [1]:
import os
import glob
import json
import cv2
import numpy as np
from PIL import Image
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from tensorflow.keras import models
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense, BatchNormalization
from tensorflow.keras.optimizers import Adam
import tensorflow as tf

In [None]:
# 데이터 디렉토리 경로
data_dir = '/content/drive/Shareddrives/데이터230705/152.반려동물 피부질환 데이터/01.데이터'

# json 파일 리스트 가져오기
json_files = [os.path.join(data_dir, f) for f in os.listdir(data_dir) if f.endswith('.json')]
json_files.sort()

# 이미지 파일 리스트 가져오기
image_files = [os.path.join(data_dir, f) for f in os.listdir(data_dir) if f.endswith('.jpg')]

In [None]:
import os
import glob

def get_paths(root_directory, extensions):

    paths = []

    for path in glob.glob(os.path.join(root_directory, '**', extensions), recursive=True):
        paths.append(path)

    return paths

image_extensions = '*.jpg'
json_extension = '*.json'

root_directory = '/content/drive/Shareddrives/데이터230705/152.반려동물 피부질환 데이터/training'
image_paths, json_paths = get_paths(root_directory, image_extensions), get_paths(root_directory, json_extension)

In [None]:
def read_json_metadata(json_path):
    with open(json_path, 'r') as f:
        json_data = json.load(f)

    meta_data = json_data['metaData']
    return meta_data
    # metadata를 읽어와서 반환하는 함수


def get_polygon_and_box_coordinates(json_path):
    with open(json_path, 'r') as f:
        json_data = json.load(f)

    labeling_info = json_data['labelingInfo']
    polygon_coordinates = []
    box_location = []

    for item in labeling_info:
        if 'polygon' in item:
            polygon = item['polygon']
            location = polygon['location'][0]
            num_coordinates = len(location) // 2
            coordinates = [(location[f'x{i+1}'], location[f'y{i+1}']) for i in range(num_coordinates)]
            polygon_coordinates.append(coordinates)
            # 각각의 폴리곤 좌표를 다루는 코드

        if 'box' in item:
            box = item['box']
            location = box['location'][0]
            x = location['x']
            y = location['y']
            width = location['width']
            height = location['height']
            box_location.append((x, y, width, height))
            # 박스 좌표를 다루는 코드

    return polygon_coordinates, box_location
    # 폴리곤과 박스 좌표를 반환하는 함수


def crop_image_by_polygon(polygon_coordinates, image_path):
    image = Image.open(image_path)
    image_np = np.array(image)
    image_bgr = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)

    mask = np.zeros(image_bgr.shape[:2], dtype=np.uint8)
    cv2.fillPoly(mask, [np.array(coords)], 255)

    cropped_image = cv2.bitwise_and(image_bgr, image_bgr, mask=mask)

    return cropped_image
    # 폴리곤 좌표에 따라 이미지를 잘라내는 함수


def crop_image_by_box(box_coordinate, image_path):
    x, y, width, height = box_coordinate
    image = Image.open(image_path)
    image_np = np.array(image)
    image_bgr = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)

    cropped_image = image_bgr[y:y + height, x:x + width]

    return cropped_image
    # 박스 좌표에 따라 이미지를 잘라내는 함수


In [None]:
for json_path, image_path in zip(json_files, image_files):
    # JSON 파일 읽기
    with open(json_path, 'r') as f:
        json_data = json.load(f)

    # metaData 정보 가져오기
    meta_data = json_data['metaData']
    breed = meta_data['breed']
    age = meta_data['age']
    gender = meta_data['gender']
    region = meta_data['region']
    camera_type = meta_data['camera type']
    species = meta_data['species']
    lesions = meta_data['lesions']

    # labelingInfo 정보 가져오기
    labeling_info = json_data['labelingInfo']

    # polygon 정보
    polygon_coordinates = []

    # box 정보
    box_location = []

    for item in labeling_info:
        if 'polygon' in item:
            polygon = item['polygon']
            location = polygon['location'][0]
            num_coordinates = len(location) // 2
            coordinates = [(location[f'x{i+1}'], location[f'y{i+1}']) for i in range(num_coordinates)]
            polygon_coordinates.append(coordinates)

        if 'box' in item:
            box = item['box']
            location = box['location'][0]
            x = location['x']
            y = location['y']
            width = location['width']
            height = location['height']
            box_location.append((x, y, width, height))

    # 결과 출력
    print(f'견종: {breed}')
    print(f'나이: {age}')
    print(f'성별: {gender}')
    print(f'촬영위치: {region}')
    print(f'촬영장비: {camera_type}')
    print(f'반려종: {species}')
    print(f'증상: {lesions}')
    print('폴리곤 좌표:')
    for coords in polygon_coordinates:
        print(f'- {coords}')
    print('박스 위치:')
    for location in box_location:
        print(f'- x: {location[0]}, y: {location[1]}, width: {location[2]}, height: {location[3]}')
    print('=' * 50)

    # 결과 이미지 저장 경로 생성
    os.makedirs('cropped_image_polygon', exist_ok=True)
    os.makedirs('cropped_image_box', exist_ok=True)

    # 이미지 파일 이름
    image_filename = os.path.basename(image_path)

    # 이미지 불러오기
    image = Image.open(image_path)
    image_np = np.array(image)
    image_bgr = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)

    # 폴리곤 좌표로 마스크 생성
    mask = np.zeros(image_bgr.shape[:2], dtype=np.uint8)
    cv2.fillPoly(mask, [np.array(coords)], 255)

    # 이미지 크롭
    cropped_image = cv2.bitwise_and(image_bgr, image_bgr, mask=mask)

    # 이미지 리사이징
    cropped_resized_img = cv2.resize(cropped_image, (96, 96))

    # 결과 이미지 저장
    cv2.imwrite(os.path.join('cropped_image_polygon', image_filename), cropped_image)

    # 박스 좌표로 이미지 크롭
    cropped_image = image_bgr[y:y + height, x:x + width]

    # 이미지 리사이징
    cropped_resized_img = cv2.resize(cropped_image, (96, 96))

    # 결과 이미지 저장
    cv2.imwrite(os.path.join('cropped_image_box', image_filename), cropped_image)

# Modeling

In [None]:
# 분류 모델과 하이퍼파라미터를 설정합니다
model_LR = LogisticRegression(random_state=9)
model_LDA = LinearDiscriminantAnalysis(solver='svd')
model_KNN = KNeighborsClassifier(n_neighbors=5)
model_DT = DecisionTreeClassifier(n_estimators=100)
model_RF = RandomForestClassifier(n_estimators=200, random_state=0)
model_GaussianNB = GaussianNB(var_smoothing=1e-09)
model_SVM = SVC(kernel='linear', C=1, random_state=0)

# 분류 모델들을 리스트에 담습니다
models = [model_LR, model_LDA, model_KNN, model_DT, model_RF, model_GaussianNB, model_SVM]

# 각 분류 모델을 학습시키고 예측 결과를 출력합니다
for model in models:
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    score = accuracy_score(y_test, y_pred)
    print(f"{model.__class__.__name__}: {score}")

In [None]:
# Sequential 모델 생성
model = models.Sequential()

# 첫번째 Conv2D 레이어
model.add(Conv2D(filters=32, kernel_size=(3, 3), activation='relu', padding='same', input_shape=(96,96,3)))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(3, 3)))

# 첫번째 Dropout 레이어
model.add(Dropout(0.25))

# 두번째 Conv2D 레이어
model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))

# 두번째 Dropout 레이어
model.add(Dropout(0.25))

# 세번째 Conv2D 레이어
model.add(Conv2D(filters=128, kernel_size=(3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(Conv2D(filters=128, kernel_size=(3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))

# 세번째 Dropout 레이어
model.add(Dropout(0.25))

# Flatten 레이어
model.add(Flatten())

# 첫번째 Dense 레이어
model.add(Dense(units=1024, activation='relu'))
model.add(BatchNormalization())

# 두번째 Dropout 레이어
model.add(Dropout(0.5))

# 두번째 Dense 레이어: 최종 출력 레이어
model.add(Dense(units=7, activation='softmax'))

# 모델 컴파일
opt = Adam(lr=0.001, decay=0.00001)
model.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])

# 모델 구조 요약
model.summary()

# 모델 학습
epochs = 150
batch_size = 32

history = model.fit(train_data, epochs=epochs, batch_size=batch_size, validation_data=val_data)

# 모델 평가
test_loss, test_acc = model.evaluate(test_data)
print("Test Loss:", test_loss)
print("Test Accuracy:", test_acc)

In [None]:
# TensorFlow Lite 모델로 변환
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

# 변환된 모델을 파일로 저장
with open('your_model.tflite', 'wb') as f:
    f.write(tflite_model)