# Skin lesion classification of dermoscopic images using machine learning and convolutional neural network

19 December 2022

https://www.nature.com/articles/s41598-022-22644-9#Tab7

https://aihub.or.kr/aihubdata/data/view.do?currMenu=&topMenu=&aihubDataSe=realm&dataSetSn=561

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Preprocessing

In [2]:
import os
import glob
import json
import cv2

def get_image_data(src_path):
    image_paths = glob.glob(os.path.join(src_path, '**', '*.jpg'), recursive=True)
    image_paths.sort()
    image_data = [cv2.imread(image_path) for image_path in image_paths]
    return image_data

def get_meta_data(src_path):
    json_paths = glob.glob(os.path.join(src_path, '**', '*.json'), recursive=True)
    json_paths.sort()

    meta_data = []
    for json_path in json_paths:
        with open(json_path, "r", encoding="utf-8") as file:
            file_content = file.read()
            try:
                json_data = json.loads(file_content)
                meta_data.append(json_data.get('metaData', None))
            except json.JSONDecodeError as e:
                print(f"Error: {str(e)}")
                print(f"Invalid JSON file content: {json_path}:")
                print(file_content)
                print("\n")

    return meta_data

In [None]:
src_path = "/content/drive/Shareddrives/152.반려동물 피부질환 데이터/일반카메라1/validation/반려묘"
# image_data = get_image_data(src_path)
meta_data = get_meta_data(src_path)[:5]

Error: Expecting value: line 1 column 1 (char 0)
Invalid JSON file content: /content/drive/Shareddrives/152.반려동물 피부질환 데이터/일반카메라1/validation/반려묘/무증상/A2_비듬_각질_상피성잔고리/IMG_C_A7_017596.json:



Error: Expecting value: line 1 column 1 (char 0)
Invalid JSON file content: /content/drive/Shareddrives/152.반려동물 피부질환 데이터/일반카메라1/validation/반려묘/무증상/A2_비듬_각질_상피성잔고리/IMG_C_A7_017597.json:



Error: Expecting value: line 1 column 1 (char 0)
Invalid JSON file content: /content/drive/Shareddrives/152.반려동물 피부질환 데이터/일반카메라1/validation/반려묘/무증상/A2_비듬_각질_상피성잔고리/IMG_C_A7_017598.json:



Error: Expecting value: line 1 column 1 (char 0)
Invalid JSON file content: /content/drive/Shareddrives/152.반려동물 피부질환 데이터/일반카메라1/validation/반려묘/무증상/A2_비듬_각질_상피성잔고리/IMG_C_A7_017599.json:



Error: Expecting value: line 1 column 1 (char 0)
Invalid JSON file content: /content/drive/Shareddrives/152.반려동물 피부질환 데이터/일반카메라1/validation/반려묘/무증상/A2_비드

In [None]:
Before_Resize_Polygon_Binary_Mask.csv
After_Resize_Polygon_Binary_Mask.csv
Before_Resize_Box_Binary_Mask.csv
After_Resize_Box_Binary_Mask.csv
Before_Resize_Polygon_Segmentation_Map.csv
After_Resize_Polygon_Segmentation_Map.csv
Before_Resize_Box_Segmentation_Map.csv
After_Resize_Box_Segmentation_Map.csv
Before_Resize_Polygon_Box_Binary_Mask.csv
After_Resize_Polygon_Box_Binary_Mask.csv
Before_Resize_Polygon_Box_Segmentation_Map.csv
After_Resize_Polygon_Box_Segmentation_Map.csv
Before_Resize_Polygon_Binary_Mask_MetaData.csv
After_Resize_Polygon_Binary_Mask_MetaData.csv
Before_Resize_Box_Binary_Mask_MetaData.csv
After_Resize_Box_Binary_Mask_MetaData.csv
Before_Resize_Polygon_Segmentation_Map_MetaData.csv
After_Resize_Polygon_Segmentation_Map_MetaData.csv
Before_Resize_Box_Segmentation_Map_MetaData.csv
After_Resize_Box_Segmentation_Map_MetaData.csv

# Modeling

In [None]:
import tensorflow as tf
from tensorflow.keras import models
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator

import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

In [None]:
# 1. 폴리곤 크롭 이미지
polygon_cropped = pd.read_csv("polygon_cropped.csv")

# 2. 박스 크롭 이미지
box_cropped = pd.read_csv("box_cropped.csv")

# 3. 폴리곤 크롭 이미지, 박스 크롭 이미지
polygon_box_cropped = pd.read_csv("polygon_box_cropped.csv")

# 4. 폴리곤 크롭 이미지, 메타데이터
polygon_cropped_metadata = pd.read_csv("polygon_cropped_metadata.csv")

# 5. 박스 크롭 이미지, 메타데이터
box_cropped_metadata = pd.read_csv("box_cropped_metadata.csv")

# 6. 폴리곤 크롭 이미지, 박스 크롭 이미지, 메타데이터
polygon_box_cropped_metadata = pd.read_csv("polygon_box_cropped_metadata.csv")

In [None]:
# 분류 모델과 하이퍼파라미터를 설정합니다
model_LR = LogisticRegression(random_state=9)
model_LDA = LinearDiscriminantAnalysis(solver='svd')
model_KNN = KNeighborsClassifier(n_neighbors=5)
model_DT = DecisionTreeClassifier(n_estimators=100)
model_RF = RandomForestClassifier(n_estimators=200, random_state=0)
model_GaussianNB = GaussianNB(var_smoothing=1e-09)
model_SVM = SVC(kernel='linear', C=1, random_state=0)

# 분류 모델들을 리스트에 담습니다
models = [model_LR, model_LDA, model_KNN, model_DT, model_RF, model_GaussianNB, model_SVM]

# 각 분류 모델을 학습시키고 예측 결과를 출력합니다
for model in models:
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    score = accuracy_score(y_test, y_pred)
    print(f"{model.__class__.__name__}: {score}")

In [None]:
# Sequential 모델 생성
model = models.Sequential()

# 첫번째 Conv2D 레이어
model.add(Conv2D(filters=32, kernel_size=(3, 3), activation='relu', padding='same', input_shape=(96,96,3)))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(3, 3)))

# 첫번째 Dropout 레이어
model.add(Dropout(0.25))

# 두번째 Conv2D 레이어
model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))

# 두번째 Dropout 레이어
model.add(Dropout(0.25))

# 세번째 Conv2D 레이어
model.add(Conv2D(filters=128, kernel_size=(3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(Conv2D(filters=128, kernel_size=(3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))

# 세번째 Dropout 레이어
model.add(Dropout(0.25))

# Flatten 레이어
model.add(Flatten())

# 첫번째 Dense 레이어
model.add(Dense(units=1024, activation='relu'))
model.add(BatchNormalization())

# 두번째 Dropout 레이어
model.add(Dropout(0.5))

# 두번째 Dense 레이어: 최종 출력 레이어
model.add(Dense(units=7, activation='softmax'))

# 모델 컴파일
opt = Adam(lr=0.001, decay=0.00001)
model.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])

# 모델 구조 요약
model.summary()

# 모델 학습
epochs = 150
batch_size = 32

history = model.fit(train_data, epochs=epochs, batch_size=batch_size, validation_data=val_data)

# 모델 평가
test_loss, test_acc = model.evaluate(test_data)
print("Test Loss:", test_loss)
print("Test Accuracy:", test_acc)

In [None]:
# TensorFlow Lite 모델로 변환
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

# 변환된 모델을 파일로 저장
with open('your_model.tflite', 'wb') as f:
    f.write(tflite_model)