# Skin lesion classification of dermoscopic images using machine learning and convolutional neural network

19 December 2022

https://www.nature.com/articles/s41598-022-22644-9#Tab7

https://aihub.or.kr/aihubdata/data/view.do?currMenu=&topMenu=&aihubDataSe=realm&dataSetSn=561

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Preprocessing

In [8]:
import cv2
import json
import itertools
import glob
import os
import numpy as np
import pandas as pd

class ImageProcessor:
    def __init__(self, src_path):
        self.src_path = src_path
        self.image_paths, self.json_paths = self.preprocess_data()

    def resize_image(self, image, target_size=(96, 96)):
        return cv2.resize(image, target_size)

    def create_masks_and_maps(self, image, coordinates, mask_type):
        mask = np.zeros_like(image)

        for idx, coord in enumerate(coordinates):
            if len(coord) > 2: # 폴리곤 경우
                points = np.array([coord], np.int32)
                color = (255, 255, 255) if mask_type == "binary_mask" else (idx + 1, idx + 1, idx + 1)
                mask = cv2.fillPoly(mask, points, color)
            else: # 박스 경우
                x, y, w, h = coord
                color = (255, 255, 255) if mask_type == "binary_mask" else (idx + 1, idx + 1, idx + 1)
                mask[y:y + h, x:x + w] = color

        return cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)

    def preprocess_data(self):
        image_paths = glob.glob(os.path.join(self.src_path, '**', '*.jpg'), recursive=True)
        json_paths = glob.glob(os.path.join(self.src_path, '**', '*.json'), recursive=True)
        image_paths.sort()
        json_paths.sort()
        return image_paths, json_paths

    def process_images(self):
        results = {}

        # .polygon. .box.을 구체화해서 추가
        for image_path, json_path in zip(self.image_paths, self.json_paths):
            with open(json_path) as f:
                json_data = json.load(f)

            image = cv2.imread(image_path)
            metadata_df = pd.DataFrame(json_data['metaData'], index=[0])
            resized_image = self.resize_image(image)

            labeling_types = [('polygon', 'polygon'), ('box', 'box'), ('polygon_box', ['box', 'polygon'])]

            coordinates = []
            for label_info in json_data['labelingInfo']:
                for label_key in ['polygon', 'box']:
                    if label_info.get(label_key):
                        coord = label_info[label_key]['location']
                        if isinstance(coord, list):
                            coordinates.extend(coord)
                        else:
                            coordinates.append(coord)

                for mask_type in ['binary_mask', 'segmentation_map']:
                    for resize_option in ['before_resize', 'after_resize']:
                        img = resized_image if resize_option == 'after_resize' else image
                        mask = self.create_masks_and_maps(img, coordinates, mask_type)

                        for metadata_option in ['no_metadata', 'metadata']:
                            key = (resize_option, label_type, metadata_option, mask_type)
                            if key not in results:
                                results[key] = []

                            output_df = pd.DataFrame({f"{key}": [mask]})
                            result = pd.concat([metadata_df, output_df], axis=1) if metadata_option == 'metadata' else output_df
                            results[key].append(result)

        result_dataframes = []

        # 결과 파일 이름 형식 추가
        file_name_format = "{resize_option}_{label_type}_{mask_type}{meta_data_suffix}.csv"

        for key, result_list in results.items():
            resize_option, label_type, metadata_option, mask_type = key
            meta_data_suffix = "_MetaData" if metadata_option == 'metadata' else ""
            file_name = file_name_format.format(resize_option=resize_option, label_type=label_type,
                                                mask_type=mask_type, meta_data_suffix=meta_data_suffix)
            combined_df = pd.concat(result_list, ignore_index=True)
            combined_df.to_csv(file_name, index=False)
            result_dataframes.append(combined_df)

        return result_dataframes

In [9]:
processor = ImageProcessor('/content/drive/Shareddrives/152.반려동물 피부질환 데이터/일반카메라/2.Validation/반려묘')
processor.process_images()

TypeError: ignored

In [None]:
Before_Resize_Polygon_Binary_Mask.csv
After_Resize_Polygon_Binary_Mask.csv
Before_Resize_Box_Binary_Mask.csv
After_Resize_Box_Binary_Mask.csv
Before_Resize_Polygon_Segmentation_Map.csv
After_Resize_Polygon_Segmentation_Map.csv
Before_Resize_Box_Segmentation_Map.csv
After_Resize_Box_Segmentation_Map.csv
Before_Resize_Polygon_Box_Binary_Mask.csv
After_Resize_Polygon_Box_Binary_Mask.csv
Before_Resize_Polygon_Box_Segmentation_Map.csv
After_Resize_Polygon_Box_Segmentation_Map.csv
Before_Resize_Polygon_Binary_Mask_MetaData.csv
After_Resize_Polygon_Binary_Mask_MetaData.csv
Before_Resize_Box_Binary_Mask_MetaData.csv
After_Resize_Box_Binary_Mask_MetaData.csv
Before_Resize_Polygon_Segmentation_Map_MetaData.csv
After_Resize_Polygon_Segmentation_Map_MetaData.csv
Before_Resize_Box_Segmentation_Map_MetaData.csv
After_Resize_Box_Segmentation_Map_MetaData.csv

# Modeling

In [None]:
import tensorflow as tf
from tensorflow.keras import models
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator

import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

In [None]:
# 1. 폴리곤 크롭 이미지
polygon_cropped = pd.read_csv("polygon_cropped.csv")

# 2. 박스 크롭 이미지
box_cropped = pd.read_csv("box_cropped.csv")

# 3. 폴리곤 크롭 이미지, 박스 크롭 이미지
polygon_box_cropped = pd.read_csv("polygon_box_cropped.csv")

# 4. 폴리곤 크롭 이미지, 메타데이터
polygon_cropped_metadata = pd.read_csv("polygon_cropped_metadata.csv")

# 5. 박스 크롭 이미지, 메타데이터
box_cropped_metadata = pd.read_csv("box_cropped_metadata.csv")

# 6. 폴리곤 크롭 이미지, 박스 크롭 이미지, 메타데이터
polygon_box_cropped_metadata = pd.read_csv("polygon_box_cropped_metadata.csv")

In [None]:
# 분류 모델과 하이퍼파라미터를 설정합니다
model_LR = LogisticRegression(random_state=9)
model_LDA = LinearDiscriminantAnalysis(solver='svd')
model_KNN = KNeighborsClassifier(n_neighbors=5)
model_DT = DecisionTreeClassifier(n_estimators=100)
model_RF = RandomForestClassifier(n_estimators=200, random_state=0)
model_GaussianNB = GaussianNB(var_smoothing=1e-09)
model_SVM = SVC(kernel='linear', C=1, random_state=0)

# 분류 모델들을 리스트에 담습니다
models = [model_LR, model_LDA, model_KNN, model_DT, model_RF, model_GaussianNB, model_SVM]

# 각 분류 모델을 학습시키고 예측 결과를 출력합니다
for model in models:
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    score = accuracy_score(y_test, y_pred)
    print(f"{model.__class__.__name__}: {score}")

In [None]:
# Sequential 모델 생성
model = models.Sequential()

# 첫번째 Conv2D 레이어
model.add(Conv2D(filters=32, kernel_size=(3, 3), activation='relu', padding='same', input_shape=(96,96,3)))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(3, 3)))

# 첫번째 Dropout 레이어
model.add(Dropout(0.25))

# 두번째 Conv2D 레이어
model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))

# 두번째 Dropout 레이어
model.add(Dropout(0.25))

# 세번째 Conv2D 레이어
model.add(Conv2D(filters=128, kernel_size=(3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(Conv2D(filters=128, kernel_size=(3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))

# 세번째 Dropout 레이어
model.add(Dropout(0.25))

# Flatten 레이어
model.add(Flatten())

# 첫번째 Dense 레이어
model.add(Dense(units=1024, activation='relu'))
model.add(BatchNormalization())

# 두번째 Dropout 레이어
model.add(Dropout(0.5))

# 두번째 Dense 레이어: 최종 출력 레이어
model.add(Dense(units=7, activation='softmax'))

# 모델 컴파일
opt = Adam(lr=0.001, decay=0.00001)
model.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])

# 모델 구조 요약
model.summary()

# 모델 학습
epochs = 150
batch_size = 32

history = model.fit(train_data, epochs=epochs, batch_size=batch_size, validation_data=val_data)

# 모델 평가
test_loss, test_acc = model.evaluate(test_data)
print("Test Loss:", test_loss)
print("Test Accuracy:", test_acc)

In [None]:
# TensorFlow Lite 모델로 변환
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

# 변환된 모델을 파일로 저장
with open('your_model.tflite', 'wb') as f:
    f.write(tflite_model)