# Skin lesion classification of dermoscopic images using machine learning and convolutional neural network

19 December 2022

https://www.nature.com/articles/s41598-022-22644-9#Tab7

https://aihub.or.kr/aihubdata/data/view.do?currMenu=&topMenu=&aihubDataSe=realm&dataSetSn=561

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Preprocessing

In [None]:
import os
import glob
import cv2
import json
import pandas as pd
import numpy as np

In [None]:
def get_image_and_json_paths(directory):
    image_paths = glob.glob(os.path.join(directory, '**', '*.jpg'), recursive=True)
    json_paths = glob.glob(os.path.join(directory, '**', '*.json'), recursive=True)

    image_paths.sort()
    json_paths.sort()

    return image_paths, json_paths


def crop_image(image, coordinates):
    if isinstance(coordinates, list):  # 다각형
        mask = np.zeros(image.shape[:2], dtype=np.uint8)
        cv2.fillPoly(mask, [np.array(coordinates, np.int32)], (255))
        rect = cv2.boundingRect(np.array(coordinates, np.int32))
        cropped_image = cv2.bitwise_and(image[rect[1]:rect[1] + rect[3], rect[0]:rect[0] + rect[2]], image[rect[1]:rect[1] + rect[3], rect[0]:rect[0] + rect[2]], mask=mask[rect[1]:rect[1] + rect[3], rect[0]:rect[0] + rect[2]])
    else:  # 상자
        x, y, w, h = coordinates
        cropped_image = image[y:y + h, x:x + w]
    return cropped_image

def process_images(image_paths, json_paths):
    cropped_images = []
    metadata_list = []

    for image_path, json_path in zip(image_paths, json_paths):
        with open(json_path) as f:
            json_data = json.load(f)

        image = cv2.imread(image_path)

        if json_data.get('metaData'):
            metadata_list.append(json_data['metaData'])

        if json_data.get('labelingInfo'):
            for label_info in json_data['labelingInfo']:
                if label_info.get('box'):
                    coordinates = label_info['box']
                    cropped_image = crop_image(image, coordinates)
                    cropped_images.append({"cropped_box_image": cropped_image})
                if label_info.get('polygon'):
                    coordinates = label_info['polygon']
                    cropped_image = crop_image(image, coordinates)
                    cropped_images.append({"cropped_polygon_image": cropped_image})

    metadata_df = pd.DataFrame(metadata_list)
    cropped_images_df = pd.concat([pd.DataFrame(cropped_images[i]) for i in range(len(cropped_images))], keys=cropped_images, axis=1)
    images_metadata_df = pd.concat([metadata_df, cropped_images_df], axis=1)

    return images_metadata_df

In [None]:
src_path = '/content/drive/Shareddrives/152.반려동물 피부질환 데이터'
image_paths, json_paths = get_image_and_json_paths(src_path)
result_df = process_images(image_paths, json_paths)

result_df

In [None]:
# 메타데이터 개수 구하기
metadata_columns = len(result_df.columns) - 2

# 1. 폴리곤 크롭 이미지
polygon_cropped = result_df.iloc[:, [-2]]
polygon_cropped.to_csv("polygon_cropped.csv", index=False)

# 2. 박스 크롭 이미지
box_cropped = result_df.iloc[:, [-1]]
box_cropped.to_csv("box_cropped.csv", index=False)

# 3. 폴리곤 크롭 이미지, 박스 크롭 이미지
polygon_box_cropped = result_df.iloc[:, [-2, -1]]
polygon_box_cropped.to_csv("polygon_box_cropped.csv", index=False)

# 4. 폴리곤 크롭 이미지, 메타데이터
polygon_cropped_metadata = result_df.iloc[:, list(range(metadata_columns)) + [-2]]
polygon_cropped_metadata.to_csv("polygon_cropped_metadata.csv", index=False)

# 5. 박스 크롭 이미지, 메타데이터
box_cropped_metadata = result_df.iloc[:, list(range(metadata_columns)) + [-1]]
box_cropped_metadata.to_csv("box_cropped_metadata.csv", index=False)

# 6. 폴리곤 크롭 이미지, 박스 크롭 이미지, 메타데이터
polygon_box_cropped_metadata = result_df.iloc[:, list(range(metadata_columns)) + [-2, -1]]
polygon_box_cropped_metadata.to_csv("polygon_box_cropped_metadata.csv", index=False)

# Modeling

In [None]:
# 분류 모델과 하이퍼파라미터를 설정합니다
model_LR = LogisticRegression(random_state=9)
model_LDA = LinearDiscriminantAnalysis(solver='svd')
model_KNN = KNeighborsClassifier(n_neighbors=5)
model_DT = DecisionTreeClassifier(n_estimators=100)
model_RF = RandomForestClassifier(n_estimators=200, random_state=0)
model_GaussianNB = GaussianNB(var_smoothing=1e-09)
model_SVM = SVC(kernel='linear', C=1, random_state=0)

# 분류 모델들을 리스트에 담습니다
models = [model_LR, model_LDA, model_KNN, model_DT, model_RF, model_GaussianNB, model_SVM]

# 각 분류 모델을 학습시키고 예측 결과를 출력합니다
for model in models:
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    score = accuracy_score(y_test, y_pred)
    print(f"{model.__class__.__name__}: {score}")

In [None]:
# Sequential 모델 생성
model = models.Sequential()

# 첫번째 Conv2D 레이어
model.add(Conv2D(filters=32, kernel_size=(3, 3), activation='relu', padding='same', input_shape=(96,96,3)))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(3, 3)))

# 첫번째 Dropout 레이어
model.add(Dropout(0.25))

# 두번째 Conv2D 레이어
model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))

# 두번째 Dropout 레이어
model.add(Dropout(0.25))

# 세번째 Conv2D 레이어
model.add(Conv2D(filters=128, kernel_size=(3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(Conv2D(filters=128, kernel_size=(3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))

# 세번째 Dropout 레이어
model.add(Dropout(0.25))

# Flatten 레이어
model.add(Flatten())

# 첫번째 Dense 레이어
model.add(Dense(units=1024, activation='relu'))
model.add(BatchNormalization())

# 두번째 Dropout 레이어
model.add(Dropout(0.5))

# 두번째 Dense 레이어: 최종 출력 레이어
model.add(Dense(units=7, activation='softmax'))

# 모델 컴파일
opt = Adam(lr=0.001, decay=0.00001)
model.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])

# 모델 구조 요약
model.summary()

# 모델 학습
epochs = 150
batch_size = 32

history = model.fit(train_data, epochs=epochs, batch_size=batch_size, validation_data=val_data)

# 모델 평가
test_loss, test_acc = model.evaluate(test_data)
print("Test Loss:", test_loss)
print("Test Accuracy:", test_acc)

In [None]:
# TensorFlow Lite 모델로 변환
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

# 변환된 모델을 파일로 저장
with open('your_model.tflite', 'wb') as f:
    f.write(tflite_model)