# Skin lesion classification of dermoscopic images using machine learning and convolutional neural network

19 December 2022

https://www.nature.com/articles/s41598-022-22644-9#Tab7

https://aihub.or.kr/aihubdata/data/view.do?currMenu=&topMenu=&aihubDataSe=realm&dataSetSn=561

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Preprocessing

In [None]:
import glob
import os
import json
import numpy as np
import cv2
import pandas as pd

# 데이터 로딩 및 정렬: 디렉토리에서 이미지와 JSON 경로를 찾아 정렬된 순으로 반환
def preprocess_data(directory):
    image_paths = glob.glob(os.path.join(directory, '**', '*.jpg'), recursive=True)
    json_paths = glob.glob(os.path.join(directory, '**', '*.json'), recursive=True)

    image_paths.sort()
    json_paths.sort()

    return image_paths, json_paths

# 이미지 처리: 이미지 및 JSON 정보를 사용하여 크롭 및 리사이징 작업 수행하고 데이터프레임으로 반환
def process_images(image_paths, json_paths):
    cropped_images = []
    metadata_list = []

    for image_path, json_path in zip(image_paths, json_paths):
        with open(json_path) as f:
            json_data = json.load(f)

        image = cv2.imread(image_path)

        if json_data.get('metaData'):
            metadata_list.append(json_data['metaData'])

        if json_data.get('labelingInfo'):
            for label_info in json_data['labelingInfo']:
                if label_info.get('box'):
                    coordinates = label_info['box']
                    cropped_image = crop_image(image, coordinates)
                    resized_cropped_image = resize_image(cropped_image)
                    cropped_images.append({"cropped_box_image": resized_cropped_image})
                if label_info.get('polygon'):
                    coordinates = label_info['polygon']
                    cropped_image = crop_image(image, coordinates)
                    resized_cropped_image = resize_image(cropped_image)
                    cropped_images.append({"cropped_polygon_image": resized_cropped_image})

    metadata_df = pd.DataFrame(metadata_list)
    cropped_images_df = pd.concat([pd.DataFrame(cropped_images[i]) for i in range(len(cropped_images))], keys=cropped_images, axis=1)
    images_metadata_df = pd.concat([metadata_df, cropped_images_df], axis=1)

    return images_metadata_df

# 결과 저장 및 출력: 각각에 대해 데이터프레임을 생성하고 로컬 경로에 CSV 파일로 저장
def save_results(result_df):
    metadata_columns = len(result_df.columns) - 2
    result_file_names = [
        "polygon_cropped.csv",
        "box_cropped.csv",
        "polygon_box_cropped.csv",
        "polygon_cropped_metadata.csv",
        "box_cropped_metadata.csv",
        "polygon_box_cropped_metadata.csv",
    ]

    result_dataframes = [
        result_df.iloc[:, [-2]],  # 폴리곤 크롭 이미지
        result_df.iloc[:, [-1]],  # 박스 크롭 이미지
        result_df.iloc[:, [-2, -1]],  # 폴리곤 크롭 이미지 및 박스 크롭 이미지
        result_df.iloc[:, list(range(metadata_columns)) + [-2]],  # 폴리곤 크롭 이미지와 메타데이터
        result_df.iloc[:, list(range(metadata_columns)) + [-1]],  # 박스 크롭 이미지와 메타데이터
        result_df.iloc[:, list(range(metadata_columns)) + [-2, -1]],  # 폴리곤 크롭 이미지, 박스 크롭 이미지와 메타데이터
    ]

    for file_name, dataframe in zip(result_file_names, result_dataframes):
        dataframe.to_csv(file_name, index=False)

src_path = '/content/drive/Shareddrives/152.반려동물 피부질환 데이터'

# 파이프라인 실행
image_paths, json_paths = preprocess_data(src_path)
result_df = process_images(image_paths, json_paths)
save_results(result_df)

In [None]:
def resize_image(image, target_size=(128, 128)):
    return cv2.resize(image, target_size)

def create_binary_mask(image, coordinates):
    mask = np.zeros_like(image)
    if len(coordinates) > 2: # 폴리곤 경우
        points = np.array([coordinates], np.int32)
        mask = cv2.fillPoly(mask, points, (255, 255, 255))
    else: # 박스 경우
        x, y, w, h = coordinates
        mask[y:y+h, x:x+w] = (255, 255, 255)
    return cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)

def create_segmentation_map(image, coordinates):
    segmentation_map = np.zeros_like(image)
    for idx, coord in enumerate(coordinates):
        if len(coord) > 2: # 폴리곤 경우
            points = np.array([coord], np.int32)
            segmentation_map = cv2.fillPoly(segmentation_map, points, (idx+1, idx+1, idx+1))
        else: # 박스 경우
            x, y, w, h = coord
            segmentation_map[y:y+h, x:x+w] = (idx+1, idx+1, idx+1)
    return cv2.cvtColor(segmentation_map, cv2.COLOR_BGR2GRAY)

def create_masks_and_maps(image, coordinates, resize=False):
    if resize:
        resized_image = resize_image(image)
        binary_mask = create_binary_mask(resized_image, coordinates)
        segmentation_map = create_segmentation_map(resized_image, coordinates)
    else:
        binary_mask = create_binary_mask(image, coordinates)
        segmentation_map = create_segmentation_map(image, coordinates)
    return binary_mask, segmentation_map

# 데이터 로딩 및 정렬: 디렉토리에서 이미지와 JSON 경로를 찾아 정렬된 순으로 반환
def preprocess_data(directory):
    image_paths = glob.glob(os.path.join(directory, '**', '*.jpg'), recursive=True)
    json_paths = glob.glob(os.path.join(directory, '**', '*.json'), recursive=True)

    image_paths.sort()
    json_paths.sort()

    return image_paths, json_paths

# 이미지 처리: 이미지 및 JSON 정보를 사용하여 이진 마스크 및 segmentation map을 생성하고 데이터프레임으로 반환
def process_images(image_paths, json_paths):
    metadata_list = []

    results = {
        "before_resize": [],
        "after_resize": [],
    }

    for image_path, json_path in zip(image_paths, json_paths):
        with open(json_path) as f:
            json_data = json.load(f)

        image = cv2.imread(image_path)

        if json_data.get('metaData'):
            metadata_list.append(json_data['metaData'])

        # 폴리곤 및 박스 좌표 처리 및 이진 마스크/segmentation map 생성, 리사이징 처리
        if json_data.get('labelingInfo'):
            before_resize = []
            after_resize = []

            for label_info in json_data['labelingInfo']:
                if label_info.get('box') or label_info.get('polygon'):
                    coordinates = label_info['box'] if label_info.get('box') else label_info['polygon']

                    # create_binary_mask, create_segmentation_map은 각각 이진 마스크, segmentation map 생성을 위한 함수입니다.
                    binary_mask_before_resize, segmentation_map_before_resize = create_masks_and_maps(image, coordinates)
                    resized_image = resize_image(image)
                    binary_mask_after_resize, segmentation_map_after_resize = create_masks_and_maps(resized_image, coordinates)


                    before_resize.append({
                        "before_resize_binary_mask": binary_mask_before_resize,
                        "before_resize_segmentation_map": segmentation_map_before_resize,
                    })

                    after_resize.append({
                        "after_resize_binary_mask": binary_mask_after_resize,
                        "after_resize_segmentation_map": segmentation_map_after_resize,
                    })

            results["before_resize"].append(before_resize)
            results["after_resize"].append(after_resize)

    metadata_df = pd.DataFrame(metadata_list)

    # 결과 데이터프레임 및 저장 파일 이름 정의
    result_dataframes = []
    result_file_names = []

    for key, values in results.items():
        for i, value in enumerate(values):
            for j, (_, content) in enumerate(value.items()):
                result_dataframes.append(pd.DataFrame({f"{key}_{i}_{j}": content}))
                result_file_names.append(f"{key}_{i}_{j}.csv")

    # 메타데이터와 결합하여 최종 결과 데이터프레임 생성 및 저장
    for file_name, dataframe, _ in zip(result_file_names, result_dataframes, results.values()):
        result_df = pd.concat([metadata_df, dataframe], axis=1)
        result_df.to_csv(file_name, index=False)

src_path = '/content/drive/Shareddrives/152.반려동물 피부질환 데이터'

image_paths, json_paths = preprocess_data(src_path)
process_images(image_paths, json_paths)

In [None]:
import os
import json
import cv2
import glob
import numpy as np
import pandas as pd

class ImageProcessor:
    def __init__(self, src_path):
        self.src_path = src_path
        self.image_paths, self.json_paths = self.preprocess_data()

    def resize_image(self, image, target_size=(96, 96)):
        return cv2.resize(image, target_size)

    def create_binary_mask(self, image, coordinates):
        mask = np.zeros_like(image)
        if len(coordinates) > 2: # 폴리곤 경우
            points = np.array([coordinates], np.int32)
            mask = cv2.fillPoly(mask, points, (255, 255, 255))
        else: # 박스 경우
            x, y, w, h = coordinates
            mask[y:y+h, x:x+w] = (255, 255 ,255)
        return cv2.cvtColor(mask,cv2.COLOR_BGR2GRAY)

    def create_segmentation_map(self,image ,coordinates):
        segmentation_map=np.zeros_like(image)

        for idx ,coord in enumerate(coordinates):
            if len(coord)>2: # 폴리곤 경우
                points=np.array([coord],np.int32)
                segmentation_map=cv2.fillPoly(segmentation_map ,points ,(idx+1,idx+1,idx+1))

            else: # 박스 경우
                x,y,w,h=coord
                segmentation_map[y:y+h,x:x+w]=(idx+1,idx+1,idx+1)

        return cv2.cvtColor(segmentation_map,cv2.COLOR_BGR2GRAY)

    def create_masks_and_maps(self,image ,coordinates ,resize=False):

      if resize:
          resized_image=self.resize_image(image)
          binary_mask=self.create_binary_mask(resized_image ,coordinates)
          segmentation_map=self.create_segmentation_map(resized_image ,coordinates)

      else:
          binary_mask=self.create_binary_mask(image ,coordinates)
          segmentation_map=self.create_segmentation_map(image ,coordinates)

      return binary_mask,segmentation_map

    def preprocess_data(self):
        image_paths=glob.glob(os.path.join(self.src_path ,'**','*.jpg') ,recursive=True)
        json_paths=glob.glob(os.path.join(self.src_path ,'**','*.json') ,recursive=True)

        image_paths.sort()
        json_paths.sort()

        return image_paths,json_paths

    def process_images(self):

      metadata_list=[]

      results={
          "before_resize":[],
          "after_resize":[]
      }

      for image_path,json_path in zip(self.image_paths,self.json_paths):

          with open(json_path) as f:
              json_data=json.load(f)

          image=cv2.imread(image_path)

          if json_data.get('metaData'):
              metadata_list.append(json_data['metaData'])

          if json_data.get('labelingInfo'):
              before_resize=[]
              after_resize=[]

              for label_info in json_data['labelingInfo']:

                  if label_info.get('box') or label_info.get('polygon'):
                      coordinates=label_info['box'] if label_info.get('box') else label_info['polygon']

                      binary_mask_before_resize,segmentation_map_before_resize=self.create_masks_and_maps(image,coordinates)

                      resized_image=self.resize_image(image)
                      binary_mask_after_resize,segmentation_map_after_resize=self.create_masks_and_maps(resized_image,coordinates)


                      before_resize.append({
                          "before_resize_binary_mask":binary_mask_before_resize,
                          "before_resize_segmentation_map":segmentation_map_before_resize,
                      })

                      after_resize.append({
                          "after_resize_binary_mask":binary_mask_after_resize,
                          "after_resize_segmentation_map":segmentation_map_after_resize,
                      })

              results["before_resize"].append(before_resize)
              results["after_resize"].append(after_resize)

      metadata_df=pd.DataFrame(metadata_list)

      result_dataframes=[]
      result_file_names=[]

      for key,values in results.items():
          for i,value in enumerate(values):
              for j,(_,content) in enumerate(value.items()):
                  result_dataframes.append(pd.DataFrame({f"{key}_{i}_{j}":content}))
                  result_file_names.append(f"{key}_{i}_{j}.csv")

      for file_name,dataframe,_ in zip(result_file_names,result_dataframes,results.values()):
          result_df=pd.concat([metadata_df,dataframe],axis=1)
          result_df.to_csv(file_name,index=False)

processor=ImageProcessor('/content/drive/Shareddrives/152.반려동물 피부질환 데이터')
processor.process_images()

# Modeling

In [None]:
import tensorflow as tf
from tensorflow.keras import models
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator

import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

In [None]:
# 1. 폴리곤 크롭 이미지
polygon_cropped = pd.read_csv("polygon_cropped.csv")

# 2. 박스 크롭 이미지
box_cropped = pd.read_csv("box_cropped.csv")

# 3. 폴리곤 크롭 이미지, 박스 크롭 이미지
polygon_box_cropped = pd.read_csv("polygon_box_cropped.csv")

# 4. 폴리곤 크롭 이미지, 메타데이터
polygon_cropped_metadata = pd.read_csv("polygon_cropped_metadata.csv")

# 5. 박스 크롭 이미지, 메타데이터
box_cropped_metadata = pd.read_csv("box_cropped_metadata.csv")

# 6. 폴리곤 크롭 이미지, 박스 크롭 이미지, 메타데이터
polygon_box_cropped_metadata = pd.read_csv("polygon_box_cropped_metadata.csv")

In [None]:
# 분류 모델과 하이퍼파라미터를 설정합니다
model_LR = LogisticRegression(random_state=9)
model_LDA = LinearDiscriminantAnalysis(solver='svd')
model_KNN = KNeighborsClassifier(n_neighbors=5)
model_DT = DecisionTreeClassifier(n_estimators=100)
model_RF = RandomForestClassifier(n_estimators=200, random_state=0)
model_GaussianNB = GaussianNB(var_smoothing=1e-09)
model_SVM = SVC(kernel='linear', C=1, random_state=0)

# 분류 모델들을 리스트에 담습니다
models = [model_LR, model_LDA, model_KNN, model_DT, model_RF, model_GaussianNB, model_SVM]

# 각 분류 모델을 학습시키고 예측 결과를 출력합니다
for model in models:
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    score = accuracy_score(y_test, y_pred)
    print(f"{model.__class__.__name__}: {score}")

In [None]:
# Sequential 모델 생성
model = models.Sequential()

# 첫번째 Conv2D 레이어
model.add(Conv2D(filters=32, kernel_size=(3, 3), activation='relu', padding='same', input_shape=(96,96,3)))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(3, 3)))

# 첫번째 Dropout 레이어
model.add(Dropout(0.25))

# 두번째 Conv2D 레이어
model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))

# 두번째 Dropout 레이어
model.add(Dropout(0.25))

# 세번째 Conv2D 레이어
model.add(Conv2D(filters=128, kernel_size=(3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(Conv2D(filters=128, kernel_size=(3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))

# 세번째 Dropout 레이어
model.add(Dropout(0.25))

# Flatten 레이어
model.add(Flatten())

# 첫번째 Dense 레이어
model.add(Dense(units=1024, activation='relu'))
model.add(BatchNormalization())

# 두번째 Dropout 레이어
model.add(Dropout(0.5))

# 두번째 Dense 레이어: 최종 출력 레이어
model.add(Dense(units=7, activation='softmax'))

# 모델 컴파일
opt = Adam(lr=0.001, decay=0.00001)
model.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])

# 모델 구조 요약
model.summary()

# 모델 학습
epochs = 150
batch_size = 32

history = model.fit(train_data, epochs=epochs, batch_size=batch_size, validation_data=val_data)

# 모델 평가
test_loss, test_acc = model.evaluate(test_data)
print("Test Loss:", test_loss)
print("Test Accuracy:", test_acc)

In [None]:
# TensorFlow Lite 모델로 변환
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

# 변환된 모델을 파일로 저장
with open('your_model.tflite', 'wb') as f:
    f.write(tflite_model)