# Skin lesion classification of dermoscopic images using machine learning and convolutional neural network

19 December 2022

https://www.nature.com/articles/s41598-022-22644-9#Tab7

https://aihub.or.kr/aihubdata/data/view.do?currMenu=&topMenu=&aihubDataSe=realm&dataSetSn=561

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import os
import glob
import cv2
import numpy as np
from tqdm import tqdm

from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
from sklearn.preprocessing import LabelEncoder

from skimage.feature import graycomatrix

import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Flatten, Conv2D, MaxPooling2D, Dropout, BatchNormalization, Concatenate

In [3]:
def get_path_and_label(src_path, num_files_per_folder):
    image_paths = []
    lesions = []

    for root, dirs, files in tqdm(os.walk(src_path), desc='Walking directories', unit=' dir'):
        for dir in dirs:
            dir_path = os.path.join(root, dir)
            image_files = sorted(glob.glob(os.path.join(dir_path, '*.jpg')))

            for image_file in image_files[:num_files_per_folder]:
                filename = os.path.basename(image_file)
                parts = filename.split('_')
                if len(parts) >= 3:
                    second_part = parts[2]
                    lesions.append(second_part)
                    image_paths.append(image_file)

    return image_paths, lesions

def preprocess_images(image_paths):
    images = []
    gray_images = []

    for image_path in tqdm(image_paths, desc='Preprocessing Images'):
        image = cv2.imread(image_path)
        if image is None:
            print(f"Error loading image: {image_path}")
            continue
        image = cv2.resize(image, target_size)

        rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        gray_image = cv2.cvtColor(rgb_image, cv2.COLOR_RGB2GRAY)

        images.append(rgb_image)
        gray_images.append(gray_image)

    return np.array(images), np.array(gray_images)

def extract_color_histograms(images):
    histograms = np.zeros((images.shape[0], 512))

    for i, image in tqdm(enumerate(images), desc='Extracting Color Histograms'):
        histogram = cv2.calcHist([image], [0, 1, 2], None,[8 ,8 ,8 ], [0 ,256 ,0 ,256 ,0 ,256])
        histogram = cv2.normalize(histogram,histogram).flatten()

        histograms[i] = histogram

    return histograms

def extract_hu_moments(gray_images):
    moments_list = [cv2.HuMoments(cv2.moments(gray_image)).flatten() for gray_image in tqdm(gray_images, desc='Extracting Hu Moments')]
    return np.array(moments_list)

def extract_haralick_textures(gray_images):
    textures_list = [np.mean(graycomatrix(gray_image, distances=[1], angles=[0], symmetric=True, normed=True), axis=(0, 1)).flatten() for gray_image in tqdm(gray_images, desc='Extracting Haralick Textures')]
    return np.array(textures_list)

def extract_features(image_paths):
    images, gray_images = preprocess_images(image_paths)
    color_histograms = extract_color_histograms(images)
    hu_moments = extract_hu_moments(gray_images)
    haralick_textures = extract_haralick_textures(gray_images)

    feature_vectors = np.concatenate([color_histograms,
                                      hu_moments,
                                      haralick_textures], axis=1)

    return feature_vectors

def create_dataset(image_paths, image_feature_vectors, lesions):
    encoder = LabelEncoder()
    encoded_labels = encoder.fit_transform(lesions)

    num_classes = len(encoder.classes_)
    one_hot_labels = tf.one_hot(encoded_labels, depth=num_classes)

    dataset = tf.data.Dataset.from_tensor_slices(({"image_input": image_paths,
                                                   "global_feature_input": image_feature_vectors},
                                                  one_hot_labels))

    def load_and_preprocess(inputs_dict, label):
        img_raw = tf.io.read_file(inputs_dict["image_input"])
        img_tensor = tf.image.decode_jpeg(img_raw, channels=3)
        img = tf.image.resize(img_tensor, target_size)
        img = img / 255.0

        return {"image_input": img,
                "global_feature_input": inputs_dict["global_feature_input"]}, label

    dataset = dataset.map(load_and_preprocess)

    return dataset

# camera cat

In [4]:
src_path = '/content/drive/Shareddrives/반려묘/일반카메라/Training'
num_files_per_folder = 2000

image_paths, lesions = get_path_and_label(src_path, num_files_per_folder)

Walking directories: 14 dir [00:12,  1.08 dir/s]


In [None]:
target_size = (256, 256)

image_feature_vectors = extract_features(image_paths)

Preprocessing Images:  16%|█▌        | 3244/20490 [01:20<04:25, 64.96it/s]

In [18]:
print('Number of image paths: ', len(image_paths))
print('Number of image feature vectors: ', len(image_feature_vectors))
print('Number of labels: ', len(lesions))

Number of image paths:  20490
Number of image feature vectors:  20490
Number of labels:  20490


In [11]:
print('image paths:\n', image_paths[:5])
print('image feature vectors:\n', image_feature_vectors[:5])
print('labels:\n', lesions[:5])

image paths:
 ['/content/drive/Shareddrives/반려묘/일반카메라/Training/유증상/A6_결절_종괴_잔여/IMG_C_A6_807019.jpg', '/content/drive/Shareddrives/반려묘/일반카메라/Training/유증상/A6_결절_종괴_잔여/IMG_C_A6_807020.jpg', '/content/drive/Shareddrives/반려묘/일반카메라/Training/유증상/A6_결절_종괴_잔여/IMG_C_A6_807021.jpg', '/content/drive/Shareddrives/반려묘/일반카메라/Training/유증상/A6_결절_종괴_잔여/IMG_C_A6_807022.jpg', '/content/drive/Shareddrives/반려묘/일반카메라/Training/유증상/A6_결절_종괴_잔여/IMG_C_A6_807023.jpg']
image feature vectors:
 [[ 3.81492637e-02  2.71042722e-04  0.00000000e+00 ... -3.34269860e-16
  -3.60531132e-23  1.52587891e-05]
 [ 6.17622733e-02  4.69318184e-05  0.00000000e+00 ...  3.94745778e-16
  -4.53436791e-23  1.52587891e-05]
 [ 2.59372950e-01  2.52708830e-02  0.00000000e+00 ...  6.14236736e-14
   1.20199478e-20  1.52587891e-05]
 [ 3.76893729e-02  2.93792435e-03  0.00000000e+00 ... -7.87975735e-17
  -3.94002962e-2

In [12]:
buffer_size = 1000
batch_size = 64

dataset = create_dataset(image_paths, image_feature_vectors, lesions)
dataset = dataset.shuffle(buffer_size).batch(batch_size)

In [13]:
dataset_size = tf.data.experimental.cardinality(dataset).numpy()

train_ratio = 0.8
val_ratio = 0.1
test_ratio = 0.1

train_size = int(train_ratio * dataset_size)
val_size = int(val_ratio * dataset_size)
test_size = int(test_ratio * dataset_size)

train_dataset = dataset.take(train_size)
val_dataset_temp = dataset.skip(train_size)

val_dataset = val_dataset_temp.take(val_size)
test_dataset = val_dataset_temp.skip(val_size)

In [15]:
print('Expected train_dataset size: ', train_size)
print('Expected val_dataset size: ', val_size)
print('Expected test_dataset size: ', test_size)

print('Actual train_dataset size: ', tf.data.experimental.cardinality(train_dataset).numpy())
print('Actual val_dataset size: ', tf.data.experimental.cardinality(val_dataset).numpy())
print('Actual test_dataset size: ', tf.data.experimental.cardinality(test_dataset).numpy())

Expected train_dataset size:  256
Expected val_dataset size:  32
Expected test_dataset size:  32
Actual train_dataset size:  256
Actual val_dataset size:  32
Actual test_dataset size:  33


In [16]:
image_input = Input(shape=(256, 256, 3), name='image_input')
x = Conv2D(32, (3, 3), padding='same', activation='relu')(image_input)
x = BatchNormalization()(x)
x = MaxPooling2D(pool_size=(3 , 3))(x)

x = Dropout(0.25)(x)

x = Conv2D(64, (3, 3), padding='same', activation='relu')(x)
x = Conv2D(64, (3, 3), padding='same', activation='relu')(x)
x = BatchNormalization()(x)
x = MaxPooling2D(pool_size=(2 , 2))(x)

x = Dropout(0.25)(x)

x = Conv2D(128, (3, 3), padding='same', activation='relu')(x)
x = Conv2D(128, (3, 3), padding='same', activation='relu')(x)
x = BatchNormalization()(x)
x = MaxPooling2D(pool_size=(2 , 2))(x)

x = Dropout(0.25)(x)

global_feature_input = Input(shape=(520,), name='global_feature_input')

combined_features= Concatenate()([Flatten()(x), global_feature_input])

combined_features= Dense(1024, activation='relu')(combined_features)
combined_features= BatchNormalization ()(combined_features)
combined_features= Dropout(0.5)(combined_features)

outputs= Dense(4, activation='softmax')(combined_features)

model= Model(inputs=[image_input, global_feature_input], outputs=outputs)

model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [17]:
history = model.fit(
    train_dataset,
    validation_data=val_dataset,
    epochs=150,
    batch_size=64
)

Epoch 1/150

InvalidArgumentError: ignored

In [None]:
y_true = np.concatenate([y.numpy() for x, y in test_dataset], axis=0)
y_pred = model.predict(test_dataset)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true_classes = np.argmax(y_true, axis=1)

print(classification_report(y_true_classes, y_pred_classes))

print(confusion_matrix(y_true_classes, y_pred_classes))

roc_auc = roc_auc_score(y_true, y_pred , multi_class='ovr')
print('ROC-AUC score:', roc_auc)

In [None]:
# TensorFlow Lite 모델로 변환
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

# 변환된 모델을 파일로 저장
with open('3000_256.tflite', 'wb') as f:
    f.write(tflite_model)

# microscope cat

In [None]:
from tensorflow.keras.layers import PReLU
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MeanSquaredError
from tensorflow.keras.layers import Concatenate, MaxPooling2D, BatchNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.layers import concatenate
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.losses import mean_squared_error

def inception_module(input_layer, filters):
    conv1x1 = Conv2D(filters[0], (1, 1), activation='relu')(input_layer)
    conv3x3_reduce = Conv2D(filters[1], (1, 1), activation='relu')(input_layer)
    conv3x3 = Conv2D(filters[2], (3, 3), padding='same', activation='relu')(conv3x3_reduce)
    conv5x5_reduce = Conv2D(filters[3], (1, 1), activation='relu')(input_layer)
    conv5x5 = Conv2D(filters[4], (5, 5), padding='same', activation='relu')(conv5x5_reduce)
    maxpool = MaxPooling2D((3, 3), strides=(1, 1), padding='same')(input_layer)
    maxpool_conv = Conv2D(filters[5], (1, 1), activation='relu')(maxpool)
    inception_output = Concatenate(axis=-1)([conv1x1, conv3x3, conv5x5, maxpool_conv])
    return inception_output

# Input
input_shape = (128, 128, 3)
input_layer = Input(shape=input_shape)

# Inception block
inception_output = inception_module(input_layer, filters=[64, 128, 192, 32, 96, 64])
inception_output = inception_module(inception_output, filters=[64, 128, 192, 32, 96, 64])
# Add more inception modules if needed

# Primary Capsule layer
primary_capsules = Conv2D(32, (1, 1), activation='relu')(inception_output)

# Higher Capsule layers
# (Add imperative routing mechanism layers here)

# PReLU activation for routing
higher_capsules_prelu = PReLU()(higher_capsules)

# Flatten and Fully Connected layers
capsule_flatten = Flatten()(higher_capsules_prelu)  # Flatten higher capsules
output_layer = Dense(2, activation='softmax')(capsule_flatten)  # Two capsules: parasitized and uninfected

# Create the model
model = Model(inputs=input_layer, outputs=output_layer)

# Compile the model with Adam optimizer and custom loss function
optimizer = Adam(learning_rate=0.007, beta_1=0.8)
loss_fn = custom_loss_function # Define the custom loss function as described in the paper
model.compile(optimizer=optimizer, loss=loss_fn, metrics=['accuracy'])

# Print the model summary
model.summary()