# Skin lesion classification of dermoscopic images using machine learning and convolutional neural network

19 December 2022

https://www.nature.com/articles/s41598-022-22644-9#Tab7

https://aihub.or.kr/aihubdata/data/view.do?currMenu=&topMenu=&aihubDataSe=realm&dataSetSn=561

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import os
import glob
import cv2
import numpy as np
from tqdm import tqdm

from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
from sklearn.preprocessing import LabelEncoder

from skimage.feature import graycomatrix

import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Flatten, Conv2D, MaxPooling2D, Dropout, BatchNormalization, Concatenate

In [None]:
class ImageLoader:
    def __init__(self, src_path, num_files_per_folder):
        self.src_path = src_path
        self.num_files_per_folder = num_files_per_folder

    def get_path_and_label(self):
        image_paths = []
        dermatological_lesions = []

        for root, dirs, files in tqdm(os.walk(self.src_path), desc='Walking directories', unit=' dir'):

            for dir in dirs:
                dir_path = os.path.join(root, dir)
                image_files = sorted(glob.glob(os.path.join(dir_path, '*.jpg')))

                for image_file in image_files[:self.num_files_per_folder]:
                    image_paths.append(image_file)
                    filename = os.path.basename(image_file)
                    parts = filename.split('_')
                    second_part = parts[2]
                    dermatological_lesions.append(second_part)

        return image_paths, dermatological_lesions

class ImagePreprocessor:
    def __init__(self, image_paths, target_size):
        self.image_paths = image_paths
        self.target_size = target_size

    def preprocess_images(self):
        images = []
        gray_images = []

        for image_path in tqdm(self.image_paths, desc='Preprocessing Images'):
            image = cv2.imread(image_path)

            if image is None:
                print(f"Error loading image: {image_path}")
                continue

            image = cv2.resize(image, self.target_size)

            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            gray_image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)

            images.append(image)
            gray_images.append(gray_image)

        return np.array(images), np.array(gray_images)

class FeatureExtractor:
    def __init__(self, images, gray_images):
        self.images = images
        self.gray_images = gray_images

    def extract_color_histograms(self):
        color_histograms = np.zeros((self.images.shape[0], 512))

        for i, image in tqdm(enumerate(self.images), desc='Extracting Color Histograms'):
            histogram = cv2.calcHist([image], [0, 1, 2], None,[8 ,8 ,8 ], [0 ,256 ,0 ,256 ,0 ,256])
            histogram = cv2.normalize(histogram,histogram).flatten()
            color_histograms[i] = histogram

        return color_histograms

    def extract_hu_moments(self):
        moments_list = [cv2.HuMoments(cv2.moments(gray_image)).flatten() for gray_image in tqdm(self.gray_images)]
        hu_moments = np.array(moments_list)

        return hu_moments

    def extract_haralick_textures(self):
        textures_list = [np.mean(graycomatrix(gray_image, distances=[1], angles=[0], symmetric=True, normed=True), axis=(0, 1)).flatten() for gray_image in tqdm(self.gray_images, desc='Extracting Haralick Textures')]
        haralick_textures = np.array(textures_list)

        return haralick_textures

class DatasetCreator:
    def __init__(self, loader: ImageLoader,
                 preprocessor: ImagePreprocessor,
                 feature_extractor: FeatureExtractor):

        self.loader = loader
        self.preprocessor = preprocessor
        self.feature_extractor = feature_extractor

    def create_dataset(self):
        self.get_path_and_label()
        self.preprocess_images()
        self.extract_color_histograms()
        self.extract_hu_moments()
        self.extract_haralick_textures()

        self.feature_vectors = np.concatenate([self.color_histograms,
                                        self.hu_moments,
                                        self.haralick_textures], axis=1)

        encoder = LabelEncoder()
        encoded_labels = encoder.fit_transform(self.lesions)
        num_classes = len(encoder.classes_)
        one_hot_labels = tf.one_hot(encoded_labels, depth=num_classes)

        self.image = tf.convert_to_tensor(self.images, dtype=tf.float32)
        self.image = self.image / tf.constant(255.0, dtype=tf.float32)

        self.dataset = tf.data.Dataset.from_tensor_slices(({"image_input": self.image,
                                                    "global_feature_input": self.feature_vectors},
                                                    one_hot_labels))

class Pipeline:
    def __init__(self, loader: ImageLoader,
                 preprocessor: ImagePreprocessor,
                 feature_extractor: FeatureExtractor):

       self.loader = loader
       self.preprocessor = preprocessor
       self.feature_extractor = feature_extractor

    def execute(self):
       for image in self.loader.load_images():
           preprocessed_image = self.preprocessor.preprocess_image(image)
           features = self.feature_extractor.extract_features(preprocessed_image)
           yield features

# camera cat

In [None]:
src_path = '/content/drive/Shareddrives/반려묘/일반카메라/Training'
num_files_per_folder = 2000
target_size = (256, 256)
buffer_size = 1000
batch_size = 64

image_paths, lesions = get_path_and_label(src_path, num_files_per_folder)

NameError: ignored

In [None]:


image_feature_vectors = extract_features(image_paths)

Preprocessing Images: 100%|██████████| 20490/20490 [09:01<00:00, 37.85it/s]
Extracting Color Histograms: 20490it [00:03, 5848.99it/s]
Extracting Hu Moments: 100%|██████████| 20490/20490 [00:01<00:00, 15993.48it/s]
Extracting Haralick Textures: 100%|██████████| 20490/20490 [00:08<00:00, 2517.30it/s]


In [None]:
print('Number of image paths: ', len(image_paths))
print('Number of image feature vectors: ', len(image_feature_vectors))
print('Number of labels: ', len(lesions))

Number of image paths:  20490
Number of image feature vectors:  20490
Number of labels:  20490


In [None]:
dataset = create_dataset(image_paths, image_feature_vectors, lesions)
dataset = dataset.shuffle(buffer_size).batch(batch_size)

In [None]:
dataset_size = tf.data.experimental.cardinality(dataset).numpy()

train_ratio = 0.8
val_ratio = 0.1
test_ratio = 0.1

train_size = int(train_ratio * dataset_size)
val_size = int(val_ratio * dataset_size)
test_size = int(test_ratio * dataset_size)

train_dataset = dataset.take(train_size)
val_dataset_temp = dataset.skip(train_size)

val_dataset = val_dataset_temp.take(val_size)
test_dataset = val_dataset_temp.skip(val_size)

In [None]:
print('Expected train_dataset size: ', train_size)
print('Expected val_dataset size: ', val_size)
print('Expected test_dataset size: ', test_size)

print('Actual train_dataset size: ', tf.data.experimental.cardinality(train_dataset).numpy())
print('Actual val_dataset size: ', tf.data.experimental.cardinality(val_dataset).numpy())
print('Actual test_dataset size: ', tf.data.experimental.cardinality(test_dataset).numpy())

Expected train_dataset size:  256
Expected val_dataset size:  32
Expected test_dataset size:  32
Actual train_dataset size:  256
Actual val_dataset size:  32
Actual test_dataset size:  33


In [None]:
image_input = Input(shape=(256, 256, 3), name='image_input')
x = Conv2D(32, (3, 3), padding='same', activation='relu')(image_input)
x = BatchNormalization()(x)
x = MaxPooling2D(pool_size=(3 , 3))(x)

x = Dropout(0.25)(x)

x = Conv2D(64, (3, 3), padding='same', activation='relu')(x)
x = Conv2D(64, (3, 3), padding='same', activation='relu')(x)
x = BatchNormalization()(x)
x = MaxPooling2D(pool_size=(2 , 2))(x)

x = Dropout(0.25)(x)

x = Conv2D(128, (3, 3), padding='same', activation='relu')(x)
x = Conv2D(128, (3, 3), padding='same', activation='relu')(x)
x = BatchNormalization()(x)
x = MaxPooling2D(pool_size=(2 , 2))(x)

x = Dropout(0.25)(x)

global_feature_input = Input(shape=(520,), name='global_feature_input')

combined_features= Concatenate()([Flatten()(x), global_feature_input])

combined_features= Dense(1024, activation='relu')(combined_features)
combined_features= BatchNormalization ()(combined_features)
combined_features= Dropout(0.5)(combined_features)

outputs= Dense(4, activation='softmax')(combined_features)

model= Model(inputs=[image_input, global_feature_input], outputs=outputs)

model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [None]:
history = model.fit(
    train_dataset,
    validation_data=val_dataset,
    epochs=150,
    batch_size=64
)

Epoch 1/150

InvalidArgumentError: ignored

In [None]:
y_true = np.concatenate([y.numpy() for x, y in test_dataset], axis=0)
y_pred = model.predict(test_dataset)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true_classes = np.argmax(y_true, axis=1)

print(classification_report(y_true_classes, y_pred_classes))

print(confusion_matrix(y_true_classes, y_pred_classes))

roc_auc = roc_auc_score(y_true, y_pred , multi_class='ovr')
print('ROC-AUC score:', roc_auc)

In [None]:
# TensorFlow Lite 모델로 변환
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

# 변환된 모델을 파일로 저장
with open('3000_256.tflite', 'wb') as f:
    f.write(tflite_model)

# microscope cat

In [None]:
from tensorflow.keras.layers import PReLU
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MeanSquaredError
from tensorflow.keras.layers import Concatenate, MaxPooling2D, BatchNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.layers import concatenate
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.losses import mean_squared_error

def inception_module(input_layer, filters):
    conv1x1 = Conv2D(filters[0], (1, 1), activation='relu')(input_layer)
    conv3x3_reduce = Conv2D(filters[1], (1, 1), activation='relu')(input_layer)
    conv3x3 = Conv2D(filters[2], (3, 3), padding='same', activation='relu')(conv3x3_reduce)
    conv5x5_reduce = Conv2D(filters[3], (1, 1), activation='relu')(input_layer)
    conv5x5 = Conv2D(filters[4], (5, 5), padding='same', activation='relu')(conv5x5_reduce)
    maxpool = MaxPooling2D((3, 3), strides=(1, 1), padding='same')(input_layer)
    maxpool_conv = Conv2D(filters[5], (1, 1), activation='relu')(maxpool)
    inception_output = Concatenate(axis=-1)([conv1x1, conv3x3, conv5x5, maxpool_conv])
    return inception_output

# Input
input_shape = (128, 128, 3)
input_layer = Input(shape=input_shape)

# Inception block
inception_output = inception_module(input_layer, filters=[64, 128, 192, 32, 96, 64])
inception_output = inception_module(inception_output, filters=[64, 128, 192, 32, 96, 64])
# Add more inception modules if needed

# Primary Capsule layer
primary_capsules = Conv2D(32, (1, 1), activation='relu')(inception_output)

# Higher Capsule layers
# (Add imperative routing mechanism layers here)

# PReLU activation for routing
higher_capsules_prelu = PReLU()(higher_capsules)

# Flatten and Fully Connected layers
capsule_flatten = Flatten()(higher_capsules_prelu)  # Flatten higher capsules
output_layer = Dense(2, activation='softmax')(capsule_flatten)  # Two capsules: parasitized and uninfected

# Create the model
model = Model(inputs=input_layer, outputs=output_layer)

# Compile the model with Adam optimizer and custom loss function
optimizer = Adam(learning_rate=0.007, beta_1=0.8)
loss_fn = custom_loss_function # Define the custom loss function as described in the paper
model.compile(optimizer=optimizer, loss=loss_fn, metrics=['accuracy'])

# Print the model summary
model.summary()