# Imports

In [None]:
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

from keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, BatchNormalization, Rescaling, Dropout, Resizing
from keras.layers import RandomFlip, RandomTranslation, RandomRotation, RandomZoom
from keras.models import Sequential
from keras.metrics import F1Score, Precision
from keras.losses import CategoricalFocalCrossentropy
from keras.callbacks import EarlyStopping
from keras.utils import to_categorical

from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE, ADASYN, KMeansSMOTE
from imblearn.under_sampling import TomekLinks, RepeatedEditedNearestNeighbours
from imblearn.combine import SMOTETomek, SMOTEENN
from sklearn.utils import compute_class_weight
from keras.utils import to_categorical
import os
from PIL import Image

2025-04-04 13:38:56.079846: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1743766736.318406  271164 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1743766736.383324  271164 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-04-04 13:38:56.823779: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


# Move images into class folders

In [None]:
def move_and_separate_images(list_of_folders, df_metadata):
    df_metadata = df_metadata.set_index('image_id', drop=True)
    count = 0
    for folder in list_of_folders:
        for root, dirs, files in os.walk(folder):
            for file in files:
                try:
                    os.makedirs(f'dataset/{df_metadata.loc[file.strip('.jpg'), 'dx']}')
                except:
                    source=os.path.join(root, file)
                    destination=os.path.join('dataset', df_metadata.loc[file.strip('.jpg'), 'dx'], file)
                    os.rename(source,destination)
                    count += 1
                    if count%100 == 0:
                        print(f'{count} images were processed')

In [None]:
df = pd.read_csv('HAM10000_metadata.csv')

move_and_separate_images(['HAM10000_images_part_1', 'HAM10000_images_part_2'], df)

# Make arrays

In [4]:
def array_from_images(folder, df_metadata, dict_of_labels, h=224, w=224, channels=3):
    # Create an array of images and labels the size of the number of pictures
    nb_files = 0
    for root, dirs, files in os.walk(folder):
        for file in files:
            nb_files += 1
    array = np.zeros(shape=(nb_files, h, w, channels))
    labels = np.zeros(shape=(nb_files,))

    # Check the name and fill array and labels
    df_metadata = df_metadata.set_index('image_id', drop=True)
    count = 0

    for root, dirs, files in os.walk(folder):
        for file in files:
            with Image.open(os.path.join(root, file)) as im:
                array[count,:,:,:] = np.asarray(im.resize((h,w)))
                labels[count] = dict_of_labels[df_metadata.loc[file.strip('.jpg'), 'dx']]
                count += 1
                if count%1000 == 0:
                    print(f'{count} images were processed')
    return array, labels

In [5]:
df = pd.read_csv('HAM10000_metadata.csv')
dict_label = {k:v for k,v in zip(set(df['dx']), range(7))}

X, y = array_from_images('dataset/', df, dict_label, h=64, w=64)


1000 images were processed
2000 images were processed
3000 images were processed
4000 images were processed
5000 images were processed
6000 images were processed
7000 images were processed
8000 images were processed
9000 images were processed
10000 images were processed


In [6]:
np.savez_compressed('X', X, allow_pickle=True)
np.savez_compressed('y', y, allow_pickle=True)

# Load arrays

In [2]:
data_X = np.load('X.npz')
data_y = np.load('y.npz')

In [3]:
X = data_X['arr_0']
y = data_y['arr_0']

In [6]:
from sklearn.model_selection import train_test_split

X_train, X_temp, y_train, y_temp = train_test_split(X, y, stratify=y, test_size=0.2, random_state=38)

del X, y

X_test, X_val, y_test, y_val = train_test_split(X_temp, y_temp, stratify=y_temp, test_size=0.5, random_state=38)

del X_temp, y_temp

# SMOTETOMEK

In [7]:
X_train = np.reshape(X_train, (shape_origin[0], shape_origin[1]*shape_origin[2]*shape_origin[3]))

smotetomek = SMOTETomek(random_state=38, n_jobs=-1)

X_train, y_train = smotetomek.fit_resample(X_train, y_train)

X_train = np.reshape(X_train, (X_train.shape[0], shape_origin[1], shape_origin[2], shape_origin[3]))



(37541, 12288)

# To categorical

In [7]:
y_train = to_categorical(y_train)
y_val = to_categorical(y_val)
y_test = to_categorical(y_test)

# Helper functions

In [8]:
def get_earlystopping(patience=10):
    early_stopping = EarlyStopping(
    monitor='val_f1_score',
    patience=patience,
    verbose=1,
    restore_best_weights=True)
    return early_stopping

In [9]:
def plot_learning_curves(model):
    fig, ax = plt.subplots(1,3, figsize=(15,5))
    ax[0].plot(model.history.history['val_f1_score'], label='val_f1_score')
    ax[0].plot(model.history.history['f1_score'], label='f1_score')
    ax[0].legend()
    try:
        ax[1].plot(model.history.history['val_accuracy'], label='val_accuracy')
        ax[1].plot(model.history.history['accuracy'], label='accuracy')
        ax[1].legend()
    except:
        ax[1].plot(model.history.history['val_precision_1'], label='val_precision_1')
        ax[1].plot(model.history.history['precision_1'], label='precision_1')
        ax[1].legend()
    ax[2].plot(model.history.history['val_loss'], label='val_loss')
    ax[2].plot(model.history.history['loss'], label='loss')
    ax[2].legend()
    fig.show;

In [10]:
def get_analysis(model, testX, testy):
    plot_learning_curves(model)
    loss, acc, f1 = model.evaluate(testX, testy)
    print(f'The model gave')
    print(f'Loss: {loss:.2f}')
    print(f'Accuracy: {acc:.2f}')
    print(f'F1 Macro: {f1:.2f}')
    y_pred = model.predict(testX)
    y_res = np.argmax(y_pred, axis=1)
    print(classification_report(testy, y_res))
    return y_pred, y_res

In [11]:
def get_analysis_cat(model, testX, testy):
    plot_learning_curves(model)
    loss, acc, f1 = model.evaluate(testX, testy)
    print(f'The model gave')
    print(f'Loss: {loss:.2f}')
    print(f'Accuracy: {acc:.2f}')
    print(f'F1 Macro: {f1:.2f}')
    predy = model.predict(testX)
    resy = to_categorical(np.argmax(predy, axis=1))
    print(classification_report(testy,resy))
    return predy, resy

In [12]:
def compile_and_train(model, loss, opt, metrics, epochs, patience=None, steps=None):
    model.compile(loss=loss,
                optimizer=opt,
                metrics=metrics)

    model.summary()

    if patience != None:
        model.fit(
            X_train,
            y_train,
            epochs=epochs,
            validation_data=(X_val, y_val),
            callbacks=[get_earlystopping(patience)],
            steps_per_epoch=steps
            )
    else:
        model.fit(
            X_train,
            y_train,
            epochs=epochs,
            validation_data=(X_val, y_val),
            steps_per_epoch=steps
            )

    return model

# Model

In [16]:
filters = 32

In [18]:
model_020 = Sequential([
    Input(shape=(X_train.shape[1:])),
    Rescaling(1./255),
    RandomFlip('horizontal and vertical'),
    RandomRotation(factor=(-0.3, 0.3)),
    RandomTranslation(height_factor=(-0.3,0.3), width_factor=(-0.3, 0.3)),
    RandomZoom(height_factor=(-0.3,0.3), width_factor=(-0.3, 0.3)),
    Conv2D(filters, kernel_size=3, padding='SAME', activation='relu'),
    Conv2D(filters*2, kernel_size=3, padding='SAME', activation='relu'),
    Conv2D(filters*4, kernel_size=3, padding='SAME', activation='relu'),
    MaxPooling2D(pool_size=2),
    BatchNormalization(),
    Dropout(0.2),
    Conv2D(filters, kernel_size=3, padding='SAME', activation='relu'),
    Conv2D(filters*2, kernel_size=3, padding='SAME', activation='relu'),
    Conv2D(filters*4, kernel_size=3, padding='SAME', activation='relu'),
    MaxPooling2D(pool_size=2),
    BatchNormalization(),
    Dropout(0.2),
    Conv2D(filters, kernel_size=3, padding='SAME', activation='relu'),
    Conv2D(filters*2, kernel_size=3, padding='SAME', activation='relu'),
    Conv2D(filters*4, kernel_size=3, padding='SAME', activation='relu'),
    MaxPooling2D(pool_size=2),
    BatchNormalization(),
    Dropout(0.2),
    Conv2D(filters, kernel_size=3, padding='SAME', activation='relu'),
    Conv2D(filters*2, kernel_size=3, padding='SAME', activation='relu'),
    Conv2D(filters*4, kernel_size=3, padding='SAME', activation='relu'),
    MaxPooling2D(pool_size=2),
    BatchNormalization(),
    Dropout(0.2),
    Flatten(),
    Dense(filters, activation='relu'),
    Dense(7, activation='softmax')
])

history020 = compile_and_train(model_020,
                               loss=CategoricalFocalCrossentropy(),
                               opt='adam',
                               metrics=['accuracy', F1Score(average='macro')],
                               epochs=200,
                               patience=None)

2025-04-04 11:40:57.922206: W external/local_xla/xla/tsl/framework/bfc_allocator.cc:497] Allocator (GPU_0_bfc) ran out of memory trying to allocate 18.0KiB (rounded to 18432)requested by op Mul
If the cause is memory fragmentation maybe the environment variable 'TF_GPU_ALLOCATOR=cuda_malloc_async' will improve the situation. 
Current allocation summary follows.
Current allocation summary follows.
2025-04-04 11:40:57.922251: I external/local_xla/xla/tsl/framework/bfc_allocator.cc:1053] BFCAllocator dump for GPU_0_bfc
2025-04-04 11:40:57.922261: I external/local_xla/xla/tsl/framework/bfc_allocator.cc:1060] Bin (256): 	Total Chunks: 65, Chunks in use: 65. 16.2KiB allocated for chunks. 16.2KiB in use in bin. 3.9KiB client-requested in use in bin.
2025-04-04 11:40:57.922269: I external/local_xla/xla/tsl/framework/bfc_allocator.cc:1060] Bin (512): 	Total Chunks: 1, Chunks in use: 1. 512B allocated for chunks. 512B in use in bin. 448B client-requested in use in bin.
2025-04-04 11:40:57.922276

ResourceExhaustedError: {{function_node __wrapped__Mul_device_/job:localhost/replica:0/task:0/device:GPU:0}} failed to allocate memory [Op:Mul] name: 

In [None]:
get_analysis_cat(model_020, X_test, y_test)

## Custom loss

In [17]:
import tensorflow as tf
from keras.losses import CategoricalFocalCrossentropy
from keras import backend as K
from keras import ops

def f1macro(y_true, y_pred):
    tp = []
    fp = []
    fn = []
    precision = []
    recall = []
    f1 = []

    cfc = CategoricalFocalCrossentropy()


    for n in range(7):
        tp.append(ops.sum(ops.cast(y_true[:, n] * y_pred[:, n], 'float32'), axis=0))
        fp.append(ops.sum(ops.cast((1 - y_true[:, n]) * y_pred[:, n], 'float32'), axis=0))
        fn.append(ops.sum(ops.cast(y_true[:, n] * (1 - y_pred[:, n]), 'float32'), axis=0))

    for n in range(7):
        precision.append(tp[n] / (tp[n] + fp[n] + K.epsilon()))
        recall.append(tp[n] / (tp[n] + fn[n] + K.epsilon()))

    for n in range(7):
        f1.append(2 * (precision[n] * recall[n]) / (precision[n] + recall[n] + K.epsilon()))

    f1macro = tf.stack(f1)
    f1macroscore = ops.mean(f1macro)

    f1macroscore = tf.where(tf.math.is_nan(f1macroscore), tf.zeros_like(f1macroscore), f1macroscore)

    return cfc + (1 - f1macroscore)