In [2]:
import numpy as np
import pandas as pd

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

## Gather data

In [3]:
# Gather metadata
metadata_path = 'dataverse_files/HAM10000_metadata'
metadata = pd.read_csv(metadata_path)
for idx, row in metadata.iterrows():
    metadata.at[idx, 'img_path'] = 'dataverse_files/Images/' + row['image_id'] + '.jpg'

## Data augmentation

In [4]:
# Prepare image data
train_datagen = ImageDataGenerator(
    rescale=1.0/255,
    rotation_range=180,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    vertical_flip=True,
    fill_mode='nearest',
    validation_split=0.2
)

test_datagen = ImageDataGenerator(
    rescale=1.0/255,
    validation_split=0.2
)

In [5]:
# Apply augmentation
batch_size = 32
seed = 123

train_generator = train_datagen.flow_from_dataframe(
    dataframe=metadata,
    seed=seed,
    target_size=(150, 150),
    x_col='img_path',
    y_col='dx',
    batch_size=batch_size,
    class_mode='categorical',
    subset='training'
)

test_generator = test_datagen.flow_from_dataframe(
    dataframe=metadata,
    seed=seed,
    target_size=(150, 150),
    x_col='img_path',
    y_col='dx',
    batch_size=batch_size,
    class_mode='categorical',
    subset='validation',
)

Found 8012 validated image filenames belonging to 7 classes.
Found 2003 validated image filenames belonging to 7 classes.


## Train

In [6]:
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)),
    MaxPooling2D(2, 2),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Flatten(),
    Dense(512, activation='relu'),
    Dropout(0.5),
    Dense(7, activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [15]:
history = model.fit(
    train_generator, 
    steps_per_epoch = train_generator.samples // batch_size,
    epochs=5, 
    validation_data=test_generator,
    validation_steps = test_generator.samples // batch_size,
)

Epoch 1/5
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 205ms/step - accuracy: 0.8670 - loss: 0.4026 - val_accuracy: 0.0010 - val_loss: 7.1178
Epoch 2/5
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 130us/step - accuracy: 0.9375 - loss: 0.1892 - val_accuracy: 0.0000e+00 - val_loss: 6.6146
Epoch 3/5


2024-05-12 17:12:17.166238: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
  self.gen.throw(value)
2024-05-12 17:12:17.193891: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]


[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 205ms/step - accuracy: 0.8623 - loss: 0.4061 - val_accuracy: 0.0010 - val_loss: 7.7812
Epoch 4/5
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 116us/step - accuracy: 0.8750 - loss: 0.4344 - val_accuracy: 0.0000e+00 - val_loss: 8.1501
Epoch 5/5


2024-05-12 17:13:09.494543: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
2024-05-12 17:13:09.519921: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]


[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 203ms/step - accuracy: 0.8695 - loss: 0.3847 - val_accuracy: 0.0010 - val_loss: 7.3536


In [20]:
# print(test_generator.class_indices)
# model.predict(test_generator, steps=test_generator.samples//batch_size)

{'akiec': 0, 'bcc': 1, 'bkl': 2, 'df': 3, 'mel': 4, 'nv': 5, 'vasc': 6}
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 102ms/step


array([[5.5260941e-02, 5.6012049e-02, 2.7041954e-03, ..., 1.0241340e-01,
        7.8099126e-01, 2.6121612e-03],
       [4.4427207e-01, 3.1853583e-01, 2.9852355e-03, ..., 4.9562000e-02,
        1.7236327e-01, 1.2146307e-02],
       [4.3315914e-02, 3.2830402e-02, 2.9189333e-03, ..., 1.0747351e-01,
        8.1149328e-01, 1.9623975e-03],
       ...,
       [1.7450683e-01, 6.0560156e-02, 2.3388257e-03, ..., 9.2700094e-02,
        6.6780001e-01, 2.0812652e-03],
       [1.9614326e-02, 2.2181813e-02, 5.7444642e-05, ..., 1.4577818e-02,
        9.4305706e-01, 5.1158556e-04],
       [2.8952625e-02, 4.3899585e-03, 1.2760797e-05, ..., 1.8032838e-02,
        9.4832152e-01, 2.9017971e-04]], dtype=float32)

## Accuracy

In [29]:
# Final accuracy
print(history.history['accuracy'][-1])

0.8703007698059082
