In [26]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sklearn
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical

In [27]:
metadata = pd.read_csv('HAM10000/HAM10000_metadata.csv')
metadata.head()

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization
0,HAM_0000118,ISIC_0027419,bkl,histo,80.0,male,scalp
1,HAM_0000118,ISIC_0025030,bkl,histo,80.0,male,scalp
2,HAM_0002730,ISIC_0026769,bkl,histo,80.0,male,scalp
3,HAM_0002730,ISIC_0025661,bkl,histo,80.0,male,scalp
4,HAM_0001466,ISIC_0031633,bkl,histo,75.0,male,ear


In [28]:
image_data = pd.read_csv('HAM10000/hmnist_28_28_RGB.csv')
image_data.head()

Unnamed: 0,pixel0000,pixel0001,pixel0002,pixel0003,pixel0004,pixel0005,pixel0006,pixel0007,pixel0008,pixel0009,...,pixel2343,pixel2344,pixel2345,pixel2346,pixel2347,pixel2348,pixel2349,pixel2350,pixel2351,label
0,192,153,193,195,155,192,197,154,185,202,...,173,124,138,183,147,166,185,154,177,2
1,25,14,30,68,48,75,123,93,126,158,...,60,39,55,25,14,28,25,14,27,2
2,192,138,153,200,145,163,201,142,160,206,...,167,129,143,159,124,142,136,104,117,2
3,38,19,30,95,59,72,143,103,119,171,...,44,26,36,25,12,17,25,12,15,2
4,158,113,139,194,144,174,215,162,191,225,...,209,166,185,172,135,149,109,78,92,2


In [29]:
lesion_types = metadata['dx'].value_counts()
lesion_types

dx
nv       6705
mel      1113
bkl      1099
bcc       514
akiec     327
vasc      142
df        115
Name: count, dtype: int64

In [31]:
# Reshape image data to 28x28x3 and normalize
images = image_data.drop('label', axis=1).values.reshape(-1, 28, 28, 3) / 255.0

# Encode labels to one-hot
labels = image_data['label']
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(labels)
labels_one_hot = to_categorical(labels_encoded)

# Check shapes and data types
print(images.shape)  # Should show (number of images, 28, 28, 3)
print(labels_one_hot.shape)  # Should show (number of images, number of classes)


(10015, 28, 28, 3)
(10015, 7)


In [32]:
num_classes = 7 # 7 different types of skin lesions: 'akiec', 'bcc', 'bkl', 'df', 'mel', 'nv', 'vasc'

model = Sequential([
    Conv2D(32, (3,3), activation='relu', input_shape=(28, 28, 3)),
    MaxPooling2D(2, 2),
    Conv2D(64, (3,3), activation='relu'),
    MaxPooling2D(2,2),
    Conv2D(128, (3,3), activation='relu'),
    MaxPooling2D(2,2),
    Flatten(),
    Dense(512, activation='relu'),
    Dropout(0.5),
    Dense(num_classes, activation='softmax') 
])

  super().__init__(


In [33]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

In [34]:
model.fit(images, labels_one_hot, epochs=10, validation_split=0.2)

Epoch 1/10
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 9ms/step - accuracy: 0.6237 - loss: 1.1179 - val_accuracy: 0.6086 - val_loss: 1.6340
Epoch 2/10
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - accuracy: 0.6762 - loss: 0.8472 - val_accuracy: 0.3550 - val_loss: 2.2383
Epoch 3/10
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - accuracy: 0.6950 - loss: 0.7827 - val_accuracy: 0.3719 - val_loss: 2.4914
Epoch 4/10
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - accuracy: 0.7036 - loss: 0.7667 - val_accuracy: 0.6021 - val_loss: 2.1489
Epoch 5/10
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - accuracy: 0.7075 - loss: 0.7145 - val_accuracy: 0.3505 - val_loss: 2.7351
Epoch 6/10
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - accuracy: 0.7292 - loss: 0.6949 - val_accuracy: 0.2506 - val_loss: 2.7579
Epoch 7/10
[1m251/251[0m 

In [40]:
test_loss, test_accuracy = model.evaluate(images, labels_one_hot)

print('Test loss: {:.2f}'.format(test_loss))
print('Test accuracy: {:.2f}'.format(test_accuracy))


[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7018 - loss: 0.7621
Test loss: 1.03
Test accuracy: 0.70
