# Get the data from :
https://www.kaggle.com/datasets/farjanakabirsamanta/skin-cancer-dataset

In [1]:
import tensorflow as tf
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
tf.__version__

'2.10.0'

In [3]:
tf.config.list_physical_devices()

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'),
 PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

# Matching the images to the csv file

In [4]:
metadata = pd.read_csv("./skin_cancer/HAM10000_metadata.csv")

In [5]:
metadata.dx.value_counts()

nv       6705
mel      1113
bkl      1099
bcc       514
akiec     327
vasc      142
df        115
Name: dx, dtype: int64

In [6]:
metadata['filename']= metadata['image_id'] + '.jpg'

# Importing the data to Tensorflow

In [7]:
metadata

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization,filename
0,HAM_0000118,ISIC_0027419,bkl,histo,80.0,male,scalp,ISIC_0027419.jpg
1,HAM_0000118,ISIC_0025030,bkl,histo,80.0,male,scalp,ISIC_0025030.jpg
2,HAM_0002730,ISIC_0026769,bkl,histo,80.0,male,scalp,ISIC_0026769.jpg
3,HAM_0002730,ISIC_0025661,bkl,histo,80.0,male,scalp,ISIC_0025661.jpg
4,HAM_0001466,ISIC_0031633,bkl,histo,75.0,male,ear,ISIC_0031633.jpg
...,...,...,...,...,...,...,...,...
10010,HAM_0002867,ISIC_0033084,akiec,histo,40.0,male,abdomen,ISIC_0033084.jpg
10011,HAM_0002867,ISIC_0033550,akiec,histo,40.0,male,abdomen,ISIC_0033550.jpg
10012,HAM_0002867,ISIC_0033536,akiec,histo,40.0,male,abdomen,ISIC_0033536.jpg
10013,HAM_0000239,ISIC_0032854,akiec,histo,80.0,male,face,ISIC_0032854.jpg


In [8]:
from sklearn.model_selection import train_test_split

In [9]:
train_df, test_df = train_test_split(metadata, test_size=0.1)

In [10]:
train_df

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization,filename
3115,HAM_0005661,ISIC_0025053,nv,follow_up,50.0,female,abdomen,ISIC_0025053.jpg
8776,HAM_0002518,ISIC_0032028,nv,histo,60.0,male,abdomen,ISIC_0032028.jpg
1864,HAM_0000176,ISIC_0033073,mel,histo,65.0,male,upper extremity,ISIC_0033073.jpg
2898,HAM_0006904,ISIC_0027291,bcc,histo,70.0,male,back,ISIC_0027291.jpg
7948,HAM_0006559,ISIC_0034056,nv,histo,25.0,female,ear,ISIC_0034056.jpg
...,...,...,...,...,...,...,...,...
5475,HAM_0003658,ISIC_0026379,nv,follow_up,45.0,male,trunk,ISIC_0026379.jpg
3669,HAM_0005088,ISIC_0031647,nv,follow_up,50.0,female,trunk,ISIC_0031647.jpg
8472,HAM_0006210,ISIC_0026454,nv,histo,50.0,male,back,ISIC_0026454.jpg
5223,HAM_0002125,ISIC_0025502,nv,follow_up,50.0,male,trunk,ISIC_0025502.jpg


In [11]:
test_df

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization,filename
7368,HAM_0005248,ISIC_0032902,nv,histo,45.0,female,upper extremity,ISIC_0032902.jpg
9322,HAM_0001902,ISIC_0029668,nv,consensus,5.0,female,lower extremity,ISIC_0029668.jpg
9967,HAM_0000462,ISIC_0033869,akiec,histo,60.0,female,lower extremity,ISIC_0033869.jpg
7494,HAM_0003824,ISIC_0034138,nv,histo,15.0,female,lower extremity,ISIC_0034138.jpg
6792,HAM_0002856,ISIC_0028154,nv,histo,70.0,male,upper extremity,ISIC_0028154.jpg
...,...,...,...,...,...,...,...,...
8038,HAM_0002601,ISIC_0032510,nv,consensus,25.0,male,unknown,ISIC_0032510.jpg
9609,HAM_0003561,ISIC_0034154,nv,consensus,,unknown,unknown,ISIC_0034154.jpg
7867,HAM_0000578,ISIC_0033288,nv,histo,30.0,female,back,ISIC_0033288.jpg
6859,HAM_0006030,ISIC_0024618,nv,histo,25.0,male,face,ISIC_0024618.jpg


In [12]:
dir_path = './skin_cancer/Skin Cancer/Skin Cancer/'

In [13]:
# Create generators
data_generator = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1/255.0, validation_split=0.1)

In [14]:
batch_size = 32
target_size = (224, 224)

In [15]:
# Flow image data
train_images = data_generator.flow_from_dataframe(
    dataframe=train_df,
    directory=dir_path,
    x_col='filename',
    y_col='dx',
    subset='training',
    shuffle=True,
    class_mode='categorical',
    target_size=target_size,
)

val_images = data_generator.flow_from_dataframe(
    dataframe=train_df,
    directory=dir_path,
    x_col='filename',
    y_col='dx',
    subset='validation',
    shuffle=True,
    class_mode='categorical',
    target_size=target_size,
)

test_images = data_generator.flow_from_dataframe(
    dataframe=test_df,
    directory=dir_path,
    x_col='filename',
    y_col='dx',
    class_mode='categorical',
    target_size=target_size,
)

Found 8112 validated image filenames belonging to 7 classes.
Found 901 validated image filenames belonging to 7 classes.
Found 1002 validated image filenames belonging to 7 classes.


In [16]:
class_names = list(train_images.class_indices.keys())
class_names

['akiec', 'bcc', 'bkl', 'df', 'mel', 'nv', 'vasc']

In [17]:
train_images.next()[0].shape, train_images.next()[1].shape

((32, 224, 224, 3), (32, 7))

In [18]:
train_images.image_shape

(224, 224, 3)

### Function to plot some images

In [19]:
def plot_images(train_images):
    rows = 2
    cols = 4
    samples, labels = train_images[0]

    plt.figure(figsize=(3.5 * cols, 3.5 * rows))
    for i in range(rows * cols):
        ax = plt.subplot(rows, cols, i + 1)
        plt.imshow(samples[i], cmap='gray')
        plt.title(f'Tumor : {class_names[int(labels[i][1])]}')
        #plt.axis('off')
    plt.show()

# Now the CNN

In [20]:
from keras import Sequential
from keras.layers import Dense, Flatten, Dropout, Conv2D, MaxPooling2D
from keras.losses import CategoricalCrossentropy

In [None]:
model = Sequential([
    Conv2D(filters=32, kernel_size=(5, 5), activation='relu', input_shape=train_images.image_shape),
    MaxPooling2D((2, 2), padding='same'),
    Conv2D(filters=16, kernel_size=(5, 5), activation='relu'),
    MaxPooling2D((2, 2), padding='same'),
    Flatten(),
    Dense(units=128, activation='relu'),
    Dropout(0.25),
    Dense(units=64, activation='relu'),
    Dropout(0.25),
    Dense(units=7, activation='softmax')
])
model.compile(optimizer='adam', loss=CategoricalCrossentropy(), metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
train = model.fit(train_images, batch_size=batch_size, epochs=5, validation_data=val_images)

In [None]:
result = model.evaluate(test_images)

In [None]:
plt.plot(model.history.history['loss'])
plt.title("Training loss")
plt.show()