# Downloading the data and splitting (train,val,test)

In [9]:
!curl -L -o ~/code/ViniciusGalina1995/skin_lens/raw_data/multiple-skin-disease-detection-and-classification.zip\
  https://www.kaggle.com/api/v1/datasets/download/pritpal2873/multiple-skin-disease-detection-and-classification

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
100  784M  100  784M    0     0  25.6M      0  0:00:30  0:00:30 --:--:-- 24.7M 0:00:30  0:00:24  0:00:06 26.4M


In [13]:
!unzip -qn ~/code/ViniciusGalina1995/skin_lens/raw_data/multiple-skin-disease-detection-and-classification.zip -d ~/code/ViniciusGalina1995/skin_lens/raw_data/


In [14]:
import splitfolders

input_folder = r'/home/vinicius/code/ViniciusGalina1995/skin_lens/raw_data/Skin Cancer Dataset'

# Split with a ratio
# To only split into training and validation set, set a tuple to ratio, i.e; 80/20
#Train, val, test
splitfolders.ratio(input_folder, output=r'/home/vinicius/code/ViniciusGalina1995/skin_lens/raw_data/Split',
                   seed=42, ratio=(.7, .1, .2),
                   group_prefix=None) # default values

Copying files: 4109 files [00:03, 1175.81 files/s]


# Assigning the directory path and plotting image

In [2]:
import os


In [3]:
train_dir = os.path.join("..", "raw_data", "Split", "train")
val_dir = os.path.join("..", "raw_data", "Split", "val")
test_dir = os.path.join("..", "raw_data", "Split", "test")

In [4]:
from tensorflow.keras.preprocessing.image import array_to_img, img_to_array, load_img

img = load_img(f"{train_dir}/Acitinic Keratosis/ISIC_0053515.jpg")

2025-01-28 15:18:17.759280: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-01-28 15:18:18.451270: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2025-01-28 15:18:18.451308: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2025-01-28 15:18:18.541852: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-01-28 15:18:20.849426: W tensorflow/stream_executor/platform/de

# Loading the data in batches

In [22]:
batch_size = 64

In [23]:
from tensorflow.keras.utils import image_dataset_from_directory # type: ignore

def load_data(data_dir: str):

    ds = image_dataset_from_directory(
    data_dir,
    labels="inferred",
    label_mode="categorical",
    seed=123,
    image_size=(150, 150),
    batch_size=batch_size)

    return ds

In [24]:
train_ds = load_data(train_dir)
val_ds = load_data(val_dir)
test_ds = load_data(test_dir)

Found 2874 files belonging to 9 classes.
Found 409 files belonging to 9 classes.
Found 824 files belonging to 9 classes.


In [9]:
train_ds.class_names

['Acitinic Keratosis',
 'Basal Cell Carcinoma',
 'Dermatofibroma',
 'Melanoma',
 'Nevus',
 'Pigmented Benign Keratosis',
 'Seborrheic Keratosis',
 'Squamous Cell Carcinoma',
 'Vascular Lesion']

In [10]:
type(train_ds)

tensorflow.python.data.ops.dataset_ops.BatchDataset

# Initializing the baseline model

In [25]:
from tensorflow.keras import layers, optimizers, callbacks
from tensorflow.keras.applications import VGG16, ResNet50
from tensorflow.keras.layers import Flatten, Dense, Dropout, GlobalAveragePooling2D
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.optimizers import Adam
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np
import math
import seaborn as sns

In [26]:
model = Sequential()

model.add(layers.Input((150, 150, 3)))
model.add(layers.Rescaling(1./255))

model.add(layers.Conv2D(filters = 32, kernel_size = (3,3), activation="relu", padding="same"))
model.add(layers.MaxPooling2D(pool_size=(2, 2)) )

model.add(layers.Conv2D(filters = 32, kernel_size = (3,3), activation="relu", padding="same"))
model.add(layers.MaxPooling2D(pool_size=(2, 2)) )

model.add(layers.Conv2D(filters = 64, kernel_size = (3,3), activation="relu", padding="same"))
model.add(layers.MaxPooling2D(pool_size=(2, 2)) )

model.add(layers.Conv2D(filters = 128, kernel_size = (3,3), activation="relu", padding="same"))
model.add(layers.MaxPooling2D(pool_size=(2, 2)) )

model.add(layers.Flatten())

model.add(layers.Dense(64, activation="relu"))

model.add(layers.Dropout(0.5))

model.add(layers.Dense(9, activation="softmax"))

adam = optimizers.Adam()
model.compile(loss='categorical_crossentropy',
              optimizer=adam,
              metrics=['accuracy'])

model.summary()
##################################

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling_2 (Rescaling)     (None, 150, 150, 3)       0         
                                                                 
 conv2d_6 (Conv2D)           (None, 150, 150, 32)      896       
                                                                 
 max_pooling2d_6 (MaxPooling  (None, 75, 75, 32)       0         
 2D)                                                             
                                                                 
 conv2d_7 (Conv2D)           (None, 75, 75, 32)        9248      
                                                                 
 max_pooling2d_7 (MaxPooling  (None, 38, 38, 32)       0         
 2D)                                                             
                                                                 
 conv2d_8 (Conv2D)           (None, 38, 38, 64)       

In [28]:
# def some callbacks
'''LRreducer = callbacks.ReduceLROnPlateau(monitor="val_loss",
                                        factor=0.1,
                                        patience=3,
                                        verbose=1,
                                        min_lr=0)'''

EarlyStopper = callbacks.EarlyStopping(monitor='val_loss',
                                       patience=20,
                                       verbose=0,
                                       restore_best_weights=True)

# fit the model with training data
history = model.fit(
        train_ds,
        epochs=50,
        validation_data=val_ds,
        callbacks=[EarlyStopper]
        )


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 7: ReduceLROnPlateau reducing learning rate to 0.0009999999776482583.
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 10: ReduceLROnPlateau reducing learning rate to 9.999999310821295e-05.
Epoch 11/20
Epoch 12/20
Epoch 13/20

KeyboardInterrupt: 