In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, models
from tensorflow.keras.metrics import categorical_accuracy, top_k_categorical_accuracy
import os

In [2]:
# mount google drive where preprocessed images are
# Mount drive and download data from EDA step 1
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

# make sure these are consistent with the preprocessing operation
TARGET_SIZE = (192,256) # in preparation for CNN, From EDA, size in S1 is (450,600)
BATCH_SIZE = 32
RANDOM_SEED = 42

# configure google drive folders from EDA_Preprocessing.ipynb S1 file
GDIR_PROJECT = '/content/drive/MyDrive/AAI-521/Module7/TeamProject'
GDIR_PROJECT_EDA = GDIR_PROJECT + '/EDA_PrePro'
GDIR_PROJECT_EDA_S2 = GDIR_PROJECT_EDA + '/S2'
GDIR_PROJECT_EDA_S2_VAL = GDIR_PROJECT_EDA_S2 + '/val'
GDIR_PROJECT_EDA_S2_TRAIN = GDIR_PROJECT_EDA_S2 + '/train'

Mounted at /content/drive


In [3]:
!pwd

/content


In [4]:
# copy gdrive files to virtual environment folder
VDIR_PROJECT = '/content/skin_cancer/training'
os.makedirs(VDIR_PROJECT, exist_ok=True)


In [7]:
# copy zipped file to virtual
SOURCE_FILE = '/content/drive/MyDrive/AAI-521/Module7/TeamProject/EDA_PrePro/S2-20241201T123737Z-001.zip'

!cp -r {SOURCE_FILE} {VDIR_PROJECT}


In [8]:
# unzip S2
VIRTUAL_FILE = '/content/skin_cancer/training/S2-20241201T123737Z-001.zip'
!unzip {VIRTUAL_FILE} -d {VDIR_PROJECT}

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: /content/skin_cancer/training/S2/train/bkl/image_1971.jpg  
  inflating: /content/skin_cancer/training/S2/train/bkl/image_1529.jpg  
  inflating: /content/skin_cancer/training/S2/train/bkl/image_1418.jpg  
  inflating: /content/skin_cancer/training/S2/train/bkl/image_1592.jpg  
  inflating: /content/skin_cancer/training/S2/train/bkl/image_1406.jpg  
  inflating: /content/skin_cancer/training/S2/train/bkl/image_1399.jpg  
  inflating: /content/skin_cancer/training/S2/train/bkl/image_1370.jpg  
  inflating: /content/skin_cancer/training/S2/train/bkl/image_1603.jpg  
  inflating: /content/skin_cancer/training/S2/train/bkl/image_1899.jpg  
  inflating: /content/skin_cancer/training/S2/train/bkl/image_1526.jpg  
  inflating: /content/skin_cancer/training/S2/train/bkl/image_2010.jpg  
  inflating: /content/skin_cancer/training/S2/train/bkl/image_1354.jpg  
  inflating: /content/skin_cancer/training/S2/train/bkl/ima

In [9]:


VDIR_PROJECT_S2 = VDIR_PROJECT + '/S2'
VDIR_PROJECT_S2_VAL = VDIR_PROJECT_S2 + '/val'
VDIR_PROJECT_S2_TRAIN = VDIR_PROJECT_S2 + '/train'

In [24]:
# create image iterators
datagen = ImageDataGenerator(rescale=1./255)  # Normalize pixel values

# create train dataset generator
train_generator = datagen.flow_from_directory(
    VDIR_PROJECT_S2_TRAIN,
    target_size=TARGET_SIZE,
    batch_size=BATCH_SIZE,
    class_mode = 'categorical',
    seed = RANDOM_SEED)


# create validation dataset generator
val_generator = datagen.flow_from_directory(
    VDIR_PROJECT_S2_VAL,
    target_size=TARGET_SIZE,
    batch_size=BATCH_SIZE,
    class_mode = 'categorical',
    seed = RANDOM_SEED)



Found 37989 images belonging to 7 classes.
Found 1094 images belonging to 7 classes.


In [25]:
# compute training steps
GDIR_PROJECT_EDA_S1 = GDIR_PROJECT_EDA + '/S1'
GDIR_PROJECT_EDA_S1_VAL = GDIR_PROJECT_EDA_S1 + '/val'
GDIR_PROJECT_EDA_S1_TRAIN = GDIR_PROJECT_EDA_S1 + '/train'
# read train and val set metadata from S1
metadata_df_train = pd.read_csv(GDIR_PROJECT_EDA_S1_TRAIN + '/metadata_train.csv')
metadata_df_val = pd.read_csv(GDIR_PROJECT_EDA_S1_VAL + '/metadata_val.csv')

# get number of train and validation samples
orig_train_samples = len(metadata_df_train)
orig_val_samples = len(metadata_df_val)
print("ORIG TRAIN SAMPLES: ", orig_train_samples)
print("ORIG VAL SAMPLES: ", orig_val_samples)

train_steps = np.ceil(orig_train_samples / BATCH_SIZE).astype('uint32')

val_steps = np.ceil(orig_val_samples / BATCH_SIZE).astype('uint32')

print("TRAIN STEPS: ", train_steps)
print("VAL STEPS: ", val_steps)

ORIG TRAIN SAMPLES:  8864
ORIG VAL SAMPLES:  1094
TRAIN STEPS:  277
VAL STEPS:  35


In [26]:
len(train_generator)

1188

In [27]:
# create base classifier model
# Load the DenseNet model with pre-trained weights from ImageNet
input_shape = (TARGET_SIZE[0],TARGET_SIZE[1], 3)
print("input_shape = ", input_shape)
base_model = tf.keras.applications.DenseNet121(weights='imagenet', include_top=False, input_shape=input_shape)

# Freeze the base model layers
base_model.trainable = False

# Create a new model on top of the base model
model = models.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dense(512, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(7, activation='softmax')  # Use 'softmax' for multi-class classification
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=[categorical_accuracy])

input_shape =  (192, 256, 3)


In [15]:
base_model.summary()

In [16]:
model.summary()

In [28]:
# train model
history = model.fit(
    train_generator,
    steps_per_epoch=train_steps,
    #class_weight=class_weights,
    validation_data=val_generator,
    validation_steps=val_steps,
    epochs=15, verbose=1)

Epoch 1/15


  self._warn_if_super_not_called()


[1m277/277[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 147ms/step - categorical_accuracy: 0.4373 - loss: 1.4920

  self._warn_if_super_not_called()


[1m277/277[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m75s[0m 200ms/step - categorical_accuracy: 0.4376 - loss: 1.4912 - val_categorical_accuracy: 0.8190 - val_loss: 0.5590
Epoch 2/15
[1m277/277[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 170ms/step - categorical_accuracy: 0.5867 - loss: 1.0781
Epoch 3/15
[1m  2/277[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m20s[0m 73ms/step - categorical_accuracy: 0.6484 - loss: 0.9186

  self.gen.throw(typ, value, traceback)


[1m277/277[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 153ms/step - categorical_accuracy: 0.6212 - loss: 0.9839 - val_categorical_accuracy: 0.8346 - val_loss: 0.5029
Epoch 4/15
[1m277/277[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 139ms/step - categorical_accuracy: 0.6362 - loss: 0.9597
Epoch 5/15
[1m  2/277[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m20s[0m 76ms/step - categorical_accuracy: 0.4375 - loss: 1.1673

  self.gen.throw(typ, value, traceback)


[1m 80/277[0m [32m━━━━━[0m[37m━━━━━━━━━━━━━━━[0m [1m34s[0m 174ms/step - categorical_accuracy: 0.6465 - loss: 0.9237

  self.gen.throw(typ, value, traceback)


[1m277/277[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 59ms/step - categorical_accuracy: 0.6526 - loss: 0.9119 - val_categorical_accuracy: 0.8373 - val_loss: 0.4724
Epoch 6/15
[1m277/277[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 144ms/step - categorical_accuracy: 0.6722 - loss: 0.8652
Epoch 7/15
[1m  1/277[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m22s[0m 82ms/step - categorical_accuracy: 0.6562 - loss: 0.9805

  self.gen.throw(typ, value, traceback)


[1m277/277[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 160ms/step - categorical_accuracy: 0.6628 - loss: 0.8886 - val_categorical_accuracy: 0.8391 - val_loss: 0.4744
Epoch 8/15
[1m277/277[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 138ms/step - categorical_accuracy: 0.6863 - loss: 0.8338
Epoch 9/15
[1m  2/277[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m22s[0m 83ms/step - categorical_accuracy: 0.6328 - loss: 0.7475

  self.gen.throw(typ, value, traceback)


[1m277/277[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 160ms/step - categorical_accuracy: 0.6820 - loss: 0.8224 - val_categorical_accuracy: 0.8410 - val_loss: 0.4678
Epoch 10/15
[1m277/277[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 40ms/step - categorical_accuracy: 0.6846 - loss: 0.8048 


  self.gen.throw(typ, value, traceback)


Epoch 11/15
[1m277/277[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 151ms/step - categorical_accuracy: 0.7000 - loss: 0.7821 - val_categorical_accuracy: 0.8355 - val_loss: 0.4809
Epoch 12/15
[1m277/277[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 137ms/step - categorical_accuracy: 0.7124 - loss: 0.7757

  self.gen.throw(typ, value, traceback)


[1m277/277[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 139ms/step - categorical_accuracy: 0.7124 - loss: 0.7756
Epoch 13/15
[1m277/277[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 143ms/step - categorical_accuracy: 0.7096 - loss: 0.7564 - val_categorical_accuracy: 0.8455 - val_loss: 0.4650
Epoch 14/15
[1m277/277[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 121ms/step - categorical_accuracy: 0.7177 - loss: 0.7543
Epoch 15/15
[1m  2/277[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m21s[0m 78ms/step - categorical_accuracy: 0.6562 - loss: 0.6774

  self.gen.throw(typ, value, traceback)


[1m 80/277[0m [32m━━━━━[0m[37m━━━━━━━━━━━━━━━[0m [1m30s[0m 153ms/step - categorical_accuracy: 0.7269 - loss: 0.7087

  self.gen.throw(typ, value, traceback)


[1m277/277[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 54ms/step - categorical_accuracy: 0.7286 - loss: 0.7060 - val_categorical_accuracy: 0.8501 - val_loss: 0.4656


In [29]:
train_batches = datagen.flow_from_directory(train_path,
                                            target_size=(image_size,image_size),
                                            batch_size=train_batch_size)


277.0