In [101]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, models
from tensorflow.keras.metrics import categorical_accuracy, top_k_categorical_accuracy
import os
from sklearn.utils.class_weight import compute_class_weight


In [4]:
# mount google drive where preprocessed images are
# Mount drive and download data from EDA step 1
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [28]:
# create directory in virtual environment
# this takes about 3 mins, please be patient
VDIR_PROJECT = '/content/skin_cancer/training'
os.makedirs(VDIR_PROJECT, exist_ok=True)

# define source directory from Gdrive
GDIR_PROJECT_EDA_S1 = '/content/drive/MyDrive/AAI-521/Module7/TeamProject/EDA_PrePro/S1'

# Copy S1 onto virtual env
!cp -r {GDIR_PROJECT_EDA_S1}/* {VDIR_PROJECT}

# clone repo on virtual directory.
# for now copy files needed
# import evaluation functions from another ipynb file, and run that ipynb
COMMON_FUNCTIONS_DIR = '/content/drive/MyDrive/AAI-521/Module7/TeamProject/Common_Functions'
EVAL_FUNCTIONS_IPYNB = COMMON_FUNCTIONS_DIR + '/Evaluation_Functions.ipynb'

!cp -r {EVAL_FUNCTIONS_IPYNB} {VDIR_PROJECT}


^C
^C


In [31]:
!cp -r {EVAL_FUNCTIONS_IPYNB} {VDIR_PROJECT}

EVAL_FUNCTIONS_PY = COMMON_FUNCTIONS_DIR + '/evaluation_functions.py'
!cp -r {EVAL_FUNCTIONS_PY} {VDIR_PROJECT}


cp: cannot stat '/content/drive/MyDrive/AAI-521/Module7/TeamProject/Common_Functions/evaluation_functions.py': No such file or directory


In [6]:
# create variables to access virtual directories
VDIR_PROJECT_VAL = VDIR_PROJECT + '/val'
VDIR_PROJECT_TRAIN = VDIR_PROJECT + '/train'

In [7]:
# make sure these are consistent with the preprocessing operation
TARGET_SIZE = (192,256)
BATCH_SIZE = 32
RANDOM_SEED = 42

# create image iterators
datagen = ImageDataGenerator(rescale=1./255)  # Normalize pixel values

# create train dataset generator
train_generator = datagen.flow_from_directory(
    VDIR_PROJECT_TRAIN,
    target_size=TARGET_SIZE,
    batch_size=BATCH_SIZE,
    class_mode = 'categorical',
    seed = RANDOM_SEED)


# create validation dataset generator
val_generator = datagen.flow_from_directory(
    VDIR_PROJECT_VAL,
    target_size=TARGET_SIZE,
    batch_size=BATCH_SIZE,
    class_mode = 'categorical',
    seed = RANDOM_SEED)



Found 8864 images belonging to 7 classes.
Found 1094 images belonging to 7 classes.


In [8]:
# Compute class weights
classes = train_generator.classes
class_weights = compute_class_weight(
    class_weight='balanced',
    classes=np.unique(classes),
    y=classes
)

class_weight_dict = dict(enumerate(class_weights))
class_weight_dict



{0: 4.263588263588264,
 1: 2.643602743811512,
 2: 1.2624982196268337,
 3: 11.834445927903872,
 4: 1.1890006706908116,
 5: 0.21892906540209445,
 6: 9.816168327796234}

In [93]:
# Access the class to integer mapping
class_list = train_generator.class_indices

# Print the mapping
print(class_list)

{'akiec': 0, 'bcc': 1, 'bkl': 2, 'df': 3, 'mel': 4, 'nv': 5, 'vasc': 6}


In [9]:
# create base classifier model
# Load the DenseNet model with pre-trained weights from ImageNet
input_shape = (TARGET_SIZE[0],TARGET_SIZE[1], 3)
print("input_shape = ", input_shape)
base_model = tf.keras.applications.DenseNet121(weights='imagenet', include_top=False, input_shape=input_shape)

# Freeze the base model layers
base_model.trainable = False

# Create a new model on top of the base model
model = models.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dense(512, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(7, activation='softmax')  # Use 'softmax' for multi-class classification
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=[categorical_accuracy])

input_shape =  (192, 256, 3)
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/densenet/densenet121_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m29084464/29084464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step


In [10]:
# print summary
base_model.summary()
model.summary()

In [11]:
# train model
history = model.fit(
    train_generator,
    #steps_per_epoch=len(train_generator),
    class_weight=class_weight_dict,
    validation_data=val_generator,
    #validation_steps=len(val_generator),
    epochs=30, verbose=1)

Epoch 1/30


  self._warn_if_super_not_called()


[1m277/277[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 112ms/step - categorical_accuracy: 0.3756 - loss: 1.9090 - val_categorical_accuracy: 0.7276 - val_loss: 0.9155
Epoch 2/30
[1m277/277[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 42ms/step - categorical_accuracy: 0.5109 - loss: 1.3978 - val_categorical_accuracy: 0.7605 - val_loss: 0.7870
Epoch 3/30
[1m277/277[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 41ms/step - categorical_accuracy: 0.5555 - loss: 1.1790 - val_categorical_accuracy: 0.7697 - val_loss: 0.7565
Epoch 4/30
[1m277/277[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 42ms/step - categorical_accuracy: 0.5606 - loss: 1.1274 - val_categorical_accuracy: 0.7358 - val_loss: 0.7789
Epoch 5/30
[1m277/277[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 41ms/step - categorical_accuracy: 0.5817 - loss: 1.0580 - val_categorical_accuracy: 0.6627 - val_loss: 0.9063
Epoch 6/30
[1m277/277[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m

In [142]:
# define a function that saves the model weights, training history in the virtual environment
def save_model(TARGET_DIR, model_name="default", model=[], history=[]):
  # check if model exists already to avoid overwriting previous ones
  model_path = TARGET_DIR + '/'+model_name
  if os.path.exists(model_path) and os.path.isdir(model_path):
    print("Folder exists, exiting")
    return
  else:
    print(f"Saving model and training history in :{model_path}")
    os.makedirs(model_path, exist_ok=True)

  model.save(model_path+'/model.keras')
  with open(model_path+'/training_history.pkl', 'wb') as file:
    pickle.dump(history, file)

  return


In [145]:
VDIR_PROJECT_MODELS = VDIR_PROJECT+'/models'
save_model(VDIR_PROJECT_MODELS, "base", model, history)

Saving model and training history in :/content/skin_cancer/training/models/base


In [140]:
# Save model to GDRIVE
GDIR_PROJECT_TRAINING = '/content/drive/MyDrive/AAI-521/Module7/TeamProject/Training/'
os.makedirs(GDIR_PROJECT_TRAINING, exist_ok=True)

In [146]:
# copy training results from virtual to gdrive
!cp -r {VDIR_PROJECT_MODELS} {GDIR_PROJECT_TRAINING}