In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
#!tar -cf /content/drive/MyDrive/skin_cancer/final_data.tar -C /content/drive/MyDrive/skin_cancer final_data

In [None]:
!tar -xf /content/drive/MyDrive/skin_cancer/final_data.tar -C /content/

In [None]:
import os
import numpy as np
import tensorflow as tf
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras import layers, models, optimizers, regularizers
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.metrics import Precision, Recall, AUC
from tensorflow.keras.losses import CategoricalFocalCrossentropy

# PATHS
TRAIN_DIR = "/content/final_data/train"
VAL_DIR = "/content/final_data/valid/sorted"
TEST_DIR = "/content/final_data/test/sorted"
LOCAL_CHECKPOINT_PATH = "/content/drive/MyDrive/skin_cancer/models/resnet50_new_checkpoints"
FINAL_MODEL_PATH = "/content/drive/MyDrive/skin_cancer/models/resnet50_new_full_model.h5"
os.makedirs(LOCAL_CHECKPOINT_PATH, exist_ok=True)
os.makedirs(os.path.dirname(FINAL_MODEL_PATH), exist_ok=True)

# PARAMETERS
IMG_SIZE = (224, 224)
BATCH_SIZE = 32
EPOCHS = 20

# CENTER CROP FUNCTION
def center_crop_and_preprocess(img):
    """Crops the image to a square center and resizes to 224x224, then applies preprocess_input."""
    h, w, _ = img.shape
    min_side = min(h, w)
    top = (h - min_side) // 2
    left = (w - min_side) // 2
    img = img[top:top + min_side, left:left + min_side]
    img = tf.image.resize(img, IMG_SIZE)
    img = preprocess_input(img)
    return img

# DATA AUGMENTATION
train_datagen = ImageDataGenerator(
    preprocessing_function=center_crop_and_preprocess,
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    zoom_range=0.3,
    shear_range=0.15,
    horizontal_flip=True,
    vertical_flip=True,
    fill_mode="nearest"
)

val_datagen = ImageDataGenerator(preprocessing_function=center_crop_and_preprocess)
test_datagen = ImageDataGenerator(preprocessing_function=center_crop_and_preprocess)

train_gen = train_datagen.flow_from_directory(
    TRAIN_DIR, target_size=IMG_SIZE, batch_size=BATCH_SIZE,
    class_mode='categorical', shuffle=True
)
val_gen = val_datagen.flow_from_directory(
    VAL_DIR, target_size=IMG_SIZE, batch_size=BATCH_SIZE,
    class_mode='categorical', shuffle=False
)
test_gen = test_datagen.flow_from_directory(
    TEST_DIR, target_size=IMG_SIZE, batch_size=BATCH_SIZE,
    class_mode='categorical', shuffle=False
)

# CLASS WEIGHTS → for Focal Loss alpha
labels = train_gen.classes
class_weights_array = compute_class_weight(
    class_weight='balanced',
    classes=np.unique(labels),
    y=labels
)
class_weights_dict = dict(zip(np.unique(labels), class_weights_array))
print("Class weights:", class_weights_dict)

# Normalize α for focal loss
alpha_array = np.array([class_weights_dict[i] for i in sorted(class_weights_dict.keys())], dtype=np.float32)
alpha_array = alpha_array / np.sum(alpha_array)
print("Alpha array:", alpha_array)

# MODEL (ResNet50)
base_model = ResNet50(weights="imagenet", include_top=False, input_shape=(224, 224, 3))

# Freeze base model first (warmup)
for layer in base_model.layers:
    layer.trainable = False

x = layers.GlobalAveragePooling2D()(base_model.output)
x = layers.Dense(512, activation='relu', kernel_regularizer=regularizers.l2(1e-4))(x)
x = layers.BatchNormalization()(x)
x = layers.Dropout(0.5)(x)
x = layers.Dense(256, activation='relu', kernel_regularizer=regularizers.l2(1e-4))(x)
x = layers.BatchNormalization()(x)
x = layers.Dropout(0.4)(x)
output = layers.Dense(train_gen.num_classes, activation="softmax")(x)

model = models.Model(inputs=base_model.input, outputs=output)

# LOSS FUNCTION
loss_fn = CategoricalFocalCrossentropy(
    gamma=2,
    alpha=alpha_array,
    label_smoothing=0.05,
    from_logits=False
)

# COMPILE (Warmup Phase)
model.compile(
    optimizer=optimizers.Adam(learning_rate=1e-4),
    loss=loss_fn,
    metrics=["accuracy", Precision(name="precision"), Recall(name="recall"), AUC(name="auc")]
)

# CALLBACKS
checkpoint_callback = ModelCheckpoint(
    filepath=os.path.join(LOCAL_CHECKPOINT_PATH, "ckpt-{epoch:02d}.keras"),
    save_weights_only=False,
    monitor="val_loss",
    save_best_only=True,
    verbose=1
)
early_stop = EarlyStopping(monitor="val_loss", patience=6, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=4, min_lr=1e-6, verbose=1)

# PHASE 1: Warmup (Top Layers)
history_warmup = model.fit(
    train_gen,
    epochs=1,
    validation_data=val_gen,
    callbacks=[checkpoint_callback, early_stop, reduce_lr],
    verbose=1
)

# PHASE 2: Fine-tune deeper layers
set_trainable = False
for layer in base_model.layers:
    if "conv3" in layer.name or "conv4" in layer.name or "conv5" in layer.name:
        set_trainable = True
    if set_trainable:
        layer.trainable = True

model.compile(
    optimizer=optimizers.Adam(learning_rate=1e-4),
    loss=loss_fn,
    metrics=["accuracy", Precision(name="precision"), Recall(name="recall"), AUC(name="auc")]
)

history_finetune = model.fit(
    train_gen,
    validation_data=val_gen,
    epochs=EPOCHS,
    initial_epoch=1,
    callbacks=[checkpoint_callback, early_stop, reduce_lr],
    verbose=1
)

# SAVE FINAL MODEL
model.save(FINAL_MODEL_PATH, save_format="tf")
print(f"✅ Training complete! Full model saved at {FINAL_MODEL_PATH}")

Found 27205 images belonging to 7 classes.
Found 235 images belonging to 7 classes.
Found 1470 images belonging to 7 classes.
Class weights: {np.int32(0): np.float64(1.2954761904761904), np.int32(1): np.float64(1.1104081632653062), np.int32(2): np.float64(0.9716071428571429), np.int32(3): np.float64(1.2954761904761904), np.int32(4): np.float64(0.9716071428571429), np.int32(5): np.float64(0.5796314051347609), np.int32(6): np.float64(1.2954761904761904)}
Alpha array: [0.17227803 0.14766689 0.12920852 0.17227803 0.12920852 0.07708189
 0.17227803]


  self._warn_if_super_not_called()


[1m851/851[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 646ms/step - accuracy: 0.3612 - auc: 0.7369 - loss: 0.3435 - precision: 0.4201 - recall: 0.2675
Epoch 1: val_loss improved from inf to 0.23253, saving model to /content/drive/MyDrive/skin_cancer/models/resnet50_new_checkpoints/ckpt-01.keras
[1m851/851[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m586s[0m 672ms/step - accuracy: 0.3612 - auc: 0.7369 - loss: 0.3435 - precision: 0.4202 - recall: 0.2675 - val_accuracy: 0.6511 - val_auc: 0.9079 - val_loss: 0.2325 - val_precision: 0.7605 - val_recall: 0.5404 - learning_rate: 1.0000e-04
Epoch 2/20
[1m851/851[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 767ms/step - accuracy: 0.5381 - auc: 0.8695 - loss: 0.2527 - precision: 0.6200 - recall: 0.4356
Epoch 2: val_loss improved from 0.23253 to 0.18856, saving model to /content/drive/MyDrive/skin_cancer/models/resnet50_new_checkpoints/ckpt-02.keras
[1m851/851[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m730s[0m 794ms/

KeyboardInterrupt: 

In [None]:
import tensorflow as tf
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.resnet50 import preprocess_input
from sklearn.metrics import classification_report, confusion_matrix, f1_score

# Paths
CHECKPOINT_PATH = "/content/drive/MyDrive/skin_cancer/models/resnet50_new_checkpoints/ckpt-11.keras"
TEST_DIR = "/content/final_data/test/sorted"
IMG_SIZE = (224, 224)
BATCH_SIZE = 32

# Preprocessing and Generators
def center_crop_and_preprocess(img):
    h, w, _ = img.shape
    min_side = min(h, w)
    top = (h - min_side) // 2
    left = (w - min_side) // 2
    img = img[top:top + min_side, left:left + min_side]
    img = tf.image.resize(img, IMG_SIZE)
    img = preprocess_input(img)
    return img

test_datagen = ImageDataGenerator(preprocessing_function=center_crop_and_preprocess)

test_gen = test_datagen.flow_from_directory(
    TEST_DIR,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=False
)

# Load Model & Evaluate
model = tf.keras.models.load_model(CHECKPOINT_PATH, compile=False)

steps = int(np.ceil(test_gen.samples / test_gen.batch_size))
preds = model.predict(test_gen, steps=steps, verbose=1)

y_true = test_gen.classes
y_pred = np.argmax(preds, axis=1)
target_names = list(test_gen.class_indices.keys())

print(classification_report(y_true, y_pred, target_names=target_names, digits=4))
print("Macro F1:", f1_score(y_true, y_pred, average='macro'))
print("Confusion Matrix:\n", confusion_matrix(y_true, y_pred))

Found 1470 images belonging to 7 classes.


  self._warn_if_super_not_called()


[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 455ms/step
              precision    recall  f1-score   support

       AKIEC     0.4857    0.5667    0.5231        30
         BCC     0.7468    0.6344    0.6860        93
         BKL     0.6884    0.6313    0.6587       217
          DF     0.5000    0.7200    0.5902        25
         MEL     0.4505    0.7719    0.5690       171
          NV     0.9264    0.8174    0.8685       909
        VASC     0.8462    0.8800    0.8627        25

    accuracy                         0.7673      1470
   macro avg     0.6634    0.7174    0.6797      1470
weighted avg     0.8070    0.7673    0.7793      1470

Macro F1: 0.6797356835683035
Confusion Matrix:
 [[ 17   4   3   3   2   1   0]
 [  5  59   6   0  16   7   0]
 [  9   8 137   0  43  20   0]
 [  1   0   1  18   2   2   1]
 [  1   0   8   3 132  27   0]
 [  2   8  44  11  98 743   3]
 [  0   0   0   1   0   2  22]]
