After Augmentation

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import numpy as np
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt

import keras
from keras import backend as K
from keras.models import Sequential
from keras.layers import Activation
from keras.layers import Dense, Dropout, Flatten
from keras.optimizers import *
from keras.metrics import *
from keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.utils.class_weight import compute_class_weight
from sklearn.utils import resample
from tensorflow.keras import backend as K

from sklearn.model_selection import train_test_split

In [None]:
import pandas as pd

# Define full lesion names
type_dictionary = {
    'MEL': 'Melanoma',
    'NV': 'Melanocytic nevi',
    'BCC': 'Basal cell carcinoma',
    'AK': 'Actinic keratoses',
    'BKL': 'Benign keratosis',
    'DF': 'Dermatofibroma',
    'VASC': 'Vascular skin lesions',
    'SCC': 'Squamous cell carcinoma'
}

root_dir = '/content/drive/MyDrive/Final_Project/Classification/'

# Load metadata and ground truth
metadata_path = root_dir + 'ISIC_2019_Training_Metadata.csv'
groundtruth_path = root_dir + 'ISIC_2019_Training_GroundTruth.csv'

meta_df = pd.read_csv(metadata_path)
gt_df = pd.read_csv(groundtruth_path)

# Merge metadata and ground truth using 'image' column
df = pd.merge(meta_df, gt_df, on='image')

# Keep label columns as is (multi-label)
label_columns = ['MEL', 'NV', 'BCC', 'AK', 'BKL', 'DF', 'VASC', 'SCC']

# Map full names (optional, useful for display)
df['type'] = df[label_columns].apply(lambda row: [type_dictionary[c] for c in label_columns if row[c] == 1], axis=1)

# Add image path column
df['path'] = root_dir + 'ISIC_2019_Training_Data/' + df['image'] + '.jpg'

# Drop unnecessary columns except the label columns and image info
cols_to_drop = ['lesion_id', 'sex', 'age_approx', 'anatom_site_general']
df = df.drop(columns=cols_to_drop)

# Your df now has:
# - image column (filename)
# - path column (full path to image)
# - label columns (binary 0/1 multi-label targets)
# - type column (list of full lesion names present)

df.head()






Unnamed: 0,image,MEL,NV,BCC,AK,BKL,DF,VASC,SCC,UNK,type,path
0,ISIC_0000000,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,[Melanocytic nevi],/content/drive/MyDrive/Final_Project/Classific...
1,ISIC_0000001,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,[Melanocytic nevi],/content/drive/MyDrive/Final_Project/Classific...
2,ISIC_0000002,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,[Melanoma],/content/drive/MyDrive/Final_Project/Classific...
3,ISIC_0000003,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,[Melanocytic nevi],/content/drive/MyDrive/Final_Project/Classific...
4,ISIC_0000004,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,[Melanoma],/content/drive/MyDrive/Final_Project/Classific...


In [None]:
# Get class weights
y_train = np.argmax(df[label_columns].values, axis=1)  # Convert multi-label to single-label for weighting
class_weights = compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)
class_weight_dict = dict(enumerate(class_weights))


In [None]:
import os
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.metrics import AUC
from tensorflow.keras.utils import Sequence
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from imblearn.over_sampling import RandomOverSampler

from tensorflow.keras import Sequential
from tensorflow.keras.layers import RandomFlip, RandomRotation, RandomZoom, RandomTranslation
# ====== PATHS ======
image_dir = "/content/drive/MyDrive/Final_Project/Classification/preprocessed_data/images"
label_dir = "/content/drive/MyDrive/Final_Project/Classification/preprocessed_data/labels"


# Use this instead of ImageDataGenerator
img_augmentation = Sequential([
    RandomFlip("horizontal"),
    RandomRotation(0.2),
    RandomZoom(0.15),
    RandomTranslation(0.1, 0.1)
])

# ====== Load training paths ======
train_image_paths = sorted([os.path.join(image_dir, f) for f in os.listdir(image_dir) if f.startswith("train_")])
train_label_paths = sorted([os.path.join(label_dir, f) for f in os.listdir(label_dir) if f.startswith("train_")])

# ====== Oversample labels ======
y_raw = np.array([np.load(p) for p in train_label_paths])
indices = np.arange(len(y_raw)).reshape(-1, 1)
ros = RandomOverSampler()
resampled_indices, _ = ros.fit_resample(indices, y_raw)

# ====== Data Generators ======
class BalancedNpyGenerator(Sequence):
        def __init__(self, image_paths, label_paths, resampled_indices, batch_size=32, datagen=None):
            self.image_paths = np.array(image_paths)[resampled_indices.flatten()]
            self.label_paths = np.array(label_paths)[resampled_indices.flatten()]
            self.batch_size = batch_size
            self.datagen = datagen
            self.label_values = np.array([np.load(p) for p in self.label_paths])

        def __len__(self):
            return int(np.ceil(len(self.image_paths) / self.batch_size))

        def __getitem__(self, idx):
            # Stratified sampling for this batch
            batch_indices = resample(
                np.arange(len(self.image_paths)),
                stratify=np.argmax(self.label_values, axis=1),
                replace=True,
                n_samples=self.batch_size
            )

            x = np.array([np.load(self.image_paths[i]) for i in batch_indices]).astype(np.float32)
            y = np.array([np.load(self.label_paths[i]) for i in batch_indices]).astype(np.float32)
            x = preprocess_input(x * 255.0)

            if self.datagen:
                  flow = self.datagen.flow(x, y, batch_size=self.batch_size, shuffle=False)
                  x, y = next(flow)
            return x, y

# === Validation Generator ===
val_image_paths = sorted([os.path.join(image_dir, f) for f in os.listdir(image_dir) if f.startswith("val_")])
val_label_paths = sorted([os.path.join(label_dir, f) for f in os.listdir(label_dir) if f.startswith("val_")])

class SimpleNpyGenerator(Sequence):
    def __init__(self, image_paths, label_paths, batch_size=32):
        self.image_paths = image_paths
        self.label_paths = label_paths
        self.batch_size = batch_size

    def __len__(self):
        return int(np.ceil(len(self.image_paths) / self.batch_size))

    def __getitem__(self, idx):
        x = np.array([np.load(p) for p in self.image_paths[idx*self.batch_size:(idx+1)*self.batch_size]]).astype(np.float32)
        y = np.array([np.load(p) for p in self.label_paths[idx*self.batch_size:(idx+1)*self.batch_size]]).astype(np.float32)
        x = x / 255.0
        x = preprocess_input(x)
        return x, y



# ====== Create Generators ======
train_gen = BalancedNpyGenerator(
    image_paths=train_image_paths,
    label_paths=train_label_paths,
    resampled_indices=resampled_indices,
    batch_size=32,
    datagen=ImageDataGenerator(
        rotation_range=15,
        width_shift_range=0.05,
        brightness_range=[0.9, 1.1],
        horizontal_flip=True
    )
)

val_gen = SimpleNpyGenerator(val_image_paths, val_label_paths, batch_size=32)


In [None]:
import os
import numpy as np
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.metrics import AUC

# === Set your phase: 1, 2, or 3 ===
phase = 3  # Change this manually for next phase

# === Directory to save weights and epoch progress ===
weights_dir = "/content/drive/MyDrive/Final_Project/Classification/Phase3_Augmented/reweights/"
weights_path = os.path.join(weights_dir, f"resnet50_phase{phase}.weights.h5")
checkpoint_path = os.path.join(weights_dir, f"resnet50_phase{phase}_checkpoint.weights.h5")
previous_weights_path = os.path.join(weights_dir, f"resnet50_phase{phase - 1}.weights.h5")
epoch_file = os.path.join(weights_dir, f"phase{phase}_last_epoch.txt")


In [None]:
def get_freeze_index(phase):
    if phase == 1:
        return len(keras.applications.ResNet50().layers)  # freeze all
    elif phase == 2:
        return -30  # unfreeze last 30 layers
    elif phase == 3:
         return -60
    else:
        raise ValueError("Phase must be 1, 2, or 3")

def build_model(freeze_until):
    base = keras.applications.ResNet50(
        include_top=False,
        weights='imagenet',
        input_shape=(75, 100, 3),
        pooling='avg'
    )

    for layer in base.layers:
        layer.trainable = False

    if freeze_until < 0:
        for layer in base.layers[:freeze_until]:  # keep early layers frozen
            layer.trainable = False
    else:
        for layer in base.layers[:freeze_until]:  # freeze first layers
            layer.trainable = False

    from tensorflow.keras.regularizers import l2

    model = Sequential([
        base,
        Dense(512, activation='relu', kernel_regularizer=l2(0.001)),
        Dropout(0.3),
        Dense(512, activation='relu', kernel_regularizer=l2(0.001)),
        Dropout(0.5),  # Increased dropout to prevent overfitting
        Dense(8, activation='sigmoid', kernel_regularizer=l2(0.001))  # Apply L2 here too
    ])

    return model

freeze_until = get_freeze_index(phase)
model = build_model(freeze_until)


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 0us/step


In [None]:
# Adjust learning rate based on phase
learning_rate = 1e-6

def focal_loss(gamma=2.0, alpha=0.25):
    def loss(y_true, y_pred):
        pt = y_true * y_pred + (1 - y_true) * (1 - y_pred)
        return -K.mean(alpha * K.pow(1 - pt, gamma) * K.log(pt + 1e-7))
    return loss


model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
    loss=focal_loss(),  # Changed to focal loss
    metrics=['accuracy', AUC(name='auc')]
)

# Load weights
if os.path.exists(checkpoint_path):
    print(f" Resuming from checkpoint: {checkpoint_path}")
    model.load_weights(checkpoint_path)
elif phase > 1 and os.path.exists(previous_weights_path):
    print(f" Starting from previous phase weights: {previous_weights_path}")
    model.load_weights(previous_weights_path)
else:
    print(" Starting fresh training")



 Resuming from checkpoint: /content/drive/MyDrive/Final_Project/Classification/Phase3_Augmented/reweights/resnet50_phase3_checkpoint.weights.h5


  saveable.load_own_variables(weights_store.get(inner_path))


In [None]:
# Callback to save weights after every epoch
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import ReduceLROnPlateau
checkpoint_cb = keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_path,
    save_weights_only=True,
    save_best_only=False,
    save_freq='epoch',
    verbose=1
)
epoch_save ="/content/drive/MyDrive/Final_Project/Classification/Phase3_Augmented/reweights/epoch_save"
epochwise_checkpoint_cb = keras.callbacks.ModelCheckpoint(
    filepath=os.path.join(epoch_save, f"resnet50_phase{phase}_epoch_" + "{epoch:02d}.weights.h5"),
    save_weights_only=True,
    save_best_only=False,
    save_freq='epoch',
    verbose=0  # Silent save; reduce log clutter
)
# Custom callback to save current epoch to .txt file
class SaveEpochCallback(keras.callbacks.Callback):
    def __init__(self, path):
        super().__init__()
        self.path = path

    def on_epoch_end(self, epoch, logs=None):
        with open(self.path, "w") as f:
            f.write(str(epoch + 1))  # Save next epoch index

save_epoch_cb = SaveEpochCallback(epoch_file)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, min_lr=1e-6)
early_stop = keras.callbacks.EarlyStopping(
    monitor='val_auc',
    patience=5,
    mode='max',
    restore_best_weights=True
)


In [None]:
# Read last completed epoch
initial_epoch = 20
if os.path.exists(epoch_file):
    with open(epoch_file, "r") as f:
        initial_epoch = int(f.read().strip())

print(f" Resuming from epoch {initial_epoch}")

# Train the model
hist=model.fit(
    train_gen,
    validation_data=val_gen,
    initial_epoch=initial_epoch,
    epochs=25,
    callbacks=[checkpoint_cb, save_epoch_cb , reduce_lr, early_stop, epochwise_checkpoint_cb],
    class_weight=class_weight_dict,
    verbose=1
)


  self._warn_if_super_not_called()


 Resuming from epoch 20
Epoch 21/25
[1m2318/2318[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4s/step - accuracy: 0.9585 - auc: 0.9987 - loss: 0.0262

  self._warn_if_super_not_called()



Epoch 21: saving model to /content/drive/MyDrive/Final_Project/Classification/Phase3_Augmented/reweights/resnet50_phase3_checkpoint.weights.h5
[1m2318/2318[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9628s[0m 4s/step - accuracy: 0.9585 - auc: 0.9987 - loss: 0.0262 - val_accuracy: 0.5081 - val_auc: 0.8299 - val_loss: 0.0402 - learning_rate: 1.0000e-06
Epoch 22/25
[1m2318/2318[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3s/step - accuracy: 0.9617 - auc: 0.9989 - loss: 0.0251
Epoch 22: saving model to /content/drive/MyDrive/Final_Project/Classification/Phase3_Augmented/reweights/resnet50_phase3_checkpoint.weights.h5
[1m2318/2318[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7927s[0m 3s/step - accuracy: 0.9617 - auc: 0.9989 - loss: 0.0251 - val_accuracy: 0.5081 - val_auc: 0.8299 - val_loss: 0.0403 - learning_rate: 1.0000e-06
Epoch 23/25
[1m2318/2318[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3s/step - accuracy: 0.9642 - auc: 0.9990 - loss: 0.0243
Epoch 23: s

In [None]:
model.save_weights(weights_path)
print(f"✅ Final weights saved to: {weights_path}")

In [None]:
for layer in base_model.layers:
    print(layer.name, layer.trainable)

input_layer_2 False
conv1_pad False
conv1_conv False
conv1_bn False
conv1_relu False
pool1_pad False
pool1_pool False
conv2_block1_1_conv False
conv2_block1_1_bn False
conv2_block1_1_relu False
conv2_block1_2_conv False
conv2_block1_2_bn False
conv2_block1_2_relu False
conv2_block1_0_conv False
conv2_block1_3_conv False
conv2_block1_0_bn False
conv2_block1_3_bn False
conv2_block1_add False
conv2_block1_out False
conv2_block2_1_conv False
conv2_block2_1_bn False
conv2_block2_1_relu False
conv2_block2_2_conv False
conv2_block2_2_bn False
conv2_block2_2_relu False
conv2_block2_3_conv False
conv2_block2_3_bn False
conv2_block2_add False
conv2_block2_out False
conv2_block3_1_conv False
conv2_block3_1_bn False
conv2_block3_1_relu False
conv2_block3_2_conv False
conv2_block3_2_bn False
conv2_block3_2_relu False
conv2_block3_3_conv False
conv2_block3_3_bn False
conv2_block3_add False
conv2_block3_out False
conv3_block1_1_conv False
conv3_block1_1_bn False
conv3_block1_1_relu False
conv3_block1

In [None]:
# 1. First get the base ResNet50 model
base_model = ResNet50(include_top=False, weights='imagenet')

# 2. Apply your unfreezing logic
phase = 3  # Your current phase
freeze_until = -70  # Your setting for phase 3

for i, layer in enumerate(base_model.layers):
    layer.trainable = (i >= len(base_model.layers) + freeze_until ) # + because freeze_until is negative

# 3. Count and display layer status
total_layers = len(base_model.layers)
trainable_layers = sum([1 for layer in base_model.layers if layer.trainable])
frozen_layers = total_layers - trainable_layers

print(f"\nTotal layers: {total_layers}")
print(f"Trainable layers: {trainable_layers} (last {freeze_until})")
print(f"Frozen layers: {frozen_layers}\n")

# 4. Print the first 5 frozen and last 5 trainable layers
print("First 5 FROZEN layers:")
for layer in base_model.layers[:5]:
    print(f"{layer.name:25} | Trainable: {layer.trainable}")

print("\nLast 5 TRAINABLE layers:")
for layer in base_model.layers[-5:]:
    print(f"{layer.name:25} | Trainable: {layer.trainable}")

# 5. Verify BatchNorm layers are frozen (recommended)
print("\nBatchNorm layer status:")
bn_layers = [l for l in base_model.layers if 'bn' in l.name]
for bn in bn_layers[-3:]:  # Print last 3 BN layers
    print(f"{bn.name:25} | Trainable: {bn.trainable} (should typically be False)")


Total layers: 175
Trainable layers: 70 (last -70)
Frozen layers: 105

First 5 FROZEN layers:
input_layer_2             | Trainable: False
conv1_pad                 | Trainable: False
conv1_conv                | Trainable: False
conv1_bn                  | Trainable: False
conv1_relu                | Trainable: False

Last 5 TRAINABLE layers:
conv5_block3_2_relu       | Trainable: True
conv5_block3_3_conv       | Trainable: True
conv5_block3_3_bn         | Trainable: True
conv5_block3_add          | Trainable: True
conv5_block3_out          | Trainable: True

BatchNorm layer status:
conv5_block3_1_bn         | Trainable: True (should typically be False)
conv5_block3_2_bn         | Trainable: True (should typically be False)
conv5_block3_3_bn         | Trainable: True (should typically be False)


In [None]:
import numpy as np
from tensorflow import keras
from sklearn.metrics import classification_report, accuracy_score, roc_auc_score

# === Load your model architecture ===
base_model = keras.applications.ResNet50(
    include_top=False,
    weights='imagenet',
    input_shape=(75, 100, 3),
    pooling='avg'
)

# Build your full model
model = keras.Sequential([
    base_model,
    keras.layers.Dense(512, activation='relu'),
    keras.layers.Dropout(0.3),
    keras.layers.Dense(512, activation='relu'),
    keras.layers.Dropout(0.3),
    keras.layers.Dense(8, activation='sigmoid')
])

# Load saved weights (change the path as needed)
weights_path = "/content/drive/MyDrive/Final_Project/Classification/Phase3_Augmented/reweights/resnet50_phase3_checkpoint.weights.h5"
model.load_weights(weights_path)

# === Prepare your validation data generator ===
# (Make sure val_gen is defined as in your training pipeline)
# Example:
# val_gen = SimpleNpyGenerator(val_image_paths, val_label_paths, batch_size=32)

# Load full validation data into numpy arrays
x_val = []
y_val = []

for batch_x, batch_y in val_gen:
    x_val.append(batch_x)
    y_val.append(batch_y)
    # Stop after one full pass
    if len(x_val) * val_gen.batch_size >= len(val_gen.image_paths):
        break

x_val = np.vstack(x_val)
y_val = np.vstack(y_val)

# === Predict on validation set ===
y_pred = model.predict(x_val, verbose=0)

# Binarize predictions at 0.5 threshold
y_pred_binary = (y_pred >= 0.5).astype(int)

# Calculate exact match accuracy (strict multi-label accuracy)
exact_match_acc = accuracy_score(y_val, y_pred_binary)

# Calculate macro AUC
auc_score = roc_auc_score(y_val, y_pred, average='macro')

# Print classification report
print("📊 Classification Report (Validation Set):")
print(classification_report(y_val, y_pred_binary, target_names=[
    'MEL', 'NV', 'BCC', 'AK', 'BKL', 'DF', 'VASC', 'SCC'
]))

print(f"✅ Exact Match Accuracy (Validation): {exact_match_acc:.4f}")
print(f"🔥 Macro AUC Score (Validation): {auc_score:.4f}")

# Per-class accuracy
class_accuracies = (y_pred_binary == y_val).mean(axis=0)
labels = ['MEL', 'NV', 'BCC', 'AK', 'BKL', 'DF', 'VASC', 'SCC']
print("\n🔹 Per-class Accuracy (Validation):")
for i, label in enumerate(labels):
    print(f"{label}: {class_accuracies[i]:.4f}")


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
📊 Classification Report (Validation Set):
              precision    recall  f1-score   support

         MEL       0.42      0.40      0.41       362
          NV       0.83      0.68      0.75      1030
         BCC       0.53      0.38      0.45       266
          AK       0.52      0.16      0.24        70
         BKL       0.41      0.34      0.37       210
          DF       0.55      0.32      0.40        19
        VASC       0.83      0.25      0.38        20
         SCC       0.29      0.10      0.15        50

   micro avg       0.65      0.52      0.57      2027
   macro avg       0.55      0.33      0.39      2027
weighted avg       0.65      0.52      0.57      2027
 samples avg       0.51      0.52      0.51      2027

✅ Exact Match Accuracy (V

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
