In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
import pandas as pd

# ✅ Updated path to your balanced metadata file
metadata_path = "/content/drive/MyDrive/DermAI/balanced_metadata_binary.csv"

# Load metadata
df = pd.read_csv(metadata_path)

# Show first few rows
print("📌 First 5 rows of the metadata:")
print(df.head())

# Show column names and types
print("\n📌 Column names and types:")
print(df.dtypes)

# Check for missing/null values
print("\n📌 Missing values per column:")
print(df.isnull().sum())


📌 First 5 rows of the metadata:
     lesion_id  dx    dx_type   age     sex     localization binary_label  \
0  HAM_0000550  nv  follow_up  45.0    male            trunk       Benign   
1  HAM_0003577  nv  follow_up  50.0    male  lower extremity       Benign   
2  HAM_0001477  nv  follow_up  55.0  female            trunk       Benign   
3  HAM_0000484  nv  follow_up  40.0    male            trunk       Benign   
4  HAM_0000981  nv  follow_up  75.0  female             back       Benign   

       image_id  
0  ISIC_0024306  
1  ISIC_0024307  
2  ISIC_0024308  
3  ISIC_0024309  
4  ISIC_0024311  

📌 Column names and types:
lesion_id        object
dx               object
dx_type          object
age             float64
sex              object
localization     object
binary_label     object
image_id         object
dtype: object

📌 Missing values per column:
lesion_id        0
dx               0
dx_type          0
age             57
sex              0
localization     0
binary_label     0
i

In [None]:
import pandas as pd

# Load your metadata
metadata_path = "/content/drive/MyDrive/DermAI/balanced_metadata_binary.csv"
df = pd.read_csv(metadata_path)

# Show all column names
print("📌 All Columns in Metadata:")
print(df.columns.tolist())

# Optional: display first few rows
print("\n📌 Sample Data:")
print(df.head())


📌 All Columns in Metadata:
['lesion_id', 'dx', 'dx_type', 'age', 'sex', 'localization', 'binary_label', 'image_id']

📌 Sample Data:
     lesion_id  dx    dx_type   age     sex     localization binary_label  \
0  HAM_0000550  nv  follow_up  45.0    male            trunk       Benign   
1  HAM_0003577  nv  follow_up  50.0    male  lower extremity       Benign   
2  HAM_0001477  nv  follow_up  55.0  female            trunk       Benign   
3  HAM_0000484  nv  follow_up  40.0    male            trunk       Benign   
4  HAM_0000981  nv  follow_up  75.0  female             back       Benign   

       image_id  
0  ISIC_0024306  
1  ISIC_0024307  
2  ISIC_0024308  
3  ISIC_0024309  
4  ISIC_0024311  


In [None]:
import pandas as pd

# Load original metadata CSV
metadata_path = "/content/drive/MyDrive/DermAI/balanced_metadata_binary.csv"
df = pd.read_csv(metadata_path)

# Replace spaces with underscores in 'localization'
df['localization'] = df['localization'].str.replace(' ', '_')

# One-hot encode 'localization'
localization_dummies = pd.get_dummies(df['localization'], prefix='loc')

# Convert 'sex' to numeric: female=0, male=1
sex_map = {'female': 0, 'male': 1}
df['sex'] = df['sex'].map(sex_map)

# Drop original 'localization' column
df = df.drop('localization', axis=1)

# Concatenate one-hot encoded localization columns
df = pd.concat([df, localization_dummies], axis=1)

# Save processed metadata to new CSV file
processed_path = "/content/drive/MyDrive/DermAI/balanced_metadata_binary_processed.csv"
df.to_csv(processed_path, index=False)

print(f"Processed metadata saved to: {processed_path}")
print(df.head())


Processed metadata saved to: /content/drive/MyDrive/DermAI/balanced_metadata_binary_processed.csv
     lesion_id  dx    dx_type   age  sex binary_label      image_id  \
0  HAM_0000550  nv  follow_up  45.0  1.0       Benign  ISIC_0024306   
1  HAM_0003577  nv  follow_up  50.0  1.0       Benign  ISIC_0024307   
2  HAM_0001477  nv  follow_up  55.0  0.0       Benign  ISIC_0024308   
3  HAM_0000484  nv  follow_up  40.0  1.0       Benign  ISIC_0024309   
4  HAM_0000981  nv  follow_up  75.0  0.0       Benign  ISIC_0024311   

   loc_abdomen  loc_acral  loc_back  ...  loc_face  loc_foot  loc_genital  \
0        False      False     False  ...     False     False        False   
1        False      False     False  ...     False     False        False   
2        False      False     False  ...     False     False        False   
3        False      False     False  ...     False     False        False   
4        False      False      True  ...     False     False        False   

   loc_hand 

In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler

# Load processed metadata CSV
processed_path = "/content/drive/MyDrive/DermAI/balanced_metadata_binary_processed.csv"
df = pd.read_csv(processed_path)

# List one-hot columns (those starting with 'loc_')
one_hot_cols = [col for col in df.columns if col.startswith('loc_')]

# Convert boolean one-hot columns to int (0 or 1)
df[one_hot_cols] = df[one_hot_cols].astype(int)

# Rename one-hot columns from loc_ to binary_
df.rename(columns={col: col.replace('loc_', 'binary_') for col in one_hot_cols}, inplace=True)

# Normalize 'age' column and save normalized values in a new column 'age_normalized'
scaler = StandardScaler()
df['age_normalized'] = scaler.fit_transform(df[['age']])

# Save the updated DataFrame to a new CSV
normalized_path = "/content/drive/MyDrive/DermAI/balanced_metadata_binary_normalized.csv"
df.to_csv(normalized_path, index=False)

print(f"Normalized and updated metadata saved to: {normalized_path}")
print(df.head())


Normalized and updated metadata saved to: /content/drive/MyDrive/DermAI/balanced_metadata_binary_normalized.csv
     lesion_id  dx    dx_type   age  sex binary_label      image_id  \
0  HAM_0000550  nv  follow_up  45.0  1.0       Benign  ISIC_0024306   
1  HAM_0003577  nv  follow_up  50.0  1.0       Benign  ISIC_0024307   
2  HAM_0001477  nv  follow_up  55.0  0.0       Benign  ISIC_0024308   
3  HAM_0000484  nv  follow_up  40.0  1.0       Benign  ISIC_0024309   
4  HAM_0000981  nv  follow_up  75.0  0.0       Benign  ISIC_0024311   

   binary_abdomen  binary_acral  binary_back  ...  binary_foot  \
0               0             0            0  ...            0   
1               0             0            0  ...            0   
2               0             0            0  ...            0   
3               0             0            0  ...            0   
4               0             0            1  ...            0   

   binary_genital  binary_hand  binary_lower_extremity  binary_n

In [None]:
import joblib

# After fitting scaler on 'age' column
scaler = StandardScaler()
df['age_normalized'] = scaler.fit_transform(df[['age']])

# Save the scaler to disk
scaler_path = "/content/drive/MyDrive/DermAI/age_scaler.save"
joblib.dump(scaler, scaler_path)

print(f"Scaler saved to: {scaler_path}")


Scaler saved to: /content/drive/MyDrive/DermAI/age_scaler.save


In [None]:
import os
import pandas as pd

# Paths
root_dir = "/content/drive/MyDrive/DermAI/balanced_two_classes"
metadata_path = "/content/drive/MyDrive/DermAI/balanced_metadata_binary_normalized.csv"

# Collect all image IDs from all subfolders under root_dir
image_ids_in_folder = set()
for class_folder in os.listdir(root_dir):
    class_path = os.path.join(root_dir, class_folder)
    if os.path.isdir(class_path):
        for img_file in os.listdir(class_path):
            if img_file.lower().endswith(".jpg") or img_file.lower().endswith(".jpeg") or img_file.lower().endswith(".png"):
                image_id = os.path.splitext(img_file)[0]
                image_ids_in_folder.add(image_id)

# Load image IDs from metadata CSV
df = pd.read_csv(metadata_path)
image_ids_in_metadata = set(df["image_id"].astype(str).values)

# Compare metadata image IDs against image files in folder
missing_from_folder = image_ids_in_metadata - image_ids_in_folder
extra_in_folder = image_ids_in_folder - image_ids_in_metadata

print(f"\n📌 Total images listed in metadata: {len(image_ids_in_metadata)}")
print(f"📌 Total images found in folder: {len(image_ids_in_folder)}")
print(f"❌ Missing images (in metadata but NOT found in folder): {len(missing_from_folder)}")
print(f"⚠️ Extra images (in folder but NOT in metadata): {len(extra_in_folder)}")

if missing_from_folder:
    print(f"\nList of missing images:\n{missing_from_folder}")

if extra_in_folder:
    print(f"\nList of extra images:\n{extra_in_folder}")



📌 Total images listed in metadata: 12061
📌 Total images found in folder: 12061
❌ Missing images (in metadata but NOT found in folder): 0
⚠️ Extra images (in folder but NOT in metadata): 0


In [None]:
import pandas as pd

# Path to your metadata CSV
metadata_path = "/content/drive/MyDrive/DermAI/balanced_metadata_binary_normalized.csv"

# Load the metadata
df = pd.read_csv(metadata_path)

# Map binary_label to numeric values: Benign -> 0, Malignant -> 1
df['label'] = df['binary_label'].map({'Benign': 0, 'Malignant': 1})

# Verify the change
print(df[['binary_label', 'label']].head())

# Save back to the same path (or change filename if you want to keep original)
df.to_csv(metadata_path, index=False)

print(f"✅ Updated metadata with numeric 'label' column saved to: {metadata_path}")


  binary_label  label
0       Benign      0
1       Benign      0
2       Benign      0
3       Benign      0
4       Benign      0
✅ Updated metadata with numeric 'label' column saved to: /content/drive/MyDrive/DermAI/balanced_metadata_binary_normalized.csv


In [None]:
import pandas as pd

# Load metadata
metadata_path = "/content/drive/MyDrive/DermAI/balanced_metadata_binary_normalized.csv"
df = pd.read_csv(metadata_path)

# Show column names
print("📌 Column Names:")
print(df.columns.tolist())

# Show first 5 rows
print("\n📌 First 5 Rows:")
print(df.head())


📌 Column Names:
['lesion_id', 'dx', 'dx_type', 'age', 'sex', 'binary_label', 'image_id', 'binary_abdomen', 'binary_acral', 'binary_back', 'binary_chest', 'binary_ear', 'binary_face', 'binary_foot', 'binary_genital', 'binary_hand', 'binary_lower_extremity', 'binary_neck', 'binary_scalp', 'binary_trunk', 'binary_unknown', 'binary_upper_extremity', 'age_normalized', 'label']

📌 First 5 Rows:
     lesion_id  dx    dx_type   age  sex binary_label      image_id  \
0  HAM_0000550  nv  follow_up  45.0  1.0       Benign  ISIC_0024306   
1  HAM_0003577  nv  follow_up  50.0  1.0       Benign  ISIC_0024307   
2  HAM_0001477  nv  follow_up  55.0  0.0       Benign  ISIC_0024308   
3  HAM_0000484  nv  follow_up  40.0  1.0       Benign  ISIC_0024309   
4  HAM_0000981  nv  follow_up  75.0  0.0       Benign  ISIC_0024311   

   binary_abdomen  binary_acral  binary_back  ...  binary_genital  \
0               0             0            0  ...               0   
1               0             0            

In [None]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow as tf

# === Paths ===
metadata_path = "/content/drive/MyDrive/DermAI/balanced_metadata_binary_normalized.csv"
image_root_dir = "/content/drive/MyDrive/DermAI/balanced_two_classes"

# === Load metadata ===
df = pd.read_csv(metadata_path)

# ✅ Select valid metadata features (exclude 'binary_label' since it's a string)
metadata_features = ['age_normalized', 'sex'] + [
    col for col in df.columns if col.startswith('binary_') and col != 'binary_label'
]

# ✅ 'label' column already contains 0/1, so no mapping needed
print(f"Metadata features used: {metadata_features}")
print("Label distribution:\n", df['label'].value_counts())

# === Split into train, val, test ===
image_ids = df['image_id'].values
labels = df['label'].values

# First 70% train, 30% test+val
train_ids, test_val_ids = train_test_split(image_ids, test_size=0.3, random_state=42, stratify=labels)

# Then split test+val 50/50
val_ids, test_ids = train_test_split(test_val_ids, test_size=0.5, random_state=42)

# Create DataFrames
train_df = df[df['image_id'].isin(train_ids)]
val_df = df[df['image_id'].isin(val_ids)]
test_df = df[df['image_id'].isin(test_ids)]

# --- Shuffle train_df BEFORE dataset creation ---
train_df = train_df.sample(frac=1, random_state=42).reset_index(drop=True)

print(f"Train samples: {len(train_df)}, Val samples: {len(val_df)}, Test samples: {len(test_df)}")

# === Image Preprocessing ===
IMG_SIZE = 380

@tf.function  # Compiles preprocessing into a TensorFlow graph (faster)
def preprocess_image(img_path):
    img = tf.io.read_file(img_path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, [IMG_SIZE, IMG_SIZE])
    return img / 255.0

def get_image_path(image_id):
    for class_folder in os.listdir(image_root_dir):
        img_path = os.path.join(image_root_dir, class_folder, image_id + ".jpg")
        if tf.io.gfile.exists(img_path):
            return img_path
    raise FileNotFoundError(f"Image {image_id}.jpg not found.")

# === Dataset Creation ===
def create_dataset(dataframe, batch_size=14, shuffle=False):
    image_ids = dataframe['image_id'].values
    labels = dataframe['label'].values
    metadata = dataframe[metadata_features].values.astype(np.float32)

    def gen():
        for i in range(len(image_ids)):
            img_path = get_image_path(image_ids[i])
            img = preprocess_image(tf.convert_to_tensor(img_path))  # Tensor-native path
            meta = tf.convert_to_tensor(metadata[i], dtype=tf.float32)
            label = tf.convert_to_tensor(labels[i], dtype=tf.float32)
            yield (img, meta), label

    ds = tf.data.Dataset.from_generator(
        gen,
        output_signature=(
            (tf.TensorSpec(shape=(IMG_SIZE, IMG_SIZE, 3), dtype=tf.float32),
             tf.TensorSpec(shape=(len(metadata_features),), dtype=tf.float32)),
            tf.TensorSpec(shape=(), dtype=tf.float32)
        )
    )

    if shuffle:
        ds = ds.shuffle(buffer_size=min(len(dataframe), 1000))  # Memory-safe shuffle

    return ds.batch(batch_size, drop_remainder=False).prefetch(tf.data.AUTOTUNE)  # No caching here

# === Create Datasets ===
batch_size = 14 # Safe for T4 GPU with 380x380 images
train_ds = create_dataset(train_df, batch_size=batch_size, shuffle=True)
val_ds = create_dataset(val_df, batch_size=batch_size, shuffle=False)
test_ds = create_dataset(test_df, batch_size=batch_size, shuffle=False)

print("✅ Datasets are ready for training!")


Metadata features used: ['age_normalized', 'sex', 'binary_abdomen', 'binary_acral', 'binary_back', 'binary_chest', 'binary_ear', 'binary_face', 'binary_foot', 'binary_genital', 'binary_hand', 'binary_lower_extremity', 'binary_neck', 'binary_scalp', 'binary_trunk', 'binary_unknown', 'binary_upper_extremity']
Label distribution:
 label
0    7996
1    3998
Name: count, dtype: int64
Train samples: 8395, Val samples: 1799, Test samples: 1800
✅ Datasets are ready for training!


In [None]:
for i, ((img_batch, meta_batch), label_batch) in enumerate(train_ds.take(10)):
    unique, counts = np.unique(label_batch.numpy().astype(int), return_counts=True)
    print(f"Batch {i+1} label distribution: {dict(zip(unique, counts))}")


In [None]:
import os
import pandas as pd

# Paths
metadata_path = "/content/drive/MyDrive/DermAI/balanced_metadata_binary_normalized.csv"
image_root_dir = "/content/drive/MyDrive/DermAI/balanced_two_classes"

# Load metadata
df = pd.read_csv(metadata_path)

# Count rows with NaNs in metadata columns (mainly age_normalized, sex)
na_rows = df[df[['age_normalized', 'sex']].isna().any(axis=1)]

print(f"Removing {len(na_rows)} rows with NaNs from metadata and corresponding images.")

# Remove corresponding image files
for img_id in na_rows['image_id']:
    for ext in ['.jpg', '.jpeg', '.png']:
        img_path = os.path.join(image_root_dir, img_id + ext)
        if os.path.exists(img_path):
            os.remove(img_path)
            print(f"Deleted image: {img_path}")
            break

# Drop NaN rows from dataframe
df_cleaned = df.dropna(subset=['age_normalized', 'sex'])

# Save cleaned metadata back to same file
df_cleaned.to_csv(metadata_path, index=False)
print("Updated metadata CSV saved.")


Removing 0 rows with NaNs from metadata and corresponding images.
Updated metadata CSV saved.


In [None]:
# === Sanity checks on metadata and labels ===
print("\n--- Metadata NaN Check ---")
print(df[metadata_features].isna().sum())   # Count NaNs per metadata feature

print("\n--- Labels NaN Check ---")
print(df['label'].isna().sum())  # Should be 0

print("\n--- Metadata Statistics ---")
print(df[metadata_features].describe())

# === Check that all images exist in image folders ===
missing_images = []
for img_id in df['image_id']:
    found = False
    for class_folder in os.listdir(image_root_dir):
        img_path = os.path.join(image_root_dir, class_folder, img_id + ".jpg")
        if os.path.exists(img_path):
            found = True
            break
    if not found:
        missing_images.append(img_id)

print(f"\nMissing images count: {len(missing_images)}")
if missing_images:
    print("Example missing image IDs:", missing_images[:5])

# === Load and check sample image + metadata shape ===
sample_idx = 0
sample_image_id = df.iloc[sample_idx]['image_id']
print(f"\nSample image ID: {sample_image_id}")

try:
    sample_img = None
    for class_folder in os.listdir(image_root_dir):
        path = os.path.join(image_root_dir, class_folder, sample_image_id + ".jpg")
        if os.path.exists(path):
            sample_img = tf.io.read_file(path)
            sample_img = tf.image.decode_jpeg(sample_img, channels=3)
            print("Sample image shape (original):", sample_img.shape)
            sample_img = tf.image.resize(sample_img, [IMG_SIZE, IMG_SIZE]) / 255.0
            print("Sample image shape (resized):", sample_img.shape)
            break
    if sample_img is None:
        print("Sample image not found!")
except Exception as e:
    print("Error loading sample image:", e)

print("\nSample metadata values:")
print(df.loc[sample_idx, metadata_features].values)

# === Quick check for NaNs in train/val/test splits metadata and labels ===
print("\nTrain NaNs in metadata:", train_df[metadata_features].isna().sum().sum())
print("Val NaNs in metadata:", val_df[metadata_features].isna().sum().sum())
print("Test NaNs in metadata:", test_df[metadata_features].isna().sum().sum())

print("\nTrain NaNs in labels:", train_df['label'].isna().sum())
print("Val NaNs in labels:", val_df['label'].isna().sum())
print("Test NaNs in labels:", test_df['label'].isna().sum())



--- Metadata NaN Check ---
age_normalized            0
sex                       0
binary_abdomen            0
binary_acral              0
binary_back               0
binary_chest              0
binary_ear                0
binary_face               0
binary_foot               0
binary_genital            0
binary_hand               0
binary_lower_extremity    0
binary_neck               0
binary_scalp              0
binary_trunk              0
binary_unknown            0
binary_upper_extremity    0
dtype: int64

--- Labels NaN Check ---
0

--- Metadata Statistics ---
       age_normalized           sex  binary_abdomen  binary_acral  \
count    11994.000000  11994.000000    11994.000000  11994.000000   
mean         0.000795      0.557862        0.092963      0.000584   
std          0.999448      0.496661        0.290393      0.024152   
min         -3.137979      0.000000        0.000000      0.000000   
25%         -0.808047      0.000000        0.000000      0.000000   
50%         

In [None]:
import pandas as pd
metadata_path = "/content/drive/MyDrive/DermAI/balanced_metadata_binary_normalized.csv"
df=pd.read_csv(metadata_path)
df.head()
df['label'].value_counts()



Unnamed: 0_level_0,count
label,Unnamed: 1_level_1
0,7996
1,3998


In [None]:
from sklearn.utils.class_weight import compute_class_weight
import numpy as np

# Create actual labels array from your DataFrame
labels_array = df['label'].values

# Fix: classes must be a NumPy array
class_weights = compute_class_weight(class_weight='balanced', classes=np.array([0, 1]), y=labels_array)
class_weight_dict = {0: class_weights[0], 1: class_weights[1]}

print("Class weights:", class_weight_dict)


Class weights: {0: np.float64(0.75), 1: np.float64(1.5)}


In [None]:
from tensorflow.keras.applications import EfficientNetB4
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, GlobalAveragePooling2D, Dropout, Concatenate, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import AUC, Precision, Recall

# === Input shapes ===
num_metadata_features = len(metadata_features)

# --- Image Branch ---
image_input = Input(shape=(380, 380, 3), name='image_input')
base_model = EfficientNetB4(weights='imagenet', include_top=False, input_tensor=image_input)
base_model.trainable = False  # Freeze EfficientNetB4 layers for initial training

x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dropout(0.3)(x)

# --- Metadata Branch ---
meta_input = Input(shape=(num_metadata_features,), name='meta_input')
m = Dense(128, activation='relu')(meta_input)
m = BatchNormalization()(m)
m = Dropout(0.3)(m)
m = Dense(64, activation='relu')(m)
m = BatchNormalization()(m)

# --- Fusion ---
combined = Concatenate()([x, m])
z = Dense(128, activation='relu')(combined)
z = Dropout(0.4)(z)

# --- Output Layer ---
output = Dense(1, activation='sigmoid')(z)

# === Final Model ===
model = Model(inputs=[image_input, meta_input], outputs=output)

# === Compile with additional metrics ===
model.compile(
    optimizer=Adam(learning_rate=1e-4),
    loss='binary_crossentropy',
    metrics=['accuracy', AUC(name='auc'), Precision(name='precision'), Recall(name='recall')]
)

# === Model Summary ===
model.summary()


Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb4_notop.h5
[1m71686520/71686520[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau

# === Checkpoint path for saving best model ===
checkpoint_path = '/content/drive/MyDrive/DermAI/best_model_b4_binary_initial.h5'

# === Save the best model based on validation AUC ===
checkpoint_cb = ModelCheckpoint(
    filepath=checkpoint_path,
    monitor='val_auc',
    save_best_only=True,
    mode='max',      # Maximize AUC
    verbose=1
)

# === Stop training early if val_auc doesn't improve ===
earlystop_cb = EarlyStopping(
    monitor='val_auc',
    patience=5,
    restore_best_weights=True,
    mode='max',
    verbose=1
)

# === Reduce LR if no improvement in val_auc for 'patience' epochs ===
reduce_lr_cb = ReduceLROnPlateau(
    monitor='val_auc',
    factor=0.5,
    patience=3,
    min_lr=1e-6,
    mode='max',
    verbose=1
)

# === Combine all callbacks ===
callbacks = [checkpoint_cb, earlystop_cb, reduce_lr_cb]


In [None]:
# Check predictions on multiple batches
for i, ((img_batch, meta_batch), label_batch) in enumerate(train_ds.take(5)):
    preds = model.predict([img_batch, meta_batch])
    print(f"Batch {i+1} predictions mean: {preds.mean()}, min: {preds.min()}, max: {preds.max()}")
    print(f"Batch {i+1} true label distribution: {np.bincount(label_batch.numpy().astype(int))}")


In [None]:
from tensorflow.keras.models import load_model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import AUC, Precision, Recall

# ✅ Load the previously saved compiled model
model_path = '/content/drive/MyDrive/DermAI/best_model_b4_binary_initial.keras'
model = load_model(model_path)

print("✅ Model loaded successfully!")

# ✅ Unfreeze top 30 layers for fine-tuning
for layer in model.layers:
    if hasattr(layer, 'trainable'):
        layer.trainable = True  # First set all trainable to True temporarily

# Freeze all except top 30
trainable_count = 0
for layer in reversed(model.layers):
    if hasattr(layer, 'trainable'):
        if trainable_count < 30:
            layer.trainable = True
            trainable_count += 1
        else:
            layer.trainable = False

# ✅ Recompile with a lower learning rate
model.compile(
    optimizer=Adam(learning_rate=1e-5),
    loss='binary_crossentropy',
    metrics=['accuracy', AUC(name='auc'), Precision(name='precision'), Recall(name='recall')]
)


✅ Model loaded successfully!


  saveable.load_own_variables(weights_store.get(inner_path))


In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau

checkpoint_path = '/content/drive/MyDrive/DermAI/best_model_b4_binary_resume.keras'

checkpoint_cb = ModelCheckpoint(
    filepath=checkpoint_path,
    monitor='val_auc',
    save_best_only=True,
    mode='max',
    verbose=1
)

earlystop_cb = EarlyStopping(
    monitor='val_auc',
    patience=5,
    restore_best_weights=True,
    mode='max',
    verbose=1
)

reduce_lr_cb = ReduceLROnPlateau(
    monitor='val_auc',
    factor=0.5,
    patience=3,
    min_lr=1e-6,
    mode='max',
    verbose=1
)

callbacks = [checkpoint_cb, earlystop_cb, reduce_lr_cb]


In [None]:
train_ds = create_dataset(train_df, batch_size=batch_size, shuffle=True).repeat()
val_ds = create_dataset(val_df, batch_size=batch_size, shuffle=False).repeat()

steps_per_epoch = int(np.ceil(len(train_df) / batch_size))
validation_steps = int(np.ceil(len(val_df) / batch_size))


In [None]:
history = model.fit(
    train_ds,
    steps_per_epoch=steps_per_epoch,
    validation_data=val_ds,
    validation_steps=validation_steps,
    epochs=20,
    callbacks=callbacks,
    class_weight=class_weight_dict,
    verbose=1
)


Epoch 1/20
[1m600/600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5s/step - accuracy: 0.7087 - auc: 0.7726 - loss: 0.5738 - precision: 0.5462 - recall: 0.7364
Epoch 1: val_auc improved from -inf to 0.79821, saving model to /content/drive/MyDrive/DermAI/best_model_b4_binary_resume.keras
[1m600/600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4049s[0m 6s/step - accuracy: 0.7088 - auc: 0.7726 - loss: 0.5738 - precision: 0.5462 - recall: 0.7364 - val_accuracy: 0.7204 - val_auc: 0.7982 - val_loss: 0.5613 - val_precision: 0.5564 - val_recall: 0.7772 - learning_rate: 1.0000e-05
Epoch 2/20
[1m600/600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 215ms/step - accuracy: 0.6945 - auc: 0.7560 - loss: 0.5900 - precision: 0.5288 - recall: 0.7257
Epoch 2: val_auc did not improve from 0.79821
[1m600/600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m185s[0m 308ms/step - accuracy: 0.6945 - auc: 0.7561 - loss: 0.5900 - precision: 0.5288 - recall: 0.7257 - val_accuracy: 0.7315 -

In [None]:
from tensorflow import keras

model_path = "/content/drive/MyDrive/DermAI/best_model_b4_binary_resume.keras"
model = keras.models.load_model(model_path)

print("✅ Best resumed model loaded!")


✅ Best resumed model loaded!


In [None]:
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder

# === Make predictions ===
y_true = []
y_pred = []

for (img_batch, meta_batch), label_batch in test_ds:
    probs = model.predict([img_batch, meta_batch], verbose=0)
    preds = (probs > 0.5).astype(int)

    y_true.extend(label_batch.numpy())
    y_pred.extend(preds.flatten())

y_true = np.array(y_true, dtype=int)
y_pred = np.array(y_pred, dtype=int)

# === Reverse Label Encoding ===
label_encoder = LabelEncoder()
label_encoder.fit(['benign', 'malignant'])
labels = label_encoder.classes_

# === Confusion Matrix & Report ===
print("🧮 Confusion Matrix:")
print(confusion_matrix(y_true, y_pred))

print("\n📊 Classification Report:")
print(classification_report(y_true, y_pred, target_names=labels))
