SUNBURN CLASSIFICATION

In [None]:
# Step 1: Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Step 2: Unzip the dataset
# 🔁 Replace the path below with the exact path to your ZIP file inside your Google Drive
!unzip -o '/content/drive/MyDrive/HAM 10000.zip' -d '/content/HAM10000'

In [None]:
# List contents of the unzipped folder
import os

root_path = "/content/HAM10000"
print("Contents:", os.listdir(root_path))

In [None]:
import pandas as pd

# Load the metadata CSV
df = pd.read_csv('/content/HAM10000/HAM10000_metadata.csv')

# Now this will work
print("Columns in the metadata:", df.columns)
print("\nFirst few rows of metadata:\n", df.head())

In [None]:
import os

# List the contents of the HAM10000 folder
extracted_files = os.listdir('/content/HAM10000')
print(extracted_files)

In [None]:
import os

# Set the image directories for both parts
image_dir_1 = '/content/HAM10000/HAM10000_images_part_1'
image_dir_2 = '/content/HAM10000/HAM10000_images_part_2'

# List the image files in both parts
image_files_1 = os.listdir(image_dir_1)
image_files_2 = os.listdir(image_dir_2)

# Combine the lists of image files from both directories
image_files = image_files_1 + image_files_2

# Check the first 10 image files
print(image_files[:10])

In [None]:
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

# Select a sample image from the list
sample_image = image_files[0]

# Build the full path for the selected image
image_path = os.path.join(image_dir_1, sample_image) if sample_image in image_files_1 else os.path.join(image_dir_2, sample_image)

# Read and display the image
img = mpimg.imread(image_path)
plt.imshow(img)
plt.axis('off')  # Hide axis labels
plt.show()

In [None]:
#Phase 1
import os
import numpy as np
import pandas as pd
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tqdm import tqdm

# --- Image File Mapping ---
# Collect all image file names
image_files_1 = os.listdir('/content/HAM10000/HAM10000_images_part_1')
image_files_2 = os.listdir('/content/HAM10000/HAM10000_images_part_2')

# Map image_id to full path
image_id_to_path = {}
for filename in image_files_1:
    if filename.endswith('.jpg'):
        image_id = filename.split('.')[0]
        image_id_to_path[image_id] = os.path.join('/content/HAM10000/HAM10000_images_part_1', filename)
for filename in image_files_2:
    if filename.endswith('.jpg'):
        image_id = filename.split('.')[0]
        image_id_to_path[image_id] = os.path.join('/content/HAM10000/HAM10000_images_part_2', filename)

# --- Load Metadata ---
df = pd.read_csv('/content/HAM10000/HAM10000_metadata.csv')
df = df[df['image_id'].isin(image_id_to_path.keys())]

# --- Optional: Sunburn Severity Mapping ---
# You can define this mapping with proper references from medical literature
sunburn_mapping = {
    'nv': 'no_sunburn',          # Melanocytic nevi
    'mel': 'moderate_sunburn',   # Melanoma (linked with UV)
    'bkl': 'mild_sunburn',       # Benign keratosis (from chronic sun)
    'akiec': 'severe_sunburn',   # Actinic keratoses (UV-induced pre-cancer)
    'bcc': 'moderate_sunburn',   # Basal cell carcinoma (from sun)
    'vasc': 'no_sunburn',        # Vascular lesions
    'df': 'no_sunburn'           # Dermatofibroma
}

df['sunburn_severity'] = df['dx'].map(sunburn_mapping)

# Encode severity labels
le_severity = LabelEncoder()
df['severity_label'] = le_severity.fit_transform(df['sunburn_severity'])

# --- Load and preprocess images ---
image_size = (128, 128)
images = []
labels = []

print("Loading and processing images with severity mapping...")
for _, row in tqdm(df.iterrows(), total=len(df)):
    image_id = row['image_id']
    label = row['severity_label']
    image_path = image_id_to_path[image_id]

    img = load_img(image_path, target_size=image_size)
    img_array = img_to_array(img) / 255.0

    images.append(img_array)
    labels.append(label)

X = np.array(images)
y = np.array(labels)

# --- Split Data ---
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# --- Summary ---
print(f"\n✅ Data preparation complete with sunburn severity mapping:")
print(f"Total images: {len(X)}")
print(f"Training samples: {len(X_train)}")
print(f"Test samples: {len(X_test)}")
print(f"Image shape: {X_train[0].shape}")
print(f"Unique severity labels: {np.unique(y)} => {le_severity.classes_}")

In [None]:
#Phase_ 2: Imports
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16, MobileNetV2, EfficientNetB0
from tensorflow.keras.applications.vgg16 import preprocess_input as vgg_preprocess
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input as mobilenet_preprocess
from tensorflow.keras.applications.efficientnet import preprocess_input as efficientnet_preprocess
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import os

In [None]:
#Phase_2: Image Data Generators
img_size = (224, 224)
batch_size = 32

train_dir = '/content/HAM10000/sunburn_data/train'
val_dir = '/content/HAM10000/sunburn_data/val'

datagens = {
    "vgg16": ImageDataGenerator(preprocessing_function=vgg_preprocess),
    "mobilenetv2": ImageDataGenerator(preprocessing_function=mobilenet_preprocess),
    "efficientnetb0": ImageDataGenerator(preprocessing_function=efficientnet_preprocess),
}

def create_data_generators(preprocessor):
    return (
        datagens[preprocessor].flow_from_directory(train_dir, target_size=img_size, batch_size=batch_size, class_mode='categorical'),
        datagens[preprocessor].flow_from_directory(val_dir, target_size=img_size, batch_size=batch_size, class_mode='categorical', shuffle=False)
    )

In [None]:
#Phase_2: Model Builder Function
def build_model(base_model, input_shape=(224, 224, 3), num_classes=4):
    base_model.trainable = False  # Freeze base model
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dropout(0.3)(x)
    predictions = Dense(num_classes, activation='softmax')(x)
    return Model(inputs=base_model.input, outputs=predictions)

In [None]:
#Phase_2: Training Function
def train_model(model, train_gen, val_gen, name):
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

    es = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

    history = model.fit(
        train_gen,
        validation_data=val_gen,
        epochs=10,
        callbacks=[es]
    )

    # Evaluation
    val_preds = model.predict(val_gen)
    y_true = val_gen.classes
    y_pred = np.argmax(val_preds, axis=1)
    print(f"\nClassification Report for {name}:")
    print(classification_report(y_true, y_pred, target_names=val_gen.class_indices.keys()))

    cm = confusion_matrix(y_true, y_pred)
    sns.heatmap(cm, annot=True, fmt='d', xticklabels=val_gen.class_indices.keys(), yticklabels=val_gen.class_indices.keys(), cmap='Blues')
    plt.title(f'{name} - Confusion Matrix')
    plt.show()

    return history

In [None]:
import shutil
from sklearn.model_selection import train_test_split

# Create directories
base_dir = "/content/HAM10000/sunburn_data"
os.makedirs(base_dir, exist_ok=True)

for split in ['train', 'val']:
    for label in df['sunburn_severity'].unique():
        os.makedirs(os.path.join(base_dir, split, label), exist_ok=True)

# Split metadata
train_df, val_df = train_test_split(df, test_size=0.2, stratify=df['sunburn_severity'], random_state=42)

# Copy files to respective folders
def copy_images(dataframe, split):
    for _, row in dataframe.iterrows():
        image_id = row['image_id'] + ".jpg"
        label = row['sunburn_severity']
        src_path = os.path.join(image_dir_1, image_id) if image_id in image_files_1 else os.path.join(image_dir_2, image_id)
        dst_path = os.path.join(base_dir, split, label, image_id)
        shutil.copyfile(src_path, dst_path)

copy_images(train_df, "train")
copy_images(val_df, "val")

print("✅ Dataset split complete. Ready for training!")

In [None]:
import shutil
from sklearn.model_selection import train_test_split

# Create directories
base_dir = "/content/HAM10000/sunburn_data"
os.makedirs(base_dir, exist_ok=True)

for split in ['train', 'val']:
    for label in df['sunburn_severity'].unique():
        os.makedirs(os.path.join(base_dir, split, label), exist_ok=True)

# Split metadata
train_df, val_df = train_test_split(df, test_size=0.2, stratify=df['sunburn_severity'], random_state=42)

# Copy files to respective folders
def copy_images(dataframe, split):
    for _, row in dataframe.iterrows():
        image_id = row['image_id'] + ".jpg"
        label = row['sunburn_severity']
        src_path = os.path.join(image_dir_1, image_id) if image_id in image_files_1 else os.path.join(image_dir_2, image_id)
        dst_path = os.path.join(base_dir, split, label, image_id)
        shutil.copyfile(src_path, dst_path)

copy_images(train_df, "train")
copy_images(val_df, "val")

print("✅ Dataset split complete. Ready for training!")

In [None]:
# Define the models and preprocessing keys
models_to_train = {
    "VGG16": (VGG16(weights="imagenet", include_top=False, input_shape=(224, 224, 3)), "vgg16"),
    "MobileNetV2": (MobileNetV2(weights="imagenet", include_top=False, input_shape=(224, 224, 3)), "mobilenetv2"),
    "EfficientNetB0": (EfficientNetB0(weights="imagenet", include_top=False, input_shape=(224, 224, 3)), "efficientnetb0")
}

# Loop through each model for training
for name, (base_model, preprocess_key) in models_to_train.items():
    print(f"\n🔥 Now Training {name} Model")

    # Create generators
    train_gen, val_gen = create_data_generators(preprocess_key)

    # Build and compile model
    model = build_model(base_model)

    # Train and evaluate
    history = train_model(model, train_gen, val_gen, name)

    # Optional: Save the trained model
    model.save(f"{name}_sunburn_model.h5")
    print(f"✅ Saved {name} model to disk.")