In [2]:
%pip install tensorflow pandas numpy
%pip install opencv-python


Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [3]:
import pandas as pd
import os

IMG_DIR = "../data/HAM10000_images_all"  
CSV_PATH = "../data/HAM10000_metadata.csv"  

df = pd.read_csv(CSV_PATH)

df["full_path"] = df["image_id"].apply(lambda x: os.path.join(IMG_DIR, x + ".jpg"))

df = df[df["full_path"].apply(os.path.exists)]

df.to_csv("../data/processed_metadata.csv", index=False)

print(df.head())



      lesion_id      image_id   dx dx_type   age     sex localization  \
2   HAM_0002730  ISIC_0026769  bkl   histo  80.0    male        scalp   
3   HAM_0002730  ISIC_0025661  bkl   histo  80.0    male        scalp   
4   HAM_0001466  ISIC_0031633  bkl   histo  75.0    male          ear   
11  HAM_0004234  ISIC_0029396  bkl   histo  85.0  female        chest   
14  HAM_0001949  ISIC_0032417  bkl   histo  70.0    male        trunk   

                                       full_path  
2   ../data/HAM10000_images_all/ISIC_0026769.jpg  
3   ../data/HAM10000_images_all/ISIC_0025661.jpg  
4   ../data/HAM10000_images_all/ISIC_0031633.jpg  
11  ../data/HAM10000_images_all/ISIC_0029396.jpg  
14  ../data/HAM10000_images_all/ISIC_0032417.jpg  


In [4]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
data_gen = ImageDataGenerator(rescale=1./255, validation_split=0.2)

train_data = data_gen.flow_from_dataframe(
    dataframe=df,
    directory=None, 
    x_col="full_path", 
    y_col="dx",
    target_size=(224, 224),
    batch_size=16,
    subset="training",
    class_mode="categorical"
)


val_data = data_gen.flow_from_dataframe(
    dataframe=df,
    directory=None, 
    x_col="full_path",
    y_col="dx",
    target_size=(224, 224),
    batch_size=16,
    subset="validation",
    class_mode="categorical"
)



Found 4822 validated image filenames belonging to 7 classes.
Found 1205 validated image filenames belonging to 7 classes.


In [None]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator

train_df, val_df = train_test_split(df, test_size=0.2, stratify=df["dx"], random_state=42)

data_gen = ImageDataGenerator(rescale=1./255)

train_data = data_gen.flow_from_dataframe(
    dataframe=train_df,
    directory=None,
    x_col="full_path",
    y_col="dx",
    target_size=(224, 224),
    batch_size=16,
    shuffle=True,
    class_mode="categorical"
)

val_data = data_gen.flow_from_dataframe(
    dataframe=val_df,
    directory=None,
    x_col="full_path",
    y_col="dx",
    target_size=(224, 224),
    batch_size=16,
    shuffle=False,
    class_mode="categorical"
)


Found 4821 validated image filenames belonging to 7 classes.
Found 1206 validated image filenames belonging to 7 classes.


In [None]:
from sklearn.utils.class_weight import compute_class_weight
import numpy as np

classes = np.unique(train_df["dx"])

class_weights = compute_class_weight(
    class_weight="balanced",
    classes=classes,
    y=train_df["dx"]
)

class_indices = train_data.class_indices
index_to_class = {v: k for k, v in class_indices.items()}
class_weights_dict = {i: class_weights[np.where(classes == index_to_class[i])[0][0]] for i in range(len(class_weights))}

print("Class weights:", class_weights_dict)


Class weights: {0: 5.139658848614072, 1: 2.881649731022116, 2: 1.3399110617009449, 3: 12.298469387755102, 4: 1.1346199105671924, 5: 0.21515597804257597, 6: 9.838775510204082}


In [10]:
from tensorflow.keras.applications import EfficientNetB3
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout

base_model = EfficientNetB3(weights="imagenet", include_top=False, input_shape=(224, 224, 3))

x = GlobalAveragePooling2D()(base_model.output)
x = Dropout(0.5)(x)
x = Dense(len(train_data.class_indices), activation="softmax")(x)

model = Model(inputs=base_model.input, outputs=x)

model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])


In [None]:
model.fit(
    train_data,
    validation_data=val_data,
    epochs=8,
    class_weight=class_weights_dict
)
model.save("skin_model.h5")
