In [2]:
import numpy as np
import pandas as pd
import os
import tensorflow as tf

from tensorflow.keras import layers, models
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.utils import resample

import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    roc_auc_score,
)

In [3]:
def build_advanced_nn(input_shape):
    model = models.Sequential()
    model.add(layers.Dense(128, activation="relu", input_shape=input_shape))
    model.add(layers.BatchNormalization())
    model.add(layers.Dropout(0.4))

    model.add(layers.Dense(64, activation="relu"))
    model.add(layers.BatchNormalization())
    model.add(layers.Dropout(0.4))

    model.add(layers.Dense(32, activation="relu"))
    model.add(layers.BatchNormalization())
    model.add(layers.Dropout(0.4))

    model.add(layers.Dense(1, activation="sigmoid"))

    model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
    return model


In [4]:
train_metadata_path = (
    "E:/W/ML NUST/(T5) CNN, RNN, LSTM/isic-2024-data/train-metadata.csv"
)
test_metadata_path = "E:/W/ML NUST/(T5) CNN, RNN, LSTM/isic-2024-data/test-metadata.csv"

train_metadata = pd.read_csv(train_metadata_path)
test_metadata = pd.read_csv(test_metadata_path)

train_metadata.head()

  train_metadata = pd.read_csv(train_metadata_path)


Unnamed: 0,isic_id,target,patient_id,age_approx,sex,anatom_site_general,clin_size_long_diam_mm,image_type,tbp_tile_type,tbp_lv_A,...,lesion_id,iddx_full,iddx_1,iddx_2,iddx_3,iddx_4,iddx_5,mel_mitotic_index,mel_thick_mm,tbp_lv_dnn_lesion_confidence
0,ISIC_0015670,0,IP_1235828,60.0,male,lower extremity,3.04,TBP tile: close-up,3D: white,20.244422,...,,Benign,Benign,,,,,,,97.517282
1,ISIC_0015845,0,IP_8170065,60.0,male,head/neck,1.1,TBP tile: close-up,3D: white,31.71257,...,IL_6727506,Benign,Benign,,,,,,,3.141455
2,ISIC_0015864,0,IP_6724798,60.0,male,posterior torso,3.4,TBP tile: close-up,3D: XP,22.57583,...,,Benign,Benign,,,,,,,99.80404
3,ISIC_0015902,0,IP_4111386,65.0,male,anterior torso,3.22,TBP tile: close-up,3D: XP,14.242329,...,,Benign,Benign,,,,,,,99.989998
4,ISIC_0024200,0,IP_8313778,55.0,male,anterior torso,2.73,TBP tile: close-up,3D: white,24.72552,...,,Benign,Benign,,,,,,,70.44251


In [5]:
# Checking for cols. with missing values.
missing_values = train_metadata.isnull().sum()
print("Missing Values=\n", missing_values)

Missing Values=
 isic_id                              0
target                               0
patient_id                           0
age_approx                        2798
sex                              11517
anatom_site_general               5756
clin_size_long_diam_mm               0
image_type                           0
tbp_tile_type                        0
tbp_lv_A                             0
tbp_lv_Aext                          0
tbp_lv_B                             0
tbp_lv_Bext                          0
tbp_lv_C                             0
tbp_lv_Cext                          0
tbp_lv_H                             0
tbp_lv_Hext                          0
tbp_lv_L                             0
tbp_lv_Lext                          0
tbp_lv_areaMM2                       0
tbp_lv_area_perim_ratio              0
tbp_lv_color_std_mean                0
tbp_lv_deltaA                        0
tbp_lv_deltaB                        0
tbp_lv_deltaL                        0
tbp_lv_d

In [6]:
columns_to_remove = [
    "lesion_id",
    "iddx_2",
    "iddx_3",
    "iddx_4",
    "iddx_5",
    "mel_mitotic_index",
    "mel_thick_mm",
]
train_metadata_cleaned = train_metadata.drop(columns=columns_to_remove, errors="ignore")
train_metadata_cleaned_no_nulls = train_metadata_cleaned.dropna()

In [7]:
train_metadata_cleaned_no_nulls["target"].value_counts()

target
0    381533
1       381
Name: count, dtype: int64

In [8]:
majority = train_metadata_cleaned_no_nulls[
    train_metadata_cleaned_no_nulls["target"] == 0
]
minority = train_metadata_cleaned_no_nulls[
    train_metadata_cleaned_no_nulls["target"] == 1
]
majority_class_downsample = resample(
    majority, replace=False, n_samples=len(minority), random_state=42
)
train_metadata_balanced = pd.concat([majority_class_downsample, minority])


print("Shpareee after balancing classes: ", train_metadata_balanced.shape)
print(
    "Class Dist. after balancing: \n", train_metadata_balanced["target"].value_counts()
)

Shpareee after balancing classes:  (762, 48)
Class Dist. after balancing: 
 target
0    381
1    381
Name: count, dtype: int64


In [9]:
train_metadata_balanced.head()

Unnamed: 0,isic_id,target,patient_id,age_approx,sex,anatom_site_general,clin_size_long_diam_mm,image_type,tbp_tile_type,tbp_lv_A,...,tbp_lv_symm_2axis,tbp_lv_symm_2axis_angle,tbp_lv_x,tbp_lv_y,tbp_lv_z,attribution,copyright_license,iddx_full,iddx_1,tbp_lv_dnn_lesion_confidence
44730,ISIC_1176500,0,IP_4013104,50.0,female,posterior torso,3.35,TBP tile: close-up,3D: white,21.195278,...,0.726667,60,-103.079308,1195.573975,227.486694,Memorial Sloan Kettering Cancer Center,CC-BY,Benign,Benign,99.990058
362429,ISIC_9047349,0,IP_9057861,80.0,male,lower extremity,2.82,TBP tile: close-up,3D: white,17.32398,...,0.460432,110,-170.181,764.25,29.22083,Memorial Sloan Kettering Cancer Center,CC-BY,Benign,Benign,99.99927
17389,ISIC_0500366,0,IP_6894380,45.0,female,posterior torso,4.81,TBP tile: close-up,3D: white,19.0215,...,0.207865,15,66.21948,1457.846,78.27917,Memorial Sloan Kettering Cancer Center,CC-BY,Benign,Benign,99.99999
367815,ISIC_9176171,0,IP_6422845,70.0,male,posterior torso,4.19,TBP tile: close-up,3D: white,19.50162,...,0.334615,55,-39.38957,1057.866,169.1728,Memorial Sloan Kettering Cancer Center,CC-BY,Benign,Benign,99.99703
192982,ISIC_4857877,0,IP_7331742,65.0,male,lower extremity,2.52,TBP tile: close-up,3D: XP,21.71443,...,0.185185,55,-150.6311,749.153,-3.548462,"Department of Dermatology, Hospital Clínic de ...",CC-BY-NC,Benign,Benign,99.99995


In [10]:
train_metadata_balanced["sex"] = train_metadata_balanced["sex"].map(
    {"male": 1, "female": 0}
)

anatom_site_mapping = {
    "posterior torso": 1,
    "lower extremity": 2,
    "anterior torso": 3,
    "upper extremity": 4,
    "head/neck": 5,
}

tbp_lv_location_mapping = {
    "Torso Front Top Half": 1,
    "Torso Back Top Third": 2,
    "Head & Neck": 3,
    "Torso Back Middle Third": 4,
    "Left Leg - Lower": 5,
    "Right Leg - Lower": 6,
    "Torso Front Bottom Half": 7,
    "Left Arm - Upper": 8,
    "Left Leg - Upper": 9,
    "Right Arm - Upper": 10,
    "Right Leg - Upper": 11,
    "Left Arm - Lower": 12,
    "Right Arm - Lower": 13,
    "Torso Back Bottom Third": 14,
    "Left Leg": 15,
    "Right Leg": 16,
    "Left Arm": 17,
    "Right Arm": 18,
}


tbp_lv_location_simple_mapping = {
    "Torso Back": 1,
    "Torso Front": 2,
    "Left Leg": 3,
    "Head & Neck": 4,
    "Right Leg": 5,
    "Left Arm": 6,
    "Right Arm": 7,
}


train_metadata_balanced["anatom_site_general"] = train_metadata_balanced[
    "anatom_site_general"
].apply(lambda x: anatom_site_mapping.get(x, 0))

train_metadata_balanced["tbp_lv_location"] = train_metadata_balanced[
    "tbp_lv_location"
].apply(lambda x: tbp_lv_location_mapping.get(x, 0))

train_metadata_balanced["tbp_lv_location_simple"] = train_metadata_balanced[
    "tbp_lv_location_simple"
].apply(lambda x: tbp_lv_location_simple_mapping.get(x, 0))

In [11]:
X = train_metadata_balanced.drop(
    columns=[
        "isic_id",
        "target",
        "patient_id",
        "image_type",
        "tbp_tile_type",
        "attribution",
        "copyright_license",
        "iddx_full",
        "iddx_1",
        "tbp_lv_dnn_lesion_confidence",
    ]
)
y = train_metadata_balanced["target"]

In [12]:
scalar = StandardScaler()
X_scaled = scalar.fit_transform(X)

In [13]:
X_train, X_val, y_train, y_val = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42
)

In [14]:
train_dataset = (
    tf.data.Dataset.from_tensor_slices((X_train, y_train))
    .batch(64)
    .shuffle(buffer_size=1024)
)
val_dataset = tf.data.Dataset.from_tensor_slices((X_val, y_val)).batch(64)

In [15]:
print(X_train.shape)

(609, 38)


In [16]:
input_shape = (38,)
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3)

model = build_advanced_nn(input_shape)
history = model.fit(
    train_dataset, 
    validation_data=val_dataset, 
    epochs=50, 
    callbacks=[early_stopping, reduce_lr]
)

Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 36ms/step - accuracy: 0.5320 - loss: 0.9591 - val_accuracy: 0.6667 - val_loss: 0.6314 - learning_rate: 0.0010
Epoch 2/50
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6025 - loss: 0.7405 - val_accuracy: 0.7451 - val_loss: 0.5902 - learning_rate: 0.0010
Epoch 3/50
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6734 - loss: 0.6343 - val_accuracy: 0.7516 - val_loss: 0.5609 - learning_rate: 0.0010
Epoch 4/50
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6636 - loss: 0.6231 - val_accuracy: 0.7582 - val_loss: 0.5396 - learning_rate: 0.0010
Epoch 5/50
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7382 - loss: 0.6083 - val_accuracy: 0.7843 - val_loss: 0.5202 - learning_rate: 0.0010
Epoch 6/50
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accu

In [17]:
val_loss, val_accuracy = model.evaluate(val_dataset)
print(f"Validation Accuracy: {val_accuracy:.4f}")

[1m1/3[0m [32m━━━━━━[0m[37m━━━━━━━━━━━━━━[0m [1m0s[0m 25ms/step - accuracy: 0.8281 - loss: 0.3755

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.8063 - loss: 0.4330 
Validation Accuracy: 0.8039


In [18]:
test_metadata_cleaned = test_metadata.drop(columns=columns_to_remove, errors="ignore")
test_metadata_cleaned["sex"] = test_metadata_cleaned["sex"].map(
    {"male": 1, "female": 0}
)
test_metadata_cleaned["anatom_site_general"] = test_metadata_cleaned[
    "anatom_site_general"
].apply(lambda x: anatom_site_mapping.get(x, 0))
test_metadata_cleaned["tbp_lv_location"] = test_metadata_cleaned[
    "tbp_lv_location"
].apply(lambda x: tbp_lv_location_mapping.get(x, 0))
test_metadata_cleaned["tbp_lv_location_simple"] = test_metadata_cleaned[
    "tbp_lv_location_simple"
].apply(lambda x: tbp_lv_location_simple_mapping.get(x, 0))

In [19]:
X_test = test_metadata_cleaned.drop(
    columns=[
        "isic_id",
        "patient_id",
        "image_type",
        "tbp_tile_type",
        "attribution",
        "copyright_license",
    ]
)
X_test_scaled = scalar.transform(X_test)

In [20]:
test_predictions = model.predict(X_test_scaled)
test_predicted_labels = (test_predictions > 0.5).astype(int).flatten()

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 177ms/step


In [21]:
submission = pd.DataFrame(
    {"isic_id": test_metadata["isic_id"], "target": test_predicted_labels}
)

submission.to_csv(
    "C:/Users/jahme/Desktop/Prac/Kaggle/ISIC_Skin_Cancer/output/submission.csv",
    index=False,
)