### Data Preprocessing

In [1]:
import numpy as np
import pandas as pd
import os
import PIL
import PIL.Image
import tensorflow as tf

ROOT = "./face_skin_dataset/"
BATCH_SIZE = 8
IMG_SIZE = (512, 512)
AUTOTUNE = tf.data.AUTOTUNE # automatically tunes CPU usage

train_annotations = pd.read_csv(os.path.join(ROOT, "train", "_annotations.csv"))
val_annotations = pd.read_csv(os.path.join(ROOT, "valid", "_annotations.csv"))
test_annotations = pd.read_csv(os.path.join(ROOT, "test",  "_annotations.csv"))

# create path/to/img lists
def make_paths(df, split):
    return [os.path.join(ROOT, split, fname) for fname in df["filename"]]

train_paths = make_paths(train_annotations, "train")
val_paths = make_paths(val_annotations,   "valid")
test_paths = make_paths(test_annotations,  "test")

# creating labels lists
class_names = sorted(train_annotations["class"].unique())
num_classes = len(class_names)
name2id = {name: i for i, name in enumerate(class_names)}

train_labels = train_annotations["class"].map(name2id).astype("int32")
val_labels   = val_annotations["class"].map(name2id).astype("int32")
test_labels  = test_annotations["class"].map(name2id).astype("int32")

# create a tensorFlow dataset with ( path/to/img, label) pairs
train_ds = tf.data.Dataset.from_tensor_slices((train_paths, train_labels))
val_ds = tf.data.Dataset.from_tensor_slices((val_paths, val_labels))
test_ds = tf.data.Dataset.from_tensor_slices((test_paths, test_labels))

2025-11-12 16:38:56.841263: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-12 16:38:56.881822: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-11-12 16:38:57.680662: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-12 16:38:58.157298: E external/local_xla/xla/stream_executor/cuda/cuda_p

In [2]:
def load_image(path, label):
    image = tf.io.read_file(path) # read image from disk
    image = tf.image.decode_jpeg(image, channels=3) # decode jpeg image
    image = tf.image.resize(image, IMG_SIZE) # ensure images 512 x 512
    image = tf.image.convert_image_dtype(image, tf.float32) # normalize image pixel scale from [0, 255] to [0, 1]
    return image, label

# apply load_image function to each image in dataset using .map function
# shuffle images and batch in sets of 32
def pipeline(paths, labels, training):
    ds = tf.data.Dataset.from_tensor_slices((paths, labels))
    if training:
        ds = ds.shuffle(buffer_size=min(len(paths), 1000), reshuffle_each_iteration=True)
    ds = ds.map(load_image, num_parallel_calls=AUTOTUNE)
    ds = ds.batch(BATCH_SIZE)
    ds = ds.prefetch(AUTOTUNE)
    return ds

train_ds = pipeline(train_paths, train_labels, True)
val_ds = pipeline(val_paths, val_labels, False)
test_ds = pipeline(test_paths, test_labels, False)

### Model Architecture

In [3]:
from tensorflow.keras import layers, models
import matplotlib.pyplot as plt

model = models.Sequential(
    [
        # convolution 1
        layers.Conv2D(filters=64, kernel_size=7, strides=2, padding="same", input_shape=(512,512,3)),
        layers.BatchNormalization(),
        layers.Activation('relu'),

        # pool 1
        layers.MaxPooling2D(pool_size=3, strides=2),

        # convolution 2
        layers.Conv2D(filters=128, kernel_size=3, padding="same", use_bias=False),
        layers.BatchNormalization(),
        layers.Activation('relu'),
        
        # convolution 3
        layers.Conv2D(filters=128, kernel_size=3, padding="same", use_bias=False),
        layers.BatchNormalization(),
        layers.Activation('relu'),
        
        # pool 2
        layers.MaxPooling2D(pool_size=3, strides=2),
        
        # convolution 3     
        layers.Conv2D(filters=256, kernel_size=3, padding="same", use_bias=False),
        layers.BatchNormalization(),
        layers.Activation('relu'),

        # convolution 4
        layers.Conv2D(filters=256, kernel_size=3, padding="same", use_bias=False),
        layers.BatchNormalization(),
        layers.Activation('relu'),

        # pool 3        
        layers.MaxPooling2D(pool_size=3, strides=2),

        # fully connected layers
        layers.GlobalAveragePooling2D(),
        layers.Dense(128, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(num_classes, activation='softmax')
    ]
)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [4]:
model.summary()

In [5]:
model.compile(
    optimizer='adam',
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
    metrics=['accuracy']
)

### Model Training

In [None]:
history = model.fit(train_ds, validation_data=val_ds, epochs=10)

Epoch 1/10
[1m  49/1787[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1:06:49[0m 2s/step - accuracy: 0.3450 - loss: 2.2997