#Pull in Data

Starting With:
* 26 Black Positive
* 14 White Positive
* 14 Black None
* 8 White None
* 8 Black Other
* 15 White Other


In [1]:
from google.colab import drive
import os
from glob import glob

# Step 1: Mount Google Drive
drive.mount('/content/drive')

# Step 2: Define base directory
base_dir = "/content/drive/My Drive/images"

# Step 3: Define folder names
folders = ["blacknone", "blackother", "blackpositive", "whitenone", "whiteother", "whitepositive"]

# Step 4: Create variables for each folder
blacknone = glob(os.path.join(base_dir, "blacknone", "*"))
blackother = glob(os.path.join(base_dir, "blackother", "*"))
blackpositive = glob(os.path.join(base_dir, "blackpositive", "*"))
whitenone = glob(os.path.join(base_dir, "whitenone", "*"))
whiteother = glob(os.path.join(base_dir, "whiteother", "*"))
whitepositive = glob(os.path.join(base_dir, "whitepositive", "*"))

# Optional: Print number of images in each
print("blacknone:", len(blacknone))
print("blackother:", len(blackother))
print("blackpositive:", len(blackpositive))
print("whitenone:", len(whitenone))
print("whiteother:", len(whiteother))
print("whitepositive:", len(whitepositive))


Mounted at /content/drive
blacknone: 14
blackother: 8
blackpositive: 26
whitenone: 11
whiteother: 15
whitepositive: 14


# Baseline Classifcation Model

In [2]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os

# Define paths
base_dir = "/content/drive/My Drive/images"

# Set parameters
img_size = (224, 224)
batch_size = 32

# Create ImageDataGenerators for loading data
datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2  # 80/20 train/val split
)

# Load training data
train_data = datagen.flow_from_directory(
    base_dir,
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical',
    subset='training',
    shuffle=True
)

# Load validation data
val_data = datagen.flow_from_directory(
    base_dir,
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical',
    subset='validation',
    shuffle=False
)

# Show class indices
print("Class Indices:", train_data.class_indices)




Found 73 images belonging to 6 classes.
Found 15 images belonging to 6 classes.
Class Indices: {'blacknone': 0, 'blackother': 1, 'blackpositive': 2, 'whitenone': 3, 'whiteother': 4, 'whitepositive': 5}


In [3]:
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(*img_size, 3)),
    tf.keras.layers.MaxPooling2D(2, 2),

    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2, 2),

    tf.keras.layers.Conv2D(128, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2, 2),

    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(6, activation='softmax')  # 6 classes
])

model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model.summary()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [4]:
epochs = 7

history = model.fit(
    train_data,
    validation_data=val_data,
    epochs=epochs
)


  self._warn_if_super_not_called()


Epoch 1/7
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 7s/step - accuracy: 0.1302 - loss: 3.0497 - val_accuracy: 0.3333 - val_loss: 1.6150
Epoch 2/7
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 681ms/step - accuracy: 0.3187 - loss: 1.6096 - val_accuracy: 0.3333 - val_loss: 1.5201
Epoch 3/7
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1s/step - accuracy: 0.4591 - loss: 1.4526 - val_accuracy: 0.4667 - val_loss: 1.4520
Epoch 4/7
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 726ms/step - accuracy: 0.4307 - loss: 1.4037 - val_accuracy: 0.3333 - val_loss: 1.4322
Epoch 5/7
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 634ms/step - accuracy: 0.3793 - loss: 1.2848 - val_accuracy: 0.2667 - val_loss: 1.3450
Epoch 6/7
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 912ms/step - accuracy: 0.5931 - loss: 1.1328 - val_accuracy: 0.5333 - val_loss: 1.2379
Epoch 7/7
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

For baseline model:
* Accuracy: 0.65
* Loss: 0.95
* Validation Accuracy: 0.47
* Validation Loss: 1.3231

# Generate white postitive (Cycle GAN)

In [5]:
%%capture
!pip install tensorflow tensorflow-datasets matplotlib

In [6]:
%%capture
!pip install git+https://github.com/tensorflow/examples.git
!pip install tensorflow matplotlib

In [7]:
from google.colab import drive
from glob import glob
import os

drive.mount('/content/drive')

base_dir = "/content/drive/My Drive/images"
blackpositive_paths = glob(os.path.join(base_dir, "blackpositive", "*"))
whitepositive_paths = glob(os.path.join(base_dir, "whitepositive", "*"))


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [8]:
from PIL import Image
import numpy as np
import tensorflow as tf

IMG_SIZE = 256

def load_and_preprocess_image(path):
    img = Image.open(path).convert("RGB").resize((IMG_SIZE, IMG_SIZE))
    img = np.array(img).astype(np.float32)
    img = (img / 127.5) - 1.0  # Normalize to [-1, 1]
    return img

def make_dataset(path_list):
    images = [load_and_preprocess_image(p) for p in path_list]
    return tf.data.Dataset.from_tensor_slices(images).shuffle(100).batch(1)

black_ds = make_dataset(blackpositive_paths)
white_ds = make_dataset(whitepositive_paths)


In [9]:
from tensorflow_examples.models.pix2pix import pix2pix

OUTPUT_CHANNELS = 3
generator_g = pix2pix.unet_generator(OUTPUT_CHANNELS, norm_type='instancenorm')


In [10]:
import matplotlib.pyplot as plt

example = next(iter(black_ds))
fake_white = generator_g(example, training=False)

def show_images(original, generated):
    plt.figure(figsize=(8, 4))
    images = [original[0], generated[0]]
    titles = ['Black Positive (Input)', 'Synthetic White (Output)']

    for i in range(2):
        img = (images[i] + 1) * 127.5  # Denormalize
        plt.subplot(1, 2, i+1)
        plt.title(titles[i])
        plt.imshow(tf.cast(img, tf.uint8))
        plt.axis("off")
    plt.show()

#show_images(example, fake_white)


# Save Images to Drive

In [11]:
import os
from PIL import Image

# Define output directory
output_dir = os.path.join(base_dir, "synwhitepositive")
os.makedirs(output_dir, exist_ok=True)

# Reset dataset iterator
black_ds_iter = iter(black_ds)

# Generate and save 12 images
for i in range(12):
    try:
        input_image = next(black_ds_iter)
        fake_white = generator_g(input_image, training=False)

        # Convert from [-1, 1] to [0, 255]
        output_img = (fake_white[0].numpy() + 1.0) * 127.5
        output_img = tf.clip_by_value(output_img, 0, 255)
        output_img = tf.cast(output_img, tf.uint8).numpy()

        # Save image using PIL
        save_path = os.path.join(output_dir, f"syn_white_{i+1:02}.jpg")
        Image.fromarray(output_img).save(save_path)
        print(f"Saved: {save_path}")

    except StopIteration:
        print("Not enough images in blackpositive to generate 12.")
        break


Saved: /content/drive/My Drive/images/synwhitepositive/syn_white_01.jpg
Saved: /content/drive/My Drive/images/synwhitepositive/syn_white_02.jpg
Saved: /content/drive/My Drive/images/synwhitepositive/syn_white_03.jpg
Saved: /content/drive/My Drive/images/synwhitepositive/syn_white_04.jpg
Saved: /content/drive/My Drive/images/synwhitepositive/syn_white_05.jpg
Saved: /content/drive/My Drive/images/synwhitepositive/syn_white_06.jpg
Saved: /content/drive/My Drive/images/synwhitepositive/syn_white_07.jpg
Saved: /content/drive/My Drive/images/synwhitepositive/syn_white_08.jpg
Saved: /content/drive/My Drive/images/synwhitepositive/syn_white_09.jpg
Saved: /content/drive/My Drive/images/synwhitepositive/syn_white_10.jpg
Saved: /content/drive/My Drive/images/synwhitepositive/syn_white_11.jpg
Saved: /content/drive/My Drive/images/synwhitepositive/syn_white_12.jpg


# Classifcation Model With Synthetic Images

In [12]:
from glob import glob
from PIL import Image
import numpy as np

def load_images_from_folder(folder, label):
    paths = glob(os.path.join(folder, "*"))
    data = []
    for path in paths:
        img = Image.open(path).convert("RGB").resize((128, 128))
        img = np.array(img).astype(np.float32) / 255.0  # normalize
        data.append((img, label))
    return data

# Class labels
label_map = {
    "none": 0,
    "other": 1,
    "positive": 2,
}

# Paths to all folders
all_data = []

for skintone in ["black", "white"]:
    for subtype in ["none", "other", "positive"]:
        folder = os.path.join(base_dir, f"{skintone}{subtype}")
        label = label_map[subtype]
        all_data.extend(load_images_from_folder(folder, label))

# Add synthetic whitepositive
syn_folder = os.path.join(base_dir, "synwhitepositive")
all_data.extend(load_images_from_folder(syn_folder, label_map["positive"]))


In [13]:
import random
from sklearn.model_selection import train_test_split
import tensorflow as tf

# Shuffle and split
random.shuffle(all_data)
X, y = zip(*all_data)
X = np.array(X)
y = np.array(y)

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y)

# Convert to TensorFlow datasets
train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(1000).batch(32)
test_ds = tf.data.Dataset.from_tensor_slices((X_test, y_test)).batch(32)


In [14]:
from tensorflow.keras import layers, models

model = models.Sequential([
    layers.Input(shape=(128, 128, 3)),
    layers.Conv2D(32, 3, activation='relu'),
    layers.MaxPooling2D(),
    layers.Conv2D(64, 3, activation='relu'),
    layers.MaxPooling2D(),
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dense(3, activation='softmax')  # 3 classes
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

history = model.fit(train_ds, epochs=9, validation_data=test_ds)


Epoch 1/9
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 1s/step - accuracy: 0.3906 - loss: 1.1545 - val_accuracy: 0.2500 - val_loss: 2.3849
Epoch 2/9
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.4430 - loss: 1.6433 - val_accuracy: 0.5000 - val_loss: 1.3775
Epoch 3/9
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.6055 - loss: 0.8664 - val_accuracy: 0.7000 - val_loss: 0.8043
Epoch 4/9
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - accuracy: 0.6539 - loss: 0.7373 - val_accuracy: 0.6000 - val_loss: 0.7885
Epoch 5/9
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.7023 - loss: 0.6489 - val_accuracy: 0.6500 - val_loss: 0.7436
Epoch 6/9
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.8094 - loss: 0.4611 - val_accuracy: 0.6000 - val_loss: 0.5816
Epoch 7/9
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[3

In [15]:
test_loss, test_acc = model.evaluate(test_ds)
print(f"Test accuracy with synthetic boost: {test_acc:.4f}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.7500 - loss: 0.5688
Test accuracy with synthetic boost: 0.7500


Test Accuracy: 75%