In [None]:
pip install mediapipe


Collecting protobuf<5,>=4.25.3 (from mediapipe)
  Using cached protobuf-4.25.8-cp310-abi3-win_amd64.whl.metadata (541 bytes)
Using cached protobuf-4.25.8-cp310-abi3-win_amd64.whl (413 kB)
Installing collected packages: protobuf
  Attempting uninstall: protobuf
    Found existing installation: protobuf 6.32.0
    Uninstalling protobuf-6.32.0:
      Successfully uninstalled protobuf-6.32.0
Successfully installed protobuf-4.25.8
Note: you may need to restart the kernel to use updated packages.


  You can safely remove it manually.
ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
grpcio-status 1.74.0 requires protobuf<7.0.0,>=6.31.1, but you have protobuf 4.25.8 which is incompatible.


In [4]:
import cv2
import mediapipe as mp
import os
import shutil
import numpy as np
import pandas as pd

# Input UTKFace folder
input_dir = "UTKFace"

# Output folders
long_dir = "hair_data/long"
short_dir = "hair_data/short"
os.makedirs(long_dir, exist_ok=True)
os.makedirs(short_dir, exist_ok=True)

# Mediapipe Selfie Segmentation
mp_selfie_segmentation = mp.solutions.selfie_segmentation
segment = mp_selfie_segmentation.SelfieSegmentation(model_selection=1)

# CSV log
results = []

def auto_label_image(image_path):
    img = cv2.imread(image_path)
    if img is None:
        return None

    # Convert BGR → RGB
    rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    # Segmentation mask
    results_mp = segment.process(rgb)
    mask = results_mp.segmentation_mask

    # Threshold: hair/background separation
    binary_mask = (mask > 0.5).astype(np.uint8) * 255

    # Count pixels above head (hair region)
    h, w = binary_mask.shape
    top_half = binary_mask[0:h//2, :]
    hair_pixels = cv2.countNonZero(top_half)

    # Decide long vs short
    if hair_pixels > (h * w * 0.05):  # 5% pixels as threshold
        return "long"
    else:
        return "short"

# Process all images
for filename in os.listdir(input_dir):
    if filename.lower().endswith((".jpg", ".jpeg", ".png")):
        img_path = os.path.join(input_dir, filename)
        label = auto_label_image(img_path)

        if label == "long":
            shutil.copy(img_path, os.path.join(long_dir, filename))
        elif label == "short":
            shutil.copy(img_path, os.path.join(short_dir, filename))

        # Save to CSV
        results.append([filename, label])

# Save results in CSV
df = pd.DataFrame(results, columns=["filename", "label"])
df.to_csv("hair_labels.csv", index=False)

print("✅ Dataset split completed. Labels saved in hair_labels.csv")


✅ Dataset split completed. Labels saved in hair_labels.csv


In [5]:
import os
import shutil
import random

# Paths
dataset_dir = "hair_dataset"   # your current dataset with "short" and "long"
output_dir = "hair_dataset_split"

# Train/Val split ratio
split_ratio = 0.8

# Create output structure
for split in ["train", "val"]:
    for label in ["short", "long"]:
        os.makedirs(os.path.join(output_dir, split, label), exist_ok=True)

# Process each class
for label in ["short", "long"]:
    img_dir = os.path.join(dataset_dir, label)
    imgs = os.listdir(img_dir)
    random.shuffle(imgs)

    split_idx = int(len(imgs) * split_ratio)
    train_imgs = imgs[:split_idx]
    val_imgs = imgs[split_idx:]

    # Copy files
    for img in train_imgs:
        shutil.copy(os.path.join(img_dir, img), os.path.join(output_dir, "train", label, img))
    for img in val_imgs:
        shutil.copy(os.path.join(img_dir, img), os.path.join(output_dir, "val", label, img))

print("✅ Dataset split completed. Saved in:", output_dir)


✅ Dataset split completed. Saved in: hair_dataset_split


In [6]:
import os
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator


In [7]:
# Dataset path
dataset_dir = "hair_dataset_split"

In [8]:
# Image settings
IMG_SIZE = (128, 128)
BATCH_SIZE = 32

# Data generators (with augmentation for training)
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)

val_datagen = ImageDataGenerator(rescale=1./255)

In [9]:
train_generator = train_datagen.flow_from_directory(
    os.path.join(dataset_dir, "train"),
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode="binary"   # since short vs long = 2 classes
)


Found 6532 images belonging to 2 classes.


In [10]:
val_generator = val_datagen.flow_from_directory(
    os.path.join(dataset_dir, "val"),
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode="binary"
)

Found 1634 images belonging to 2 classes.


In [11]:
# Model
model = Sequential([
    Conv2D(32, (3,3), activation="relu", input_shape=(IMG_SIZE[0], IMG_SIZE[1], 3)),
    MaxPooling2D(2,2),

    Conv2D(64, (3,3), activation="relu"),
    MaxPooling2D(2,2),

    Conv2D(128, (3,3), activation="relu"),
    MaxPooling2D(2,2),

    Flatten(),
    Dense(128, activation="relu"),
    Dropout(0.5),
    Dense(1, activation="sigmoid")  # binary output
])

model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [12]:
# Training
history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=15
)

# Save model
model.save("hair_length_model.h5")
print("✅ Training complete. Model saved as hair_length_model.h5")


  self._warn_if_super_not_called()


Epoch 1/15
[1m205/205[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m98s[0m 471ms/step - accuracy: 0.9639 - loss: 0.1765 - val_accuracy: 0.9645 - val_loss: 0.1594
Epoch 2/15
[1m205/205[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 292ms/step - accuracy: 0.9645 - loss: 0.1740 - val_accuracy: 0.9645 - val_loss: 0.1521
Epoch 3/15
[1m205/205[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 297ms/step - accuracy: 0.9645 - loss: 0.1606 - val_accuracy: 0.9645 - val_loss: 0.1576
Epoch 4/15
[1m205/205[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 299ms/step - accuracy: 0.9645 - loss: 0.1616 - val_accuracy: 0.9645 - val_loss: 0.1513
Epoch 5/15
[1m205/205[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 301ms/step - accuracy: 0.9645 - loss: 0.1620 - val_accuracy: 0.9645 - val_loss: 0.1541
Epoch 6/15
[1m205/205[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 302ms/step - accuracy: 0.9645 - loss: 0.1621 - val_accuracy: 0.9645 - val_loss: 0.1514
Epoch 7/15



✅ Training complete. Model saved as hair_length_model.h5
