# Skin Color Diversity Metrics
## Skin Tone Histogram & Entropy Analysis

This notebook evaluates the diversity of skin tones in the dataset using training-free color space thresholding (YCbCr + HSV).

## 1. Setup and Installation

In [None]:
# Install dependencies (usually present in Colab, but obtaining opencv definitely)
!pip install -q opencv-python-headless matplotlib numpy pillow tqdm

In [None]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from pathlib import Path
from tqdm import tqdm
import json

print(f"OpenCV version: {cv2.__version__}")

## 2. Skin Detection Utils
Using a heuristic approach combining YCbCr and HSV standard skin ranges.
lightweight and generally effective for uncontrolled settings.

In [None]:
def extract_skin_mask(image_rgb):
    """Similar to standard research papers: Combine HSV and YCbCr thresholds"""
    
    # Convert to YCbCr
    img_orc = cv2.cvtColor(image_rgb, cv2.COLOR_RGB2YCrCb)
    
    # Convert to HSV
    img_hsv = cv2.cvtColor(image_rgb, cv2.COLOR_RGB2HSV)
    
    # YCbCr Thresholds (Standard skin range)
    # Y > 80, 85 < Cb < 135, 135 < Cr < 180
    lower_ycbcr = np.array([80, 85, 135], dtype=np.uint8)
    upper_ycbcr = np.array([255, 135, 180], dtype=np.uint8)
    mask_ycbcr = cv2.inRange(img_orc, lower_ycbcr, upper_ycbcr)
    
    # HSV Thresholds
    # H: [0, 20], S: [48, 255], V: [0, 255] (broad skin tone range)
    lower_hsv = np.array([0, 48, 0], dtype=np.uint8)
    upper_hsv = np.array([25, 255, 255], dtype=np.uint8)
    mask_hsv = cv2.inRange(img_hsv, lower_hsv, upper_hsv)
    
    # Combine masks (Logical AND)
    # Both conditions must be met to be confident it's skin
    skin_mask = cv2.bitwise_and(mask_ycbcr, mask_hsv)
    
    # Morphology to clean up noise (open then close)
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
    skin_mask = cv2.morphologyEx(skin_mask, cv2.MORPH_OPEN, kernel, iterations=2)
    skin_mask = cv2.morphologyEx(skin_mask, cv2.MORPH_CLOSE, kernel, iterations=2)
    
    return skin_mask

def get_skin_pixels(image_path):
    """Load image and return skin RGB pixels"""
    try:
        # Read image
        img = cv2.imread(str(image_path))
        if img is None:
            return None
        
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        
        # Get mask
        mask = extract_skin_mask(img_rgb)
        
        # Get skin pixels
        skin_pixels = img_rgb[mask > 0]
        
        return skin_pixels
    except Exception as e:
        print(f"Error: {e}")
        return None

## 3. Metrics Calculation
We calculate the **Entropy** of the skin color distribution in 2D (Hue-Saturation) space or 3D RGB space.

In [None]:
def compute_histogram_entropy(pixels, bins=32):
    """Compute entropy of pixel color histogram"""
    if len(pixels) == 0:
        return 0.0
        
    # Compute histogram (simplify to RGB for general diversity)
    # Or convert to Lab for perceptual uniformity. Let's strictly use Hue for "Skin Tone" diversity?
    # Actually, skin tone varies mostly in L (Lightness) and slightly in Hue/Sat.
    # Let's use RGB histogram entropy as a proxy for raw color diversity.
    
    H, edges = np.histogramdd(pixels, bins=bins, range=((0, 255), (0, 255), (0, 255)), density=True)
    
    # Probabilities
    prob = H.flatten()
    prob = prob[prob > 0]
    
    # Entropy
    entropy = -np.sum(prob * np.log(prob + 1e-12))
    
    # Normalize by max entropy for this bin count
    max_entropy = np.log(len(prob) if len(prob) > 0 else 1)
    # normalized = entropy / max_entropy if max_entropy > 0 else 0
    
    return entropy

def compute_ita(pixels):
    """
    Compute Individual Typology Angle (ITA) - standard metric for skin tone classification.
    ITA = arctan( (L - 50) / b ) * (180 / pi)
    Requires Lab color space.
    """
    if len(pixels) == 0:
        return []
    
    # Convert RGB chunk to Lab
    # Reshape for cv2: (N, 1, 3)
    pixels_reshaped = pixels.reshape(-1, 1, 3).astype(np.float32) / 255.0
    lab_pixels = cv2.cvtColor(pixels_reshaped, cv2.COLOR_RGB2Lab)
    lab_pixels = lab_pixels.reshape(-1, 3)
    
    # OpenCV Lab range: L [0, 100], a [-127, 127], b [-127, 127] roughly (scaled)
    # Actually cv2.COLOR_RGB2Lab output depends on type.
    # If float32 input, L is 0..100, a,b are roughly -128..127
    
    L = lab_pixels[:, 0]
    b = lab_pixels[:, 2]
    
    # Avoid division by zero
    ita = np.arctan2(L - 50, b) * (180 / np.pi)
    
    return ita

## 4. Evaluation Loop

In [None]:
# Load Config
config_path = Path('/content/datasets/dataset_config.json')
config = {}
if config_path.exists():
    with open(config_path) as f:
        config = json.load(f)
else:
    config = {
        'vitonhd': '/content/datasets/vitonhd',
        'deepfashion1': '/content/datasets/deepfashion1',
        'dresscode': '/content/datasets/dresscode',
    }

def get_image_paths(directory, max_images=None):
    directory = Path(directory)
    # Try to find 'image' or typical subfolders first to avoid masks
    search_order = ['image', 'images', 'img', '']
    
    for sub in search_order:
        p = directory / sub
        if p.exists():
            files = list(p.rglob('*.jpg')) + list(p.rglob('*.png'))
            if files:
                if max_images:
                    return files[:max_images]
                return files
    return []

def evaluate_skin_diversity(dataset_name, dataset_path, max_images=500):
    print(f"\n{'='*60}")
    print(f"Evaluating: {dataset_name}")
    
    paths = get_image_paths(dataset_path, max_images)
    if not paths:
        print("No images found.")
        return None
        
    all_skin_pixels = []
    ita_values = []
    
    valid_images = 0
    
    # Random sample if needed
    import random
    if len(paths) > max_images:
        paths = random.sample(paths, max_images)
        
    for p in tqdm(paths):
        pixels = get_skin_pixels(p)
        if pixels is not None and len(pixels) > 100: # Min skin pixels
            # Subsample pixels for memory efficiency
            if len(pixels) > 1000:
                pixels = pixels[np.random.choice(len(pixels), 1000, replace=False)]
            
            all_skin_pixels.append(pixels)
            
            # Compute ITA for this image
            itas = compute_ita(pixels)
            ita_values.extend(itas)
            
            valid_images += 1
            
    if not all_skin_pixels:
        print("No skin detected in dataset.")
        return None
        
    # Flatten all pixels
    total_pixels = np.vstack(all_skin_pixels)
    
    # 1. Global Color Entropy
    entropy = compute_histogram_entropy(total_pixels)
    
    # 2. ITA Statistics (Skin Tone Distribution)
    # ITA Categories: 
    # > 55: Very Light
    # 41-55: Light
    # 28-41: Intermediate
    # 10-28: Tan
    # < 10: Brown/Dark
    ita_mean = np.mean(ita_values)
    ita_std = np.std(ita_values)
    
    results = {
        'dataset': dataset_name,
        'num_images': valid_images,
        'skin_color_entropy': float(entropy),
        'ita_mean': float(ita_mean),
        'ita_std': float(ita_std)
    }
    
    print(f"Results for {dataset_name}:")
    print(f"  - Skin Entropy: {entropy:.4f}")
    print(f"  - ITA Mean (Tone Angle): {ita_mean:.2f} (Std: {ita_std:.2f})")
    
    return results, total_pixels, ita_values

In [None]:
all_results = {}
all_itas = {}

for name, path in config.items():
    if name in ['vitonhd', 'deepfashion1', 'dresscode']:
        res = evaluate_skin_diversity(name.upper(), path, max_images=300)
        if res:
            all_results[name] = res[0]
            all_itas[name] = res[2]

## 5. Visualization

In [None]:
def plot_ita_distribution(ita_dict):
    plt.figure(figsize=(10, 6))
    
    colors = {'vitonhd': 'blue', 'deepfashion1': 'red', 'dresscode': 'green'}
    
    for name, values in ita_dict.items():
        values = np.array(values)
        # Density plot
        import scipy.stats as stats
        if len(values) > 1:
            density = stats.gaussian_kde(values)
            xs = np.linspace(-100, 100, 200)
            plt.plot(xs, density(xs), label=f"{name.upper()} (std={np.std(values):.1f})", color=colors.get(name, 'black'))
            plt.fill_between(xs, density(xs), alpha=0.2, color=colors.get(name, 'black'))
            
    plt.title("Skin Tone Distribution (ITA Values)")
    plt.xlabel("ITA Value (Lower=Darker, Higher=Lighter)")
    plt.ylabel("Density")
    plt.axvline(x=55, color='gray', linestyle='--', alpha=0.5, label='Very Light boundary')
    plt.axvline(x=10, color='gray', linestyle='--', alpha=0.5, label='Dark boundary')
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.show()

if all_itas:
    plot_ita_distribution(all_itas)

In [None]:
# Visualizing Detection Quality
def show_skin_detection(dataset_path, num_samples=3):
    paths = get_image_paths(dataset_path, 100)
    if not paths: return
    
    import random
    samples = random.sample(paths, min(len(paths), num_samples))
    
    plt.figure(figsize=(15, 5*len(samples)))
    
    for i, p in enumerate(samples):
        img = cv2.imread(str(p))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        mask = extract_skin_mask(img)
        
        # Apply mask
        masked = cv2.bitwise_and(img, img, mask=mask)
        
        plt.subplot(len(samples), 3, i*3 + 1)
        plt.imshow(img)
        plt.title("Original")
        plt.axis('off')
        
        plt.subplot(len(samples), 3, i*3 + 2)
        plt.imshow(mask, cmap='gray')
        plt.title("Skin Mask")
        plt.axis('off')
        
        plt.subplot(len(samples), 3, i*3 + 3)
        plt.imshow(masked)
        plt.title("Detected Skin")
        plt.axis('off')
        
    plt.tight_layout()
    plt.show()

# Show examples for first dataset
if config:
    first_name = list(config.keys())[0]
    if first_name in config:
        print(f"Visualizing detection for {first_name}...")
        show_skin_detection(config[first_name])

In [None]:
# Save Results
out_path = Path('/content/datasets/skin_diversity_results.json')
with open(out_path, 'w') as f:
    json.dump(all_results, f, indent=2)
print(f"Saved to {out_path}")