# Background Diversity Metrics
## Background Complexity & Uniformity Analysis

This notebook measures the diversity and complexity of image backgrounds using computationally efficient border analysis (assuming the subject is centered).

## 1. Setup and Installation

In [None]:
!pip install -q opencv-python-headless numpy matplotlib tqdm

In [None]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
from tqdm import tqdm
import json

print(f"OpenCV: {cv2.__version__}")

## 2. Background Extraction (Heuristic)
For fashion datasets, the subject is usually centered. We analyze the **border regions** (margins) to characterize the background without needing heavy segmentation models.

In [None]:
def get_border_region(img, margin_percent=0.1):
    """Extract pixels from the image borders"""
    h, w = img.shape[:2]
    margin_h = int(h * margin_percent)
    margin_w = int(w * margin_percent)
    
    # Top
    top = img[:margin_h, :]
    # Bottom
    bottom = img[-margin_h:, :]
    # Left (excluding corners already taken)
    left = img[margin_h:-margin_h, :margin_w]
    # Right
    right = img[margin_h:-margin_h, -margin_w:]
    
    # Concatenate
    if len(img.shape) == 3:
        borders = np.vstack([top.reshape(-1, 3), bottom.reshape(-1, 3), left.reshape(-1, 3), right.reshape(-1, 3)])
    else:
        borders = np.concatenate([top.flatten(), bottom.flatten(), left.flatten(), right.flatten()])
        
    return borders

## 3. Metrics
- **Background Entropy**: Color diversity of the background.
- **Edge Density**: Structural complexity (cluttered vs plain).
- **Uniformity Score**: Percentage of background pixels close to the dominant color.

In [None]:
def compute_entropy(pixels, bins=32):
    if len(pixels) == 0: return 0.0
    # Simple R,G,B histogram entropy
    H, _ = np.histogramdd(pixels, bins=bins, range=((0, 256), (0, 256), (0, 256)), density=True)
    prob = H.flatten()
    prob = prob[prob > 0]
    return -np.sum(prob * np.log(prob + 1e-12))

def compute_edge_density(img_gray, margin_percent=0.1):
    """Compute edge density in border regions"""
    edges = cv2.Canny(img_gray, 50, 150)
    border_edges = get_border_region(edges, margin_percent)
    
    # Edge density = fraction of edge pixels
    density = np.sum(border_edges > 0) / len(border_edges)
    return density

def is_uniform(pixels, tolerance=10):
    """Check if background is effectively monotonic (solid color)"""
    curr_std = np.std(pixels, axis=0)
    # If std dev is low in all channels, it's uniform
    return np.mean(curr_std) < tolerance

## 4. Evaluation Loop

In [None]:
config = {
    'vitonhd': '/content/datasets/vitonhd',
    'deepfashion1': '/content/datasets/deepfashion1',
    'dresscode': '/content/datasets/dresscode',
}
config_path = Path('/content/datasets/dataset_config.json')
if config_path.exists():
    with open(config_path) as f:
        config = json.load(f)

def evaluate_background(dataset_name, dataset_path, max_images=300):
    print(f"\nEvaluating: {dataset_name}")
    path = Path(dataset_path)
    # Prioritize image folders
    search = list(path.rglob('*.jpg')) + list(path.rglob('*.png'))
    
    if not search:
        return None
    
    if len(search) > max_images:
        import random
        search = random.sample(search, max_images)
        
    entropies = []
    edge_densities = []
    uniform_count = 0
    
    for p in tqdm(search):
        try:
            img = cv2.imread(str(p))
            if img is None: continue
            
            # Color Analysis
            img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            border_pixels = get_border_region(img_rgb)
            entropies.append(compute_entropy(border_pixels))
            if is_uniform(border_pixels):
                uniform_count += 1
                
            # Edge Analysis
            img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            edge_densities.append(compute_edge_density(img_gray))
            
        except Exception as e:
            continue
            
    results = {
        'dataset': dataset_name,
        'avg_bg_entropy': float(np.mean(entropies)),
        'avg_edge_density': float(np.mean(edge_densities)),
        'percent_uniform_bg': float(uniform_count / len(search)),
    }
    
    print(f"Results for {dataset_name}:")
    print(f"  - Avg Entropy: {results['avg_bg_entropy']:.4f}")
    print(f"  - Avg Edge Density: {results['avg_edge_density']:.4f}")
    print(f"  - Uniform Backgrounds: {results['percent_uniform_bg']:.2%}")
    
    return results

In [None]:
all_results = {}
for name, path in config.items():
    if name in ['vitonhd', 'deepfashion1', 'dresscode']:
        res = evaluate_background(name.upper(), path)
        if res:
            all_results[name] = res

## 5. Visualization

In [None]:
if all_results:
    datasets = list(all_results.keys())
    entropy = [all_results[d]['avg_bg_entropy'] for d in datasets]
    edges = [all_results[d]['avg_edge_density'] for d in datasets]
    uniform = [all_results[d]['percent_uniform_bg'] for d in datasets]
    
    fig, ax = plt.subplots(1, 3, figsize=(18, 5))
    
    ax[0].bar(datasets, entropy, color='salmon')
    ax[0].set_title("Background Color Entropy")
    
    ax[1].bar(datasets, edges, color='skyblue')
    ax[1].set_title("Background Edge Complexity")
    
    ax[2].bar(datasets, uniform, color='lightgreen')
    ax[2].set_title("Percentage of Uniform Backgrounds")
    ax[2].set_ylim(0, 1)
    
    plt.show()

In [None]:
# Save
out_path = Path('/content/datasets/background_results.json')
with open(out_path, 'w') as f:
    json.dump(all_results, f, indent=2)
print(f"Saved to {out_path}")