# Dataset Download Notebook
## Download VTONHD, DeepFashion1, and DressCode Test Images

This notebook downloads test images from virtual try-on datasets for metrics evaluation.

## 1. Setup and Configuration

In [None]:
# Install required packages
!pip install -q gdown huggingface_hub kaggle requests tqdm pillow

In [None]:
import os
import gdown
import zipfile
import requests
from tqdm import tqdm
from pathlib import Path
from huggingface_hub import hf_hub_download, snapshot_download
from PIL import Image
import matplotlib.pyplot as plt

# Base directory for all datasets
BASE_DIR = Path('/content/datasets')
BASE_DIR.mkdir(parents=True, exist_ok=True)

print(f"Datasets will be saved to: {BASE_DIR}")

## 2. Download VITON-HD Test Set

VITON-HD is a high-resolution virtual try-on dataset with 2032 test pairs.

In [None]:
def download_vitonhd():
    """Download VITON-HD test set from HuggingFace"""
    vitonhd_dir = BASE_DIR / 'vitonhd'
    vitonhd_dir.mkdir(parents=True, exist_ok=True)
    
    print("Downloading VITON-HD test set...")
    
    try:
        # Try HuggingFace Hub first
        snapshot_download(
            repo_id="yisol/IDM-VTON",
            repo_type="dataset",
            local_dir=vitonhd_dir,
            allow_patterns=["test/*"],
            ignore_patterns=["train/*"],
        )
        print(f"VITON-HD downloaded to {vitonhd_dir}")
    except Exception as e:
        print(f"HuggingFace download failed: {e}")
        print("\nAlternative: Download manually from:")
        print("https://github.com/shadow2496/VITON-HD")
        print("Or use Google Drive link from the official repo")
    
    return vitonhd_dir

vitonhd_path = download_vitonhd()

## 3. Download DeepFashion1 Test Set

DeepFashion In-Shop Clothes Retrieval Benchmark.

In [None]:
def download_deepfashion1():
    """Download DeepFashion1 evaluation subset"""
    deepfashion_dir = BASE_DIR / 'deepfashion1'
    deepfashion_dir.mkdir(parents=True, exist_ok=True)
    
    print("Downloading DeepFashion1 subset...")
    
    # DeepFashion requires manual download from official source
    # We'll try alternative public mirrors
    
    try:
        # Try HuggingFace mirror
        snapshot_download(
            repo_id="renovamen/DeepFashion-sampled",
            repo_type="dataset",
            local_dir=deepfashion_dir,
        )
        print(f"DeepFashion1 downloaded to {deepfashion_dir}")
    except Exception as e:
        print(f"Auto download failed: {e}")
        print("\n" + "="*60)
        print("MANUAL DOWNLOAD REQUIRED:")
        print("="*60)
        print("1. Go to: http://mmlab.ie.cuhk.edu.hk/projects/DeepFashion.html")
        print("2. Request access to In-Shop Clothes Retrieval Benchmark")
        print("3. Download and extract to: /content/datasets/deepfashion1/")
        print("="*60)
    
    return deepfashion_dir

deepfashion_path = download_deepfashion1()

## 4. Download DressCode Dataset (Men/Women)

DressCode dataset with upper-body, lower-body, and dresses categories.

In [None]:
def download_dresscode():
    """Download DressCode test set"""
    dresscode_dir = BASE_DIR / 'dresscode'
    dresscode_dir.mkdir(parents=True, exist_ok=True)
    
    print("Downloading DressCode dataset...")
    
    try:
        # Download from HuggingFace
        snapshot_download(
            repo_id="yisol/IDM-VTON",
            repo_type="dataset",
            local_dir=dresscode_dir,
            allow_patterns=["DressCode/*"],
        )
        print(f"DressCode downloaded to {dresscode_dir}")
    except Exception as e:
        print(f"HuggingFace download failed: {e}")
        
        # Alternative: Google Drive
        print("\nTrying alternative download...")
        print("\n" + "="*60)
        print("ALTERNATIVE DOWNLOAD OPTIONS:")
        print("="*60)
        print("1. GitHub: https://github.com/aimagelab/dress-code")
        print("2. Request access and download to: /content/datasets/dresscode/")
        print("="*60)
    
    return dresscode_dir

dresscode_path = download_dresscode()

## 5. Alternative: Download Sample Subset for Quick Testing

If full datasets are too large, download a smaller sample subset.

In [None]:
def create_sample_subset(source_dir, sample_dir, num_samples=100):
    """Create a smaller sample subset from downloaded dataset"""
    import shutil
    import random
    
    sample_dir = Path(sample_dir)
    sample_dir.mkdir(parents=True, exist_ok=True)
    
    # Find all image files
    source_path = Path(source_dir)
    if not source_path.exists():
        print(f"Source directory {source_dir} does not exist")
        return
    
    image_extensions = {'.jpg', '.jpeg', '.png', '.webp'}
    all_images = []
    for ext in image_extensions:
        all_images.extend(source_path.rglob(f'*{ext}'))
    
    if len(all_images) == 0:
        print("No images found in source directory")
        return
    
    # Sample random images
    sampled = random.sample(all_images, min(num_samples, len(all_images)))
    
    for img_path in tqdm(sampled, desc="Copying samples"):
        dest = sample_dir / img_path.name
        shutil.copy2(img_path, dest)
    
    print(f"Created sample subset with {len(sampled)} images in {sample_dir}")
    return sample_dir

# Uncomment to create sample subsets
# create_sample_subset(vitonhd_path, BASE_DIR / 'samples' / 'vitonhd', 100)
# create_sample_subset(dresscode_path, BASE_DIR / 'samples' / 'dresscode', 100)

## 6. Verify Downloads and Visualize Samples

In [None]:
def count_images(directory):
    """Count images in directory recursively"""
    directory = Path(directory)
    if not directory.exists():
        return 0
    
    extensions = {'.jpg', '.jpeg', '.png', '.webp'}
    count = 0
    for ext in extensions:
        count += len(list(directory.rglob(f'*{ext}')))
    return count

def show_sample_images(directory, title, num_samples=5):
    """Display sample images from directory"""
    directory = Path(directory)
    if not directory.exists():
        print(f"{title}: Directory not found")
        return
    
    extensions = {'.jpg', '.jpeg', '.png', '.webp'}
    images = []
    for ext in extensions:
        images.extend(list(directory.rglob(f'*{ext}'))[:num_samples])
        if len(images) >= num_samples:
            break
    
    if not images:
        print(f"{title}: No images found")
        return
    
    fig, axes = plt.subplots(1, min(len(images), num_samples), figsize=(15, 4))
    if num_samples == 1:
        axes = [axes]
    
    for ax, img_path in zip(axes, images[:num_samples]):
        img = Image.open(img_path)
        ax.imshow(img)
        ax.set_title(img_path.name[:20], fontsize=8)
        ax.axis('off')
    
    plt.suptitle(title, fontsize=12)
    plt.tight_layout()
    plt.show()

# Display statistics
print("=" * 50)
print("DATASET DOWNLOAD SUMMARY")
print("=" * 50)
print(f"VITON-HD: {count_images(vitonhd_path)} images")
print(f"DeepFashion1: {count_images(deepfashion_path)} images")
print(f"DressCode: {count_images(dresscode_path)} images")
print("=" * 50)

In [None]:
# Visualize samples from each dataset
show_sample_images(vitonhd_path, "VITON-HD Samples")
show_sample_images(deepfashion_path, "DeepFashion1 Samples")
show_sample_images(dresscode_path, "DressCode Samples")

## 7. Export Paths for Other Notebooks

In [None]:
# Save dataset paths to a config file for use in metric notebooks
import json

config = {
    'base_dir': str(BASE_DIR),
    'vitonhd': str(vitonhd_path),
    'deepfashion1': str(deepfashion_path),
    'dresscode': str(dresscode_path),
}

config_path = BASE_DIR / 'dataset_config.json'
with open(config_path, 'w') as f:
    json.dump(config, f, indent=2)

print(f"Dataset configuration saved to: {config_path}")
print("\nUse this config in other metric notebooks:")
print("```python")
print("import json")
print("with open('/content/datasets/dataset_config.json') as f:")
print("    config = json.load(f)")
print("```")

---
## Next Steps

After downloading datasets, proceed to the metric evaluation notebooks:
1. `02_body_pose_diversity.ipynb` - Pose entropy & pairwise diversity
2. `03_cloth_diversity.ipynb` - Cloth feature diversity
3. `04_body_shape_diversity.ipynb` - PCA body shape analysis
4. `05_skin_color_histogram.ipynb` - Skin tone distribution
5. `06_occlusion_handling.ipynb` - Occlusion detection
6. `07_background_diversity.ipynb` - Background analysis