# NamedCopies: Multi-View Pipelines from a Single Decode

`NamedCopies` duplicates a single decoded image batch into a named dict, bridging
single-output decoders (like `DecodeResizeCrop`) with `MultiCropPipeline` which
applies separate transforms per view.

**Use case**: You want the same center crop, but with different downstream
transforms (e.g., different zoom levels, one augmented and one clean, etc.).

```
JPEG bytes
  │
  └── DecodeResizeCrop(256, 224)     ← decode once
       │
       └── NamedCopies(['v1', 'v2'])  ← deep-copy into named dict
            │
            └── MultiCropPipeline({    ← per-view transforms
                 'v1': [zoom=1.0],
                 'v2': [zoom=0.5],
               })
```

In [None]:
LITDATA_VAL_PATH = "s3://visionlab-datasets/imagenet1k/pre-processed/s256-l512-jpgbytes-q100-streaming/val/"

In [None]:
import torch
import numpy as np
import matplotlib.pyplot as plt

from slipstream import (
    SlipstreamDataset,
    SlipstreamLoader,
    DecodeResizeCrop,
    DecodeCenterCrop,
    MultiCropPipeline,
    NamedCopies,
    ToTorchImage,
    Normalize,
    IMAGENET_MEAN,
    IMAGENET_STD,
)
from slipstream.transforms import RandomZoom, RandomHorizontalFlip

dataset = SlipstreamDataset(
    remote_dir=LITDATA_VAL_PATH,
    decode_images=False,
)
print(f"Dataset: {len(dataset):,} samples")

## Helper: show multi-view batch

In [None]:
def show_views(batch, view_names, n=4, title=""):
    """Display named views side-by-side.
    
    Handles both numpy HWC and torch CHW formats.
    """
    num_views = len(view_names)
    fig, axes = plt.subplots(n, num_views, figsize=(3 * num_views, 3 * n))
    if n == 1:
        axes = axes[np.newaxis, :]
    
    for col, name in enumerate(view_names):
        imgs = batch[name]
        for row in range(n):
            img = imgs[row]
            if isinstance(img, torch.Tensor):
                if img.ndim == 3 and img.shape[0] in (1, 3):
                    img = img.permute(1, 2, 0)  # CHW -> HWC
                img = img.cpu().float()
                # Undo normalization if values look normalized
                if img.min() < 0:
                    mean = torch.tensor(IMAGENET_MEAN)
                    std = torch.tensor(IMAGENET_STD)
                    img = img * std + mean
                img = img.clamp(0, 1).numpy()
            else:
                # numpy HWC uint8
                img = img.astype(np.float32) / 255.0
            axes[row, col].imshow(img)
            axes[row, col].axis('off')
        axes[0, col].set_title(name, fontsize=12, fontweight='bold')
    
    if title:
        fig.suptitle(title, fontsize=14, fontweight='bold', y=1.02)
    plt.tight_layout()
    plt.show()

## 1. Basic: identical copies (numpy)

The simplest use: decode once, make named copies. Without `MultiCropPipeline`,
the copies are identical numpy arrays.

In [None]:
loader = SlipstreamLoader(
    dataset,
    batch_size=8,
    shuffle=False,
    pipelines={'image': [
        DecodeResizeCrop(resize_size=256, crop_size=224),
        NamedCopies(['view1', 'view2']),
    ]},
    exclude_fields=['path'],
    verbose=False,
)

batch = next(iter(loader))
loader.shutdown()

print("Batch keys:", list(batch.keys()))
print(f"view1: {type(batch['view1']).__name__} {batch['view1'].shape}")
print(f"view2: {type(batch['view2']).__name__} {batch['view2'].shape}")
print(f"Identical: {np.array_equal(batch['view1'], batch['view2'])}")

show_views(batch, ['view1', 'view2'], n=4, title='Identical copies (numpy HWC)')

## 2. Different zoom levels per view

The main use case: decode once, then apply different `RandomZoom` transforms
per view via `MultiCropPipeline`. View 1 is unzoomed, view 2 is zoomed to 50%.

In [None]:
DEVICE = 'cpu'

loader = SlipstreamLoader(
    dataset,
    batch_size=8,
    shuffle=False,
    pipelines={'image': [
        DecodeResizeCrop(resize_size=256, crop_size=224),
        NamedCopies(['original', 'zoomed_in']),
        MultiCropPipeline({
            'original': [
                ToTorchImage(device=DEVICE),
                RandomZoom(p=1.0, zoom=(1.0, 1.0), x=0.5, y=0.5, device=DEVICE),
            ],
            'zoomed_in': [
                ToTorchImage(device=DEVICE),
                RandomZoom(p=1.0, zoom=(0.5, 0.5), x=0.5, y=0.5, device=DEVICE),
            ],
        }),
    ]},
    exclude_fields=['path'],
    verbose=False,
)

batch = next(iter(loader))
loader.shutdown()

print(f"original:   {batch['original'].shape} {batch['original'].dtype}")
print(f"zoomed_in: {batch['zoomed_in'].shape} {batch['zoomed_in'].dtype}")

show_views(batch, ['original', 'zoomed_in'], n=4,
           title='Same crop, different zoom levels')

## 3. Three views with graduated zoom

You can create any number of named copies. Here we show three zoom levels.

In [None]:
loader = SlipstreamLoader(
    dataset,
    batch_size=8,
    shuffle=False,
    pipelines={'image': [
        DecodeResizeCrop(resize_size=224, crop_size=224),
        NamedCopies(['zoom_100', 'zoom_75', 'zoom_50']),
        MultiCropPipeline({
            'zoom_100': [
                ToTorchImage(device=DEVICE),
                RandomZoom(p=1.0, zoom=(1.0, 1.0), x=0.5, y=0.5, device=DEVICE),
            ],
            'zoom_75': [
                ToTorchImage(device=DEVICE),
                RandomZoom(p=1.0, zoom=(0.75, 0.75), x=0.5, y=0.5, device=DEVICE),
            ],
            'zoom_50': [
                ToTorchImage(device=DEVICE),
                RandomZoom(p=1.0, zoom=(0.5, 0.5), x=0.5, y=0.5, device=DEVICE),
            ],
        }),
    ]},
    exclude_fields=['path'],
    verbose=False,
)

batch = next(iter(loader))
loader.shutdown()

show_views(batch, ['zoom_100', 'zoom_75', 'zoom_50'], n=4,
           title='Three zoom levels from one decode')

## 4. Augmented vs clean view

Another common pattern: one view gets augmentations (flip, normalize),
the other stays clean for visualization or as a reconstruction target.

In [None]:
loader = SlipstreamLoader(
    dataset,
    batch_size=8,
    shuffle=False,
    pipelines={'image': [
        DecodeResizeCrop(resize_size=256, crop_size=224),
        NamedCopies(['clean', 'augmented']),
        MultiCropPipeline({
            'clean': [
                ToTorchImage(device=DEVICE),
                # No augmentation — just convert to tensor
            ],
            'augmented': [
                ToTorchImage(device=DEVICE),
                RandomHorizontalFlip(p=1.0, device=DEVICE),
                RandomZoom(p=1.0, zoom=(0.6, 0.6), x=0.5, y=0.5, device=DEVICE),
            ],
        }),
    ]},
    exclude_fields=['path'],
    verbose=False,
)

batch = next(iter(loader))
loader.shutdown()

show_views(batch, ['clean', 'augmented'], n=4,
           title='Clean vs augmented views')

## 5. Full training pipeline with normalization

A realistic example: two normalized views ready for model input,
with different zoom levels for multi-scale consistency training.

In [None]:
loader = SlipstreamLoader(
    dataset,
    batch_size=16,
    shuffle=True,
    seed=43,
    pipelines={'image': [
        DecodeResizeCrop(resize_size=256, crop_size=224),
        NamedCopies(['view1', 'view2']),
        MultiCropPipeline({
            'view1': [
                ToTorchImage(device=DEVICE),
                RandomZoom(p=1.0, zoom=(1.0, 1.0), x=0.5, y=0.5, device=DEVICE),
                Normalize(IMAGENET_MEAN, IMAGENET_STD, device=DEVICE),
            ],
            'view2': [
                ToTorchImage(device=DEVICE),
                RandomZoom(p=1.0, zoom=(0.5, 0.5), x=0.5, y=0.5, device=DEVICE),
                Normalize(IMAGENET_MEAN, IMAGENET_STD, device=DEVICE),
            ],
        }),
    ]},
    exclude_fields=['path'],
    verbose=False,
)

batch = next(iter(loader))
loader.shutdown()

print(f"view1: {batch['view1'].shape}, dtype={batch['view1'].dtype}")
print(f"  mean={batch['view1'].mean():.4f}, std={batch['view1'].std():.4f}")
print(f"view2: {batch['view2'].shape}, dtype={batch['view2'].dtype}")
print(f"  mean={batch['view2'].mean():.4f}, std={batch['view2'].std():.4f}")
print(f"label: {batch['label'].shape}")

show_views(batch, ['view1', 'view2'], n=4,
           title='Training-ready normalized views (zoom 1.0 vs 0.5)')

## 6. Verifying copy independence

Each named copy is independent — modifying one does not affect the others.
This is important because some transforms mutate tensors in-place.

In [None]:
# Test with numpy arrays
nc = NamedCopies(['a', 'b', 'c'])
original = np.random.rand(4, 224, 224, 3).astype(np.float32)
copies = nc(original)

# Mutate copy 'a'
copies['a'][:] = 0.0

print("After zeroing copy 'a':")
print(f"  original unchanged: {original.mean():.4f} (should be ~0.5)")
print(f"  copy 'a' zeroed:    {copies['a'].mean():.4f} (should be 0.0)")
print(f"  copy 'b' unchanged: {copies['b'].mean():.4f} (should be ~0.5)")
print(f"  copy 'c' unchanged: {copies['c'].mean():.4f} (should be ~0.5)")

# Test with torch tensors
t = torch.randn(4, 3, 224, 224)
t_copies = nc(t)
t_copies['a'].zero_()

print(f"\nTorch tensor independence:")
print(f"  original mean: {t.mean():.4f} (should be ~0.0)")
print(f"  copy 'a' mean: {t_copies['a'].mean():.4f} (should be 0.0)")
print(f"  copy 'b' mean: {t_copies['b'].mean():.4f} (should be ~0.0, not exactly 0.0)")

## Summary

| Component | Role |
|-----------|------|
| `DecodeResizeCrop` / `DecodeCenterCrop` | Single decode + crop |
| `NamedCopies(['v1', 'v2', ...])` | Duplicate into named dict (deep copy) |
| `MultiCropPipeline({...})` | Apply per-view transforms |

**When to use `NamedCopies`** vs `DecodeMultiRandomResizedCrop`:

| Scenario | Use |
|----------|-----|
| Same center crop, different downstream transforms | `NamedCopies` |
| Different random crops per view (SSL) | `DecodeMultiRandomResizedCrop` |
| Different crop sizes per view (global/local) | `DecodeMultiRandomResizedCrop` |