# Day 2: Normalization and Standardization
## CV Bootcamp 2024

Scaling pixel values properly is fundamental to deep learning success.

In [None]:
import cv2
import numpy as np
import matplotlib.pyplot as plt

print("Imports successful")

## Why Normalization Matters

**Benefits:**
- Prevents gradient explosion/vanishing
- Speeds up convergence
- Reduces sensitivity to initialization
- Ensures numerical stability

In [None]:
# Create sample image
image = np.random.randint(0, 255, (480, 640, 3), dtype=np.uint8)
cv2.imwrite('sample_norm.jpg', image)
image = cv2.imread('sample_norm.jpg')

print(f'Original range: {image.min()} to {image.max()}')
print(f'Original dtype: {image.dtype}')
print(f'Original shape: {image.shape}')

## 1. Min-Max Normalization

Scales values to [0, 1] range:
$$normalized = \frac{x - min}{max - min}$$

In [None]:
# Simple normalization to [0, 1]
normalized = image.astype(np.float32) / 255.0

print(f'Normalized range: {normalized.min():.4f} to {normalized.max():.4f}')
print(f'Normalized dtype: {normalized.dtype}')

In [None]:
# General min-max formula
min_val = image.min()
max_val = image.max()
normalized_general = (image - min_val) / (max_val - min_val)

print(f'General normalized range: {normalized_general.min():.4f} to {normalized_general.max():.4f}')

In [None]:
# Normalize to [-1, 1] (common for GANs)
normalized_neg = (image.astype(np.float32) / 127.5) - 1.0

print(f'[-1, 1] range: {normalized_neg.min():.4f} to {normalized_neg.max():.4f}')

In [None]:
# Visualize normalization effect
fig, axes = plt.subplots(1, 3, figsize=(15, 5))

axes[0].imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
axes[0].set_title('Original [0-255]')
axes[0].axis('off')

axes[1].imshow(cv2.cvtColor((normalized * 255).astype(np.uint8), cv2.COLOR_BGR2RGB))
axes[1].set_title('Normalized [0-1]')
axes[1].axis('off')

axes[2].imshow(cv2.cvtColor(((normalized_neg + 1) * 127.5).astype(np.uint8), cv2.COLOR_BGR2RGB))
axes[2].set_title('Normalized [-1, 1]')
axes[2].axis('off')

plt.tight_layout()
plt.show()

## 2. Z-Score Standardization

Transforms to mean=0, std=1:
$$standardized = \frac{x - \mu}{\sigma}$$

In [None]:
# Convert to float for calculations
image_float = image.astype(np.float32)

# Calculate per-channel statistics
mean = np.mean(image_float, axis=(0, 1))  # Mean per channel
std = np.std(image_float, axis=(0, 1))    # Std per channel

print(f'Mean per channel (B, G, R): {mean}')
print(f'Std per channel (B, G, R): {std}')

In [None]:
# Standardize
standardized = (image_float - mean) / std

print(f'\nStandardized mean: {standardized.mean():.6f}')  # Should be ~0
print(f'Standardized std: {standardized.std():.6f}')      # Should be ~1
print(f'Standardized range: {standardized.min():.2f} to {standardized.max():.2f}')

## 3. ImageNet Standardization

Most pretrained models use ImageNet statistics. Always use these for transfer learning!

In [None]:
# ImageNet statistics (RGB order!)
IMAGENET_MEAN = np.array([0.485, 0.456, 0.406])
IMAGENET_STD = np.array([0.229, 0.224, 0.225])

print("ImageNet Mean (RGB):", IMAGENET_MEAN)
print("ImageNet Std (RGB):", IMAGENET_STD)

In [None]:
# Convert BGR to RGB
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

# First normalize to [0, 1]
image_norm = image_rgb.astype(np.float32) / 255.0

# Then standardize with ImageNet stats
image_standardized = (image_norm - IMAGENET_MEAN) / IMAGENET_STD

print(f'ImageNet standardized mean: {image_standardized.mean():.6f}')
print(f'ImageNet standardized std: {image_standardized.std():.6f}')
print(f'ImageNet standardized range: {image_standardized.min():.2f} to {image_standardized.max():.2f}')

## 4. Preprocessing Pipeline Function

In [None]:
def preprocess_image(image_path, method='imagenet', resize=(224, 224)):
    """
    Complete preprocessing pipeline for images.
    
    Args:
        image_path: Path to image
        method: 'minmax', 'zscore', or 'imagenet'
        resize: Target size (width, height)
    
    Returns:
        Preprocessed image as numpy array
    """
    # Load image
    image = cv2.imread(image_path)
    if image is None:
        raise ValueError(f"Could not load image: {image_path}")
    
    # Convert BGR to RGB
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    # Resize
    if resize:
        image = cv2.resize(image, resize)
    
    # Convert to float
    image = image.astype(np.float32)
    
    # Apply normalization
    if method == 'minmax':
        image = image / 255.0
    
    elif method == 'minmax_neg':
        image = (image / 127.5) - 1.0
    
    elif method == 'zscore':
        mean = np.mean(image, axis=(0, 1))
        std = np.std(image, axis=(0, 1))
        image = (image - mean) / std
    
    elif method == 'imagenet':
        # Normalize to [0, 1] first
        image = image / 255.0
        # Apply ImageNet statistics
        IMAGENET_MEAN = np.array([0.485, 0.456, 0.406])
        IMAGENET_STD = np.array([0.229, 0.224, 0.225])
        image = (image - IMAGENET_MEAN) / IMAGENET_STD
    
    else:
        raise ValueError(f"Unknown method: {method}")
    
    return image

# Test the function
preprocessed = preprocess_image('sample_norm.jpg', method='imagenet')
print(f"Preprocessed shape: {preprocessed.shape}")
print(f"Preprocessed range: {preprocessed.min():.2f} to {preprocessed.max():.2f}")

## 5. Batch Preprocessing

In [None]:
def preprocess_batch(image_paths, method='imagenet', resize=(224, 224)):
    """
    Preprocess a batch of images.
    
    Returns:
        Batch as 4D numpy array: (batch_size, height, width, channels)
    """
    batch = []
    
    for path in image_paths:
        try:
            img = preprocess_image(path, method=method, resize=resize)
            batch.append(img)
        except Exception as e:
            print(f"Error processing {path}: {e}")
    
    # Stack into batch
    return np.array(batch)

# Simulate batch processing
image_paths = ['sample_norm.jpg'] * 4  # Simulate 4 images
batch = preprocess_batch(image_paths, method='imagenet')

print(f"Batch shape: {batch.shape}")  # (4, 224, 224, 3)
print(f"Batch mean: {batch.mean():.6f}")
print(f"Batch std: {batch.std():.6f}")

## Summary

You've learned:
- ✓ Min-Max normalization to [0, 1] or [-1, 1]
- ✓ Z-score standardization for mean=0, std=1
- ✓ ImageNet standardization for transfer learning
- ✓ Complete preprocessing pipelines
- ✓ Batch preprocessing for efficiency

**Key Takeaway:** Always use ImageNet statistics when using pretrained models!