# Day 1: NumPy Essentials for Computer Vision
## CV Bootcamp 2024

NumPy is the foundation of numerical computing in Python. Images are represented as NumPy arrays, making NumPy mastery essential for CV work.

## Why NumPy for Computer Vision?

- **Images = Arrays:** Grayscale (2D), Color (3D)
- **Speed:** C-optimized operations, 50-100x faster than Python loops
- **Vectorization:** Apply operations to entire arrays
- **Memory efficiency:** Handles large datasets
- **Ecosystem:** Integrates with OpenCV, PIL, scikit-image

In [1]:
import numpy as np
import time

print(f"NumPy version: {np.__version__}")

NumPy version: 2.2.5


## 1. Arrays, Shape, and Dimensions

Understanding array shapes is fundamental to working with images.

In [2]:
# Creating arrays
arr_1d = np.array([1, 2, 3, 4, 5])
arr_2d = np.array([[1, 2, 3],
                   [4, 5, 6],
                   [7, 8, 9]])

# Array properties
print(f"Shape: {arr_2d.shape}")      # (3, 3) - rows, columns
print(f"Dimensions: {arr_2d.ndim}")  # 2
print(f"Size: {arr_2d.size}")        # 9 total elements
print(f"Data type: {arr_2d.dtype}")  # int64

Shape: (3, 3)
Dimensions: 2
Size: 9
Data type: int64


### Image Array Representations

In [None]:
# Grayscale: (height, width)
gray_image = np.zeros((480, 640), dtype=np.uint8)
print(f"Grayscale shape: {gray_image.shape}")  # (480, 640)

# RGB: (height, width, channels)
color_image = np.zeros((480, 640, 3), dtype=np.uint8)
print(f"Color shape: {color_image.shape}")  # (480, 640, 3)

# Batch of images: (batch_size, height, width, channels)
batch = np.zeros((32, 224, 224, 3), dtype=np.uint8)
print(f"Batch shape: {batch.shape}")  # (32, 224, 224, 3)

## 2. Array Creation Methods

In [None]:
# Zeros and ones
zeros = np.zeros((3, 3))
ones = np.ones((2, 4))
full = np.full((3, 3), 7)  # Fill with specific value

print("Zeros:\n", zeros)
print("\nOnes:\n", ones)
print("\nFull:\n", full)

In [None]:
# Identity matrix
identity = np.eye(4)
print("Identity:\n", identity)

In [None]:
# Random arrays
random_uniform = np.random.rand(3, 3)        # Uniform [0, 1)
random_normal = np.random.randn(3, 3)        # Normal distribution
random_int = np.random.randint(0, 255, (3, 3))  # Integers

print("Random uniform:\n", random_uniform)
print("\nRandom normal:\n", random_normal)
print("\nRandom int:\n", random_int)

In [None]:
# Ranges
arange = np.arange(0, 10, 2)           # [0 2 4 6 8]
linspace = np.linspace(0, 1, 5)        # 5 evenly spaced values

print("Arange:", arange)
print("Linspace:", linspace)

In [None]:
# Like operations (match shape/dtype of existing array)
arr = np.array([[1, 2], [3, 4]])
zeros_like = np.zeros_like(arr)
ones_like = np.ones_like(arr)

print("Original:\n", arr)
print("\nZeros like:\n", zeros_like)
print("\nOnes like:\n", ones_like)

## 3. Slicing and Indexing

In [None]:
arr = np.array([[10, 20, 30, 40],
                [50, 60, 70, 80],
                [90, 100, 110, 120]])

print("Original array:\n", arr)

In [None]:
# Basic indexing
print(f"arr[0, 0] = {arr[0, 0]}")      # 10 - first element
print(f"arr[2, 3] = {arr[2, 3]}")      # 120 - last element
print(f"arr[-1, -1] = {arr[-1, -1]}")  # 120 - negative indexing

In [None]:
# Row and column selection
print(f"First row arr[0, :] = {arr[0, :]}")      # [10 20 30 40]
print(f"Second column arr[:, 1] = {arr[:, 1]}")  # [20 60 100]
print(f"Last column arr[:, -1] = {arr[:, -1]}")  # [40 80 120]

In [None]:
# Slicing ranges
print("Slice arr[0:2, 1:3]:\n", arr[0:2, 1:3])
# [[20 30]
#  [60 70]]

In [None]:
# Boolean indexing
mask = arr > 50
print("Mask (arr > 50):\n", mask)
print("\nValues where arr > 50:", arr[mask])

In [None]:
# Fancy indexing
rows = [0, 2]
cols = [1, 3]
print(f"arr[{rows}, {cols}] = {arr[rows, cols]}")  # [20 120]

In [None]:
# For images - crop center region
image = np.random.randint(0, 255, (480, 640, 3), dtype=np.uint8)
h, w = image.shape[:2]
crop_size = 200
center_crop = image[
    h//2 - crop_size//2 : h//2 + crop_size//2,
    w//2 - crop_size//2 : w//2 + crop_size//2
]
print(f"Original: {image.shape}, Cropped: {center_crop.shape}")

## 4. Broadcasting

Broadcasting allows NumPy to work with arrays of different shapes during arithmetic operations.

**Broadcasting Rules:**
1. Compare dimensions from right to left
2. Dimensions must be equal, or one must be 1
3. Missing dimensions are assumed to be 1

In [None]:
# Scalar broadcasting
arr = np.array([[1, 2, 3],
                [4, 5, 6]])
result = arr + 10  # Add 10 to every element
print("Original:\n", arr)
print("\nAfter adding 10:\n", result)

In [None]:
# 1D to 2D broadcasting
arr_2d = np.array([[1, 2, 3],
                   [4, 5, 6]])
arr_1d = np.array([10, 20, 30])

result = arr_2d + arr_1d  # arr_1d broadcasts across rows
print("2D array:\n", arr_2d)
print("\n1D array:", arr_1d)
print("\nResult:\n", result)

In [None]:
# Column broadcasting
col_vector = np.array([[10],
                       [20]])
result = arr_2d + col_vector  # Broadcasts across columns
print("2D array:\n", arr_2d)
print("\nColumn vector:\n", col_vector)
print("\nResult:\n", result)

In [None]:
# Image example - adjust RGB channels independently
image = np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8)
channel_adjustments = np.array([1.2, 1.0, 0.8])  # Boost red, reduce blue
adjusted = np.clip(image * channel_adjustments, 0, 255).astype(np.uint8)

print(f"Original image shape: {image.shape}")
print(f"Adjustments: {channel_adjustments}")
print(f"Adjusted image shape: {adjusted.shape}")

## 5. Element-wise Operations

In [None]:
arr = np.array([1, 2, 3, 4, 5])

# Arithmetic
print(f"arr + 10 = {arr + 10}")
print(f"arr * 2 = {arr * 2}")
print(f"arr ** 2 = {arr ** 2}")
print(f"arr / 2 = {arr / 2}")

In [None]:
# Array to array
arr2 = np.array([5, 4, 3, 2, 1])
print(f"arr + arr2 = {arr + arr2}")
print(f"arr * arr2 = {arr * arr2}")

In [None]:
# Comparison operations
print(f"arr > 3 = {arr > 3}")
print(f"arr == 3 = {arr == 3}")

In [None]:
# Mathematical functions
print(f"sqrt(arr) = {np.sqrt(arr)}")
print(f"exp(arr) = {np.exp(arr)}")
print(f"log(arr) = {np.log(arr)}")
print(f"sin(arr) = {np.sin(arr)}")

In [None]:
# Image brightness adjustment
image = np.random.randint(0, 200, (100, 100), dtype=np.uint8)
brighter = np.clip(image + 50, 0, 255).astype(np.uint8)
darker = np.clip(image - 50, 0, 255).astype(np.uint8)
contrast = np.clip(image * 1.5, 0, 255).astype(np.uint8)

print(f"Original range: {image.min()}-{image.max()}")
print(f"Brighter range: {brighter.min()}-{brighter.max()}")
print(f"Darker range: {darker.min()}-{darker.max()}")
print(f"Contrast range: {contrast.min()}-{contrast.max()}")

## 6. Reshaping and Flattening

In [None]:
# Reshape
arr = np.array([1, 2, 3, 4, 5, 6])
reshaped = arr.reshape(2, 3)
print("Original:", arr)
print("\nReshaped (2, 3):\n", reshaped)

In [None]:
# Automatic dimension inference with -1
auto = arr.reshape(3, -1)  # NumPy calculates: (3, 2)
print(f"Auto reshape (3, -1) shape: {auto.shape}")
print(auto)

In [None]:
# Flatten to 1D
flat = reshaped.flatten()     # Returns copy
ravel = reshaped.ravel()      # Returns view (faster)
print(f"Flattened: {flat}")
print(f"Raveled: {ravel}")

In [None]:
# Transpose
transposed = reshaped.T
print("Original:\n", reshaped)
print("\nTransposed:\n", transposed)

In [None]:
# Flatten for ML model input
mnist_image = np.random.randint(0, 255, (28, 28), dtype=np.uint8)
flat_image = mnist_image.flatten()
print(f"MNIST Image shape: {mnist_image.shape}, Flat: {flat_image.shape}")

In [None]:
# Add batch dimension
single_image = np.random.rand(224, 224, 3)
batched = single_image.reshape(1, 224, 224, 3)
# or
batched_expand = np.expand_dims(single_image, axis=0)
print(f"Single image: {single_image.shape}")
print(f"Batched: {batched.shape}")
print(f"Batched (expand_dims): {batched_expand.shape}")

In [None]:
# Rearrange dimensions (HWC to CHW for PyTorch)
hwc_image = np.random.rand(224, 224, 3)
chw_image = np.transpose(hwc_image, (2, 0, 1))
print(f"HWC: {hwc_image.shape}, CHW: {chw_image.shape}")

## 7. Vectorization vs Loops

Vectorization is crucial for performance in CV applications.

In [3]:
# Create test data
arr = np.random.rand(1000000)

# LOOP APPROACH (SLOW)
print("Testing loop vs vectorized operations...")
start = time.time()
result_loop = np.zeros_like(arr)
for i in range(len(arr)):
    result_loop[i] = arr[i] ** 2
loop_time = time.time() - start

# VECTORIZED APPROACH (FAST)
start = time.time()
result_vectorized = arr ** 2
vectorized_time = time.time() - start

print(f"\nLoop time: {loop_time:.4f}s")
print(f"Vectorized time: {vectorized_time:.4f}s")
print(f"Speedup: {loop_time / vectorized_time:.1f}x faster!")

# Verify results are identical
print(f"Results match: {np.allclose(result_loop, result_vectorized)}")

Testing loop vs vectorized operations...

Loop time: 0.2919s
Vectorized time: 0.0047s
Speedup: 62.7x faster!
Results match: True

Loop time: 0.2919s
Vectorized time: 0.0047s
Speedup: 62.7x faster!
Results match: True


## 8. Useful NumPy Functions for Images

In [None]:
image = np.random.randint(0, 255, (480, 640, 3), dtype=np.uint8)

# Statistical operations
print(f"Mean: {image.mean():.2f}")
print(f"Std: {image.std():.2f}")
print(f"Min: {image.min()}, Max: {image.max()}")
print(f"Median: {np.median(image):.2f}")

In [None]:
# Per-channel statistics
print(f"Mean per channel (R, G, B): {image.mean(axis=(0, 1))}")
print(f"Std per channel (R, G, B): {image.std(axis=(0, 1))}")

In [None]:
# Clipping values
clipped = np.clip(image, 50, 200)  # Constrain to [50, 200]
print(f"Clipped range: {clipped.min()}-{clipped.max()}")

In [None]:
# Normalization
normalized = (image - image.min()) / (image.max() - image.min())
print(f"Normalized range: {normalized.min():.4f}-{normalized.max():.4f}")

In [None]:
# Standardization (zero mean, unit variance)
standardized = (image - image.mean()) / image.std()
print(f"Standardized mean: {standardized.mean():.4f}")
print(f"Standardized std: {standardized.std():.4f}")

In [None]:
# Concatenation
img1 = np.zeros((100, 100, 3), dtype=np.uint8)
img2 = np.ones((100, 100, 3), dtype=np.uint8) * 255

hstack = np.hstack([img1, img2])  # Horizontal: (100, 200, 3)
vstack = np.vstack([img1, img2])  # Vertical: (200, 100, 3)
dstack = np.dstack([img1, img2])  # Depth: (100, 100, 6)

print(f"HStack shape: {hstack.shape}")
print(f"VStack shape: {vstack.shape}")
print(f"DStack shape: {dstack.shape}")

In [None]:
# Stacking for batches
batch = np.stack([img1, img2, img1])  # (3, 100, 100, 3)
print(f"Batch shape: {batch.shape}")

In [None]:
# Splitting arrays
split_images = np.split(batch, 3, axis=0)  # Split batch into individual images
print(f"Split count: {len(split_images)}")
print(f"Each image shape: {split_images[0].shape}")

In [None]:
# Where (conditional selection)
mask = image > 127
output = np.where(mask, 255, 0)  # White where >127, black otherwise
print(f"Output shape: {output.shape}")
print(f"Unique values: {np.unique(output)}")

## Summary

You've learned:
- ✓ Array shapes and dimensions for images
- ✓ Multiple ways to create arrays
- ✓ Indexing and slicing techniques
- ✓ Broadcasting rules
- ✓ Element-wise operations
- ✓ Reshaping and dimension manipulation
- ✓ Vectorization for 50-100x speedup!
- ✓ Essential NumPy functions for image processing

**Key Takeaway:** Always use vectorized operations instead of loops for array processing!