# Examples of SIMD Processing 

In [1]:
import numpy as np
import time

### Addition of Arrays

In [3]:
# Create two large arrays
N = 10**6
a = np.random.rand(N)
b = np.random.rand(N)

# Pure Python loop (no SIMD)
start = time.time()
c = [a[i] + b[i] for i in range(N)]
end = time.time()
print(f"Pure Python loop time: {end - start:.6f} seconds")

# NumPy array addition (with SIMD)
start = time.time()
c = a + b
end = time.time()
print(f"NumPy (SIMD) time: {end - start:.6f} seconds")

Pure Python loop time: 0.518719 seconds
NumPy (SIMD) time: 0.028724 seconds


### Trigonometric Function (e.g., Sine)

In [5]:
# Define a large array
angles = np.random.rand(10_000_000) * 2 * np.pi

# Without SIMD (using Python loops)
start = time.time()
sines = np.zeros_like(angles)
for i in range(len(angles)):
    sines[i] = np.sin(angles[i])
end = time.time()
print(f"Without SIMD: {end - start:.5f} seconds")

# With SIMD (using NumPy)
start = time.time()
sines = np.sin(angles)
end = time.time()
print(f"With SIMD: {end - start:.5f} seconds")

Without SIMD: 19.74245 seconds
With SIMD: 0.24002 seconds


### Matrix multiplication

In [11]:
# Generate two large random matrices
size = 500
matrix_a = np.random.rand(size, size)
matrix_b = np.random.rand(size, size)

# Without SIMD (manual matrix multiplication)
def manual_matrix_multiply(a, b):
    result = np.zeros((a.shape[0], b.shape[1]))
    for i in range(a.shape[0]):
        for j in range(b.shape[1]):
            for k in range(a.shape[1]):
                result[i, j] += a[i, k] * b[k, j]
    return result

start = time.time()
manual_result = manual_matrix_multiply(matrix_a, matrix_b)
end = time.time()
print(f"Without SIMD: {end - start:.2f} seconds")

# With SIMD (NumPy's dot product)
start = time.time()
numpy_result = np.dot(matrix_a, matrix_b)
end = time.time()
print(f"With SIMD: {end - start:.2f} seconds")


Without SIMD: 136.50 seconds
With SIMD: 0.02 seconds


### Image Convolution

In [20]:
from scipy.signal import convolve2d

# Generate a large random image and kernel
image = np.random.rand(1000, 1000)
kernel = np.array([[1, 0, -1], [1, 0, -1], [1, 0, -1]])  # Edge detection kernel

# Manual convolution
def manual_convolve2d(image, kernel):
    image_height, image_width = image.shape
    kernel_height, kernel_width = kernel.shape
    output = np.zeros((image_height - kernel_height + 1, image_width - kernel_width + 1))
    for i in range(output.shape[0]):
        for j in range(output.shape[1]):
            output[i, j] = np.sum(image[i:i+kernel_height, j:j+kernel_width] * kernel)
    return output

start = time.time()
manual_conv = manual_convolve2d(image, kernel)
end = time.time()
print(f"Without SIMD (Manual Convolution): {end - start:.2f} seconds")
start = time.time()
simd_conv = convolve2d(image, kernel, mode='valid')
end = time.time()
print(f"With SIMD (Scipy Convolution): {end - start:.2f} seconds")

Without SIMD (Manual Convolution): 22.82 seconds
With SIMD (Scipy Convolution): 0.11 seconds


### Bitwise Operations

In [18]:
# Generate two large arrays of integers
array_a = np.random.randint(0, 2**16, size=10_000_000, dtype=np.int32)
array_b = np.random.randint(0, 2**16, size=10_000_000, dtype=np.int32)

# Without SIMD (manual bitwise AND)
def manual_bitwise_and(a, b):
    return [a[i] & b[i] for i in range(len(a))]

start = time.time()
manual_result = manual_bitwise_and(array_a, array_b)
end = time.time()
print(f"Without SIMD (Manual Bitwise AND): {end - start:.2f} seconds")

# With SIMD (NumPy's bitwise_and)
start = time.time()
simd_result = np.bitwise_and(array_a, array_b)
end = time.time()
print(f"With SIMD (Vectorized Bitwise AND): {end - start:.2f} seconds")

Without SIMD (Manual Bitwise AND): 5.25 seconds
With SIMD (Vectorized Bitwise AND): 0.04 seconds


In [24]:
!pip install opencv-python

Collecting opencv-python
  Downloading opencv_python-4.10.0.84-cp37-abi3-win_amd64.whl.metadata (20 kB)
Downloading opencv_python-4.10.0.84-cp37-abi3-win_amd64.whl (38.8 MB)
   ---------------------------------------- 0.0/38.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/38.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/38.8 MB 435.7 kB/s eta 0:01:30
   ---------------------------------------- 0.1/38.8 MB 751.6 kB/s eta 0:00:52
   ---------------------------------------- 0.2/38.8 MB 1.4 MB/s eta 0:00:29
    --------------------------------------- 0.5/38.8 MB 2.5 MB/s eta 0:00:16
   - -------------------------------------- 1.1/38.8 MB 4.5 MB/s eta 0:00:09
   -- ------------------------------------- 2.4/38.8 MB 8.2 MB/s eta 0:00:05
   --- ------------------------------------ 2.9/38.8 MB 8.9 MB/s eta 0:00:05
   ---- ----------------------------------- 4.0/38.8 MB 10.3 MB/s eta 0:00:04
   ----- ---------------------------------- 5.4/38.8 MB 12.4 MB/s

## Real-Time Image Processing with SIMD

In [None]:
import cv2

# Load an image from a file
image = cv2.imread('C:/Users/tabendarkar/OneDrive - California State University Chico/Desktop/OIP.jpg')

# Function to apply Gaussian blur manually (without SIMD)
def manual_gaussian_blur(image, kernel_size=5, sigma=1.0):
    # Create Gaussian kernel
    kernel = np.fromfunction(
        lambda x, y: (1/ (2 * np.pi * sigma ** 2)) * np.exp(- ((x - (kernel_size - 1) // 2) ** 2 + (y - (kernel_size - 1) // 2) ** 2) / (2 * sigma ** 2)),
        (kernel_size, kernel_size)
    )
    kernel /= np.sum(kernel)  # Normalize the kernel
    
    # Apply convolution manually
    padded_image = np.pad(image, ((kernel_size // 2, kernel_size // 2), (kernel_size // 2, kernel_size // 2), (0, 0)), mode='constant')
    result = np.zeros_like(image)
    
    for i in range(image.shape[0]):
        for j in range(image.shape[1]):
            for c in range(3):  # RGB channels
                result[i, j, c] = np.sum(padded_image[i:i+kernel_size, j:j+kernel_size, c] * kernel)
    return result

# Function to apply Gaussian blur using SIMD (NumPy optimized)
def simd_gaussian_blur(image, kernel_size=5, sigma=1.0):
    kernel = np.fromfunction(
        lambda x, y: (1/ (2 * np.pi * sigma ** 2)) * np.exp(- ((x - (kernel_size - 1) // 2) ** 2 + (y - (kernel_size - 1) // 2) ** 2) / (2 * sigma ** 2)),
        (kernel_size, kernel_size)
    )
    kernel /= np.sum(kernel)  # Normalize the kernel
    return cv2.filter2D(image, -1, kernel)  # OpenCV optimized filter (SIMD under the hood)

# Measure time for manual Gaussian blur (Without SIMD)
start = time.time()
manual_blurred_image = manual_gaussian_blur(image)
end = time.time()
print(f"Time taken for manual Gaussian blur (Without SIMD): {end - start:.4f} seconds")

# Measure time for SIMD-based Gaussian blur (Using OpenCV optimized function)
start = time.time()
simd_blurred_image = simd_gaussian_blur(image)
end = time.time()
print(f"Time taken for SIMD-based Gaussian blur (With SIMD): {end - start:.4f} seconds")

# Display the images (optional)
cv2.imshow('Original Image', image)
cv2.imshow('Manual Gaussian Blur', manual_blurred_image)
cv2.imshow('SIMD Gaussian Blur', simd_blurred_image)
cv2.waitKey(0)
cv2.destroyAllWindows()


Time taken for manual Gaussian blur (Without SIMD): 6.8529 seconds
Time taken for SIMD-based Gaussian blur (With SIMD): 0.0030 seconds
