# Coding Practice Session 6
## Vectorization

In [1]:
import numpy as np
import time

In [2]:
a = np.array([1, 2, 3])
b = np.array([4, 5, 6])

a + b

array([5, 7, 9])

In [None]:
numbers = [1, 2, 3, 4, 5]
squared = []

for number in numbers:
    squared.append(number**2)

squared

[1, 4, 9, 16, 25]

In [None]:
numbers = np.array([1, 2, 3, 4, 5])
squared = numbers**2
squared

array([ 1,  4,  9, 16, 25])

### Performance Comparison

In [None]:
# pure python loop
def square_loop(numbers: np.ndarray) -> list[int | float]:
    squared = []
    for number in numbers:
        squared.append(number**2)
    return squared

In [None]:
# vectorized operation with NumPy
def square_vectorized(numbers: np.ndarray) -> np.ndarray:
    return numbers**2

In [7]:
numbers = np.random.rand(1_000_000)

In [8]:
# execution time for python loop
start_time = time.perf_counter()
squared_loop = square_loop(numbers)
end_time = time.perf_counter()
loop_time = end_time - start_time

In [9]:
# execution time for vectorized operation
start_time = time.perf_counter()
square_vectorized = square_vectorized(numbers)
end_time = time.perf_counter()
vectorized_time = end_time - start_time

In [10]:
print(f"Python loop time: {loop_time:.3f} seconds")
print(f"Vectorized time: {vectorized_time:.3f} seconds")

Python loop time: 0.062 seconds
Vectorized time: 0.003 seconds


### Writing Vectorized Code

In [None]:
def euclidean_distance(point1, point2):
    if point1.ndim == 1 and point2.ndim == 1:
        result = np.sqrt(np.sum((point1 - point2) ** 2))
    else:
        # add columns in each row
        result = np.sqrt(np.sum((point1 - point2) ** 2, axis=1))
    return result

In [41]:
p1 = np.array([5, 6])
p2 = np.array([10, 2])

euclidean_distance(p1, p2)

np.float64(6.4031242374328485)

In [42]:
p1_array = np.array([[1, 3], [5, 2], [7, 8]])
p2_array = np.array([[2, 5], [6, 1], [4, 9]])

euclidean_distance(p1_array, p2_array)

array([2.23606798, 1.41421356, 3.16227766])

#### Using `np.vectorize`

In [43]:
def square_or_cube(num):
    if num % 2 == 0:
        return num ** 2
    else:
        return num ** 3

In [47]:
square_or_cube_vectorized = np.vectorize(square_or_cube)

In [48]:
arr = np.array([1, 2, 3, 4, 5, 6])
square_or_cube_vectorized(arr)

array([  1,   4,  27,  16, 125,  36])

**Note**: The `np.vectorize` takes a function that operates on scalar values and returns another function which can operate on numpy arrays. However, the performance of this method is not the same as vectorizing the original function; it's essentially a loop in disguise.