# ⚡ 06 - NumPy Optimization & Performance

In this notebook, we’ll explore why **NumPy is fast**, and how to optimize code using **vectorization, broadcasting, and memory-efficient techniques**.

---

## 1. Imports
```python
import numpy as np


In [None]:
import numpy as np
import time

N = 10_000_000
a = list(range(N))

start = time.time()
sum_loop = 0
for i in a:
    sum_loop += i
end = time.time()
print("Python loop sum:", sum_loop, "Time:", end - start)

arr = np.arange(N)
start = time.time()
sum_numpy = arr.sum()
end = time.time()
print("NumPy sum:", sum_numpy, "Time:", end - start)


Python loop sum: 49999995000000 Time: 0.745593786239624
NumPy sum: -2014260032 Time: 0.005000114440917969


In [2]:
N = 1_000_000
x = np.random.rand(N)

squares_loop = [i**2 for i in x]

squares_vec = x**2

In [3]:
X = np.random.randn(5, 3)
print("Original matrix:\n", X)

X_norm_loop = np.zeros_like(X)
for i in range(X.shape[0]):
    X_norm_loop[i, :] = X[i, :] / np.linalg.norm(X[i, :])

row_norms = np.linalg.norm(X, axis=1, keepdims=True)
X_norm_vec = X / row_norms

print("\nNormalized (vectorized):\n", X_norm_vec)


Original matrix:
 [[-0.09034884 -1.09383384  1.65070673]
 [ 0.2708016  -0.74554702  0.02502504]
 [ 0.88482834  1.30986285  1.21464809]
 [-0.53281872  0.0080556   1.31313637]
 [-0.977813   -0.09587635 -1.14969611]]

Normalized (vectorized):
 [[-0.04557805 -0.55180353  0.83272775]
 [ 0.34123208 -0.93945     0.03153359]
 [ 0.44385728  0.65706775  0.60930508]
 [-0.37598152  0.0056844   0.92660973]
 [-0.64656489 -0.06339687 -0.76022015]]


In [4]:
x = np.random.rand(1000)

def loop_sum(x):
    total = 0
    for i in x:
        total += i
    return total

%timeit loop_sum(x)
%timeit np.sum(x)


79.3 µs ± 6.29 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)
2.56 µs ± 83.7 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [None]:
a_float32 = np.ones((1000, 1000), dtype=np.float32)
a_float64 = np.ones((1000, 1000), dtype=np.float64)

print("Float32 size:", a_float32.nbytes, "bytes")
print("Float64 size:", a_float64.nbytes, "bytes")

In [5]:
np.random.seed(42)
X = 2 * np.random.rand(1000, 1)
y = 4 + 3 * X + np.random.randn(1000, 1)

X_b = np.c_[np.ones((1000, 1)), X]

theta = np.random.randn(2, 1)
lr = 0.1
epochs = 1000

for epoch in range(epochs):
    gradients = np.zeros_like(theta)
    for i in range(len(X_b)):
        xi = X_b[i:i+1]
        yi = y[i:i+1]
        gradients += xi.T @ (xi @ theta - yi)
    gradients /= len(X_b)
    theta -= lr * gradients

print("Theta (loop version):", theta.ravel())

theta_vec = np.random.randn(2, 1)

for epoch in range(epochs):
    gradients = X_b.T @ (X_b @ theta_vec - y) / len(X_b)
    theta_vec -= lr * gradients

print("Theta (vectorized):", theta_vec.ravel())

Theta (loop version): [4.17478007 2.92260758]
Theta (vectorized): [4.17478002 2.92260762]
