**Comprehensive NumPy Operations Guide**

In [5]:
import numpy as np

## 1. **Reshaping and Transposing**

In [None]:
### Reshaping
    # Reshaping changes the dimensions without changing the data. Total elements must remain constant.

arr = np.array([1, 2, 3, 4, 5, 6])
arr

array([1, 2, 3, 4, 5, 6])

In [None]:
# 1D to 2D
arr_2d = arr.reshape(2, 3)  # 2 rows, 3 columns
arr_2d

array([[1, 2, 3],
       [4, 5, 6]])

In [11]:
# 1D to 3D
arr_3d = arr.reshape(2, 3, 1)  # depth=2, rows=3, cols=1
arr_3d

array([[[1],
        [2],
        [3]],

       [[4],
        [5],
        [6]]])

In [20]:
# Using -1 instaed of row or column in reshape - (NumPy infers the dimension)
arr.reshape(3, -1)  # 3 rows, NumPy calculates 2 columns
arr.reshape(-1, 2)  # NumPy calculates 3 rows, 2 columns

array([[1, 2],
       [3, 4],
       [5, 6]])

In [None]:
# Flatten to 1D
# arr_2d = np.array([[1, 2, 3], [4, 5, 6]])
arr_2d.flatten()     # [1, 2, 3, 4, 5, 6] - returns copy
arr_3d.ravel()       # [1, 2, 3, 4, 5, 6] - returns view (faster)

array([1, 2, 3, 4, 5, 6])

In [None]:
### Transposing
    # Transpose flips rows and columns (reverses axes).
    # Under the hood**: Both `.T` and `.transpose()` return **views**, not copies. They just change the stride information (how NumPy navigates through memory).
    
# 2D transpose
arr = np.array([[1, 2, 3],
                [4, 5, 6]])

arr.T       # [[1, 4],
            #  [2, 5],
            #  [3, 6]]

array([[1, 4],
       [2, 5],
       [3, 6]])

In [32]:
# Alternative
arr.transpose()     # Same as .T

array([[1, 4],
       [2, 5],
       [3, 6]])

In [34]:
# For 3D and higher - specify axis order
arr_3d = np.random.rand(2, 3, 4)  # shape (2, 3, 4)
arr_3d


array([[[0.47558631, 0.42700972, 0.98053344, 0.533045  ],
        [0.210123  , 0.86524091, 0.57560792, 0.47836423],
        [0.79259575, 0.52696505, 0.88383289, 0.25305597]],

       [[0.1072678 , 0.11313986, 0.18974639, 0.28260553],
        [0.9526364 , 0.65822285, 0.00701839, 0.32565982],
        [0.69686186, 0.17538082, 0.73358882, 0.78207957]]])

In [None]:
arr_3d.transpose(2, 0, 1)         # shape (4, 2, 3)
# Axes: 0→2, 1→0, 2→1

array([[[0.47558631, 0.210123  , 0.79259575],
        [0.1072678 , 0.9526364 , 0.69686186]],

       [[0.42700972, 0.86524091, 0.52696505],
        [0.11313986, 0.65822285, 0.17538082]],

       [[0.98053344, 0.57560792, 0.88383289],
        [0.18974639, 0.00701839, 0.73358882]],

       [[0.533045  , 0.47836423, 0.25305597],
        [0.28260553, 0.32565982, 0.78207957]]])

## 2. **Broadcasting**
Broadcasting allows operations between arrays of different shapes without explicit loops.

### Broadcasting Rules:
1. Compare dimensions from right to left
2. Dimensions are compatible if:
   - They're equal, OR
   - One of them is 1
### Under the hood:
 NumPy doesn't actually copy the smaller array. It uses **stride tricks** to virtually repeat elements without using extra memory. This is why broadcasting is so efficient!

In [36]:
# Scalar broadcasting (simplest)
arr = np.array([1, 2, 3, 4])
arr + 10  # [11, 12, 13, 14] - scalar broadcasts to all elements

array([11, 12, 13, 14])

In [39]:
# 1D and 2D
arr_2d = np.array([[1, 2, 3],
                   [4, 5, 6],
                   [7, 8, 9]])
arr_2d

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [40]:
arr_1d = np.array([10, 20, 30])
arr_1d

array([10, 20, 30])

In [42]:
arr_2d + arr_1d  # Adds [10, 20, 30] to each row
# [[11, 22, 33],
#  [14, 25, 36],
#  [17, 28, 39]]

array([[11, 22, 33],
       [14, 25, 36],
       [17, 28, 39]])

In [43]:
# Column-wise operation (need to reshape)
col_arr = np.array([100, 200, 300]).reshape(3, 1)
arr_2d + col_arr  # Adds to each column
# [[101, 102, 103],
#  [204, 205, 206],
#  [307, 308, 309]]

array([[101, 102, 103],
       [204, 205, 206],
       [307, 308, 309]])

In [45]:
# Complex example
a = np.array([[[1, 2]]])      # shape (1, 1, 2)
b = np.array([[3], [4], [5]]) # shape (3, 1)
result = a + b                 # shape (3, 1, 2)
result
# Dimensions: (3, 1, 2) where:
# - 3 comes from b's first dimension
# - 1 is common
# - 2 comes from a's last dimension

array([[[4, 5],
        [5, 6],
        [6, 7]]])

In [48]:
### Broadcasting Visualization:

# Shape (3, 1) + Shape (1, 4) → Shape (3, 4)
a = np.array([[1], [2], [3]])      # (3, 1)
b = np.array([[10, 20, 30, 40]])   # (1, 4)
result = a + b                      # (3, 4)
result
# [[11, 21, 31, 41],
#  [12, 22, 32, 42],
#  [13, 23, 33, 43]]

array([[11, 21, 31, 41],
       [12, 22, 32, 42],
       [13, 23, 33, 43]])

## 3. **Element-wise Operations**
These operations apply to corresponding elements without explicit loops.
NumPy's vectorized operations are **10-100x faster** than Python loops because:
- Implemented in C
- Uses SIMD (Single Instruction Multiple Data) instructions
- Eliminates Python interpreter overhead


In [52]:
a = np.array([1, 2, 3, 4])
b = np.array([10, 20, 30, 40])

In [56]:
# Arithmetic
a + b      # [11, 22, 33, 44]

array([11, 22, 33, 44])

In [57]:
a - b      # [-9, -18, -27, -36]

array([ -9, -18, -27, -36])

In [58]:
a * b      # [10, 40, 90, 160]
a / b      # [0.1, 0.1, 0.1, 0.1]
a ** 2     # [1, 4, 9, 16]

array([ 1,  4,  9, 16])

In [59]:
# Comparison (returns boolean array)
a > 2      # [False, False, True, True]


array([False, False,  True,  True])

In [60]:
a == b     # [False, False, False, False]

array([False, False, False, False])

In [61]:
# Mathematical functions
np.sin(a)
np.log(a)
np.exp(a)
np.sqrt(a)

array([1.        , 1.41421356, 1.73205081, 2.        ])

In [63]:
# For 2D
matrix = np.array([[1, 2], [3, 4]])
matrix ** 2  # [[1, 4], [9, 16]] - element-wise, NOT matrix power!
np.square(matrix)  # Same as above

array([[ 1,  4],
       [ 9, 16]])

## 4. **Aggregations**

In [None]:
arr = np.array([1, 2, 3, 4, 5, 6])
arr

array([1, 2, 3, 4, 5, 6])

In [None]:
# Common aggregations
np.sum(arr)      # 21
np.mean(arr)     # 3.5
np.std(arr)      # 1.707... (standard deviation)
np.var(arr)      # 2.916... (variance)
np.min(arr)      # 1
np.max(arr)      # 6
np.median(arr)   # 3.5
np.argmin(arr)   # 0 (index of min)
np.argmax(arr)   # 5 (index of max)

In [None]:
### Multi-dimensional Aggregations (IMPORTANT!)


arr_2d = np.array([[1, 2, 3],
                   [4, 5, 6]])

# Aggregate entire array
np.sum(arr_2d)   # 21

# Axis-wise aggregation
np.sum(arr_2d, axis=0)   # [5, 7, 9] - sum down columns
np.sum(arr_2d, axis=1)   # [6, 15] - sum across rows

# Remember: axis is the dimension that "collapses"
# axis=0 means "aggregate over rows" (result has columns)
# axis=1 means "aggregate over columns" (result has rows)

# Multiple axes
arr_3d = np.random.rand(2, 3, 4)
np.sum(arr_3d, axis=(0, 2))  # Sum over first and last axes

# Keep dimensions
np.sum(arr_2d, axis=1, keepdims=True)  # shape (2, 1) instead of (2,)
# [[6],
#  [15]]

In [None]:
### Cumulative Operations


arr = np.array([1, 2, 3, 4])

np.cumsum(arr)   # [1, 3, 6, 10] - cumulative sum
np.cumprod(arr)  # [1, 2, 6, 24] - cumulative product

## 5. **Dot Products and Matrix Multiplication**

In [None]:
## Dot Product (1D)

a = np.array([1, 2, 3])
b = np.array([4, 5, 6])

# Dot product: 1*4 + 2*5 + 3*6 = 32
np.dot(a, b)  # 32
a_dot_prod_b= a @ b         # 32 (Python 3.5+)
a_dot_prod_b


np.int64(32)

In [None]:
### Matrix Multiplication (2D)

# Dimension : A -> (2x3) @ B-> (3x2) result → (2x2)
A = np.array([[1, 2, 3],
              [4, 5, 6]])

B = np.array([[7, 8],
              [9, 10],
              [11, 12]])

In [70]:
# Matrix multiplication (NOT element-wise!)
result = np.dot(A, B)  # or A @ B
result
# [[58, 64],
#  [139, 154]]

# Calculation:
# result[0,0] = 1*7 + 2*9 + 3*11 = 58
# result[0,1] = 1*8 + 2*10 + 3*12 = 64
# etc.

array([[ 58,  64],
       [139, 154]])

In [None]:
# Element-wise multiplication (Hadamard product)
C = np.array([[1, 2], [3, 4]])
D = np.array([[5, 6], [7, 8]])
C * D  # [[5, 12], [21, 32]] - element-wise!

**Key difference:**
- `*` → element-wise multiplication
- `@` or `np.dot()` → matrix multiplication
- `np.matmul()` → same as `@` (preferred for matrices)

In [None]:
### Higher Dimensions

# Batch matrix multiplication
A = np.random.rand(10, 3, 4)  # 10 matrices of shape (3, 4)
B = np.random.rand(10, 4, 5)  # 10 matrices of shape (4, 5)
result = A @ B                 # shape (10, 3, 5)
result
# Multiplies corresponding pairs: A[i] @ B[i]

## 6. **Linear Algebra Operations**
    NumPy's `linalg` module provides comprehensive linear algebra functionality.

In [None]:
A = np.array([[1, 2],
              [3, 4]])

# Matrix operations
np.linalg.det(A)        # Determinant: -2.0
np.linalg.inv(A)        # Inverse matrix
np.linalg.matrix_rank(A) # Rank: 2

# Eigenvalues and eigenvectors
eigenvalues, eigenvectors = np.linalg.eig(A)

# Solving linear systems: Ax = b
b = np.array([5, 6])
x = np.linalg.solve(A, b)  # Solves Ax = b for x

# Matrix decompositions
# SVD (Singular Value Decomposition)
U, S, Vt = np.linalg.svd(A)

# QR decomposition
Q, R = np.linalg.qr(A)

# Cholesky decomposition (for positive definite matrices)
symmetric = np.array([[4, 2], [2, 3]])
L = np.linalg.cholesky(symmetric)

# Norms
np.linalg.norm(A)           # Frobenius norm (default)
np.linalg.norm(A, ord=2)    # Spectral norm
np.linalg.norm(A, ord='fro') # Frobenius norm (explicit)

# Matrix power
np.linalg.matrix_power(A, 3)  # A @ A @ A

'''
Practical example**: Least squares regression

'''
# Finding best-fit line: y = mx + b
X = np.array([[1, 1], [1, 2], [1, 3], [1, 4]])  # [1, x] for each point
y = np.array([2, 4, 5, 8])

# Solve: X @ [b, m] = y
coefficients = np.linalg.lstsq(X, y, rcond=None)[0]
# coefficients[0] is intercept, coefficients[1] is slope

## 8. **Vectorization (Avoiding Loops)**
Vectorization is the **most important** optimization technique in NumPy.

In [73]:

### Bad vs Good Examples

# BAD: Using loops

import time

arr = np.random.rand(1000000)

# Loop version (SLOW)
start = time.time()
result = np.zeros_like(arr)
for i in range(len(arr)):
    result[i] = arr[i] ** 2 + 2 * arr[i] + 1
print(f"Loop time: {time.time() - start:.4f}s")


# GOOD: Vectorized**

# Vectorized version (FAST)
start = time.time()
result = arr ** 2 + 2 * arr + 1
print(f"Vectorized time: {time.time() - start:.4f}s")
# Typically 50-100x faster!

### Practical Vectorization Examples

# **Example 1: Distance calculation**

# Calculate Euclidean distance between points
# Bad: Loop
points1 = np.random.rand(1000, 3)  # 1000 points in 3D
points2 = np.random.rand(1000, 3)

# Loop version (avoid!)
distances = np.zeros(1000)
for i in range(1000):
    distances[i] = np.sqrt(np.sum((points1[i] - points2[i]) ** 2))

# Vectorized version
distances = np.sqrt(np.sum((points1 - points2) ** 2, axis=1))


# **Example 2: Conditional operations**

arr = np.array([1, -2, 3, -4, 5, -6])

# Bad: Loop with condition
result = np.zeros_like(arr)
for i in range(len(arr)):
    if arr[i] > 0:
        result[i] = arr[i] ** 2
    else:
        result[i] = arr[i]

# Good: Using np.where (vectorized conditional)
result = np.where(arr > 0, arr ** 2, arr)
# [1, -2, 9, -4, 25, -6]

# **Example 3: Moving averages**

# Calculate moving average (window=3)
data = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9])

# Vectorized using convolution
window_size = 3
moving_avg = np.convolve(data, np.ones(window_size)/window_size, mode='valid')
# [2., 3., 4., 5., 6., 7., 8.]


### Advanced Vectorization Techniques

# Using `np.vectorize` (when you must use a Python function)**

def custom_func(x):
    if x < 0:
        return -x
    elif x < 5:
        return x ** 2
    else:
        return x + 10

# Vectorize the function
vectorized_func = np.vectorize(custom_func)
arr = np.array([-2, 3, 7, -5])
result = vectorized_func(arr)  # [2, 9, 17, 5]

# Note: np.vectorize is syntactic sugar, not as fast as true vectorization

# Using `np.apply_along_axis`**

# Apply function along specific axis
def normalize_row(row):
    return row / np.sum(row)

matrix = np.array([[1, 2, 3],
                   [4, 5, 6]])

normalized = np.apply_along_axis(normalize_row, axis=1, arr=matrix)
# Each row sums to 1


Loop time: 0.4926s
Vectorized time: 0.0066s
