# üìö Python for ML/AI - Part 3: NumPy

## The Foundation of Machine Learning in Python

NumPy provides fast array operations implemented in C. It's the basis for:
- Pandas (data manipulation)
- Matplotlib (visualization)
- Scikit-learn (ML algorithms)
- PyTorch/TensorFlow (deep learning)

---


In [None]:
# First, import NumPy (standard convention: import as np)
import numpy as np

print(f"NumPy version: {np.__version__}")


## 1. Creating Arrays


In [None]:
# From Python lists
arr1d = np.array([1, 2, 3, 4, 5])
arr2d = np.array([[1, 2, 3], [4, 5, 6]])

print("1D array:", arr1d)
print("2D array:\n", arr2d)
print(f"\nShape of 1D: {arr1d.shape}")  # (5,)
print(f"Shape of 2D: {arr2d.shape}")   # (2, 3) = 2 rows, 3 columns


In [None]:
# Common array creation functions
zeros = np.zeros((3, 4))         # 3x4 matrix of zeros
ones = np.ones((2, 3))           # 2x3 matrix of ones
identity = np.eye(3)             # 3x3 identity matrix
full = np.full((2, 3), 7)        # 2x3 matrix filled with 7

print("Zeros (3x4):\n", zeros)
print("\nOnes (2x3):\n", ones)
print("\nIdentity (3x3):\n", identity)


In [None]:
# Range-like arrays
arange = np.arange(0, 10, 2)       # Start, stop, step (like range)
linspace = np.linspace(0, 1, 5)    # 5 evenly spaced numbers from 0 to 1

print("arange(0, 10, 2):", arange)
print("linspace(0, 1, 5):", linspace)

# Random arrays (very common in ML for initialization!)
np.random.seed(42)  # For reproducibility
random_uniform = np.random.rand(3, 3)      # Uniform [0, 1)
random_normal = np.random.randn(3, 3)      # Normal distribution (mean=0, std=1)
random_int = np.random.randint(0, 10, (3, 3))  # Random integers

print("\nRandom uniform:\n", random_uniform)
print("\nRandom normal:\n", random_normal)


## 2. Array Properties


In [None]:
arr = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])

print("Array:\n", arr)
print(f"\nShape: {arr.shape}")      # (3, 4) = 3 rows, 4 columns
print(f"Dimensions: {arr.ndim}")    # 2
print(f"Size: {arr.size}")          # 12 (total elements)
print(f"Data type: {arr.dtype}")    # int64 (or int32 on Windows)


## 3. Vectorized Operations (No Loops!)

This is what makes NumPy fast! In C, you'd write loops. In NumPy, operations apply to entire arrays.


In [None]:
# Element-wise operations
a = np.array([1, 2, 3, 4])
b = np.array([10, 20, 30, 40])

# In C, you'd need loops for all of these!
print(f"a = {a}")
print(f"b = {b}")
print(f"a + b = {a + b}")      # Element-wise addition
print(f"a * b = {a * b}")      # Element-wise multiplication
print(f"a ** 2 = {a ** 2}")    # Square each element
print(f"a / b = {a / b}")      # Element-wise division


In [None]:
# Mathematical functions
x = np.array([0, np.pi/4, np.pi/2, np.pi])

print(f"x = {x}")
print(f"sin(x) = {np.sin(x)}")
print(f"cos(x) = {np.cos(x)}")
print(f"exp(x) = {np.exp(x)}")
print(f"log(x+1) = {np.log(x + 1)}")
print(f"sqrt(x) = {np.sqrt(x)}")


## 4. Indexing and Slicing


In [None]:
# 2D array indexing
matrix = np.array([[1, 2, 3], 
                   [4, 5, 6], 
                   [7, 8, 9]])

print("Matrix:\n", matrix)
print(f"\nElement [0,1]: {matrix[0, 1]}")     # Row 0, Column 1 = 2
print(f"Row 1: {matrix[1, :]}")                # Entire row 1
print(f"Column 2: {matrix[:, 2]}")             # Entire column 2
print(f"Submatrix [0:2, 1:3]:\n{matrix[0:2, 1:3]}")  # 2x2 submatrix


### Boolean Indexing (Filtering Data)


In [None]:
# Boolean indexing - VERY powerful for data filtering!
arr = np.array([1, 5, 2, 8, 3, 9, 4, 7])

print(f"Array: {arr}")
print(f"arr > 5: {arr > 5}")           # Boolean mask
print(f"Elements > 5: {arr[arr > 5]}") # Filter using mask
print(f"Even elements: {arr[arr % 2 == 0]}")


## 5. Reshaping Arrays (Critical for ML!)


In [None]:
# Reshaping - changing the shape without changing data
arr = np.arange(12)  # [0, 1, 2, ..., 11]
print(f"Original shape: {arr.shape}")  # (12,)

# Reshape to 2D
matrix = arr.reshape(3, 4)  # 3 rows, 4 columns
print(f"\nReshaped to (3, 4):\n{matrix}")

# Use -1 to auto-calculate one dimension
matrix2 = arr.reshape(4, -1)  # 4 rows, columns auto-calculated
print(f"\nReshaped to (4, -1):\n{matrix2}")


In [None]:
# Transpose - swap rows and columns
matrix = np.array([[1, 2, 3], [4, 5, 6]])
print(f"Original (2x3):\n{matrix}")
print(f"\nTransposed (3x2):\n{matrix.T}")


## 6. Aggregation and Statistics


In [None]:
# Basic statistics
data = np.random.randn(1000)  # 1000 random numbers from normal distribution

print(f"Mean: {np.mean(data):.4f}")
print(f"Std: {np.std(data):.4f}")
print(f"Min: {np.min(data):.4f}")
print(f"Max: {np.max(data):.4f}")
print(f"Median: {np.median(data):.4f}")
print(f"Sum: {np.sum(data):.4f}")


In [None]:
# Aggregation along axes (very common in ML!)
matrix = np.array([[1, 2, 3],
                   [4, 5, 6],
                   [7, 8, 9]])

print("Matrix:\n", matrix)
print(f"\nSum all: {np.sum(matrix)}")
print(f"Sum each column (axis=0): {np.sum(matrix, axis=0)}")  # [12, 15, 18]
print(f"Sum each row (axis=1): {np.sum(matrix, axis=1)}")     # [6, 15, 24]
print(f"Mean each column: {np.mean(matrix, axis=0)}")


## 7. Matrix Operations (Linear Algebra)


In [None]:
# Matrix multiplication - THE core of neural networks!
A = np.array([[1, 2], [3, 4]])
B = np.array([[5, 6], [7, 8]])

print("A:\n", A)
print("B:\n", B)

# Element-wise multiplication
print("\nElement-wise (A * B):\n", A * B)

# Matrix multiplication (two ways)
print("\nMatrix multiply (A @ B):\n", A @ B)
print("\nMatrix multiply (np.dot):\n", np.dot(A, B))


In [None]:
# Linear algebra operations
A = np.array([[1, 2], [3, 4]])

print("Matrix A:\n", A)
print(f"\nDeterminant: {np.linalg.det(A):.4f}")
print(f"\nInverse:\n{np.linalg.inv(A)}")

# Eigenvalues (used in PCA, etc.)
eigenvalues, eigenvectors = np.linalg.eig(A)
print(f"\nEigenvalues: {eigenvalues}")


## 8. Broadcasting

NumPy automatically expands arrays for operations. This is how normalization works!


In [None]:
# Broadcasting example
matrix = np.array([[1, 2, 3],
                   [4, 5, 6],
                   [7, 8, 9]])
row = np.array([10, 20, 30])

# The row is "broadcast" to match the matrix shape
print("Matrix:\n", matrix)
print(f"\nRow: {row}")
print("\nMatrix + Row (broadcasting):\n", matrix + row)


In [None]:
# Practical ML example: Data Normalization
# Normalize data to have mean=0 and std=1 (z-score normalization)

# Simulated dataset: 100 samples, 3 features
np.random.seed(42)
data = np.random.rand(100, 3) * 100  # Values 0-100

print("Original data (first 5 rows):\n", data[:5])
print(f"\nMean per feature: {data.mean(axis=0)}")
print(f"Std per feature: {data.std(axis=0)}")

# Normalize using broadcasting!
mean = data.mean(axis=0)
std = data.std(axis=0)
normalized = (data - mean) / std

print(f"\nNormalized mean: {normalized.mean(axis=0)}")  # Should be ~0
print(f"Normalized std: {normalized.std(axis=0)}")     # Should be ~1


## ‚úèÔ∏è Exercise: Mini ML Pipeline

Implement a simple linear regression using NumPy:
1. Generate synthetic data
2. Implement the formula: y = Xw + b
3. Calculate predictions


In [None]:
# Generate synthetic data for linear regression
np.random.seed(42)

# True parameters
true_w = np.array([2.5, -1.3])  # weights
true_b = 4.0                     # bias

# Generate 100 samples with 2 features
X = np.random.randn(100, 2)
noise = np.random.randn(100) * 0.5
y = X @ true_w + true_b + noise  # y = Xw + b + noise

print(f"X shape: {X.shape}")
print(f"y shape: {y.shape}")
print(f"First 5 X values:\n{X[:5]}")
print(f"First 5 y values: {y[:5]}")


In [None]:
# YOUR TURN: Implement prediction
# Given weights w and bias b, calculate predictions: y_pred = X @ w + b

w = np.array([2.0, -1.0])  # Initial guess for weights
b = 3.0                     # Initial guess for bias

# Calculate predictions
# y_pred = ...  # YOUR CODE HERE

# Calculate Mean Squared Error: MSE = mean((y - y_pred)^2)
# mse = ...  # YOUR CODE HERE

# print(f"Predictions (first 5): {y_pred[:5]}")
# print(f"Actual (first 5): {y[:5]}")
# print(f"Mean Squared Error: {mse:.4f}")


---
## üìù NumPy Cheat Sheet

```python
# Creation
np.array([1,2,3])       # From list
np.zeros((m,n))         # m√ón zeros
np.ones((m,n))          # m√ón ones  
np.eye(n)               # n√ón identity
np.random.rand(m,n)     # Random [0,1)
np.random.randn(m,n)    # Random normal
np.arange(start,stop,step)
np.linspace(start,stop,n)

# Properties
arr.shape               # Dimensions
arr.dtype               # Data type
arr.size                # Total elements

# Reshaping
arr.reshape(m,n)        # New shape
arr.T                   # Transpose
arr.flatten()           # To 1D

# Operations
arr + arr, arr * arr    # Element-wise
arr @ arr               # Matrix multiply
np.dot(a, b)            # Dot product

# Statistics
np.mean(arr, axis=)
np.std(arr, axis=)
np.sum(arr, axis=)
np.min(arr), np.max(arr)

# Indexing
arr[i, j]               # Single element
arr[i, :]               # Row i
arr[:, j]               # Column j
arr[arr > x]            # Boolean filter
```

---

## ‚û°Ô∏è Next Steps

Continue to **04_pandas_basics.ipynb** to learn data manipulation with Pandas!
