In [None]:
import numpy as np
from scipy.linalg import null_space
from sklearn.decomposition import PCA
np.set_printoptions(precision=4, suppress=True)

# 📘 Linear Algebra for Data Science

Comprehensive notes covering all essential linear algebra concepts required for data science, with detailed explanations, mathematical examples, and Python code.

## 1. Scalars, Vectors, and Matrices

### 🔹 Scalars

A scalar is a single number, representing magnitude without direction. Scalars can be real or complex numbers and are used to scale vectors or matrices in operations like multiplication.

* **Example**: $x = 5$ (a real number), or $z = 3 + 4i$ (a complex number).
* **Application**: Scalars are used in data science for scaling features (e.g., normalization) or adjusting model parameters (e.g., learning rate in gradient descent).

### 🔹 Vectors

A vector is an ordered array of numbers, representing a point or direction in space. Vectors can be row or column vectors, and their dimension is the number of components.

* **Row Vector**: $[1, 2, 3]$
* **Column Vector**:
  $$
  \begin{bmatrix}
  1 \\
  2 \\
  3
  \end{bmatrix}
  $$
* **Geometric Interpretation**: In 2D or 3D, vectors represent points or directions (e.g., a displacement).
* **Application**: Vectors represent data points (e.g., feature vectors in machine learning) or model weights.

In [None]:
v1 = np.array([1, 2, 3])
v2 = np.array([4, 5, 6])
print("Vector Addition:", v1 + v2)
print("Scalar Multiplication (2 * v1):", 2 * v1)
print("Dot Product:", np.dot(v1, v2))

### 🔹 Matrices

A matrix is a 2D array of numbers, used to represent linear transformations or datasets. An $m \times n$ matrix has $m$ rows and $n$ columns.

* **Example**:
  $$
  A = \begin{bmatrix}1 & 2 \\ 3 & 4\end{bmatrix}
  $$
* **Application**: Matrices represent datasets (rows as samples, columns as features) or transformations (e.g., rotation, scaling).

In [None]:
A = np.array([[1, 2], [3, 4]])
print("Matrix A:\n", A)

### 🔹 Tensors

Tensors generalize scalars (0D), vectors (1D), and matrices (2D) to higher dimensionsA 3D tensor might represent a collection of matrices (e.g., RGB images).

* **Example**: A 3D tensor for an RGB image has dimensions (height, width, channels).
* **Application**: Tensors are central to deep learning (e.g., TensorFlow, PyTorch).

In [None]:
tensor = np.random.rand(2, 3, 4)
print("3D Tensor:\n", tensor)

## 2. Matrix Operations

### 🔹 Addition & Subtraction

Matrices of the same size can be added or subtracted element-wise.

* **Example**:
  $$
  A = \begin{bmatrix}1 & 2 \\ 3 & 4\end{bmatrix},\quad B = \begin{bmatrix}5 & 6 \\ 7 & 8\end{bmatrix}
  $$
  $$
  A + B = \begin{bmatrix}6 & 8 \\ 10 & 12\end{bmatrix}
  $$
* **Application**: Used in combining datasets or updating weights in neural networks.

In [None]:
A = np.array([[1, 2], [3, 4]])
B = np.array([[5, 6], [7, 8]])
print("A + B:\n", A + B)

### 🔹 Scalar Multiplication

Multiply each element of a matrix by a scalar.

* **Example**:
  $$
  2A = \begin{bmatrix}2 & 4 \\ 6 & 8\end{bmatrix}
  $$
* **Application**: Feature scaling or adjusting learning rates.

In [None]:
print("2 * A:\n", 2 * A)

### 🔹 Matrix Multiplication

Matrix multiplication involves dot products of rows and columns. For $A$ ($m \times n$) and $B$ ($n \times p$), the result is $m \times p$.

* **Example**:
  $$
  A = \begin{bmatrix}1 & 2 \\ 3 & 4\end{bmatrix},\quad B = \begin{bmatrix}2 & 0 \\ 1 & 2\end{bmatrix}
  $$
  $$
  AB = \begin{bmatrix}(1*2 + 2*1) & (1*0 + 2*2) \\ (3*2 + 4*1) & (3*0 + 4*2)\end{bmatrix} = \begin{bmatrix}4 & 4 \\ 10 & 8\end{bmatrix}
  $$
* **Application**: Neural network layer computations, transformations.

In [None]:
print("A * B:\n", np.dot(A, B))

### 🔹 Transpose

The transpose flips rows and columns.

* **Example**:
  $$
  A^T = \begin{bmatrix}1 & 3 \\ 2 & 4\end{bmatrix}
  $$
* **Application**: Used in covariance matrices or gradient computations.

In [None]:
print("Transpose of A:\n", A.T)

In [None]:
theta = np.pi / 4  # 45 degrees
R = np.array([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]])
v = np.array([1, 0])
rotated_v = np.dot(R, v)
print("Rotated Vector:", rotated_v)

## 4. Systems of Linear Equations

Represent systems as $A\vec{x} = \vec{b}$.

* **Example**:
  $$
  2x + 3y = 5 \\
  4x + y = 6
  $$
  Matrix form:
  $$
  A = \begin{bmatrix}2 & 3 \\ 4 & 1\end{bmatrix},\quad \vec{x} = \begin{bmatrix}x \\ y\end{bmatrix},\quad \vec{b} = \begin{bmatrix}5 \\ 6\end{bmatrix}
  $$
* **Solution**: Solve using inverse or numerical methods.
* **Application**: Solving for model parameters in linear regression.

In [None]:
A = np.array([[2, 3], [4, 1]])
b = np.array([5, 6])
x = np.linalg.solve(A, b)
print("Solution (x, y):", x)

## 5. Matrix Inverse and Determinant

### 🔹 Inverse

The inverse $A^{-1}$ satisfies $AA^{-1} = I$.

* **Formula**:
  $$
  A = \begin{bmatrix}a & b \\ c & d\end{bmatrix},\quad A^{-1} = \frac{1}{ad - bc} \begin{bmatrix}d & -b \\ -c & a\end{bmatrix}
  $$
* **Application**: Solving linear systems, least squares.

In [None]:
A = np.array([[1, 2], [3, 4]])
A_inv = np.linalg.inv(A)
print("Inverse of A:\n", A_inv)

### 🔹 Determinant

The determinant measures area/volume scaling or invertibility.

* **Formula**:
  $$
  \text{det}(A) = ad - bc
  $$
* **Application**: Checking if a matrix is invertible (det ≠ 0).

In [None]:
det_A = np.linalg.det(A)
print("Determinant of A:", det_A)

## 6. Rank and Linear Independence

### 🔹 Rank

The rank is the number of linearly independent rows or columns, indicating the dimension of the column/row space.

* **Application**: Determines solvability of linear systems.
* **Note**: The matrix $A = \begin{bmatrix}1 & 2 \\ 2 & 4\end{bmatrix}$ is singular (rank 1), so it is not invertible. This is intentional to demonstrate rank computation.

In [None]:
A = np.array([[1, 2], [2, 4]])
rank = np.linalg.matrix_rank(A)
print("Rank of A:", rank)

### 🔹 Linear Independence

Vectors are linearly independent if no non-trivial combination equals zero.

* **Example**:
  $$
  c_1\vec{v}_1 + c_2\vec{v}_2 = 0 \Rightarrow c_1 = c_2 = 0
  $$
* **Application**: Feature selection in machine learning.

## 7. Eigenvalues and Eigenvectors

Eigenvectors are vectors that only scale under a transformation: $A\vec{v} = \lambda\vec{v}$.

* **Example**:
  $$
  A = \begin{bmatrix}2 & 1 \\ 1 & 2\end{bmatrix}
  $$
  Solve $\text{det}(A - \lambda I) = 0$:
  $$
  \text{det}\begin{bmatrix}2-\lambda & 1 \\ 1 & 2-\lambda\end{bmatrix} = (2-\lambda)^2 - 1 = \lambda^2 - 4\lambda + 3 = 0
  $$
  Eigenvalues: $\lambda = 1, 3$.

* **Application**: PCA, stability analysis.

In [17]:
A = np.array([[2, 1], [1, 2]])
eigenvalues, eigenvectors = np.linalg.eig(A)
print("Eigenvalues:", eigenvalues)
print("Eigenvectors:\n", eigenvectors)

Eigenvalues: [3. 1.]
Eigenvectors:
 [[ 0.70710678 -0.70710678]
 [ 0.70710678  0.70710678]]


## 8. Orthogonality and Orthonormality

### 🔹 Orthogonal Vectors

Vectors are orthogonal if their dot product is zero.

* **Example**: $\vec{a} \cdot \vec{b} = 0$

### 🔹 Orthonormal Vectors

Orthogonal vectors with unit norm.

* **Application**: Simplifies computations in PCA, QR decomposition.

In [None]:
v1 = np.array([1, 0])
v2 = np.array([0, 1])
print("Dot Product (Orthogonal):", np.dot(v1, v2))

## 9. Projections

Projection of $\vec{a}$ onto $\vec{b}$ finds the component of $\vec{a}$ in the direction of $\vec{b}$.

* **Formula**:
  $$
  \text{proj}_b a = \frac{\vec{a} \cdot \vec{b}}{\vec{b} \cdot \vec{b}} \vec{b}
  $$
* **Application**: Feature extraction, regression.

In [None]:
a = np.array([1, 2])
b = np.array([1, 0])
proj = (np.dot(a, b) / np.dot(b, b)) * b
print("Projection of a onto b:", proj)

## 10. Norms and Distance

### 🔹 L1 Norm

Sum of absolute values.

* **Formula**:
  $$
  \|\vec{x}\|_1 = |x_1| + |x_2| + \dots + |x_n|
  $$
* **Application**: Robustness in sparse models.

### 🔹 L2 Norm

Euclidean distance.

* **Formula**:
  $$
  \|\vec{x}\|_2 = \sqrt{x_1^2 + x_2^2 + \dots + x_n^2}
  $$
* **Application**: Distance in clustering.

### 🔹 Cosine Similarity

Measures angle between vectors.

* **Formula**:
  $$
  \cos(\theta) = \frac{\vec{a} \cdot \vec{b}}{\|\vec{a}\| \|\vec{b}\|}
  $$
* **Application**: Text similarity in NLP.

In [None]:
v = np.array([1, 2, 3])
print("L1 Norm:", np.linalg.norm(v, 1))
print("L2 Norm:", np.linalg.norm(v))
cos_sim = np.dot(v, v) / (np.linalg.norm(v) ** 2)
print("Cosine Similarity (v with itself):", cos_sim)

## 11. Matrix Decompositions

### 🔹 LU Decomposition

Decomposes $A = LU$ (L lower triangular, U upper triangular).

* **Application**: Solving linear systems efficiently.

### 🔹 QR Decomposition

Decomposes $A = QR$ (Q orthogonal, R upper triangular).

* **Application**: Least squares, eigenvalue computation.

### 🔹 SVD (Singular Value Decomposition)

Decomposes $A = U\Sigma V^T$, where $U, V$ are orthogonal, $\Sigma$ is diagonal.

* **Application**: PCA, image compression.

In [None]:
A = np.array([[1, 2], [3, 4]])
U, S, Vt = np.linalg.svd(A)
print("U:\n", U)
print("Singular Values:", S)
print("Vt:\n", Vt)

## 12. Principal Component Analysis (PCA)

PCA reduces dimensionality by projecting data onto principal components (eigenvectors of covariance matrix).

* **Steps**:
  1. Center the data ($X - \mu$).
  2. Compute covariance matrix: $\Sigma = \frac{1}{n} X^T X$.
  3. Compute eigenvectors/values.
  4. Project data onto top eigenvectors.

* **Application**: Feature reduction, visualization.

In [None]:
X = np.array([[1, 2], [3, 4], [5, 6]])
pca = PCA(n_components=1)
X_reduced = pca.fit_transform(X)
print("Reduced Data:\n", X_reduced)

## 13. Applications in Machine Learning

* **Linear Regression**: Solve $\hat{\beta} = (X^TX)^{-1}X^Ty$
* **PCA**: Dimensionality reduction
* **Neural Networks**: Matrix multiplication for layer outputs
* **Clustering**: Norms for distance metrics

In [None]:
X = np.array([[1, 1], [1, 2], [1, 3]])  # Include bias term
y = np.array([2, 4, 5])
beta = np.linalg.inv(X.T @ X) @ X.T @ y
print("Coefficients:", beta)

## 14. Basis and Dimension

* **Basis**: A set of linearly independent vectors that span a space.
* **Dimension**: Number of vectors in the basis.
* **Example in $\mathbb{R}^2$**:
  $$
  \left\{ \begin{bmatrix}1 \\ 0\end{bmatrix}, \begin{bmatrix}0 \\ 1\end{bmatrix} \right\}
  $$
* **Application**: Defines the structure of vector spaces in machine learning.

## 15. Null Space and Column Space

* **Null Space**: Vectors $\vec{x}$ where $A\vec{x} = 0$.
* **Column Space**: Span of $A$'s columns.
* **Application**: Understanding solution sets in linear systems.

In [None]:
A = np.array([[1, 2], [2, 4]])
ns = null_space(A)
print("Null Space:\n", ns)

## 16. Change of Basis

Express vectors in a new basis using $B^{-1}$.

* **Formula**:
  $$
  [x]_B = B^{-1}x
  $$
* **Application**: Simplifies computations in PCA.

In [None]:
B = np.array([[1, 1], [0, 1]])
x = np.array([2, 3])
x_B = np.linalg.inv(B) @ x
print("Coordinates in new basis:", x_B)

## 17. Gram-Schmidt Process

Orthogonalizes a set of vectors.

* **Formula**:
  $$
  u_1 = v_1, \quad u_2 = v_2 - \frac{v_2 \cdot u_1}{u_1 \cdot u_1} u_1
  $$
* **Application**: QR decomposition, orthogonal bases.

In [None]:
def gram_schmidt(V):
    U = np.copy(V).astype(float)
    for i in range(V.shape[1]):
        for j in range(i):
            U[:, i] -= np.dot(U[:, i], U[:, j]) / np.dot(U[:, j], U[:, j]) * U[:, j]
        U[:, i] /= np.linalg.norm(U[:, i])  # Normalize
    return U

V = np.array([[1, 1], [0, 1]]).T
U = gram_schmidt(V)
print("Orthonormalized Vectors:\n", U)

## 18. Positive Definite Matrices

A matrix $A$ is positive definite if $\vec{x}^T A \vec{x} > 0$ for all $\vec{x} \neq 0$.

* **Application**: Ensures convergence in optimization (e.g., Hessian in Newton’s method).

In [None]:
A = np.array([[2, 1], [1, 2]])
eigenvalues = np.linalg.eigvals(A)
print("Positive Definite (all eigenvalues > 0):", all(eigenvalues > 0))

## 19. Trace of a Matrix

Sum of diagonal elements.

* **Formula**:
  $$
  \text{Tr}(A) = \sum_i a_{ii}
  $$
* **Application**: Used in loss functions, optimization.

In [None]:
A = np.array([[1, 2], [3, 4]])
trace = np.trace(A)
print("Trace of A:", trace)

## 20. Block Matrices

Partitioned matrices for efficient computation.

* **Example**:
  $$
  \begin{bmatrix}A & B \\ C & D\end{bmatrix}
  $$
* **Application**: Parallel computing, structured data.

In [None]:
A = np.array([[1, 2], [3, 4]])
B = np.array([[5, 6], [7, 8]])
block_matrix = np.block([[A, B], [B, A]])
print("Block Matrix:\n", block_matrix)

## 21. Moore-Penrose Pseudo-Inverse

Generalizes inverse for non-square matrices.

* **Formula**:
  $$
  A^+ = (A^TA)^{-1}A^T
  $$
* **Application**: Least squares solutions.

In [None]:
A = np.array([[1, 2], [3, 4], [5, 6]])
A_pinv = np.linalg.pinv(A)
print("Pseudo-Inverse of A:\n", A_pinv)