# Linear Algebra: Practical Applications in ML

This notebook generates visualizations for neural networks, attention mechanisms, and vectorized operations.

In [None]:
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline
plt.style.use('seaborn-v0_8-whitegrid')

## Neural Network Forward Pass

In [None]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def relu(x):
    return np.maximum(0, x)

def forward_layer(x, W, b, activation='relu'):
    """Compute forward pass for one layer."""
    z = W @ x + b
    if activation == 'relu':
        return relu(z)
    elif activation == 'sigmoid':
        return sigmoid(z)
    return z

# Example: 3-layer network
np.random.seed(42)

# Layer dimensions: input=4, hidden1=8, hidden2=6, output=2
W1 = np.random.randn(8, 4) * 0.5
b1 = np.zeros(8)
W2 = np.random.randn(6, 8) * 0.5
b2 = np.zeros(6)
W3 = np.random.randn(2, 6) * 0.5
b3 = np.zeros(2)

# Forward pass
x = np.array([1.0, 2.0, 3.0, 4.0])
h1 = forward_layer(x, W1, b1, 'relu')
h2 = forward_layer(h1, W2, b2, 'relu')
y = forward_layer(h2, W3, b3, 'sigmoid')

print(f"Input shape: {x.shape}")
print(f"Hidden layer 1 shape: {h1.shape}")
print(f"Hidden layer 2 shape: {h2.shape}")
print(f"Output shape: {y.shape}")
print(f"Output: {y}")

## Visualize Activation Functions

In [None]:
x = np.linspace(-5, 5, 200)

fig, axes = plt.subplots(1, 3, figsize=(15, 4))

# ReLU
axes[0].plot(x, relu(x), 'b-', linewidth=2)
axes[0].axhline(y=0, color='gray', linestyle='--', alpha=0.5)
axes[0].axvline(x=0, color='gray', linestyle='--', alpha=0.5)
axes[0].set_xlabel('x')
axes[0].set_ylabel('ReLU(x)')
axes[0].set_title('ReLU: max(0, x)')
axes[0].grid(True, alpha=0.3)

# Sigmoid
axes[1].plot(x, sigmoid(x), 'r-', linewidth=2)
axes[1].axhline(y=0.5, color='gray', linestyle='--', alpha=0.5)
axes[1].axvline(x=0, color='gray', linestyle='--', alpha=0.5)
axes[1].set_xlabel('x')
axes[1].set_ylabel('sigmoid(x)')
axes[1].set_title('Sigmoid: 1/(1+e^(-x))')
axes[1].grid(True, alpha=0.3)

# Softmax visualization (for 2 classes)
z1 = np.linspace(-3, 3, 200)
z2 = 0  # Fixed second logit
softmax_probs = np.exp(z1) / (np.exp(z1) + np.exp(z2))
axes[2].plot(z1, softmax_probs, 'g-', linewidth=2, label='P(class 1)')
axes[2].plot(z1, 1 - softmax_probs, 'purple', linewidth=2, label='P(class 2)')
axes[2].axhline(y=0.5, color='gray', linestyle='--', alpha=0.5)
axes[2].set_xlabel('z1 (z2=0)')
axes[2].set_ylabel('Probability')
axes[2].set_title('Softmax (2 classes)')
axes[2].legend()
axes[2].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('../../static/img/post/linear-algebra/activation-functions.png', dpi=150, bbox_inches='tight')
plt.show()

## Attention Mechanism Visualization

In [None]:
def softmax(z):
    """Numerically stable softmax."""
    z_shifted = z - np.max(z, axis=-1, keepdims=True)
    exp_z = np.exp(z_shifted)
    return exp_z / np.sum(exp_z, axis=-1, keepdims=True)

def scaled_dot_product_attention(Q, K, V):
    """
    Scaled dot-product attention.
    Q, K, V: (seq_len, d_k)
    """
    d_k = Q.shape[-1]

    # Compute attention scores
    scores = Q @ K.T / np.sqrt(d_k)

    # Apply softmax
    attention_weights = softmax(scores)

    # Weighted sum of values
    output = attention_weights @ V

    return output, attention_weights

# Example with meaningful attention pattern
np.random.seed(123)
seq_len = 6
d_k = 8

# Create Q, K, V with some structure
Q = np.random.randn(seq_len, d_k)
K = np.random.randn(seq_len, d_k)
V = np.random.randn(seq_len, d_k)

output, weights = scaled_dot_product_attention(Q, K, V)

# Visualize attention weights
fig, ax = plt.subplots(figsize=(8, 6))

im = ax.imshow(weights, cmap='Blues', aspect='auto')
ax.set_xlabel('Key position')
ax.set_ylabel('Query position')
ax.set_title('Attention Weights Matrix')
ax.set_xticks(range(seq_len))
ax.set_yticks(range(seq_len))

# Add colorbar
cbar = plt.colorbar(im, ax=ax)
cbar.set_label('Attention Weight')

# Add text annotations
for i in range(seq_len):
    for j in range(seq_len):
        text = ax.text(j, i, f'{weights[i, j]:.2f}',
                       ha='center', va='center', color='black' if weights[i, j] < 0.5 else 'white')

plt.tight_layout()
plt.savefig('../../static/img/post/linear-algebra/attention-weights.png', dpi=150, bbox_inches='tight')
plt.show()

## Cosine Similarity Visualization

In [None]:
def cosine_similarity(u, v):
    """Cosine similarity between two vectors."""
    return np.dot(u, v) / (np.linalg.norm(u) * np.linalg.norm(v))

def cosine_similarity_matrix(X):
    """Pairwise cosine similarity matrix."""
    norms = np.linalg.norm(X, axis=1, keepdims=True)
    X_normalized = X / norms
    return X_normalized @ X_normalized.T

# Example: word embeddings
embeddings = {
    'king': np.array([0.5, 0.3, 0.8, 0.1]),
    'queen': np.array([0.5, 0.3, 0.7, 0.2]),
    'man': np.array([0.2, 0.8, 0.3, 0.1]),
    'woman': np.array([0.2, 0.8, 0.2, 0.2]),
    'apple': np.array([0.9, 0.1, 0.1, 0.9]),
}

words = list(embeddings.keys())
X = np.array([embeddings[w] for w in words])
sim_matrix = cosine_similarity_matrix(X)

# Visualize
fig, ax = plt.subplots(figsize=(8, 6))

im = ax.imshow(sim_matrix, cmap='RdYlGn', vmin=0, vmax=1, aspect='auto')
ax.set_xticks(range(len(words)))
ax.set_yticks(range(len(words)))
ax.set_xticklabels(words)
ax.set_yticklabels(words)
ax.set_title('Word Embedding Cosine Similarity')

# Add colorbar
cbar = plt.colorbar(im, ax=ax)
cbar.set_label('Cosine Similarity')

# Add text annotations
for i in range(len(words)):
    for j in range(len(words)):
        text = ax.text(j, i, f'{sim_matrix[i, j]:.2f}',
                       ha='center', va='center', color='black')

plt.tight_layout()
plt.savefig('../../static/img/post/linear-algebra/cosine-similarity.png', dpi=150, bbox_inches='tight')
plt.show()

## Linear Regression Visualization

In [None]:
def linear_regression(X, y, regularization=0):
    """Solve linear regression with optional L2 regularization."""
    X_bias = np.column_stack([np.ones(len(X)), X])
    n_features = X_bias.shape[1]
    XtX = X_bias.T @ X_bias
    if regularization > 0:
        XtX += regularization * np.eye(n_features)
    Xty = X_bias.T @ y
    weights = np.linalg.solve(XtX, Xty)
    return weights

# Generate 2D data for visualization
np.random.seed(42)
X = np.random.randn(50, 1) * 2
true_w = np.array([2.0, 1.5])  # [bias, slope]
y = true_w[0] + true_w[1] * X.flatten() + np.random.randn(50) * 0.8

# Fit model
weights = linear_regression(X, y)

# Visualize
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Regression line
ax = axes[0]
ax.scatter(X, y, alpha=0.6, s=50, label='Data')
x_line = np.linspace(X.min(), X.max(), 100)
y_line = weights[0] + weights[1] * x_line
ax.plot(x_line, y_line, 'r-', linewidth=2, label=f'Fit: y = {weights[0]:.2f} + {weights[1]:.2f}x')
ax.set_xlabel('x')
ax.set_ylabel('y')
ax.set_title('Linear Regression via Normal Equations')
ax.legend()
ax.grid(True, alpha=0.3)

# Effect of regularization
ax = axes[1]
regularizations = [0, 0.1, 1, 10]
colors = plt.cm.viridis(np.linspace(0, 0.8, len(regularizations)))

ax.scatter(X, y, alpha=0.3, s=30, color='gray')

for reg, color in zip(regularizations, colors):
    w = linear_regression(X, y, regularization=reg)
    y_pred = w[0] + w[1] * x_line
    ax.plot(x_line, y_pred, color=color, linewidth=2, label=f'lambda={reg}')

ax.set_xlabel('x')
ax.set_ylabel('y')
ax.set_title('Effect of L2 Regularization')
ax.legend()
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('../../static/img/post/linear-algebra/linear-regression-regularization.png', dpi=150, bbox_inches='tight')
plt.show()

## Neural Network Architecture Diagram

In [None]:
def draw_neural_net(ax, layer_sizes):
    """Draw a neural network diagram."""
    n_layers = len(layer_sizes)
    max_neurons = max(layer_sizes)
    
    layer_positions = np.linspace(0, 1, n_layers)
    
    for i, (n_neurons, x) in enumerate(zip(layer_sizes, layer_positions)):
        y_positions = np.linspace(0.1, 0.9, n_neurons)
        
        # Draw neurons
        for y in y_positions:
            circle = plt.Circle((x, y), 0.03, color='steelblue', ec='black', linewidth=1.5)
            ax.add_patch(circle)
        
        # Draw connections to next layer
        if i < n_layers - 1:
            next_n = layer_sizes[i + 1]
            next_x = layer_positions[i + 1]
            next_y = np.linspace(0.1, 0.9, next_n)
            
            for y1 in y_positions:
                for y2 in next_y:
                    ax.plot([x + 0.03, next_x - 0.03], [y1, y2], 'gray', alpha=0.3, linewidth=0.5)

    # Labels
    labels = ['Input', 'Hidden 1', 'Hidden 2', 'Output']
    for i, (x, label) in enumerate(zip(layer_positions, labels[:n_layers])):
        ax.text(x, -0.05, label, ha='center', fontsize=10)
        ax.text(x, 0.95, f'({layer_sizes[i]})', ha='center', fontsize=9, color='gray')

fig, ax = plt.subplots(figsize=(10, 6))

draw_neural_net(ax, [4, 8, 6, 2])

ax.set_xlim(-0.1, 1.1)
ax.set_ylim(-0.15, 1.05)
ax.set_aspect('equal')
ax.axis('off')
ax.set_title('Neural Network: Matrix Multiplication Chain', fontsize=12, pad=20)

# Add equations
ax.text(0.5, -0.12, r'$y = \sigma(W_3 \cdot \text{ReLU}(W_2 \cdot \text{ReLU}(W_1 \cdot x + b_1) + b_2) + b_3)$',
        ha='center', fontsize=11, transform=ax.transAxes)

plt.tight_layout()
plt.savefig('../../static/img/post/linear-algebra/neural-network-diagram.png', dpi=150, bbox_inches='tight')
plt.show()