In [None]:
import numpy as np
from broadcasting_exercises.tests import (  # noqa: F401
    test_ex1,
    test_ex2,
    test_ex3,
    test_ex4,
    test_ex5,
    test_ex6,
    test_ex7,
    test_ex8,
)

# Exercise 1: Column standardize a dataset

Given a dataset represented as a 2D NumPy array, standardize each column to have a mean of 0 and a standard deviation of 1. This process is known as z-score normalization.

$$
Z = \frac{X - \mu}{\sigma}
$$

In [None]:
def col_standardize(X):
    """
    Standardize each column of the dataset X to have mean 0 and standard deviation 1.

    Parameters
    ----------
    X : np.ndarray
        A 2D NumPy array where each column represents a feature and each row represents a sample.

    Returns
    -------
    Z : np.ndarray
        A 2D NumPy array of the same shape as X, where each column has been standardized.
    """
    raise NotImplementedError

In [None]:
# test_ex1(col_standardize)

# Exercise 2: Outer Sum

Use broadcasting to compute an "outer sum". Given two vectors $a$ and $b$ of shapes $(N,)$ and $(M,)$ respectively, construct $X$ of shape $(N, M)$ such that `X[i, j] = a[i] + b[j]`

In [None]:
def outer_sum(a, b):
    """
    Computes the outer sum matrix X such that X[i, j] = a[i] + b[j]

    Parameters
    ----------
    a : np.ndarray
        First vector input
    b : np.ndarray
        Second vector input

    Returns
    -------
    X : np.ndarray
        2d outer sum matrix.
    """
    raise NotImplementedError

In [None]:
# test_ex2(outer_sum)

# Exercise 3: Row-wise gathering

Given a real-valued matrix `X` with shape $(N, K)$ and an **integer** valued vector `idx` of shape $(N,)$ whose values are between 0 and $K$, for each row `i` of `X` we want to return the value at column `idx[i]`.

Example:

```
X = [[1, 2, 3],
     [4, 5, 6],
     [7, 8, 9]]
idx = [0, 1, 2]

expected_output = [1, 5, 9]

In [None]:
def row_gather(X, idx):
    """
    Retrieves the idx[i]-th column of X for each row i.

    Parameters
    ----------
    X : np.ndarray
        Matrix of input values
    idx : np.ndarray
        Integer array of index values to be gathered.

    Returns
    -------
    Z : np.ndarray
        Values of X requested by idx
    """
    raise NotImplementedError

In [None]:
# test_ex3(row_gather)

# Scatter-add per row

Given:
 - `X` : Input matrix, shape $(N,K)$, real valued
 - `idx` : Index vector, shape $(N, K)$, integer valued
- `values` : Values to add to `X`, shape $(N, K)$

**increment** each index position by the associated values, such that:

```
out[i, idx[i,k]] += value[i, k]
```

In [None]:
def row_scatter_add(X, idx, values):
    """
    Scatter-add values into X at positions specified by idx for each row.

    For each row i and column k, increments the output at position (i, idx[i, k])
    by values[i, k].

    Parameters
    ----------
    X : np.ndarray
        Input matrix of shape (N, D) to scatter into.
    idx : np.ndarray
        Integer index array of shape (N, K) with values in [0, D).
    values : np.ndarray
        Values to add, shape (N, K).

    Returns
    -------
    out : np.ndarray
        Copy of X with values added at the specified indices, shape (N, D).
    """
    raise NotImplementedError

In [None]:
# test_ex4(scatter_add)

# One-Hot Encoding

Given a input array `idx` with shape $(N,)$ and integer values `[0, 1, ..., K]`, create a boolean matrix $X$ of shape $(N, K)$. Each row should have a single `1` and zeros everywhere else. The `1` should correspond to the column assoicated with its value in `idx`.

Example:
```
idx = [1, 1, 2, 0, 3]
output: [[0, 1, 0, 0, 0],
         [0, 1, 0, 0, 0],
         [0, 0, 1, 0, 0],
         [1, 0, 0, 0, 0],
         [0, 0, 0, 0, 1]
```

In [11]:
def one_hot(idx):
    """
    Create a one-hot indicator matrix (also called a "dummy variable matrix")

    Parameters
    ----------
    idx : np.ndarray
        Integer valued array of class labels

    Returns
    -------
    out : np.ndarray
        One-hot encoded matrix with columns equal to the number of classes. Each
        row has a single 1 value corresponding to its class label, and
        elsewhere.
    """
    raise NotImplementedError

In [None]:
# test_ex5(scatter_add)

# Exercise 6: Pairwise squared distances

In many statistical and machine learning workflows, we need to compare every observation in one dataset to every observation in another dataset. A classic building block is the matrix of pairwise squared Euclidean distances.

Given two input matrices:

- `X` with shape `(N, D)` representing `N` points in `D` dimensions
- `Y` with shape `(M, D)` representing `M` points in `D` dimensions

construct an output matrix `dist2` with shape `(N, M)` where:

$$
 \text{dist2}[i, j] = \sum_{d=0}^{D-1} (X[i, d] - Y[j, d])^2
$$

The purpose of this exercise is to practice broadcasting by *introducing singleton axes*, so that subtraction produces an `(N, M, D)` array of differences, and then reducing over the last axis.

In [None]:
def pairwise_sq_dist(X: np.ndarray, Y: np.ndarray) -> np.ndarray:
    """
    Compute the pairwise squared Euclidean distance matrix between rows of X and rows of Y.

    Parameters
    ----------
    X : np.ndarray
        Array of shape (N, D) representing N points in D dimensions.
    Y : np.ndarray
        Array of shape (M, D) representing M points in D dimensions.

    Returns
    -------
    dist2 : np.ndarray
        Array of shape (N, M) where dist2[i, j] = sum_d (X[i, d] - Y[j, d])**2.
    """
    raise NotImplementedError

In [None]:
# test_ex6(pairwise_sq_dist)

# Exercise 7: Batched gather along last axis

This is a very common "indexing with broadcasting" pattern. You have a 3D tensor of values, and for each `(b, t)` position you want to select a single entry along the last axis.

Given:
 - `X` with shape `(B, T, K)`
 - `choice` with shape `(B, T)` containing integers in `[0, K)`

 return an array `out` of shape `(B, T)` such that:

$$
 \text{out}[b, t] = X[b, t, \text{choice}[b, t]]
$$

The key idea is that `choice` provides an index into the last dimension, *but* must correctly align it with the first two dimensions.

In [12]:
def gather_last_axis(X: np.ndarray, choice: np.ndarray) -> np.ndarray:
    """
    Gather values from the last axis of X according to per-position indices.

    Parameters
    ----------
    X : np.ndarray
        Input array of shape (B, T, K).
    choice : np.ndarray
        Integer array of shape (B, T) with values in [0, K). For each (b, t), selects
        X[b, t, choice[b, t]].

    Returns
    -------
    out : np.ndarray
        Array of shape (B, T) containing the gathered values.
    """
    raise NotImplementedError

In [None]:
# test_ex7(gather_last_axis)

# Exercise 8: Stable softmax along an axis

Softmax converts real-valued "logits" into probabilities along a chosen axis. It is used everywhere: categorical likelihoods, attention mechanisms, mixture responsibilities, etc.

For an input array `logits`, softmax along an axis is defined as:

$$
\text{softmax}(x)_i = \frac{\exp(x_i)}{\sum_j \exp(x_j)}
$$

However, computing `exp` directly can overflow for large values. The standard trick is to subtract the maximum value along the softmax axis before exponentiating:

$$
\text{softmax}(x) = \frac{\exp(x - \max(x))}{\sum_j \exp(x - \max(x))}
$$

This exercise is primarily about broadcasting and `keepdims=True`. When you compute `max` and `sum` along an axis, you want the result to broadcast back across that axis when subtracting / dividing.

In [13]:
def softmax(logits: np.ndarray, axis: int = -1) -> np.ndarray:
    """
    Compute the numerically-stable softmax of an array along a given axis.

    Parameters
    ----------
    logits : np.ndarray
        Input array of real-valued logits. Can have any shape.
    axis : int, default -1
        Axis along which to compute the softmax.

    Returns
    -------
    probs : np.ndarray
        Array of the same shape as logits containing softmax probabilities along `axis`.
        Values are non-negative and sum to 1 along the specified axis.
    """
    raise NotImplementedError

In [14]:
# test_ex8(softmax)