In [133]:
import numpy as np

# Numpy Aggregations

It’s very common to want to aggregate along a row or column.  
By default, every NumPy aggregation function will return the aggregate of the entire array.


In [134]:
def array_info(array: np.ndarray) -> None:
    print(f"ndim: {array.ndim}")
    print(f"shape: {array.shape}")
    print(f"size: {array.size}")
    print(f"dtype: {array.dtype}")
    print(f"values:\n{array}\n")

In [135]:
x = np.array(
    [
        [1, 2],
        [5, 3],
        [4, 6],
    ]
)

array_info(x)

ndim: 2
shape: (3, 2)
size: 6
dtype: int32
values:
[[1 2]
 [5 3]
 [4 6]]



![](../media/np_matrix_aggregation_row.png)

_The axis parameter indicates which axis gets collapsed._


In [136]:
np.max(
    x,
    axis=0,
)

array([5, 6])

In [137]:
np.max(
    x,
    axis=1,
)

array([2, 5, 6])

In [138]:
np.sum(x)

21

In [139]:
np.sum(
    x,
    axis=0,
)

array([10, 11])

In [140]:
np.sum(
    x,
    axis=1,
)

array([ 3,  8, 10])

In [141]:
np.prod(x)

720

In [142]:
big_array = np.random.rand(100_000)

In [143]:
%timeit sum(big_array)

4.93 ms ± 230 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [145]:
%timeit np.sum(big_array)

29 µs ± 518 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [None]:
13 * 1000 / 40  # Speedup

325.0

## Minimum and Maximum


In [None]:
min(big_array)

1.354237572837036e-05

In [None]:
max(big_array)

0.9999990120313899

In [None]:
np.min(big_array)

1.354237572837036e-05

In [None]:
np.max(big_array)

0.9999990120313899

In [146]:
%timeit min(big_array)

3.23 ms ± 84.5 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [147]:
%timeit np.min(big_array)

17.8 µs ± 513 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [None]:
7.38 * 1000 / 30.3  # Speedup

243.56435643564356

### Multi dimensional aggregates


In [None]:
M = np.random.random(size=(3, 3))

In [None]:
print(M)

array([[0.53031352, 0.73691596, 0.23933497],
       [0.81544938, 0.41063763, 0.37707666],
       [0.72059189, 0.69629163, 0.37351208]])

In [None]:
M.sum()

4.900123723373526

In [None]:
np.sum(M)

4.900123723373526

In [None]:
M.min(axis=0)

array([0.53031352, 0.41063763, 0.23933497])

In [None]:
M.max(axis=1)

array([0.73691596, 0.81544938, 0.72059189])

### Other aggregation functions


#### Statistics

| Name    | Description                                              |
| ------- | -------------------------------------------------------- |
| median  | Compute the median along the specified axis.             |
| average | Compute the weighted average along the specified axis.   |
| mean    | Compute the arithmetic mean along the specified axis.    |
| std     | Compute the standard deviation along the specified axis. |
| var     | Compute the variance along the specified axis.           |


In [None]:
x = np.array([1, 2, 3, 4])

In [None]:
np.mean(x)

2.5

In [None]:
np.std(x)

1.118033988749895

In [None]:
np.var(x)

1.25

#### Sums, products, differences

| Name    | Description                                                   |
| ------- | ------------------------------------------------------------- |
| prod    | Return the product of array elements over a given axis.       |
| sum     | Sum of array elements over a given axis.                      |
| cumprod | Return the cumulative product of elements along a given axis. |
| cumsum  | Return the cumulative sum of the elements along a given axis. |
| diff    | Calculate the n-th discrete difference along the given axis.  |


In [None]:
x = np.array([1, 2, 3, -4])

In [None]:
np.prod(x)

-24

In [None]:
np.prod(x, axis=0)

-24

In [None]:
np.cumprod(x)

array([  1,   2,   6, -24])

In [None]:
np.cumsum(x)

array([1, 3, 6, 2])

In [None]:
np.diff(x)

array([ 1,  1, -7])