# Numpy

Use numpy because:

- it's compact
- it's fast
- it matches the way we think about math

# Why not just use Python?

![Floating Point](data/img/C-Python-Float.png)

![List](data/img/Python-List-Float.png)

![List](data/img/Numpy-Array.png)

In [None]:
!pip install numpy

# "How fast is it?"

In [None]:
import numpy as np

np.random.seed(0)

def reciprocate(values):
    """Returns an array of 1.0/x for every x in values"""
    result = np.empty(len(values))
    for i, x in enumerate(values):
        result[i] = 1.0 / x
    return result

reciprocate(np.random.randint(1, 10, 10))

In [None]:
xs = np.random.randint(1, 10, 1_000_000)

In [None]:
%timeit reciprocate(xs)

In [None]:
%timeit 1.0 / xs

# How compact is it?

In [None]:
!pip install pympler

In [None]:
from pympler.asizeof import asizeof

In [None]:
asizeof(1.1)

In [None]:
asizeof([])

In [None]:
np_array_size = asizeof(xs)
np_array_size

In [None]:
import random
py_list_size = asizeof([random.random() for i in range(1_000_000)])

In [None]:
py_list_size

In [None]:
py_list_size / np_array_size

In [None]:
from collections import deque, namedtuple

deck = deque([])
Point = namedtuple('Point', 'x y')

In [None]:
asizeof([])

In [None]:
asizeof(())

In [None]:
asizeof((1,2))

In [None]:
asizeof(Point(1,2))

In [None]:
asizeof(deck)

In [None]:
asizeof({})

In [None]:
asizeof(set())

In [None]:
import array

In [None]:
array?

# "Rounding"

In [None]:
x = 1.234
np.trunc(x)

In [None]:
np.trunc(-x)

In [None]:
np.floor(x)

In [None]:
np.floor(-x)

In [None]:
np.ceil(x)

In [None]:
np.ceil(-x)

In [None]:
np.round(x)

In [None]:
np.round(-x)

In [None]:
np.round([1.5, 2.5])

In [None]:
[round(1.5), round(2.5)]

# Creating Numpy arrays

In [None]:
xs = np.array(range(5))
xs, xs.dtype

In [None]:
xs = np.array([1,2,3,4,3.14])
xs, xs.dtype

In [None]:
xs = np.array(range(5), dtype=float)
xs, xs.dtype

In [None]:
xs = np.array(range(5), dtype=np.int8)
xs, xs.dtype

In [None]:
xs = np.array([0, 0, 1, 1], dtype=np.int0)
xs, xs.dtype

In [None]:
%timeit np.empty(1_000_000)

In [None]:
%timeit np.zeros(1_000_000)

Multi-dimensional arrays

In [None]:
np.array([
    [1,2,3],
    [4,5,6]
])

Quick array creation convenience methods

In [None]:
np.zeros(20, dtype=np.int8)

In [None]:
np.ones((3, 4), dtype=np.float128)

In [None]:
np.ones((3, 4, 5), dtype=np.float128)

In [None]:
xs = np.full((3,4), np.pi)
xs

In [None]:
xs.shape

In [None]:
len(xs)

# Creating ranges of array values

In [None]:
range(10)

In [None]:
list(range(10))

In [None]:
np.arange(10)

In [None]:
np.arange(10.0)

In [None]:
np.arange(10, 20)

In [None]:
np.arange(10, 50, 5)

In [None]:
np.arange(10, 50, 5.5)

In [None]:
np.linspace(0, 10, 3)

In [None]:
np.linspace(0, 10, 20)

# The `r_` and `c_` helpers

In [None]:
np.r_

In [None]:
np.r_[:5]

In [None]:
np.r_[:5, 200, :5]

In [None]:
np.r_[[1,2,3], [10, 11], 5:1:-0.3]

In [None]:
np.r_[0:10:3]

In [None]:
np.r_[0:10:3j]   # abusing indexing _and_ complex numbers now

In [None]:
(np.r_[0:10:20j] == np.linspace(0, 10, 20)).all()

In [None]:
np.r_[0:10:20]

In [None]:
np.c_[:10, :100:10j]

In [None]:
np.c_[:10]

Fast generation of random values

In [None]:
np.random.random((4,4))

In [None]:
np.random.normal(10, 2, (3,3))

In [None]:
np.random.randn(10)

In [None]:
np.eye(5)

# Converting array types

In [None]:
xs = np.r_[:10:20j]
xs

In [None]:
np.round(xs).astype(int)

# Numpy indexing and slicing

In [None]:
xs = np.random.randint(1, 10, (4, 4))
xs

In [None]:
xs[1, 1]

In [None]:
xs[1]

In [None]:
xs[:,1]

In [None]:
xs[1:3,1:3]

In [None]:
xs[[1,2], 2]

Lists for all indexes work a little different

In [None]:
xs[[1,2], [3,2]]

In [None]:
xs

In [None]:
xs[1, 3], xs[2, 2]

In [None]:
xs.reshape(16)

In [None]:
xs.ravel()

In [None]:
np.r_[:9].reshape((3,3))

Sometimes you need to take a 1-D array and make a column vector out of it:

In [None]:
xs = np.r_[:10]
xs

In [None]:
xs[:, None]    

In [None]:
xs[:, np.newaxis]

In [None]:
print(np.newaxis)

In [None]:
np.c_[xs]

# Filtering

In [None]:
xs = np.random.randint(0, 5, 10)

In [None]:
xs

In [None]:
xs.nonzero()

In [None]:
xs[xs.nonzero()]

In [None]:
xs[xs != 0]

In [None]:
xs < 3

In [None]:
xs[xs < 3]

In [None]:
xs = np.random.randint(0, 5, (3, 3))
xs

In [None]:
xs.nonzero()

In [None]:
np.array(xs.nonzero())

In [None]:
xs[xs.nonzero()]

Maybe this looks a little cleaner?

In [None]:
np.transpose(xs.nonzero())

In [None]:
xs[xs.nonzero()] = 100
xs

In [None]:
xs = np.random.random((4,4))
xs

In [None]:
xs[xs > 0.7] = 0.7
xs

# Array views

In [None]:
xs = np.r_[:16].reshape((4,4))
xs

In [None]:
xs_view = xs[1:3,1:3]
xs_view

In [None]:
xs_view[0,0] = 100
xs_view

In [None]:
xs

In [None]:
xs_view_copy = xs_view.copy()

In [None]:
xs_view_copy[0, 0] = 200
xs_view_copy

In [None]:
xs

In [None]:
xs_view

# Numpy universal functions (ufuncs)

Most `np.array` methods also exist as functions in the numpy namespace.

They typically operate well over scalars, numpy arrays, and Python sequences

In [None]:
np.multiply(2, 3)  # scalar / scalar

In [None]:
np.multiply(np.r_[:10], 20) # array / scalar

In [None]:
np.multiply(np.r_[:10], [4] * 10)  # array / list

In [None]:
np.divide(1, 0)

In [None]:
np.divide([1], [0])

In [None]:
np.divide(1, 0) - np.divide(1, 0)

In [None]:
1 / np.zeros(10)

In [None]:
1 // np.r_[0]

In [None]:
2 ** np.r_[:9].reshape((3,3))

https://docs.scipy.org/doc/numpy/reference/ufuncs.html for the full list of ufuncs

Generally, operators delegate to ufuncs if at least one side of the operation is a numpy type.

# Aggregation

We can apply ufuncs as reduction operators:

In [None]:
xs

In [None]:
np.add.reduce(xs)

In [None]:
np.add.reduce(xs, axis=1)

In [None]:
np.multiply.reduce(xs)

In [None]:
np.multiply.reduce(xs, axis=1)

In [None]:
np.sum(xs)

In [None]:
np.mean(xs)

In [None]:
np.mean(xs, axis=0)

In [None]:
np.mean(xs, axis=1)

# Build your own ufunc

Although this does _not_ give you "compiled C" performance, you can get the casting rules for your own functions using `np.vectorize`

In [None]:
def saturating_adder(maxval):
    def add(x, y):
        return min([x+y, maxval])
    return np.vectorize(add)
    

In [None]:
myadd = saturating_adder(10)

In [None]:
myadd(2, 9)

In [None]:
myadd([2,3,4,5], 6)

In [None]:
myadd([2,3,4,5], np.r_[5:9])

Open the [numpy lab][numpy-lab]

[numpy-lab]: ./numpy-lab.ipynb