In [1]:
"""
Notebook based largely from the Alan Turing Institute Research Software Engineering with Python
https://alan-turing-institute.github.io/rse-course/html/module03_research_data_in_python/03_05_advanced_numpy.html
"""
import numpy as np

In [2]:
# elementwise array operations
np.arange(5) * np.arange(5)

array([ 0,  1,  4,  9, 16])

In [3]:
# elementwise multiply - not matrix
np.ones([3, 3]) * np.ones([3, 3])

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

# Numpy Broadcasting
If an array has dimension 1 the data is repeated to match the other.

In [4]:
col = np.arange(10).reshape([10, 1])
col

array([[0],
       [1],
       [2],
       [3],
       [4],
       [5],
       [6],
       [7],
       [8],
       [9]])

In [5]:
row = col.T
row

array([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]])

In [6]:
col.shape # "Column vector"

(10, 1)

In [7]:
row.shape # "Row vector"

(1, 10)

In [8]:
# If one vector has dimension 1 then data is repeated to match
assert np.alltrue((row + col) == (col + row))
row + col

array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9],
       [ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10],
       [ 2,  3,  4,  5,  6,  7,  8,  9, 10, 11],
       [ 3,  4,  5,  6,  7,  8,  9, 10, 11, 12],
       [ 4,  5,  6,  7,  8,  9, 10, 11, 12, 13],
       [ 5,  6,  7,  8,  9, 10, 11, 12, 13, 14],
       [ 6,  7,  8,  9, 10, 11, 12, 13, 14, 15],
       [ 7,  8,  9, 10, 11, 12, 13, 14, 15, 16],
       [ 8,  9, 10, 11, 12, 13, 14, 15, 16, 17],
       [ 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]])

In [9]:
# More than one unit dimension
x = np.array([1, 2]).reshape(1, 2)
x

array([[1, 2]])

In [10]:
y = np.array([3, 4, 5]).reshape(3, 1)
y

array([[3],
       [4],
       [5]])

In [11]:
# Both axes are expanded before the addition
result = x + y
print(result.shape)
result


(3, 2)


array([[4, 5],
       [5, 6],
       [6, 7]])

In [12]:
# np.newaxis allows indexing to create new one-long dimensions on the fly
x = np.arange(10).reshape(2, 5)
y = np.arange(8).reshape(2, 2, 2)

In [13]:
x

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

In [14]:
y

array([[[0, 1],
        [2, 3]],

       [[4, 5],
        [6, 7]]])

In [15]:
x_dash = x[:, :, np.newaxis, np.newaxis]
print(f"x.shape {x.shape}")
print(f"x_dash.shape {x_dash.shape}")

x.shape (2, 5)
x_dash.shape (2, 5, 1, 1)


In [16]:
x2 = x[:, np.newaxis, :, np.newaxis, np.newaxis]
print(f"x2.shape {x2.shape}")

x2.shape (2, 1, 5, 1, 1)


In [17]:
y_dash = y[:, np.newaxis, :, :]
print(f"y_dash.shape {y_dash}")

y_dash.shape [[[[0 1]
   [2 3]]]


 [[[4 5]
   [6 7]]]]


In [18]:
res = x_dash * y_dash
print(f"res.shape {res.shape}")

res.shape (2, 5, 2, 2)


In [19]:
np.sum(res)

830

In [20]:
threebythree = np.arange(9).reshape(3, 3)
threebythree

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [21]:
threebythree[:, np.newaxis, :]

array([[[0, 1, 2]],

       [[3, 4, 5]],

       [[6, 7, 8]]])

# Dot products

In [22]:
a = np.arange(9).reshape(3, 3)
a

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [23]:
b = np.arange(3, 12).reshape(3, 3)
b

array([[ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

In [24]:
# elementwise
a * b

array([[ 0,  4, 10],
       [18, 28, 40],
       [54, 70, 88]])

In [25]:
# dot product using broadcasting
(a[:, :, np.newaxis] * b[np.newaxis, : , :]).sum(1)

array([[ 24,  27,  30],
       [ 78,  90, 102],
       [132, 153, 174]])

In [26]:
a @ b # python 3.5 or later

array([[ 24,  27,  30],
       [ 78,  90, 102],
       [132, 153, 174]])

In [27]:
np.dot(a, b)

array([[ 24,  27,  30],
       [ 78,  90, 102],
       [132, 153, 174]])

In [28]:
# Record Arrays
x = np.arange(50).reshape([10, 5])
record_x = x.view(
    dtype={"names": ["col1", "col2", "another", "more", "last"],
           "formats": [int] * 5}
)

In [29]:
record_x

array([[( 0,  1,  2,  3,  4)],
       [( 5,  6,  7,  8,  9)],
       [(10, 11, 12, 13, 14)],
       [(15, 16, 17, 18, 19)],
       [(20, 21, 22, 23, 24)],
       [(25, 26, 27, 28, 29)],
       [(30, 31, 32, 33, 34)],
       [(35, 36, 37, 38, 39)],
       [(40, 41, 42, 43, 44)],
       [(45, 46, 47, 48, 49)]],
      dtype=[('col1', '<i8'), ('col2', '<i8'), ('another', '<i8'), ('more', '<i8'), ('last', '<i8')])

In [30]:
record_x["col1"]

array([[ 0],
       [ 5],
       [10],
       [15],
       [20],
       [25],
       [30],
       [35],
       [40],
       [45]])

In [31]:
# Logical arrays, masking, and selection

x = np.zeros([3, 4])
x

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [32]:
y = np.arange(-1, 2)[:, np.newaxis] * np.arange(-2, 2)[np.newaxis, :]
y

array([[ 2,  1,  0, -1],
       [ 0,  0,  0,  0],
       [-2, -1,  0,  1]])

In [33]:
y_is_one = y == 1
y_is_one

array([[False,  True, False, False],
       [False, False, False, False],
       [False, False, False,  True]])

In [34]:
aresame = x == y
aresame

array([[False, False,  True, False],
       [ True,  True,  True,  True],
       [False, False,  True, False]])

In [35]:
y[np.logical_not(aresame)]

array([ 2,  1, -1, -2, -1,  1])

In [36]:
y[aresame] = 5

In [37]:
y

array([[ 2,  1,  5, -1],
       [ 5,  5,  5,  5],
       [-2, -1,  5,  1]])

In [38]:
# Numpy memory

In [39]:
x = list(range(5))
y = x[:]

In [40]:
y[2] = 0
x

[0, 1, 2, 3, 4]

In [41]:
# In numpy changing a copy changes the original array (reference)
x = np.arange(5)
y = x[:]
y[2] = 0
x

array([0, 1, 0, 3, 4])