## NumPy Data Structures

### Arrays with Python Lists

In [82]:
v = [0.5, 0.75, 1.0, 1.5, 2.0]  # vector of numbers

In [83]:
m = [v, v, v]  # matrix of numbers
m

[[0.5, 0.75, 1.0, 1.5, 2.0],
 [0.5, 0.75, 1.0, 1.5, 2.0],
 [0.5, 0.75, 1.0, 1.5, 2.0]]

In [84]:
m[1]

[0.5, 0.75, 1.0, 1.5, 2.0]

In [85]:
m[1][0]

0.5

In [86]:
v1 = [0.5, 1.5]
v2 = [1, 2]
m = [v1, v2]
c = [m, m]  # cube of numbers
c

[[[0.5, 1.5], [1, 2]], [[0.5, 1.5], [1, 2]]]

In [87]:
c[1][1][0]

1

In [88]:
v = [0.5, 0.75, 1.0, 1.5, 2.0]
m = [v, v, v]
m

[[0.5, 0.75, 1.0, 1.5, 2.0],
 [0.5, 0.75, 1.0, 1.5, 2.0],
 [0.5, 0.75, 1.0, 1.5, 2.0]]

In [89]:
v[0] = 'Python'
m

[['Python', 0.75, 1.0, 1.5, 2.0],
 ['Python', 0.75, 1.0, 1.5, 2.0],
 ['Python', 0.75, 1.0, 1.5, 2.0]]

In [90]:
from copy import deepcopy
v = [0.5, 0.75, 1.0, 1.5, 2.0]
m = 3 * [deepcopy(v), ]
m

[[0.5, 0.75, 1.0, 1.5, 2.0],
 [0.5, 0.75, 1.0, 1.5, 2.0],
 [0.5, 0.75, 1.0, 1.5, 2.0]]

In [91]:
v[0] = 'Python'
m

[[0.5, 0.75, 1.0, 1.5, 2.0],
 [0.5, 0.75, 1.0, 1.5, 2.0],
 [0.5, 0.75, 1.0, 1.5, 2.0]]

### Regular NumPy Arrays

In [92]:
import numpy as np

In [93]:
a = np.array([0, 0.5, 1.0, 1.5, 2.0])
type(a)

numpy.ndarray

In [94]:
a[:2]  # indexing as with list objects in 1 dimension

array([ 0. ,  0.5])

In [95]:
a.sum()  # sum of all elements

5.0

In [96]:
a.std()  # standard deviation

0.70710678118654757

In [97]:
a.cumsum()  # running cumulative sum

array([ 0. ,  0.5,  1.5,  3. ,  5. ])

In [98]:
a * 2

array([ 0.,  1.,  2.,  3.,  4.])

In [99]:
a ** 2

array([ 0.  ,  0.25,  1.  ,  2.25,  4.  ])

In [100]:
np.sqrt(a)

array([ 0.        ,  0.70710678,  1.        ,  1.22474487,  1.41421356])

In [101]:
b = np.array([a, a * 2])
b

array([[ 0. ,  0.5,  1. ,  1.5,  2. ],
       [ 0. ,  1. ,  2. ,  3. ,  4. ]])

In [102]:
b[0]  # first row

array([ 0. ,  0.5,  1. ,  1.5,  2. ])

In [103]:
b[0, 2]  # third element of first row

1.0

In [104]:
b.sum()

15.0

In [105]:
b.sum(axis=0)
  # sum along axis 0, i.e. column-wise sum

array([ 0. ,  1.5,  3. ,  4.5,  6. ])

In [106]:
b.sum(axis=1)
  # sum along axis 1, i.e. row-wise sum

array([  5.,  10.])

In [107]:
c = np.zeros((2, 3, 4), dtype='i', order='C')  # also: np.ones()
c

array([[[0, 0, 0, 0],
        [0, 0, 0, 0],
        [0, 0, 0, 0]],

       [[0, 0, 0, 0],
        [0, 0, 0, 0],
        [0, 0, 0, 0]]], dtype=int32)

In [108]:
d = np.ones_like(c, dtype='f16', order='C')  # also: np.zeros_like()
d

array([[[ 1.0,  1.0,  1.0,  1.0],
        [ 1.0,  1.0,  1.0,  1.0],
        [ 1.0,  1.0,  1.0,  1.0]],

       [[ 1.0,  1.0,  1.0,  1.0],
        [ 1.0,  1.0,  1.0,  1.0],
        [ 1.0,  1.0,  1.0,  1.0]]], dtype=float128)

In [109]:
import random
I = 5000 

In [110]:
%time mat = [[random.gauss(0, 1) for j in range(I)] for i in range(I)]
  # a nested list comprehension

CPU times: user 18.1 s, sys: 380 ms, total: 18.5 s
Wall time: 18.6 s


In [111]:
%time mat = np.random.standard_normal((I, I))

CPU times: user 1.15 s, sys: 160 ms, total: 1.31 s
Wall time: 1.31 s


In [112]:
%time mat.sum()

CPU times: user 16.2 ms, sys: 1.39 ms, total: 17.5 ms
Wall time: 15.5 ms


-2428.7146035744513

### Structured Arrays

In [113]:
dt = np.dtype([('Name', 'S10'), ('Age', 'i4'),
               ('Height', 'f'), ('Children/Pets', 'i4', 2)])
s = np.array([('Smith', 45, 1.83, (0, 1)),
              ('Jones', 53, 1.72, (2, 2))], dtype=dt)
s

array([(b'Smith', 45,  1.83000004, [0, 1]),
       (b'Jones', 53,  1.72000003, [2, 2])],
      dtype=[('Name', 'S10'), ('Age', '<i4'), ('Height', '<f4'), ('Children/Pets', '<i4', (2,))])

In [114]:
s['Name']

array([b'Smith', b'Jones'],
      dtype='|S10')

In [115]:
s['Height'].mean()

1.7750001

In [116]:
s[1]['Age']

53

## Vectorization of Code

### Basic Vectorization

In [117]:
r = np.random.standard_normal((4, 3))
s = np.random.standard_normal((4, 3))

In [118]:
r + s

array([[-2.29239914, -1.45265499,  0.76943704],
       [-1.30340201,  0.36292379, -0.40236305],
       [-0.99103905, -1.31596832, -1.19018574],
       [-0.02089955,  0.14173371,  0.21600032]])

In [119]:
2 * r + 3

array([[ 1.34372288,  3.15555248,  3.9816459 ],
       [ 0.86055109,  3.74903593,  2.48469897],
       [ 2.65591344,  1.47885806,  1.316848  ],
       [ 2.05312193,  5.03535432,  3.68703385]])

In [120]:
s = np.random.standard_normal(3)
r + s

array([[-2.39772948,  0.82536573,  0.44468374],
       [-2.63931537,  1.12210745, -0.30378972],
       [-1.7416342 , -0.01298148, -0.88771521],
       [-2.04302995,  1.76526665,  0.29737772]])

In [121]:
# causes intentional error
# s = np.random.standard_normal(4)
# r + s

In [122]:
# r.transpose() + s

In [123]:
np.shape(r.T)

(3, 4)

In [124]:
def f(x):
    return 3 * x + 5

In [125]:
f(0.5)  # float object

6.5

In [126]:
f(r)  # NumPy array

array([[ 2.51558433,  5.23332872,  6.47246885],
       [ 1.79082663,  6.1235539 ,  4.22704846],
       [ 4.48387017,  2.7182871 ,  2.47527199],
       [ 3.5796829 ,  8.05303149,  6.03055078]])

In [127]:
# causes intentional error
# import math
# math.sin(r)

In [128]:
np.sin(r)  # array as input

array([[-0.73667385,  0.07769785,  0.47135185],
       [-0.87706818,  0.36582395, -0.25480932],
       [-0.17119582, -0.68933519, -0.74569412],
       [-0.45594974,  0.85089003,  0.33680062]])

In [129]:
np.sin(np.pi)  # float as input

1.2246467991473532e-16

### Memory Layout

In [130]:
x = np.random.standard_normal((5, 10000000))
y = 2 * x + 3  # linear equation y = a * x + b
C = np.array((x, y), order='C')
F = np.array((x, y), order='F')
x = 0.0; y = 0.0  # memory clean-up

In [131]:
C[:2].round(2)

array([[[-1.43,  0.73, -0.4 , ...,  0.17,  0.47,  1.01],
        [-0.68,  0.57,  0.42, ...,  1.79, -0.11,  1.2 ],
        [-0.68,  0.56,  0.95, ..., -1.57,  1.93, -1.43],
        [-0.93,  0.94, -0.72, ..., -0.09,  0.32,  0.11],
        [ 0.24, -0.26,  0.71, ..., -0.46,  0.49,  1.17]],

       [[ 0.13,  4.47,  2.19, ...,  3.34,  3.94,  5.02],
        [ 1.64,  4.15,  3.85, ...,  6.59,  2.79,  5.4 ],
        [ 1.63,  4.12,  4.9 , ..., -0.13,  6.87,  0.13],
        [ 1.14,  4.88,  1.56, ...,  2.82,  3.64,  3.21],
        [ 3.49,  2.49,  4.42, ...,  2.08,  3.98,  5.35]]])

In [132]:
%timeit C.sum()

48.1 ms ± 1.65 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [133]:
%timeit F.sum()

60.6 ms ± 17.4 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [134]:
%timeit C[0].sum(axis=0)

97.5 ms ± 4.04 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [135]:
%timeit C[0].sum(axis=1)

25.3 ms ± 845 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [136]:
%timeit F.sum(axis=0)

1.08 s ± 50.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [137]:
%timeit F.sum(axis=1)

2.25 s ± 92.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [138]:
F = 0.0; C = 0.0  # memory clean-up