# Numerical Computing with NumPy

## Arrays of Data

### Arrays with Python Lists

In [1]:
v = [0.5, 0.75, 1.0, 1.5, 2.0]

In [4]:
m = [v, v, v]
m

[[0.5, 0.75, 1.0, 1.5, 2.0],
 [0.5, 0.75, 1.0, 1.5, 2.0],
 [0.5, 0.75, 1.0, 1.5, 2.0]]

In [5]:
m[1]

[0.5, 0.75, 1.0, 1.5, 2.0]

In [6]:
m[1][0]

0.5

In [7]:
v1 = [0.5, 1.5]
v2 = [1, 2]
m = [v1, v2]
c = [m, m]
c

[[[0.5, 1.5], [1, 2]], [[0.5, 1.5], [1, 2]]]

In [8]:
c[1][1][0]

1

In [9]:
v = [0.5, 0.75, 1.0, 1.5, 2.0]
m = [v, v, v]
m

[[0.5, 0.75, 1.0, 1.5, 2.0],
 [0.5, 0.75, 1.0, 1.5, 2.0],
 [0.5, 0.75, 1.0, 1.5, 2.0]]

In [10]:
v[0] = 'Python'
m

[['Python', 0.75, 1.0, 1.5, 2.0],
 ['Python', 0.75, 1.0, 1.5, 2.0],
 ['Python', 0.75, 1.0, 1.5, 2.0]]

In [11]:
from copy import deepcopy
v = [0.5, 0.75, 1.0, 1.5, 2.0]
m = 3 * [deepcopy(v), ]
m

[[0.5, 0.75, 1.0, 1.5, 2.0],
 [0.5, 0.75, 1.0, 1.5, 2.0],
 [0.5, 0.75, 1.0, 1.5, 2.0]]

In [12]:
v[0] = 'Python'
m

[[0.5, 0.75, 1.0, 1.5, 2.0],
 [0.5, 0.75, 1.0, 1.5, 2.0],
 [0.5, 0.75, 1.0, 1.5, 2.0]]

### The Python array Class

In [2]:
v = [0.5, 0.75, 1.0, 1.5, 2.0]

In [5]:
import array
a = array.array('f', v)
a

array('f', [0.5, 0.75, 1.0, 1.5, 2.0])

In [6]:
a.append(0.5)
a

array('f', [0.5, 0.75, 1.0, 1.5, 2.0, 0.5])

In [7]:
a.extend([5.0, 6.75])
a

array('f', [0.5, 0.75, 1.0, 1.5, 2.0, 0.5, 5.0, 6.75])

In [8]:
2 * a

array('f', [0.5, 0.75, 1.0, 1.5, 2.0, 0.5, 5.0, 6.75, 0.5, 0.75, 1.0, 1.5, 2.0, 0.5, 5.0, 6.75])

In [9]:
a.append('string')

TypeError: must be real number, not str

In [10]:
a.tolist()

[0.5, 0.75, 1.0, 1.5, 2.0, 0.5, 5.0, 6.75]

In [11]:
f = open('array.apy', 'wb')
a.tofile(f)
f.close()

In [12]:
with open('array.apy', 'wb') as f:
    a.tofile(f)

In [16]:
!ls -n arr*

-rw-r--r--  1 501  20  32  1 17 14:32 array.apy


In [17]:
b = array.array('f')


In [18]:
with open('array.apy', 'rb') as f:
    b.fromfile(f, 5)

In [19]:
b

array('f', [0.5, 0.75, 1.0, 1.5, 2.0])

In [20]:
b = array.array('d')

In [21]:
with open('array.apy', 'rb') as f:
    b.fromfile(f, 2)

In [22]:
b

array('d', [0.0004882813645963324, 0.12500002956949174])

## Regular Numpy Arrays

### The Basics

In [23]:
import numpy as np

In [24]:
a = np.array([0, 0.5, 1.0, 1.5, 2.0])
a

array([0. , 0.5, 1. , 1.5, 2. ])

In [25]:
type(a)

numpy.ndarray

In [26]:
a = np.array(['a', 'b', 'c'])
a

array(['a', 'b', 'c'], dtype='<U1')

In [27]:
a = np.arange(2, 20, 2)
a

array([ 2,  4,  6,  8, 10, 12, 14, 16, 18])

In [29]:
a = np.arange(8, dtype=np.float64)
a

array([0., 1., 2., 3., 4., 5., 6., 7.])

In [30]:
a[5:]

array([5., 6., 7.])

In [31]:
a[:2]

array([0., 1.])

In [32]:
a.sum()

28.0

In [33]:
a.std()

2.29128784747792

In [34]:
a.cumsum()

array([ 0.,  1.,  3.,  6., 10., 15., 21., 28.])

In [35]:
l = [0., 0.5, 1.5, 3., 5.]
2 * l

[0.0, 0.5, 1.5, 3.0, 5.0, 0.0, 0.5, 1.5, 3.0, 5.0]

In [36]:
a

array([0., 1., 2., 3., 4., 5., 6., 7.])

In [37]:
2 * a

array([ 0.,  2.,  4.,  6.,  8., 10., 12., 14.])

In [38]:
a ** 2

array([ 0.,  1.,  4.,  9., 16., 25., 36., 49.])

In [39]:
2 ** a

array([  1.,   2.,   4.,   8.,  16.,  32.,  64., 128.])

In [40]:
a ** a

array([1.00000e+00, 1.00000e+00, 4.00000e+00, 2.70000e+01, 2.56000e+02,
       3.12500e+03, 4.66560e+04, 8.23543e+05])

In [41]:
np.exp(a)

array([1.00000000e+00, 2.71828183e+00, 7.38905610e+00, 2.00855369e+01,
       5.45981500e+01, 1.48413159e+02, 4.03428793e+02, 1.09663316e+03])

In [42]:
np.sqrt(a)

array([0.        , 1.        , 1.41421356, 1.73205081, 2.        ,
       2.23606798, 2.44948974, 2.64575131])

In [43]:
np.sqrt(2.5)

1.5811388300841898

In [44]:
import math

In [45]:
math.sqrt(2.5)

1.5811388300841898

In [46]:
math.sqrt(a)

TypeError: only length-1 arrays can be converted to Python scalars

In [47]:
%timeit np.sqrt(2.5)

624 ns ± 3.1 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)


In [48]:
%timeit math.sqrt(2.5)

64.4 ns ± 0.287 ns per loop (mean ± std. dev. of 7 runs, 10,000,000 loops each)


### Multiple Dimensions

In [49]:
b = np.array([a, a * 2])
b

array([[ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.],
       [ 0.,  2.,  4.,  6.,  8., 10., 12., 14.]])

In [50]:
b[0]

array([0., 1., 2., 3., 4., 5., 6., 7.])

In [51]:
b[0, 2]

2.0

In [53]:
b[:, 1]

array([1., 2.])

In [54]:
b.sum()

84.0

In [55]:
b.sum(axis=0)

array([ 0.,  3.,  6.,  9., 12., 15., 18., 21.])

In [56]:
b.sum(axis=1)

array([28., 56.])

In [65]:
c = np.zeros((2, 3), dtype='i', order='C')
c

array([[0, 0, 0],
       [0, 0, 0]], dtype=int32)

In [66]:
c = np.ones((2, 3, 4), dtype='i', order='C')
c

array([[[1, 1, 1, 1],
        [1, 1, 1, 1],
        [1, 1, 1, 1]],

       [[1, 1, 1, 1],
        [1, 1, 1, 1],
        [1, 1, 1, 1]]], dtype=int32)

In [67]:
d = np.zeros_like(c, dtype='f16', order='C')
d

array([[[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]],

       [[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]]], dtype=float128)

In [68]:
d = np.ones_like(c, dtype='f16', order='C')
d

array([[[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]],

       [[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]]], dtype=float128)

In [72]:
e = np.empty((2, 3, 2), dtype='f', order='C')
e

array([[[0.000000e+00, 0.000000e+00],
        [4.172325e-08, 1.525000e+00],
        [0.000000e+00, 1.625000e+00]],

       [[0.000000e+00, 1.750000e+00],
        [0.000000e+00, 1.812500e+00],
        [0.000000e+00, 1.875000e+00]]], dtype=float32)

In [73]:
f = np.empty_like(c)
f

array([[[1065353216, 1065353216, 1065353216, 1065353216],
        [1065353216, 1065353216, 1065353216, 1065353216],
        [1065353216, 1065353216, 1065353216, 1065353216]],

       [[1065353216, 1065353216, 1065353216, 1065353216],
        [1065353216, 1065353216, 1065353216, 1065353216],
        [1065353216, 1065353216, 1065353216, 1065353216]]], dtype=int32)

In [74]:
np.eye(5)

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

In [75]:
g = np.linspace(5, 15, 12)  # 12 numbers from 5 to 15
g

array([ 5.        ,  5.90909091,  6.81818182,  7.72727273,  8.63636364,
        9.54545455, 10.45454545, 11.36363636, 12.27272727, 13.18181818,
       14.09090909, 15.        ])

### Metainformation

In [76]:
g.size

12

In [77]:
g.itemsize

8

In [78]:
g.ndim

1

In [79]:
g.shape

(12,)

In [80]:
g.dtype

dtype('float64')

In [81]:
g.nbytes

96

### Reshaping and Resizing

In [82]:
g = np.arange(15)
g

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [83]:
g.shape

(15,)

In [84]:
np.shape(g)

(15,)

In [85]:
g.reshape((3, 5))   # g.reshape(3, 5) also works

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [86]:
h = g.reshape((5, 3))
h

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

In [87]:
h.T

array([[ 0,  3,  6,  9, 12],
       [ 1,  4,  7, 10, 13],
       [ 2,  5,  8, 11, 14]])

In [88]:
h.transpose()

array([[ 0,  3,  6,  9, 12],
       [ 1,  4,  7, 10, 13],
       [ 2,  5,  8, 11, 14]])

In [89]:
g

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [90]:
np.resize(g, (3, 1))

array([[0],
       [1],
       [2]])

In [91]:
np.resize(g, (2, 5))

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

In [92]:
n = np.resize(g, (5, 4))
n

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14,  0],
       [ 1,  2,  3,  4]])

In [93]:
h

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

In [94]:
np.hstack((h, 2 * h))

array([[ 0,  1,  2,  0,  2,  4],
       [ 3,  4,  5,  6,  8, 10],
       [ 6,  7,  8, 12, 14, 16],
       [ 9, 10, 11, 18, 20, 22],
       [12, 13, 14, 24, 26, 28]])

In [95]:
np.vstack((h, 0.5 * h))

array([[ 0. ,  1. ,  2. ],
       [ 3. ,  4. ,  5. ],
       [ 6. ,  7. ,  8. ],
       [ 9. , 10. , 11. ],
       [12. , 13. , 14. ],
       [ 0. ,  0.5,  1. ],
       [ 1.5,  2. ,  2.5],
       [ 3. ,  3.5,  4. ],
       [ 4.5,  5. ,  5.5],
       [ 6. ,  6.5,  7. ]])

In [96]:
h

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

In [97]:
h.flatten()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [98]:
h.flatten(order='C')

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [99]:
h.flatten(order='F')

array([ 0,  3,  6,  9, 12,  1,  4,  7, 10, 13,  2,  5,  8, 11, 14])

In [100]:
for i in h.flat:
    print(i, end=',')

0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,

In [101]:
for i in h.ravel(order='C'):
    print(i, end=',')

0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,

In [102]:
for i in h.ravel(order='F'):
    print(i, end=',')

0,3,6,9,12,1,4,7,10,13,2,5,8,11,14,

### Boolean Arrays

In [103]:
h

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

In [104]:
h > 8

array([[False, False, False],
       [False, False, False],
       [False, False, False],
       [ True,  True,  True],
       [ True,  True,  True]])

In [105]:
h <= 7

array([[ True,  True,  True],
       [ True,  True,  True],
       [ True,  True, False],
       [False, False, False],
       [False, False, False]])

In [106]:
h == 5

array([[False, False, False],
       [False, False,  True],
       [False, False, False],
       [False, False, False],
       [False, False, False]])

In [108]:
(h == 5).astype(int)

array([[0, 0, 0],
       [0, 0, 1],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0]])

In [109]:
(h > 4) & (h <= 12)

array([[False, False, False],
       [False, False,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True, False, False]])

In [110]:
(h > 4) & (h <= 12).astype(int)

array([[0, 0, 0],
       [0, 0, 1],
       [1, 1, 1],
       [1, 1, 1],
       [1, 0, 0]])

In [111]:
h[h > 8]

array([ 9, 10, 11, 12, 13, 14])

In [112]:
h[(h > 4) & (h <= 12)]

array([ 5,  6,  7,  8,  9, 10, 11, 12])

In [113]:
h[(h > 4) | (h <= 12)]  # h[(h > 4) | (h <= 12).astype(int)] does not work

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [114]:
np.where(h > 7, 1, 0)

array([[0, 0, 0],
       [0, 0, 0],
       [0, 0, 1],
       [1, 1, 1],
       [1, 1, 1]])

In [115]:
np.where(h % 2 == 0, 'even', 'odd')

array([['even', 'odd', 'even'],
       ['odd', 'even', 'odd'],
       ['even', 'odd', 'even'],
       ['odd', 'even', 'odd'],
       ['even', 'odd', 'even']], dtype='<U4')

In [116]:
np.where(h <= 7, h * 2, h / 2)

array([[ 0. ,  2. ,  4. ],
       [ 6. ,  8. , 10. ],
       [12. , 14. ,  4. ],
       [ 4.5,  5. ,  5.5],
       [ 6. ,  6.5,  7. ]])

### Speed Comparison

In [117]:
import random
I = 5000

In [118]:
%time mat = [[random.gauss(0, 1) for j in range(I)] for i in range(I)]

CPU times: user 12.1 s, sys: 328 ms, total: 12.4 s
Wall time: 12.4 s


In [119]:
mat[0][:5]

[1.293906073796224,
 -0.6310412855665405,
 -0.6169456245650886,
 -0.7488295371968878,
 -1.6075421242048433]

In [120]:
%time sum([sum(l) for l in mat])

CPU times: user 112 ms, sys: 2.34 ms, total: 115 ms
Wall time: 113 ms


-4237.003095844221

In [121]:
import sys
sum([sys.getsizeof(l) for l in mat])

209400000

In [122]:
%time mat = np.random.standard_normal((I, I))

CPU times: user 979 ms, sys: 348 ms, total: 1.33 s
Wall time: 1.33 s


In [123]:
%time mat.sum()

CPU times: user 23.4 ms, sys: 6.76 ms, total: 30.1 ms
Wall time: 28.3 ms


20349.110983175826

In [124]:
mat.nbytes

200000000

In [125]:
sys.getsizeof(mat)

200000128

### Structured Numpy Arrays

In [126]:
dt = np.dtype([('Name', 'S10'), ('Age', 'i4'),
               ('Height', 'f'), ('Children/Pets', 'i4', 2)])

In [127]:
dt

dtype([('Name', 'S10'), ('Age', '<i4'), ('Height', '<f4'), ('Children/Pets', '<i4', (2,))])

In [128]:
dt = np.dtype({'names': ['Name', 'Age', 'Height', 'Children/Pets'], 
               'formats': 'O int float int,int'.split()})

In [129]:
dt

dtype([('Name', 'O'), ('Age', '<i8'), ('Height', '<f8'), ('Children/Pets', [('f0', '<i8'), ('f1', '<i8')])])

In [130]:
s = np.array([('Smith', 45, 1.83, (0, 1)),
              ('Jones', 53, 1.72, (2, 2))], dtype=dt)

In [131]:
s

array([('Smith', 45, 1.83, (0, 1)), ('Jones', 53, 1.72, (2, 2))],
      dtype=[('Name', 'O'), ('Age', '<i8'), ('Height', '<f8'), ('Children/Pets', [('f0', '<i8'), ('f1', '<i8')])])

In [132]:
type(s)

numpy.ndarray

In [133]:
s['Name']

array(['Smith', 'Jones'], dtype=object)

In [134]:
s['Height'].mean()

1.775

In [135]:
s[0]

('Smith', 45, 1.83, (0, 1))

In [137]:
s[1]['Age']

53