# Appendix - A

# A.1 ndarray Object Internals

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
np.ones((10, 5)).shape

(10, 5)

In [3]:
np.ones((3, 4, 5), dtype=np.float64).strides

(160, 40, 8)

### NumPy Data Type Hierarchy

In [4]:
ints = np.ones(10, dtype=np.uint16)

In [5]:
floats = np.ones(10, dtype=np.float32)

In [6]:
np.issubdtype(ints.dtype, np.integer)

True

In [7]:
np.issubdtype(floats.dtype, np.floating)

True

In [8]:
np.float64.mro()

[numpy.float64,
 numpy.floating,
 numpy.inexact,
 numpy.number,
 numpy.generic,
 float,
 object]

In [9]:
np.issubdtype(ints.dtype, np.number)

True

# A.2 Advanced Array Manipulation

In [10]:
arr = np.arange(8)
arr

array([0, 1, 2, 3, 4, 5, 6, 7])

In [11]:
arr.reshape((4, 2))

array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7]])

In [12]:
al = np.arange(12)
al

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [13]:
al.reshape((4, 3), order='C')

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

In [14]:
al.reshape((4, 3), order='F')

array([[ 0,  4,  8],
       [ 1,  5,  9],
       [ 2,  6, 10],
       [ 3,  7, 11]])

In [15]:
arr.reshape((4, 2)).reshape((2, 4))

array([[0, 1, 2, 3],
       [4, 5, 6, 7]])

In [16]:
arr = np.arange(15)
arr.reshape((5, -1))

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

In [17]:
other_arr = np.ones((3, 5))

In [18]:
other_arr.shape

(3, 5)

In [19]:
arr.reshape(other_arr.shape)

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [20]:
arr = np.arange(15).reshape((5, 3))
arr

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

In [21]:
arr.ravel()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [22]:
arr.flatten()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [23]:
arr = np.arange(12).reshape((3, 4))
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [24]:
arr.ravel()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [25]:
arr.ravel('F')

array([ 0,  4,  8,  1,  5,  9,  2,  6, 10,  3,  7, 11])

# Concatenating & Splitting Arrays

In [26]:
arr1 = np.array([[1, 2, 3], [4, 5, 6]])

In [27]:
arr2 = np.array([[7, 8, 9], [10, 11, 12]])

In [28]:
np.concatenate([arr1, arr2], axis=0)

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [29]:
np.concatenate([arr1, arr2], axis=1)

array([[ 1,  2,  3,  7,  8,  9],
       [ 4,  5,  6, 10, 11, 12]])

In [30]:
np.vstack((arr1, arr2))

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [31]:
np.hstack((arr1, arr2))

array([[ 1,  2,  3,  7,  8,  9],
       [ 4,  5,  6, 10, 11, 12]])

In [32]:
# arr = rng.standard_normal((5, 2))
# arr

In [33]:
first, second, third = np.split(arr, [1, 3])
first

array([[0, 1, 2, 3]])

In [34]:
second

array([[ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [35]:
third

array([], shape=(0, 4), dtype=int64)

##### Stacking helpers: r_and c_

In [36]:
rng = np.random.default_rng(12345)

In [37]:
arr = np.arange(6)

In [38]:
arr1 = arr.reshape((3, 2))

In [39]:
arr2 = rng.standard_normal((3, 2))

In [40]:
np.r_[arr1, arr2]

array([[ 0.        ,  1.        ],
       [ 2.        ,  3.        ],
       [ 4.        ,  5.        ],
       [-1.42382504,  1.26372846],
       [-0.87066174, -0.25917323],
       [-0.07534331, -0.74088465]])

In [41]:
# np.c_[np.r_[arr1, arr2], arr]

In [42]:
np.c_[1:6, -10:-5]

array([[  1, -10],
       [  2,  -9],
       [  3,  -8],
       [  4,  -7],
       [  5,  -6]])

### Repeating Elements: tile & repeat

In [43]:
arr = np.arange(3)

In [44]:
arr

array([0, 1, 2])

In [45]:
arr.repeat(3)

array([0, 0, 0, 1, 1, 1, 2, 2, 2])

In [46]:
arr.repeat([2, 3, 4])

array([0, 0, 1, 1, 1, 2, 2, 2, 2])

In [47]:
arr = rng.standard_normal((2, 2))
arr

array([[-1.3677927 ,  0.6488928 ],
       [ 0.36105811, -1.95286306]])

In [48]:
arr.repeat(2, axis=0)

array([[-1.3677927 ,  0.6488928 ],
       [-1.3677927 ,  0.6488928 ],
       [ 0.36105811, -1.95286306],
       [ 0.36105811, -1.95286306]])

In [49]:
arr.repeat([2, 3], axis=0)

array([[-1.3677927 ,  0.6488928 ],
       [-1.3677927 ,  0.6488928 ],
       [ 0.36105811, -1.95286306],
       [ 0.36105811, -1.95286306],
       [ 0.36105811, -1.95286306]])

In [50]:
arr.repeat([2, 3], axis=1)

array([[-1.3677927 , -1.3677927 ,  0.6488928 ,  0.6488928 ,  0.6488928 ],
       [ 0.36105811,  0.36105811, -1.95286306, -1.95286306, -1.95286306]])

In [51]:
arr 

array([[-1.3677927 ,  0.6488928 ],
       [ 0.36105811, -1.95286306]])

In [52]:
np.tile(arr, 2)

array([[-1.3677927 ,  0.6488928 , -1.3677927 ,  0.6488928 ],
       [ 0.36105811, -1.95286306,  0.36105811, -1.95286306]])

In [53]:
arr

array([[-1.3677927 ,  0.6488928 ],
       [ 0.36105811, -1.95286306]])

In [54]:
np.tile(arr, (2, 1))

array([[-1.3677927 ,  0.6488928 ],
       [ 0.36105811, -1.95286306],
       [-1.3677927 ,  0.6488928 ],
       [ 0.36105811, -1.95286306]])

In [55]:
np.tile(arr, (3, 2))

array([[-1.3677927 ,  0.6488928 , -1.3677927 ,  0.6488928 ],
       [ 0.36105811, -1.95286306,  0.36105811, -1.95286306],
       [-1.3677927 ,  0.6488928 , -1.3677927 ,  0.6488928 ],
       [ 0.36105811, -1.95286306,  0.36105811, -1.95286306],
       [-1.3677927 ,  0.6488928 , -1.3677927 ,  0.6488928 ],
       [ 0.36105811, -1.95286306,  0.36105811, -1.95286306]])

### Fancy Indexing Equivalents: take & put

In [56]:
arr = np.arange(10) * 100

In [57]:
inds = [7, 1, 2, 6]

In [58]:
arr[inds]

array([700, 100, 200, 600])

In [59]:
arr.take(inds)

array([700, 100, 200, 600])

In [60]:
arr.put(inds, 42)

In [61]:
arr

array([  0,  42,  42, 300, 400, 500,  42,  42, 800, 900])

In [62]:
arr.put(inds, [40, 41, 42, 43])

In [63]:
arr

array([  0,  41,  42, 300, 400, 500,  43,  40, 800, 900])

In [64]:
inds = [2, 0, 2, 1]

In [65]:
arr = rng.standard_normal((2, 4))
arr

array([[ 2.34740965,  0.96849691, -0.75938718,  0.90219827],
       [-0.46695317, -0.06068952,  0.78884434, -1.25666813]])

In [66]:
arr.take(inds, axis=1)

array([[-0.75938718,  2.34740965, -0.75938718,  0.96849691],
       [ 0.78884434, -0.46695317,  0.78884434, -0.06068952]])

# A.3 Broadcasting

In [67]:
arr = np.arange(5)

In [68]:
arr

array([0, 1, 2, 3, 4])

In [69]:
arr * 4

array([ 0,  4,  8, 12, 16])

In [70]:
arr = rng.standard_normal((4, 3))

In [71]:
arr.mean(0)

array([0.00907054, 1.11903055, 0.24381842])

In [72]:
demeaned = arr - arr.mean(0)
demeaned

array([[ 0.56678698,  0.27994844,  1.07847964],
       [-0.30876905, -0.21611121, -1.86540115],
       [-0.1672598 , -0.66954662, -1.58741949],
       [-0.09075813,  0.60570938,  2.37434101]])

In [73]:
demeaned.mean(0)

array([3.46944695e-18, 5.55111512e-17, 0.00000000e+00])

In [74]:
arr

array([[ 0.57585751,  1.39897899,  1.32229806],
       [-0.29969852,  0.90291934, -1.62158273],
       [-0.15818926,  0.44948393, -1.34360107],
       [-0.08168759,  1.72473993,  2.61815943]])

In [75]:
row_means = arr.mean(1)

In [76]:
row_means.shape

(4,)

In [77]:
row_means.reshape((4, 1))

array([[ 1.09904486],
       [-0.33945397],
       [-0.3507688 ],
       [ 1.42040392]])

In [78]:
demeaned = arr - row_means.reshape((4, 1))

In [79]:
demeaned.mean(1)

array([ 1.11022302e-16, -7.40148683e-17,  7.40148683e-17, -7.40148683e-17])

### Broadcasting over Other Axes

In [81]:
# arr - arr(1)

In [82]:
arr - arr.mean(1).reshape((4, 1))

array([[-0.52318734,  0.29993414,  0.2232532 ],
       [ 0.03975545,  1.24237331, -1.28212876],
       [ 0.19257954,  0.80025273, -0.99283227],
       [-1.50209151,  0.30433601,  1.1977555 ]])

In [83]:
arr = np.zeros((4, 4))

In [84]:
arr_3d = arr[:, np.newaxis, :]

In [85]:
arr_3d.shape

(4, 1, 4)

In [86]:
arr_1d = rng.standard_normal(3)

In [87]:
arr_1d[:, np.newaxis]

array([[ 0.77736134],
       [ 0.8286332 ],
       [-0.95898831]])

In [89]:
arr_1d[np.newaxis, :]

array([[ 0.77736134,  0.8286332 , -0.95898831]])

In [90]:
arr = rng.standard_normal((3, 4, 5))

In [92]:
depth_means = arr.mean(2)
depth_means

array([[-0.39739088,  0.09619143,  0.04314136,  0.27468984],
       [-0.18852342, -0.20137996, -0.57324159, -0.54671393],
       [ 0.11832783, -0.63005577,  0.09723001,  0.59537117]])

In [93]:
depth_means.shape

(3, 4)

In [94]:
demeaned = arr - depth_means[:, :, np.newaxis]

In [95]:
demeaned.mean(2)

array([[ 3.33066907e-17,  0.00000000e+00,  4.44089210e-17,
        -1.11022302e-17],
       [ 8.88178420e-17, -1.66533454e-17,  2.22044605e-17,
        -4.44089210e-17],
       [-2.22044605e-17, -4.44089210e-17,  4.44089210e-17,
         6.66133815e-17]])

In [96]:
def mean_axis(arr, axis=0):
    means=arr.mean(axis)
    
    indexer = [slice(None)] * arr.ndim
    indexer[axis] = np.newaxis
    return arr - means[indexer]

### Setting Array Values by Broadcasting

In [97]:
arr = np.zeros((4, 3))

In [98]:
arr[:] = 5

In [99]:
arr

array([[5., 5., 5.],
       [5., 5., 5.],
       [5., 5., 5.],
       [5., 5., 5.]])

In [100]:
col = np.array([1.28, -0.42, 0.44, 1.6])

In [101]:
arr[:] = col[:, np.newaxis]

In [102]:
arr

array([[ 1.28,  1.28,  1.28],
       [-0.42, -0.42, -0.42],
       [ 0.44,  0.44,  0.44],
       [ 1.6 ,  1.6 ,  1.6 ]])

In [103]:
arr[:2] = [[-1.37], [0.509]]
arr

array([[-1.37 , -1.37 , -1.37 ],
       [ 0.509,  0.509,  0.509],
       [ 0.44 ,  0.44 ,  0.44 ],
       [ 1.6  ,  1.6  ,  1.6  ]])

# A.4 Advanced ufunc Usage

### ufunc Instance Methods

In [104]:
arr = np.arange(10)

In [105]:
np.add.reduce(arr)

45

In [106]:
arr.sum()

45

In [107]:
my_rng = np.random.default_rng(12346)

In [108]:
arr = my_rng.standard_normal((5, 5))
arr

array([[-0.903889  ,  0.15713146,  0.89761199, -0.76219554, -0.17625556],
       [ 0.05303172, -1.62844028, -0.17753333,  1.96360352,  1.78125478],
       [-0.87971984, -1.69847913, -1.81891091,  0.11895453, -0.44409513],
       [ 0.76911421, -0.03433778,  0.39252776,  0.75891811, -0.07045967],
       [ 1.04984775,  1.02967072, -0.42005533,  0.78626627,  0.96124929]])

In [109]:
arr[::2].sort(1)

In [110]:
arr[:, :-1] < arr[:, 1:]

array([[ True,  True,  True,  True],
       [False,  True,  True, False],
       [ True,  True,  True,  True],
       [False,  True,  True, False],
       [ True,  True,  True,  True]])

In [111]:
np.logical_and.reduce(arr[:, :-1] < arr[:, 1:], axis=1)

array([ True, False,  True, False,  True])

In [112]:
arr = np.arange(15).reshape((3, 5))

In [113]:
np.add.accumulate(arr, axis=1)

array([[ 0,  1,  3,  6, 10],
       [ 5, 11, 18, 26, 35],
       [10, 21, 33, 46, 60]])

In [114]:
arr = np.arange(3).repeat([1, 2, 2])
arr

array([0, 1, 1, 2, 2])

In [115]:
np.multiply.outer(arr, np.arange(5))

array([[0, 0, 0, 0, 0],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 2, 4, 6, 8],
       [0, 2, 4, 6, 8]])

In [116]:
x, y = rng.standard_normal((3, 4)), rng.standard_normal(5)

In [117]:
result = np.subtract.outer(x, y)

In [118]:
result.shape

(3, 4, 5)

In [119]:
arr = np.arange(10)

In [120]:
np.add.reduceat(arr, [0, 5, 8])

array([10, 18, 17])

In [121]:
arr = np.multiply.outer(np.arange(4), np.arange(5))

In [122]:
arr

array([[ 0,  0,  0,  0,  0],
       [ 0,  1,  2,  3,  4],
       [ 0,  2,  4,  6,  8],
       [ 0,  3,  6,  9, 12]])

In [123]:
np.add.reduceat(arr, [0, 2, 4], axis=1)

array([[ 0,  0,  0],
       [ 1,  5,  4],
       [ 2, 10,  8],
       [ 3, 15, 12]])

### Writing New ufuncs in Python

In [124]:
def add_elements(x, y):
    return x + y

In [126]:
add_them = np.frompyfunc(add_elements, 2, 1)

In [127]:
add_them(np.arange(8), np.arange(8))

array([0, 2, 4, 6, 8, 10, 12, 14], dtype=object)

In [128]:
add_them = np.vectorize(add_elements, otypes=[np.float64])

In [129]:
add_them(np.arange(8), np.arange(8))

array([ 0.,  2.,  4.,  6.,  8., 10., 12., 14.])

In [130]:
arr = rng.standard_normal(10000)

In [131]:
%timeit add_them(arr, arr)

1.27 ms ± 59.8 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


# A.5  Structured & Record Arrays

In [132]:
dtype = [('x', np.float64), ('y', np.int32)]

In [133]:
sarr = np.array([(1.5, 6), (np.pi, -2)], dtype=dtype)

In [134]:
sarr

array([(1.5       ,  6), (3.14159265, -2)],
      dtype=[('x', '<f8'), ('y', '<i4')])

In [135]:
sarr[0]

(1.5, 6)

In [136]:
sarr[0]['y']

6

In [137]:
sarr['x']

array([1.5       , 3.14159265])

### Nested Data Types & Multidimensional Fields

In [138]:
dtype = [('x', np.int64, 3), ('y', np.int32)]

In [139]:
arr = np.zeros(4, dtype=dtype)

In [140]:
arr

array([([0, 0, 0], 0), ([0, 0, 0], 0), ([0, 0, 0], 0), ([0, 0, 0], 0)],
      dtype=[('x', '<i8', (3,)), ('y', '<i4')])

In [141]:
arr[0]['x']

array([0, 0, 0])

In [142]:
arr['x']

array([[0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0]])

In [143]:
dtype = [('x', [('a', 'f8'), ('b', 'f4')]), ('y', np.int32)]

In [144]:
data = np.array([((1, 2), 5), ((3, 4), 6)], dtype=dtype)

In [145]:
data['x']

array([(1., 2.), (3., 4.)], dtype=[('a', '<f8'), ('b', '<f4')])

In [146]:
data['y']

array([5, 6], dtype=int32)

In [148]:
data['x']['a']

array([1., 3.])

# A.6 More About Sorting

In [149]:
arr = rng.standard_normal(6)

In [150]:
arr.sort()

In [151]:
arr

array([-0.79895215, -0.43801258, -0.14255446,  0.53570375,  0.91629925,
        1.12396392])

In [152]:
arr = rng.standard_normal((3, 5))

In [153]:
arr

array([[ 0.59193226, -1.11848035, -1.79377628,  0.0275181 , -0.52183471],
       [-0.47451353, -0.16487175, -0.93190851,  0.03003636, -1.15533009],
       [-1.19558568,  0.46914309, -0.35978023,  1.03591571,  0.22672235]])

In [154]:
arr[:, 0].sort()

In [155]:
arr

array([[-1.19558568, -1.11848035, -1.79377628,  0.0275181 , -0.52183471],
       [-0.47451353, -0.16487175, -0.93190851,  0.03003636, -1.15533009],
       [ 0.59193226,  0.46914309, -0.35978023,  1.03591571,  0.22672235]])

In [156]:
arr = rng.standard_normal(5)

In [157]:
arr

array([-0.74479832, -0.59313793, -1.055009  , -0.06826983,  0.45795065])

In [158]:
np.sort(arr)

array([-1.055009  , -0.74479832, -0.59313793, -0.06826983,  0.45795065])

In [159]:
arr

array([-0.74479832, -0.59313793, -1.055009  , -0.06826983,  0.45795065])

In [160]:
arr = rng.standard_normal((3, 5))
arr

array([[-0.07002967,  0.14616063, -0.9943506 ,  1.14361991,  0.5025894 ],
       [ 0.89812774, -1.17039613, -0.26857518, -0.79595321,  1.45222994],
       [-0.25351976,  2.11834357,  0.36340947, -0.62451153,  1.12789103]])

In [161]:
arr.sort(axis=1)

In [162]:
arr

array([[-0.9943506 , -0.07002967,  0.14616063,  0.5025894 ,  1.14361991],
       [-1.17039613, -0.79595321, -0.26857518,  0.89812774,  1.45222994],
       [-0.62451153, -0.25351976,  0.36340947,  1.12789103,  2.11834357]])

In [163]:
arr[:, ::-1]

array([[ 1.14361991,  0.5025894 ,  0.14616063, -0.07002967, -0.9943506 ],
       [ 1.45222994,  0.89812774, -0.26857518, -0.79595321, -1.17039613],
       [ 2.11834357,  1.12789103,  0.36340947, -0.25351976, -0.62451153]])

### Inderect Sorts: argsort & lexsort

In [165]:
values = np.array([5, 0, 1, 3, 2])

In [166]:
indexer = values.argsort()
indexer

array([1, 2, 4, 3, 0])

In [167]:
values[indexer]

array([0, 1, 2, 3, 5])

In [168]:
arr = rng.standard_normal((3, 5))

In [169]:
arr[0] = values

In [170]:
arr

array([[ 5.        ,  0.        ,  1.        ,  3.        ,  2.        ],
       [-1.08719255, -2.15177088, -0.62873036, -1.31990725,  0.08304824],
       [ 0.20170741,  0.91728215,  0.17643823, -0.29339201,  1.16213389]])

In [171]:
arr[:, arr[0].argsort()]

array([[ 0.        ,  1.        ,  2.        ,  3.        ,  5.        ],
       [-2.15177088, -0.62873036,  0.08304824, -1.31990725, -1.08719255],
       [ 0.91728215,  0.17643823,  1.16213389, -0.29339201,  0.20170741]])

In [172]:
first_name = np.array(['Bob', 'Jane', 'Steve', 'Bill', 'Barbara'])

last_name = np.array(['Jones', 'Arnold', 'Arnold', 'Jones', 'Walters'])

sorter = np.lexsort((first_name, last_name))

In [173]:
sorter

array([1, 2, 3, 0, 4])

In [174]:
list(zip(last_name[sorter], first_name[sorter]))

[('Arnold', 'Jane'),
 ('Arnold', 'Steve'),
 ('Jones', 'Bill'),
 ('Jones', 'Bob'),
 ('Walters', 'Barbara')]

### Alternative Sort Algorithms

In [175]:
values = np.array(['2:first', '2:second', '1:first', '1:second', '1:third'])

In [176]:
key = np.array([2, 2, 1, 1, 1])

In [177]:
indexer = key.argsort(kind='mergesort')
indexer

array([2, 3, 4, 0, 1])

In [178]:
values.take(indexer)

array(['1:first', '1:second', '1:third', '2:first', '2:second'],
      dtype='<U8')

### Partially Sorting Arrays

In [179]:
rng = np.random.default_rng(12345)

In [180]:
arr = rng.standard_normal(20)

In [181]:
arr

array([-1.42382504,  1.26372846, -0.87066174, -0.25917323, -0.07534331,
       -0.74088465, -1.3677927 ,  0.6488928 ,  0.36105811, -1.95286306,
        2.34740965,  0.96849691, -0.75938718,  0.90219827, -0.46695317,
       -0.06068952,  0.78884434, -1.25666813,  0.57585751,  1.39897899])

In [182]:
np.partition(arr, 3)

array([-1.95286306, -1.42382504, -1.3677927 , -1.25666813, -0.87066174,
       -0.75938718, -0.74088465, -0.06068952,  0.36105811, -0.07534331,
       -0.25917323, -0.46695317,  0.57585751,  0.90219827,  0.96849691,
        0.6488928 ,  0.78884434,  1.26372846,  1.39897899,  2.34740965])

In [183]:
indeces = np.argpartition(arr, 3)

In [184]:
indeces

array([ 9,  0,  6, 17,  2, 12,  5, 15,  8,  4,  3, 14, 18, 13, 11,  7, 16,
        1, 19, 10])

In [185]:
arr.take(indeces)

array([-1.95286306, -1.42382504, -1.3677927 , -1.25666813, -0.87066174,
       -0.75938718, -0.74088465, -0.06068952,  0.36105811, -0.07534331,
       -0.25917323, -0.46695317,  0.57585751,  0.90219827,  0.96849691,
        0.6488928 ,  0.78884434,  1.26372846,  1.39897899,  2.34740965])

### numpy.searchsorted: Finding Elements in a Sorted Array

In [186]:
arr = np.array([0, 1, 7, 12, 15])

In [187]:
arr.searchsorted(9)

3

In [189]:
arr.searchsorted([0, 8, 11, 16])

array([0, 3, 3, 5])

In [190]:
arr = np.array([0, 0, 0, 1, 1, 1, 1])

In [191]:
arr.searchsorted([0, 1])

array([0, 3])

In [192]:
arr.searchsorted([0, 1], side='right')

array([3, 7])

In [193]:
data = np.floor(rng.uniform(0, 10000, size=50))

In [194]:
bins = np.array([0, 100, 1000, 5000, 10000])

In [195]:
data

array([ 815., 1598., 3401., 4651., 2664., 8157., 1932., 1294.,  916.,
       5985., 8547., 6016., 9319., 7247., 8605., 9293., 5461., 9376.,
       4949., 2737., 4517., 6650., 3308., 9034., 2570., 3398., 2588.,
       3554.,   50., 6286., 2823.,  680., 6168., 1763., 3043., 4408.,
       1502., 2179., 4743., 4763., 2552., 2975., 2790., 2605., 4827.,
       2119., 4956., 2462., 8384., 1801.])

In [196]:
labels = bins.searchsorted(data)
labels

array([2, 3, 3, 3, 3, 4, 3, 3, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 4,
       3, 4, 3, 3, 3, 3, 1, 4, 3, 2, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 4, 3])

In [197]:
pd.Series(data).groupby(labels).mean()

1      50.000000
2     803.666667
3    3079.741935
4    7635.200000
dtype: float64

# A.7 Writing Fast NumPy Functions with Numba

In [198]:
def mean_distance(x, y):
    nx = len(x)
    result = 0.0
    count = 0
    for i in range(nx):
        result += x[i] - y[i]
        count += 1
    return result / count

In [199]:
x = rng.standard_normal(10_000_000)

In [200]:
y = rng.standard_normal(10_000_000)

In [202]:
%timeit mean_distance(x, y)

3.05 s ± 207 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [205]:
%timeit (x - y).mean()

29.4 ms ± 1.37 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [206]:
import numba as nb

In [207]:
numba_mean_distance = nb.jit(mean_distance)

In [208]:
@nb.jit
def numba_mean_distance(x, y):
    nx = len(x)
    result = 0.0
    count = 0
    for i in range(nx):
        result += x[i] - y[i]
        count += 1
    return result / count

In [209]:
%timeit numba_mean_distance(x, y)

15.2 ms ± 2.07 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [210]:
from numba import float64, njit

In [211]:
@njit(float64(float64[:], float64[:]))
def mean_distance(x, y):
    return (x - y).mean()

### Creating Custom numpy.ufunc Objects with Numba

In [212]:
from numba import vectorize

In [213]:
@vectorize
def nb_add(x, y):
    return x + y

In [214]:
x = np.arange(10)

In [215]:
nb_add(x, x)

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

In [216]:
nb_add.accumulate(x, 0)

array([ 0,  1,  3,  6, 10, 15, 21, 28, 36, 45])

# A.8 Advanced Array Input & Output

### Memory-Mapped Files

In [217]:
mmap = np.memmap('mymmap', dtype='float64', mode='w+',
                shape=(10000, 10000))

In [218]:
mmap

memmap([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]])

In [221]:
section = mmap[:5]

In [222]:
section[:] = rng.standard_normal((5, 10000))

In [223]:
mmap.flush()

In [224]:
mmap

memmap([[-1.0312376 ,  0.84463936, -0.95301878, ...,  0.81398931,
          2.84675841, -1.62866647],
        [ 0.10610041, -1.24085227,  0.66336087, ..., -0.87528712,
          1.47565557,  1.28685327],
        [ 0.17789502, -0.52750372,  0.87546288, ...,  0.36024551,
         -1.22331971,  0.11125982],
        ...,
        [ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ]])

In [225]:
del mmap

In [226]:
mmap = np.memmap('mymmap', dtype='float64', shape=(10000, 10000))

In [227]:
mmap

memmap([[-1.0312376 ,  0.84463936, -0.95301878, ...,  0.81398931,
          2.84675841, -1.62866647],
        [ 0.10610041, -1.24085227,  0.66336087, ..., -0.87528712,
          1.47565557,  1.28685327],
        [ 0.17789502, -0.52750372,  0.87546288, ...,  0.36024551,
         -1.22331971,  0.11125982],
        ...,
        [ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ]])

In [228]:
%xdel mmap

In [229]:
!rm mymmap

# A.9 Performance Tips

In [230]:
arr_c = np.ones((100, 10000), order='C')

In [231]:
arr_f = np.ones((100, 10000), order='F')

In [232]:
arr_c.flags

  C_CONTIGUOUS : True
  F_CONTIGUOUS : False
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False

In [233]:
arr_f.flags

  C_CONTIGUOUS : False
  F_CONTIGUOUS : True
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False

In [234]:
arr_f.flags.f_contiguous

True

In [235]:
%timeit arr_c.sum(1)

549 µs ± 31.3 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [236]:
%timeit arr_f.sum(1)

535 µs ± 7.67 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [237]:
arr_f.copy('C').flags

  C_CONTIGUOUS : True
  F_CONTIGUOUS : False
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False

In [238]:
arr_c[:50].flags.contiguous

True

In [239]:
arr_c[:, :50].flags

  C_CONTIGUOUS : False
  F_CONTIGUOUS : False
  OWNDATA : False
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False