# **Appendix A**
# **Advanced Numpy**

## **A.1 ndarray Object Internals**

In [1]:
import numpy as np

In [2]:
np.ones((10, 5)).shape

(10, 5)

In [3]:
np.ones((3, 4, 5), dtype=np.float64).strides

(160, 40, 8)

### **NumPy dtype Hierarchy**

In [4]:
 ints = np.ones(10, dtype=np.uint16)

In [5]:
floats = np.ones(10, dtype=np.float32)

In [6]:
np.issubdtype(ints.dtype, np.integer)

True

In [7]:
np.issubdtype(floats.dtype, np.floating)

True

In [8]:
np.float64.mro()

[numpy.float64,
 numpy.floating,
 numpy.inexact,
 numpy.number,
 numpy.generic,
 float,
 object]

In [9]:
np.issubdtype(ints.dtype, np.number)

True

## **A.2 Advanced Array Manipulation**

### **Reshaping Arrays**

In [10]:
arr = np.arange(8)

In [11]:
arr

array([0, 1, 2, 3, 4, 5, 6, 7])

In [12]:
 arr.reshape((4, 2))

array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7]])

In [13]:
arr.reshape((4, 2)).reshape((2, 4))

array([[0, 1, 2, 3],
       [4, 5, 6, 7]])

In [14]:
arr = np.arange(15)

In [15]:
 arr.reshape((5, -1))

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

In [16]:
other_arr = np.ones((3, 5))

In [17]:
other_arr.shape

(3, 5)

In [18]:
arr.reshape(other_arr.shape)

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [19]:
arr = np.arange(15).reshape((5, 3))

In [20]:
arr

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

In [21]:
 arr.ravel()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [22]:
arr.flatten()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

### **C Versus Fortran Order**

In [23]:
arr = np.arange(12).reshape((3, 4))

In [24]:
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [25]:
arr.ravel()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [26]:
arr.ravel('F')

array([ 0,  4,  8,  1,  5,  9,  2,  6, 10,  3,  7, 11])

### **Concatenating and Splitting Arrays**

In [27]:
arr1 = np.array([[1, 2, 3], [4, 5, 6]])

In [28]:
arr2 = np.array([[7, 8, 9], [10, 11, 12]])

In [29]:
np.concatenate([arr1, arr2], axis=0)

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [30]:
np.concatenate([arr1, arr2], axis=1)

array([[ 1,  2,  3,  7,  8,  9],
       [ 4,  5,  6, 10, 11, 12]])

In [31]:
 np.vstack((arr1, arr2))

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [32]:
np.hstack((arr1, arr2))

array([[ 1,  2,  3,  7,  8,  9],
       [ 4,  5,  6, 10, 11, 12]])

In [33]:
arr = np.random.randn(5, 2)

In [34]:
arr

array([[ 1.30041719, -0.27499055],
       [-0.58957826, -0.08072925],
       [ 0.23948665,  0.01483662],
       [ 0.2790396 ,  0.18264819],
       [-2.22819342,  0.88738445]])

In [35]:
first, second, third = np.split(arr, [1, 3])

In [36]:
first

array([[ 1.30041719, -0.27499055]])

In [37]:
second

array([[-0.58957826, -0.08072925],
       [ 0.23948665,  0.01483662]])

In [38]:
third

array([[ 0.2790396 ,  0.18264819],
       [-2.22819342,  0.88738445]])

#### **Stacking helpers: r_ and c_**

In [39]:
arr = np.arange(6)

In [40]:
arr1 = arr.reshape((3, 2))

In [41]:
arr2 = np.random.randn(3, 2)

In [42]:
np.r_[arr1, arr2]

array([[ 0.        ,  1.        ],
       [ 2.        ,  3.        ],
       [ 4.        ,  5.        ],
       [-0.79460283,  2.10896611],
       [ 0.84512253,  1.25242212],
       [-0.02600928,  0.35517503]])

In [43]:
np.c_[np.r_[arr1, arr2], arr]

array([[ 0.        ,  1.        ,  0.        ],
       [ 2.        ,  3.        ,  1.        ],
       [ 4.        ,  5.        ,  2.        ],
       [-0.79460283,  2.10896611,  3.        ],
       [ 0.84512253,  1.25242212,  4.        ],
       [-0.02600928,  0.35517503,  5.        ]])

In [44]:
np.c_[1:6, -10:-5]

array([[  1, -10],
       [  2,  -9],
       [  3,  -8],
       [  4,  -7],
       [  5,  -6]])

### **Repeating Elements: tile and repeat**

In [45]:
 arr = np.arange(3)

In [46]:
arr

array([0, 1, 2])

In [47]:
arr.repeat(3)

array([0, 0, 0, 1, 1, 1, 2, 2, 2])

In [48]:
arr.repeat([2, 3, 4])

array([0, 0, 1, 1, 1, 2, 2, 2, 2])

In [49]:
arr = np.random.randn(2, 2)

In [50]:
arr

array([[-0.53841517, -0.67326066],
       [ 0.25616479, -1.26391915]])

In [51]:
arr.repeat(2, axis=0)

array([[-0.53841517, -0.67326066],
       [-0.53841517, -0.67326066],
       [ 0.25616479, -1.26391915],
       [ 0.25616479, -1.26391915]])

In [52]:
arr.repeat([2, 3], axis=0)

array([[-0.53841517, -0.67326066],
       [-0.53841517, -0.67326066],
       [ 0.25616479, -1.26391915],
       [ 0.25616479, -1.26391915],
       [ 0.25616479, -1.26391915]])

In [53]:
arr.repeat([2, 3], axis=1)

array([[-0.53841517, -0.53841517, -0.67326066, -0.67326066, -0.67326066],
       [ 0.25616479,  0.25616479, -1.26391915, -1.26391915, -1.26391915]])

In [54]:
arr

array([[-0.53841517, -0.67326066],
       [ 0.25616479, -1.26391915]])

In [55]:
np.tile(arr, 2)

array([[-0.53841517, -0.67326066, -0.53841517, -0.67326066],
       [ 0.25616479, -1.26391915,  0.25616479, -1.26391915]])

In [56]:
arr

array([[-0.53841517, -0.67326066],
       [ 0.25616479, -1.26391915]])

In [57]:
np.tile(arr, (2, 1))

array([[-0.53841517, -0.67326066],
       [ 0.25616479, -1.26391915],
       [-0.53841517, -0.67326066],
       [ 0.25616479, -1.26391915]])

In [58]:
np.tile(arr, (3, 2))

array([[-0.53841517, -0.67326066, -0.53841517, -0.67326066],
       [ 0.25616479, -1.26391915,  0.25616479, -1.26391915],
       [-0.53841517, -0.67326066, -0.53841517, -0.67326066],
       [ 0.25616479, -1.26391915,  0.25616479, -1.26391915],
       [-0.53841517, -0.67326066, -0.53841517, -0.67326066],
       [ 0.25616479, -1.26391915,  0.25616479, -1.26391915]])

### **Fancy Indexing Equivalents: take and put**

In [59]:
arr = np.arange(10) * 100

In [60]:
 inds = [7, 1, 2, 6]

In [61]:
arr[inds]

array([700, 100, 200, 600])

In [62]:
arr.take(inds)

array([700, 100, 200, 600])

In [63]:
arr.put(inds, 42)

In [64]:
arr

array([  0,  42,  42, 300, 400, 500,  42,  42, 800, 900])

In [65]:
arr.put(inds, [40, 41, 42, 43])

In [66]:
arr

array([  0,  41,  42, 300, 400, 500,  43,  40, 800, 900])

In [67]:
inds = [2, 0, 2, 1]

In [68]:
arr = np.random.randn(2, 4)

In [69]:
arr

array([[-1.4476918 ,  1.36755487, -1.62080299, -2.08909156],
       [ 0.33654083, -0.82242127,  0.0170637 ,  1.58507766]])

In [70]:
arr.take(inds, axis=1)

array([[-1.62080299, -1.4476918 , -1.62080299,  1.36755487],
       [ 0.0170637 ,  0.33654083,  0.0170637 , -0.82242127]])

## **A.3 Broadcasting**

In [71]:
arr = np.arange(5)

In [72]:
arr

array([0, 1, 2, 3, 4])

In [73]:
arr * 4

array([ 0,  4,  8, 12, 16])

In [74]:
arr = np.random.randn(4, 3)

In [75]:
arr.mean(0)

array([-0.72846737,  0.39449394,  0.43728762])

In [76]:
demeaned = arr - arr.mean(0)

In [77]:
demeaned

array([[ 0.3073032 , -1.01072905,  0.43095219],
       [-0.74716978, -0.78451499,  0.23654417],
       [ 1.50733225,  1.8963852 , -0.08999249],
       [-1.06746567, -0.10114117, -0.57750387]])

In [78]:
demeaned.mean(0)

array([0.00000000e+00, 5.55111512e-17, 0.00000000e+00])

In [79]:
arr

array([[-0.42116417, -0.61623511,  0.86823981],
       [-1.47563715, -0.39002104,  0.6738318 ],
       [ 0.77886488,  2.29087914,  0.34729514],
       [-1.79593304,  0.29335278, -0.14021625]])

In [80]:
row_means = arr.mean(1)

In [81]:
row_means.shape

(4,)

In [82]:
 row_means.reshape((4, 1))

array([[-0.05638649],
       [-0.39727546],
       [ 1.13901305],
       [-0.54759884]])

In [83]:
demeaned = arr - row_means.reshape((4, 1))

In [84]:
demeaned.mean(1)

array([-3.70074342e-17, -7.40148683e-17, -3.70074342e-17,  0.00000000e+00])

### **Broadcasting Over Other Axes**

In [85]:
arr - arr.mean(1)

ValueError: operands could not be broadcast together with shapes (4,3) (4,) 

In [None]:
arr - arr.mean(1).reshape((4, 1))

In [None]:
arr = np.zeros((4, 4))

In [None]:
arr_3d = arr[:, np.newaxis, :]

In [None]:
arr_3d.shape

In [None]:
arr_1d = np.random.normal(size=3)

In [None]:
arr_1d[:, np.newaxis]

In [None]:
arr_1d[np.newaxis, :]

In [None]:
arr = np.random.randn(3, 4, 5)

In [None]:
depth_means = arr.mean(2)

In [None]:
depth_means

In [None]:
depth_means.shape

In [None]:
demeaned = arr - depth_means[:, :, np.newaxis]

In [None]:
demeaned.mean(2)

In [None]:
def demean_axis(arr, axis=0):
    means = arr.mean(axis)
    # This generalizes things like [:, :, np.newaxis] to N dimensions
    indexer = [slice(None)] * arr.ndim
    indexer[axis] = np.newaxis
    return arr - means[indexer]

### **Setting Array Values by Broadcasting**

In [None]:
arr = np.zeros((4, 3))

In [None]:
arr[:] = 5

In [None]:
arr

In [None]:
col = np.array([1.28, -0.42, 0.44, 1.6])

In [None]:
arr[:] = col[:, np.newaxis]

In [None]:
arr

In [None]:
arr[:2] = [[-1.37], [0.509]]

In [None]:
arr

## **A.4 Advanced ufunc Usage**

### **ufunc Instance Methods**

In [None]:
arr = np.arange(10)

In [None]:
np.add.reduce(arr)

In [None]:
arr.sum()

In [None]:
np.random.seed(12346) # for reproducibility

In [None]:
arr = np.random.randn(5, 5)

In [None]:
arr[::2].sort(1) # sort a few rows

In [None]:
arr[:, :-1] < arr[:, 1:]

In [None]:
np.logical_and.reduce(arr[:, :-1] < arr[:, 1:], axis=1)

In [None]:
arr = np.arange(15).reshape((3, 5))

In [None]:
np.add.accumulate(arr, axis=1)

In [None]:
arr = np.arange(3).repeat([1, 2, 2])

In [None]:
arr

In [None]:
np.multiply.outer(arr, np.arange(5))

In [None]:
x, y = np.random.randn(3, 4), np.random.randn(5)

In [None]:
result = np.subtract.outer(x, y)

In [None]:
 result.shape

In [None]:
arr = np.arange(10)

In [None]:
np.add.reduceat(arr, [0, 5, 8])

In [None]:
arr = np.multiply.outer(np.arange(4), np.arange(5))

In [None]:
arr

In [None]:
np.add.reduceat(arr, [0, 2, 4], axis=1)

### **Writing New ufuncs in Python**

In [None]:
def add_elements(x, y):
    return x + y  

In [None]:
add_them = np.frompyfunc(add_elements, 2, 1)

In [None]:
add_them(np.arange(8), np.arange(8))

In [None]:
add_them = np.vectorize(add_elements, otypes=[np.float64])

In [None]:
add_them(np.arange(8), np.arange(8))

In [None]:
arr = np.random.randn(10000)

In [None]:
%timeit add_them(arr, arr)

In [None]:
%timeit np.add(arr, arr)

## **A.5 Structured and Record Arrays**

In [None]:
dtype = [('x', np.float64), ('y', np.int32)]

In [None]:
sarr = np.array([(1.5, 6), (np.pi, -2)], dtype=dtype)

In [None]:
sarr

In [None]:
sarr[0]

In [None]:
sarr[0]['y']

In [None]:
sarr['x']

### **Nested dtypes and Multidimensional Fields**

In [None]:
dtype = [('x', np.int64, 3), ('y', np.int32)]

In [None]:
arr = np.zeros(4, dtype=dtype)

In [None]:
arr

In [None]:
arr[0]['x']

In [None]:
arr['x']

In [None]:
dtype = [('x', [('a', 'f8'), ('b', 'f4')]), ('y', np.int32)]

In [None]:
data = np.array([((1, 2), 5), ((3, 4), 6)], dtype=dtype)

In [None]:
data['x']

In [None]:
data['y']

In [None]:
data['x']['a']

## **A.6 More About Sorting**

In [None]:
arr = np.random.randn(6)

In [None]:
 arr.sort()

In [None]:
arr

In [None]:
arr = np.random.randn(3, 5)

In [None]:
arr

In [None]:
arr[:, 0].sort() # Sort first column values in-place

In [None]:
arr

In [None]:
arr = np.random.randn(5)

In [None]:
arr

In [None]:
np.sort(arr)

In [None]:
arr

In [None]:
arr = np.random.randn(3, 5)

In [None]:
arr

In [None]:
arr.sort(axis=1)

In [None]:
arr

In [None]:
arr[:, ::-1]

### **Indirect Sorts: argsort and lexsort**

In [None]:
values = np.array([5, 0, 1, 3, 2])

In [None]:
indexer = values.argsort()

In [None]:
indexer

In [None]:
values[indexer]

In [None]:
arr = np.random.randn(3, 5)

In [None]:
arr[0] = values

In [None]:
arr

In [None]:
 arr[:, arr[0].argsort()]

In [None]:
first_name = np.array(['Bob', 'Jane', 'Steve', 'Bill', 'Barbara'])

In [None]:
last_name = np.array(['Jones', 'Arnold', 'Arnold', 'Jones', 'Walters'])

In [None]:
sorter = np.lexsort((first_name, last_name))

In [None]:
sorter

In [None]:
zip(last_name[sorter], first_name[sorter])

### **Alternative Sort Algorithms**

In [None]:
values = np.array(['2:first', '2:second', '1:first', '1:second',
                   '1:third'])

In [None]:
key = np.array([2, 2, 1, 1, 1])

In [None]:
indexer = key.argsort(kind='mergesort')

In [None]:
indexer

In [None]:
values.take(indexer)

### **Partially Sorting Arrays**

In [None]:
np.random.seed(12345)

In [None]:
arr = np.random.randn(20)

In [None]:
arr

In [None]:
indices = np.argpartition(arr, 3)

In [None]:
indices

In [None]:
arr.take(indices)

### **numpy.searchsorted: Finding Elements in a Sorted Array**

In [None]:
arr = np.array([0, 1, 7, 12, 15])

In [None]:
arr.searchsorted(9)

In [None]:
arr.searchsorted([0, 8, 11, 16])

In [None]:
arr = np.array([0, 0, 0, 1, 1, 1, 1])

In [None]:
arr.searchsorted([0, 1])

In [None]:
arr.searchsorted([0, 1], side='right')

In [None]:
data = np.floor(np.random.uniform(0, 10000, size=50))

In [None]:
bins = np.array([0, 100, 1000, 5000, 10000])

In [None]:
data

In [None]:
labels = bins.searchsorted(data)

In [None]:
labels

In [None]:
import pandas as pd

In [None]:
pd.Series(data).groupby(labels).mean()

## **A.7 Writing Fast NumPy Functions with Numba**

In [None]:
import numpy as np

In [None]:
def mean_distance(x, y):
    nx = len(x)
    result = 0.0
    count = 0
    for i in range(nx):
        result += x[i] - y[i]
        count += 1
    return result / count

In [None]:
x = np.random.randn(10000000)

In [None]:
y = np.random.randn(10000000)

In [None]:
%timeit mean_distance(x, y)

In [None]:
%timeit (x - y).mean()

In [None]:
import numba as nb

In [None]:
numba_mean_distance = nb.jit(mean_distance)

In [None]:
@nb.jit
def mean_distance(x, y):
    nx = len(x)
    result = 0.0
    count = 0
    for i in range(nx):
        result += x[i] - y[i]
        count += 1
    return result / count

In [None]:
%timeit numba_mean_distance(x, y)

In [None]:
from numba import float64, njit
@njit(float64(float64[:], float64[:]))
def mean_distance(x, y):
    return (x - y).mean()

### **Creating Custom numpy.ufunc Objects with Numba**

In [None]:
from numba import vectorize

In [None]:
@vectorize
def nb_add(x, y):
    return x + y

In [None]:
x = np.arange(10)

In [None]:
nb_add(x, x)

In [None]:
nb_add.accumulate(x, 0)

## **A.8 Advanced Array Input and Output**

In [213]:
mmap = np.memmap('mymmap', dtype='float64', mode='w+',
         shape=(10000, 10000))

In [214]:
mmap

memmap([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]])

In [215]:
section = mmap[:5]

In [216]:
section[:] = np.random.randn(5, 10000)

In [217]:
mmap.flush()

In [218]:
mmap

memmap([[ 1.37140985,  0.93127837,  0.60573747, ..., -0.62115557,
         -0.46780136,  0.47874865],
        [ 0.42296545,  0.83060431,  0.69976547, ...,  1.28831447,
          0.58858679, -1.42755372],
        [ 2.16005954, -1.24616489,  2.44470054, ...,  0.86866129,
          0.28019716,  2.13008671],
        ...,
        [ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ]])

In [220]:
del mmap

In [221]:
mmap = np.memmap('mymmap', dtype='float64', shape=(10000, 10000))

In [222]:
mmap

memmap([[ 1.37140985,  0.93127837,  0.60573747, ..., -0.62115557,
         -0.46780136,  0.47874865],
        [ 0.42296545,  0.83060431,  0.69976547, ...,  1.28831447,
          0.58858679, -1.42755372],
        [ 2.16005954, -1.24616489,  2.44470054, ...,  0.86866129,
          0.28019716,  2.13008671],
        ...,
        [ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ]])

## **A.9 Performance Tips**

### **The Importance of Contiguous Memory**

In [223]:
arr_c = np.ones((1000, 1000), order='C')

In [224]:
arr_f = np.ones((1000, 1000), order='F')

In [225]:
arr_c.flags

  C_CONTIGUOUS : True
  F_CONTIGUOUS : False
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False
  UPDATEIFCOPY : False

In [226]:
arr_f.flags

  C_CONTIGUOUS : False
  F_CONTIGUOUS : True
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False
  UPDATEIFCOPY : False

In [227]:
arr_f.flags.f_contiguous

True

In [228]:
%timeit arr_c.sum(1)

862 µs ± 107 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [229]:
%timeit arr_f.sum(1)

1.21 ms ± 327 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [230]:
arr_f.copy('C').flags

  C_CONTIGUOUS : True
  F_CONTIGUOUS : False
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False
  UPDATEIFCOPY : False

In [231]:
arr_c[:50].flags.contiguous

True

In [232]:
arr_c[:, :50].flags

  C_CONTIGUOUS : False
  F_CONTIGUOUS : False
  OWNDATA : False
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False
  UPDATEIFCOPY : False