# Apêndice A - Numpy avançado

In [1]:
import numpy as np


In [2]:
rng = np.random.default_rng(seed=12345)
np.set_printoptions(precision=4, suppress=True)


## A.1 Organização interna do objeto ndarray

In [3]:
np.ones((10, 5)).shape


(10, 5)

In [4]:
np.ones((3, 4, 5), dtype=np.float64).strides


(160, 40, 8)

### Hierarquia de tipos de dados do Numpy

In [5]:
ints = np.ones(10, dtype=np.uint16)


In [6]:
floats = np.ones(10, dtype=np.float32)


In [7]:
np.issubdtype(ints.dtype, np.integer)


True

In [8]:
np.issubdtype(floats.dtype, np.floating)


True

In [9]:
# Verificando as classes-pais de um tipo de dado específico
np.float64.mro()


[numpy.float64,
 numpy.floating,
 numpy.inexact,
 numpy.number,
 numpy.generic,
 float,
 object]

In [10]:
np.issubdtype(ints.dtype, np.number)


True

## A.2 Manipulação avançada de arrays

### Redefinição do formato de arrays

In [11]:
arr = np.arange(8)


In [12]:
arr


array([0, 1, 2, 3, 4, 5, 6, 7])

In [13]:
arr.reshape((4, 2))


array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7]])

In [14]:
arr.reshape((4, 2)).reshape((2, 4))


array([[0, 1, 2, 3],
       [4, 5, 6, 7]])

In [15]:
arr = np.arange(15)


In [16]:
arr.reshape((5, -1))


array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

In [17]:
other_arr = np.ones((3, 5))


In [18]:
other_arr.shape


(3, 5)

In [19]:
arr.reshape(other_arr.shape)


array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [20]:
# Linearização (flattening ou raveling)
arr = np.arange(15).reshape((5, 3))


In [21]:
arr


array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

In [22]:
arr.ravel()


array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [23]:
# flatten se comporta como ravel exceto por sempre retornar uma cópia dos dados
arr.flatten()


array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

### Ordem C versus ordem FORTRAN

In [24]:
arr = np.arange(12).reshape((3, 4))


In [25]:
arr


array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [26]:
arr.ravel()


array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [27]:
arr.ravel("F")


array([ 0,  4,  8,  1,  5,  9,  2,  6, 10,  3,  7, 11])

### Concatenação e divisão de arrays

In [28]:
arr1 = np.array([[1, 2, 3], [4, 5, 6]])


In [29]:
arr2 = np.array([[7, 8, 9], [10, 11, 12]])


In [30]:
np.concatenate([arr1, arr2], axis=0)


array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [31]:
np.concatenate([arr1, arr2], axis=1)


array([[ 1,  2,  3,  7,  8,  9],
       [ 4,  5,  6, 10, 11, 12]])

In [32]:
# Utilizando vstack e hstack
np.vstack((arr1, arr2))


array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [33]:
np.hstack((arr1, arr2))


array([[ 1,  2,  3,  7,  8,  9],
       [ 4,  5,  6, 10, 11, 12]])

In [34]:
# Método split
arr = rng.standard_normal((5, 2))


In [35]:
arr


array([[-1.4238,  1.2637],
       [-0.8707, -0.2592],
       [-0.0753, -0.7409],
       [-1.3678,  0.6489],
       [ 0.3611, -1.9529]])

In [36]:
first, second, third = np.split(arr, [1, 3])


In [37]:
first


array([[-1.4238,  1.2637]])

In [38]:
second


array([[-0.8707, -0.2592],
       [-0.0753, -0.7409]])

In [39]:
third


array([[-1.3678,  0.6489],
       [ 0.3611, -1.9529]])

#### Auxiliares de empilhamento: r_ e c_

In [40]:
arr = np.arange(6)


In [41]:
arr1 = arr.reshape((3, 2))


In [42]:
arr2 = rng.standard_normal((3, 2))


In [43]:
np.r_[arr1, arr2]


array([[ 0.    ,  1.    ],
       [ 2.    ,  3.    ],
       [ 4.    ,  5.    ],
       [ 2.3474,  0.9685],
       [-0.7594,  0.9022],
       [-0.467 , -0.0607]])

In [44]:
np.c_[np.r_[arr1, arr2], arr]


array([[ 0.    ,  1.    ,  0.    ],
       [ 2.    ,  3.    ,  1.    ],
       [ 4.    ,  5.    ,  2.    ],
       [ 2.3474,  0.9685,  3.    ],
       [-0.7594,  0.9022,  4.    ],
       [-0.467 , -0.0607,  5.    ]])

In [45]:
np.c_[1:6, -10:-5]


array([[  1, -10],
       [  2,  -9],
       [  3,  -8],
       [  4,  -7],
       [  5,  -6]])

### Repetição de elementos: tile e repeat

In [46]:
# repeat
arr = np.arange(3)


In [47]:
arr


array([0, 1, 2])

In [48]:
arr.repeat(3)


array([0, 0, 0, 1, 1, 1, 2, 2, 2])

In [49]:
arr.repeat([2, 3, 4])


array([0, 0, 1, 1, 1, 2, 2, 2, 2])

In [50]:
arr = rng.standard_normal((2, 2))


In [51]:
arr


array([[ 0.7888, -1.2567],
       [ 0.5759,  1.399 ]])

In [52]:
arr.repeat(2, axis=0)


array([[ 0.7888, -1.2567],
       [ 0.7888, -1.2567],
       [ 0.5759,  1.399 ],
       [ 0.5759,  1.399 ]])

In [53]:
arr.repeat([2, 3], axis=0)


array([[ 0.7888, -1.2567],
       [ 0.7888, -1.2567],
       [ 0.5759,  1.399 ],
       [ 0.5759,  1.399 ],
       [ 0.5759,  1.399 ]])

In [54]:
arr.repeat([2, 3], axis=1)


array([[ 0.7888,  0.7888, -1.2567, -1.2567, -1.2567],
       [ 0.5759,  0.5759,  1.399 ,  1.399 ,  1.399 ]])

In [55]:
# tile
arr


array([[ 0.7888, -1.2567],
       [ 0.5759,  1.399 ]])

In [56]:
np.tile(arr, 2)


array([[ 0.7888, -1.2567,  0.7888, -1.2567],
       [ 0.5759,  1.399 ,  0.5759,  1.399 ]])

In [57]:
arr


array([[ 0.7888, -1.2567],
       [ 0.5759,  1.399 ]])

In [58]:
np.tile(arr, (2, 1))


array([[ 0.7888, -1.2567],
       [ 0.5759,  1.399 ],
       [ 0.7888, -1.2567],
       [ 0.5759,  1.399 ]])

In [59]:
np.tile(arr, (3, 2))


array([[ 0.7888, -1.2567,  0.7888, -1.2567],
       [ 0.5759,  1.399 ,  0.5759,  1.399 ],
       [ 0.7888, -1.2567,  0.7888, -1.2567],
       [ 0.5759,  1.399 ,  0.5759,  1.399 ],
       [ 0.7888, -1.2567,  0.7888, -1.2567],
       [ 0.5759,  1.399 ,  0.5759,  1.399 ]])

### Equivalentes à indexação sofisticada: take e put

In [60]:
arr = np.arange(10) * 100


In [61]:
inds = [7, 1, 2, 6]


In [62]:
arr[inds]


array([700, 100, 200, 600])

In [63]:
arr.take(inds)


array([700, 100, 200, 600])

In [64]:
arr.put(inds, 42)


In [65]:
arr


array([  0,  42,  42, 300, 400, 500,  42,  42, 800, 900])

In [66]:
arr.put(inds, [40, 41, 42, 43])


In [67]:
arr


array([  0,  41,  42, 300, 400, 500,  43,  40, 800, 900])

In [68]:
inds = [2, 0, 2, 1]


In [69]:
arr = rng.standard_normal((2, 4))


In [70]:
arr


array([[ 1.3223, -0.2997,  0.9029, -1.6216],
       [-0.1582,  0.4495, -1.3436, -0.0817]])

In [71]:
arr.take(inds, axis=1)


array([[ 0.9029,  1.3223,  0.9029, -0.2997],
       [-1.3436, -0.1582, -1.3436,  0.4495]])

## A.3 Broadcasting

In [72]:
arr = np.arange(5)


In [73]:
arr


array([0, 1, 2, 3, 4])

In [74]:
arr * 4


array([ 0,  4,  8, 12, 16])

In [75]:
arr = rng.standard_normal((4, 3))


In [76]:
arr.mean(0)


array([0.1206, 0.243 , 0.1444])

In [77]:
demeaned = arr - arr.mean(0)


In [78]:
demeaned


array([[ 1.6042,  2.3751,  0.633 ],
       [ 0.7081, -1.202 , -1.3538],
       [-1.5329,  0.2985,  0.6076],
       [-0.7793, -1.4717,  0.1132]])

In [79]:
demeaned.mean(0)


array([ 0., -0.,  0.])

In [80]:
arr


array([[ 1.7247,  2.6182,  0.7774],
       [ 0.8286, -0.959 , -1.2094],
       [-1.4123,  0.5415,  0.7519],
       [-0.6588, -1.2287,  0.2576]])

In [81]:
row_means = arr.mean(1)


In [82]:
row_means.shape


(4,)

In [83]:
row_means.reshape((4, 1))


array([[ 1.7068],
       [-0.4466],
       [-0.0396],
       [-0.5433]])

In [84]:
demeaned = arr - row_means.reshape((4, 1))


In [85]:
demeaned.mean(1)


array([-0.,  0.,  0.,  0.])

### Broadcasting em outros eixos

In [86]:
arr - arr.mean(1)


ValueError: operands could not be broadcast together with shapes (4,3) (4,) 

In [87]:
arr - arr.mean(1).reshape((4, 1))


array([[ 0.018 ,  0.9114, -0.9294],
       [ 1.2752, -0.5124, -0.7628],
       [-1.3727,  0.5811,  0.7915],
       [-0.1155, -0.6854,  0.8009]])

In [88]:
# Utilizando np.newaxis para inserir um novo eixo
arr = np.zeros((4, 4))


In [89]:
arr_3d = arr[:, np.newaxis, :]


In [90]:
arr_3d.shape


(4, 1, 4)

In [91]:
arr_1d = rng.standard_normal(3)


In [92]:
arr_1d[:, np.newaxis]


array([[ 0.3129],
       [-0.1308],
       [ 1.27  ]])

In [93]:
arr_1d[np.newaxis, :]


array([[ 0.3129, -0.1308,  1.27  ]])

In [94]:
arr = rng.standard_normal((3, 4, 5))


In [95]:
depth_means = arr.mean(2)


In [96]:
depth_means


array([[ 0.0431,  0.2747, -0.1885, -0.2014],
       [-0.5732, -0.5467,  0.1183, -0.6301],
       [ 0.0972,  0.5954,  0.0331, -0.6002]])

In [97]:
depth_means.shape


(3, 4)

In [98]:
demeaned = arr - depth_means[:, :, np.newaxis]


In [99]:
demeaned.mean(2)


array([[ 0., -0.,  0., -0.],
       [ 0., -0., -0., -0.],
       [ 0.,  0.,  0.,  0.]])

### Definição de valores de arrays por broadcasting

In [100]:
arr = np.zeros((4, 3))


In [101]:
arr[:] = 5


In [102]:
arr


array([[5., 5., 5.],
       [5., 5., 5.],
       [5., 5., 5.],
       [5., 5., 5.]])

In [103]:
col = np.array([1.28, -0.42, 0.44, 1.6])


In [104]:
arr[:] = col[:, np.newaxis]


In [105]:
arr


array([[ 1.28,  1.28,  1.28],
       [-0.42, -0.42, -0.42],
       [ 0.44,  0.44,  0.44],
       [ 1.6 ,  1.6 ,  1.6 ]])

In [106]:
arr[:2] = [[-1.37], [0.509]]


In [107]:
arr


array([[-1.37 , -1.37 , -1.37 ],
       [ 0.509,  0.509,  0.509],
       [ 0.44 ,  0.44 ,  0.44 ],
       [ 1.6  ,  1.6  ,  1.6  ]])

## A.4 Uso avançado das ufuncs

### Métodos de instância das ufuncs

In [108]:
arr = np.arange(10)


In [109]:
np.add.reduce(arr)


np.int64(45)

In [110]:
arr.sum()


np.int64(45)

In [111]:
my_rng = np.random.default_rng(12346)


In [112]:
arr = my_rng.standard_normal((5, 5))


In [113]:
arr


array([[-0.9039,  0.1571,  0.8976, -0.7622, -0.1763],
       [ 0.053 , -1.6284, -0.1775,  1.9636,  1.7813],
       [-0.8797, -1.6985, -1.8189,  0.119 , -0.4441],
       [ 0.7691, -0.0343,  0.3925,  0.7589, -0.0705],
       [ 1.0498,  1.0297, -0.4201,  0.7863,  0.9612]])

In [114]:
arr[::2].sort(1)


In [115]:
arr[:, :-1] < arr[:, 1:]


array([[ True,  True,  True,  True],
       [False,  True,  True, False],
       [ True,  True,  True,  True],
       [False,  True,  True, False],
       [ True,  True,  True,  True]])

In [116]:
np.logical_and.reduce(arr[:, :-1] < arr[:, 1:], axis=1)


array([ True, False,  True, False,  True])

In [117]:
arr = np.arange(15).reshape((3, 5))


In [118]:
np.add.accumulate(arr, axis=1)


array([[ 0,  1,  3,  6, 10],
       [ 5, 11, 18, 26, 35],
       [10, 21, 33, 46, 60]])

In [119]:
arr = np.arange(3).repeat([1, 2, 2])


In [120]:
arr


array([0, 1, 1, 2, 2])

In [121]:
np.multiply.outer(arr, np.arange(5))


array([[0, 0, 0, 0, 0],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 2, 4, 6, 8],
       [0, 2, 4, 6, 8]])

In [122]:
x, y = rng.standard_normal((3, 4)), rng.standard_normal(5)


In [123]:
result = np.subtract.outer(x, y)


In [124]:
result.shape


(3, 4, 5)

In [125]:
arr = np.arange(10)


In [126]:
# Os resultados de reduceat são as reduções (aqui, as somas) executadas em 
# arr[0:5], arr[5:8] e arr[8:]
np.add.reduceat(arr, [0, 5, 8])



array([10, 18, 17])

In [127]:
arr = np.multiply.outer(np.arange(4), np.arange(5))


In [128]:
arr


array([[ 0,  0,  0,  0,  0],
       [ 0,  1,  2,  3,  4],
       [ 0,  2,  4,  6,  8],
       [ 0,  3,  6,  9, 12]])

In [129]:
np.add.reduceat(arr, [0, 2, 4], axis=1)


array([[ 0,  0,  0],
       [ 1,  5,  4],
       [ 2, 10,  8],
       [ 3, 15, 12]])

### Criação de novas ufuncs em Python

In [130]:
def add_elements(x, y):
    return x + y


In [131]:
add_them = np.frompyfunc(add_elements, 2, 1)


In [132]:
add_them(np.arange(8), np.arange(8))


array([0, 2, 4, 6, 8, 10, 12, 14], dtype=object)

In [133]:
add_them = np.vectorize(add_elements, otypes=[np.float64])


In [134]:
add_them(np.arange(8), np.arange(8))


array([ 0.,  2.,  4.,  6.,  8., 10., 12., 14.])

In [135]:
arr = rng.standard_normal(10000)


In [136]:
%timeit add_them(arr, arr)


3.88 ms ± 737 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [137]:
%timeit np.add(arr, arr)


7.25 µs ± 2.6 µs per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


## A.5 Arrays estruturados e de registros

In [140]:
dtype = [("x", np.float64), ("y", np.int32)]


In [141]:
sarr = np.array([(1.5, 6), (np.pi, -2)], dtype=dtype)


In [None]:
sarr


array([(1.5   ,  6), (3.1416, -2)], dtype=[('x', '<f8'), ('y', '<i4')])

In [None]:
sarr[0]


np.void((1.5, 6), dtype=[('x', '<f8'), ('y', '<i4')])

In [None]:
sarr[0]["y"]


np.int32(6)

In [None]:
sarr["x"]


array([1.5   , 3.1416])

### Tipos de dados aninhados e campos multidimensionais

In [146]:
dtype = [("x", np.int64, 3), ("y", np.int32)]


In [147]:
arr = np.zeros(4, dtype=dtype)


In [None]:
arr


array([([0, 0, 0], 0), ([0, 0, 0], 0), ([0, 0, 0], 0), ([0, 0, 0], 0)],
      dtype=[('x', '<i8', (3,)), ('y', '<i4')])

In [None]:
arr[0]["x"]


array([0, 0, 0])

In [None]:
arr["x"]


array([[0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0]])

In [151]:
dtype = [("x", [("a", "f8"), ("b", "f4")]), ("y", np.int32)]


In [152]:
data = np.array([((1, 2), 5), ((3, 4), 6)], dtype=dtype)


In [None]:
data["x"]


array([(1., 2.), (3., 4.)], dtype=[('a', '<f8'), ('b', '<f4')])

In [None]:
data["y"]


array([5, 6], dtype=int32)

In [157]:
data["x"]["a"]


array([1., 3.])

## A.6 Mais detalhes sobre ordenação

In [None]:
arr = rng.standard_normal(6)


In [None]:
arr.sort()


In [None]:
arr


array([-1.1553, -0.9319, -0.5218, -0.4745, -0.1649,  0.03  ])

In [161]:
arr = rng.standard_normal((3, 5))


In [None]:
arr


array([[-1.1956,  0.4691, -0.3598,  1.0359,  0.2267],
       [-0.7448, -0.5931, -1.055 , -0.0683,  0.458 ],
       [-0.07  ,  0.1462, -0.9944,  1.1436,  0.5026]])

In [None]:
arr[:, 0].sort()  # Ordena os valores da primeira coluna diretamente no local


In [None]:
arr


array([[-1.1956,  0.4691, -0.3598,  1.0359,  0.2267],
       [-0.7448, -0.5931, -1.055 , -0.0683,  0.458 ],
       [-0.07  ,  0.1462, -0.9944,  1.1436,  0.5026]])

In [None]:
# Método np.sort
arr = rng.standard_normal(5)


In [None]:
arr


array([ 0.8981, -1.1704, -0.2686, -0.796 ,  1.4522])

In [None]:
np.sort(arr)  # Cria uma nova cópia ordenada de um array


array([-1.1704, -0.796 , -0.2686,  0.8981,  1.4522])

In [None]:
arr


array([ 0.8981, -1.1704, -0.2686, -0.796 ,  1.4522])

In [None]:
arr = rng.standard_normal((3, 5))


In [None]:
arr


array([[-0.2535,  2.1183,  0.3634, -0.6245,  1.1279],
       [ 1.6164, -0.2287, -0.6201, -0.1143, -1.2067],
       [-1.0872, -2.1518, -0.6287, -1.3199,  0.083 ]])

In [None]:
arr.sort(axis=1)


In [None]:
arr


array([[-0.6245, -0.2535,  0.3634,  1.1279,  2.1183],
       [-1.2067, -0.6201, -0.2287, -0.1143,  1.6164],
       [-2.1518, -1.3199, -1.0872, -0.6287,  0.083 ]])

In [173]:
arr[:, ::-1]  # Retorna um ndarray na ordem inversa


array([[ 2.1183,  1.1279,  0.3634, -0.2535, -0.6245],
       [ 1.6164, -0.1143, -0.2287, -0.6201, -1.2067],
       [ 0.083 , -0.6287, -1.0872, -1.3199, -2.1518]])

### Ordenações indiretas: argsort e lexsort

In [None]:
values = np.array([5, 0, 1, 3, 2])


In [None]:
# Array de índices inteiros que informa como reorganizar os dados para
# que fiquem ordenados
indexer = values.argsort()


In [None]:
indexer


array([1, 2, 4, 3, 0])

In [None]:
values[indexer]


array([0, 1, 2, 3, 5])

In [None]:
arr = rng.standard_normal((3, 5))


In [None]:
arr[0] = values


In [None]:
arr


array([[ 5.    ,  0.    ,  1.    ,  3.    ,  2.    ],
       [-0.7503, -2.1268, -1.391 , -0.4922,  0.4505],
       [ 0.8926, -1.0479,  0.9553,  0.2936,  0.5379]])

In [181]:
arr[:, arr[0].argsort()]


array([[ 0.    ,  1.    ,  2.    ,  3.    ,  5.    ],
       [-2.1268, -1.391 ,  0.4505, -0.4922, -0.7503],
       [-1.0479,  0.9553,  0.5379,  0.2936,  0.8926]])

In [183]:
first_name = np.array(["Bob", "Jane", "Steve", "Bill", "Barbara"])


In [184]:
last_name = np.array(["Jones", "Arnold", "Arnold", "Jones", "Walters"])


In [None]:
# Executa uma ordenação indireta lexicográfica em vários arrays de chaves
# A ordem na qual as chaves são utilizadas para ordenar os dados começa com o
# último array passado
sorter = np.lexsort((first_name, last_name))


In [None]:
sorter


array([1, 2, 3, 0, 4])

In [187]:
list(zip(last_name[sorter], first_name[sorter]))


[(np.str_('Arnold'), np.str_('Jane')),
 (np.str_('Arnold'), np.str_('Steve')),
 (np.str_('Jones'), np.str_('Bill')),
 (np.str_('Jones'), np.str_('Bob')),
 (np.str_('Walters'), np.str_('Barbara'))]

### Algoritmos de ordenação alternativos

In [188]:
values = np.array(["2:first", "2:second", "1:first", "1:second", "1:third"])


In [189]:
key = np.array([2, 2, 1, 1, 1])


In [190]:
indexer = key.argsort(kind="mergesort")


In [None]:
indexer


array([2, 3, 4, 0, 1])

In [None]:
values.take(indexer)


array(['1:first', '1:second', '1:third', '2:first', '2:second'],
      dtype='<U8')

### Ordenação parcial de arrays

In [193]:
rng = np.random.default_rng(12345)


In [None]:
arr = rng.standard_normal(20)


In [None]:
arr


array([-1.4238,  1.2637, -0.8707, -0.2592, -0.0753, -0.7409, -1.3678,
        0.6489,  0.3611, -1.9529,  2.3474,  0.9685, -0.7594,  0.9022,
       -0.467 , -0.0607,  0.7888, -1.2567,  0.5759,  1.399 ])

In [196]:
np.partition(arr, 3)


array([-1.9529, -1.4238, -1.3678, -1.2567, -0.8707, -0.7594, -0.7409,
       -0.467 , -0.2592, -0.0753, -0.0607,  0.3611,  0.5759,  0.6489,
        0.7888,  0.9022,  0.9685,  1.2637,  1.399 ,  2.3474])

In [197]:
indices = np.argpartition(arr, 3)


In [None]:
indices


array([ 9,  0,  6, 17,  2, 12,  5, 14,  3,  4, 15,  8, 18,  7, 16, 13, 11,
        1, 19, 10])

In [None]:
arr.take(indices)


array([-1.9529, -1.4238, -1.3678, -1.2567, -0.8707, -0.7594, -0.7409,
       -0.467 , -0.2592, -0.0753, -0.0607,  0.3611,  0.5759,  0.6489,
        0.7888,  0.9022,  0.9685,  1.2637,  1.399 ,  2.3474])

### numpy.searchsorted: encontrando elementos em um array ordenado

In [None]:
arr = np.array([0, 1, 7, 12, 15])


In [None]:
arr.searchsorted(9)


np.int64(3)

In [202]:
arr.searchsorted([0, 8, 11, 16])


array([0, 3, 3, 5])

In [203]:
arr = np.array([0, 0, 0, 1, 1, 1, 1])


In [204]:
arr.searchsorted([0, 1])


array([0, 3])

In [None]:
arr.searchsorted([0, 1], side="right")


array([3, 7])

In [206]:
data = np.floor(rng.uniform(0, 10000, size=50))


In [207]:
bins = np.array([0, 100, 1000, 5000, 10000])


In [None]:
data


array([ 815., 1598., 3401., 4651., 2664., 8157., 1932., 1294.,  916.,
       5985., 8547., 6016., 9319., 7247., 8605., 9293., 5461., 9376.,
       4949., 2737., 4517., 6650., 3308., 9034., 2570., 3398., 2588.,
       3554.,   50., 6286., 2823.,  680., 6168., 1763., 3043., 4408.,
       1502., 2179., 4743., 4763., 2552., 2975., 2790., 2605., 4827.,
       2119., 4956., 2462., 8384., 1801.])

In [209]:
labels = bins.searchsorted(data)


In [None]:
labels


array([2, 3, 3, 3, 3, 4, 3, 3, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 4,
       3, 4, 3, 3, 3, 3, 1, 4, 3, 2, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 4, 3])

In [212]:
import pandas as pd

pd.Series(data).groupby(labels).mean()


1      50.000000
2     803.666667
3    3079.741935
4    7635.200000
dtype: float64

## A.7 Criação de funções rápidas do NumPy com o Numba

In [213]:
def mean_distance(x, y):
    nx = len(x)
    result = 0.0
    count = 0
    for i in range(nx):
        result += x[i] - y[i]
        count += 1
    return result / count


In [None]:
x = rng.standard_normal(10_000_000)


In [None]:
y = rng.standard_normal(10_000_000)


In [None]:
%timeit mean_distance(x, y)


6.52 s ± 109 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [None]:
%timeit (x - y).mean()


58.6 ms ± 10.1 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [None]:
import numba as nb


In [None]:
numba_mean_distance = nb.jit(mean_distance)


Esse código poderia ter sido escrito usando um decorador:
```python
@nb.jit
def mean_distance(x, y):
    nx = len(x)
    result = 0.0
    count = 0
    for i in range(nx):
        result += x[i] - y[i]
        count += 1
    return result / count
```

In [None]:
%timeit numba_mean_distance(x, y)


18.1 ms ± 171 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)


A função <code>jit</code> do Numba tem uma opção <code>nopython=True</code> que
restringe o código permitido a um código Python que possa ser compilado para
LLVM sem nenhuma chamada a API C do Python. <code>jit(nopython=True)</code> tem
um alias mais curto, <code>numba.njit</code>

No exemplo anterior, o código poderia ser escrito dessa maneira:
```python
from numba import float64, njit

@njit(float64(float64[:], float64[:]))
def mean_distance(x, y):
    return (x - y).mean()
```

### Criação de objetos numpy.ufunc personalizados com o Numba

In [221]:
from numba import vectorize


@vectorize
def nb_add(x, y):
    return x + y


In [None]:
x = np.arange(10)


In [223]:
nb_add(x, x)


array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

In [None]:
nb_add.accumulate(x, 0)


array([ 0,  1,  3,  6, 10, 15, 21, 28, 36, 45])

## A.8 Operações avançadas de entrada e saída com arrays

### Arquivos mapeados em memória

In [225]:
mmap = np.memmap("mymap", dtype="float64", mode="w+", shape=(10000, 10000))


In [None]:
mmap


memmap([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]], shape=(10000, 10000))

In [None]:
section = mmap[:5]


In [None]:
section[:] = rng.standard_normal((5, 10000))


In [None]:
mmap.flush()


In [None]:
mmap


memmap([[ 0.8726, -0.439 ,  0.361 , ..., -0.5822,  1.773 , -0.0615],
        [ 0.1979,  2.5442,  0.9167, ...,  0.1741,  0.8364, -0.6414],
        [-0.8671,  1.3508,  0.0072, ...,  0.0877,  1.7929, -2.0398],
        ...,
        [ 0.    ,  0.    ,  0.    , ...,  0.    ,  0.    ,  0.    ],
        [ 0.    ,  0.    ,  0.    , ...,  0.    ,  0.    ,  0.    ],
        [ 0.    ,  0.    ,  0.    , ...,  0.    ,  0.    ,  0.    ]],
       shape=(10000, 10000))

In [None]:
del mmap


In [232]:
mmap = np.memmap("mymap", dtype="float64", shape=(10000, 10000))


In [None]:
mmap


memmap([[ 0.8726, -0.439 ,  0.361 , ..., -0.5822,  1.773 , -0.0615],
        [ 0.1979,  2.5442,  0.9167, ...,  0.1741,  0.8364, -0.6414],
        [-0.8671,  1.3508,  0.0072, ...,  0.0877,  1.7929, -2.0398],
        ...,
        [ 0.    ,  0.    ,  0.    , ...,  0.    ,  0.    ,  0.    ],
        [ 0.    ,  0.    ,  0.    , ...,  0.    ,  0.    ,  0.    ],
        [ 0.    ,  0.    ,  0.    , ...,  0.    ,  0.    ,  0.    ]],
       shape=(10000, 10000))

In [None]:
%xdel mmap


In [1]:
!del mymap
