In [199]:
#! ipython suppress id=e926030638df4e2f922f33c9c27afc51
#%pushd book-materials
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.rc('figure', figsize=(10, 6))
PREVIOUS_MAX_ROWS = pd.options.display.max_rows
pd.options.display.max_columns = 20
pd.options.display.max_rows = 20
pd.options.display.max_colwidth = 80
np.set_printoptions(precision=4, suppress=True)

In [3]:
#! ipython id=d620c53910a240e69ece77ec9bd156f6
rng = np.random.default_rng(seed=12345)

In [4]:
#! ipython id=b4eb8d5574a54518973f910d623bf5e8
np.ones((10, 5)).shape

(10, 5)

In [5]:
#! ipython id=7bf22ceac5c94f9bba173f578a933d8a
np.ones((3, 4, 5), dtype=np.float64).strides

(160, 40, 8)

In [6]:
#! ipython id=7d56951cbc81495790806d99f88db611
ints = np.ones(10, dtype=np.uint16)
floats = np.ones(10, dtype=np.float32)
np.issubdtype(ints.dtype, np.integer)


True

In [7]:
np.issubdtype(floats.dtype, np.floating)

True

In [8]:
#! ipython id=7c790b51cfef43789583e81d44f667eb
np.float64.mro()

[numpy.float64,
 numpy.floating,
 numpy.inexact,
 numpy.number,
 numpy.generic,
 float,
 object]

In [9]:
#! ipython id=802b4959858f42f68fdd8223778c6461
np.issubdtype(ints.dtype, np.number)

True

In [17]:
#! ipython id=ce8ef0b9be0541258afa90e9a306b2d5
arr = np.arange(12)
arr


array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [18]:
arr.reshape((4, 3))

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

In [20]:
#! ipython id=7f3a7b2404af44ecb287104171672800
arr.reshape((4, 3)).reshape((3, 4))

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [21]:
#! ipython id=a22eb31894eb44e9af770cafd2dc82f8
arr = np.arange(15)
arr.reshape((5, -1))

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

In [23]:
#! ipython id=3c3a3b489e7c4e77b4405c114b61cebc
other_arr = np.ones((3, 5))
other_arr.shape


(3, 5)

In [24]:
arr.reshape(other_arr.shape)

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [25]:
#! ipython id=e9ffffb7cf6d4703bba796bee68d0a55
arr = np.arange(15).reshape((5, 3))
arr


array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

In [26]:
arr.ravel()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [27]:
#! ipython id=07195f02b51e408cb21a126d3001d130
arr.flatten()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [28]:
#! ipython id=bc83d3159fff4c5da487f3981f727c07
arr = np.arange(12).reshape((3, 4))
arr


array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [29]:
arr.ravel()


array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [30]:
arr.ravel('F')

array([ 0,  4,  8,  1,  5,  9,  2,  6, 10,  3,  7, 11])

In [31]:
#! ipython id=c0bea878f4d74ef9b5bce94b88b90b3b
arr1 = np.array([[1, 2, 3], [4, 5, 6]])
arr2 = np.array([[7, 8, 9], [10, 11, 12]])
np.concatenate([arr1, arr2], axis=0)


array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [32]:
np.concatenate([arr1, arr2], axis=1)

array([[ 1,  2,  3,  7,  8,  9],
       [ 4,  5,  6, 10, 11, 12]])

In [33]:
#! ipython id=33efa139bb234d10817538b49b41fc02
np.vstack((arr1, arr2))


array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [34]:
np.hstack((arr1, arr2))

array([[ 1,  2,  3,  7,  8,  9],
       [ 4,  5,  6, 10, 11, 12]])

In [35]:
#! ipython id=6e069f660a754dab80caf4ea98d8161f
arr = rng.standard_normal((5, 2))
arr


array([[-1.4238,  1.2637],
       [-0.8707, -0.2592],
       [-0.0753, -0.7409],
       [-1.3678,  0.6489],
       [ 0.3611, -1.9529]])

In [36]:
first, second, third = np.split(arr, [1, 3])
first


array([[-1.4238,  1.2637]])

In [37]:
second


array([[-0.8707, -0.2592],
       [-0.0753, -0.7409]])

In [38]:
third

array([[-1.3678,  0.6489],
       [ 0.3611, -1.9529]])

In [39]:
#! ipython id=988edd872ce94799b21ff68a2aedc0cb
arr = np.arange(6)
arr1 = arr.reshape((3, 2))
arr2 = rng.standard_normal((3, 2))
np.r_[arr1, arr2]


array([[ 0.    ,  1.    ],
       [ 2.    ,  3.    ],
       [ 4.    ,  5.    ],
       [ 2.3474,  0.9685],
       [-0.7594,  0.9022],
       [-0.467 , -0.0607]])

In [40]:
np.c_[np.r_[arr1, arr2], arr]

array([[ 0.    ,  1.    ,  0.    ],
       [ 2.    ,  3.    ,  1.    ],
       [ 4.    ,  5.    ,  2.    ],
       [ 2.3474,  0.9685,  3.    ],
       [-0.7594,  0.9022,  4.    ],
       [-0.467 , -0.0607,  5.    ]])

In [41]:
#! ipython id=9756c6c66e71442cbf99da0bc5723058
np.c_[1:6, -10:-5]

array([[  1, -10],
       [  2,  -9],
       [  3,  -8],
       [  4,  -7],
       [  5,  -6]])

In [42]:
#! ipython id=cebbe60be04246d6b5c099fb47cfe223
arr = np.arange(3)
arr


array([0, 1, 2])

In [43]:
arr.repeat(3)

array([0, 0, 0, 1, 1, 1, 2, 2, 2])

In [53]:
#! ipython id=5ed5884236d343a4b896eaefe3282631
arr.repeat([2, 3, 4])

ValueError: operands could not be broadcast together with shape (4,) (3,)

In [48]:
#! ipython id=1ec3cee8c380412692d75b15c8a95f04
arr = rng.standard_normal((2, 2))
arr
arr.repeat(2, axis=0)

array([[ 1.3223, -0.2997],
       [ 1.3223, -0.2997],
       [ 0.9029, -1.6216],
       [ 0.9029, -1.6216]])

In [23]:
#! ipython id=edf606b3fc364243bfb27eae78a59308
arr.repeat([2, 3], axis=0)
arr.repeat([2, 3], axis=1)

In [55]:
#! ipython id=e8829e7ccec642d7980149682a573d36
arr


array([[ 1.3223, -0.2997],
       [ 0.9029, -1.6216]])

In [56]:
np.tile(arr, 2)

array([[ 1.3223, -0.2997,  1.3223, -0.2997],
       [ 0.9029, -1.6216,  0.9029, -1.6216]])

In [57]:
#! ipython id=7edcdb77a9834138a2e1b2f21ec85248
arr


array([[ 1.3223, -0.2997],
       [ 0.9029, -1.6216]])

In [58]:
np.tile(arr, (2, 1))


array([[ 1.3223, -0.2997],
       [ 0.9029, -1.6216],
       [ 1.3223, -0.2997],
       [ 0.9029, -1.6216]])

In [59]:
np.tile(arr, (3, 2))

array([[ 1.3223, -0.2997,  1.3223, -0.2997],
       [ 0.9029, -1.6216,  0.9029, -1.6216],
       [ 1.3223, -0.2997,  1.3223, -0.2997],
       [ 0.9029, -1.6216,  0.9029, -1.6216],
       [ 1.3223, -0.2997,  1.3223, -0.2997],
       [ 0.9029, -1.6216,  0.9029, -1.6216]])

In [60]:
#! ipython id=b69d0b1b35f94e5f93d50b9e91a51d82
arr = np.arange(10) * 100
inds = [7, 1, 2, 6]
arr[inds]

array([700, 100, 200, 600])

In [61]:
#! ipython id=3078f81f1f1245438754131cfda83a71
arr.take(inds)
arr.put(inds, 42)
arr


array([  0,  42,  42, 300, 400, 500,  42,  42, 800, 900])

In [62]:
arr.put(inds, [40, 41, 42, 43])
arr

array([  0,  41,  42, 300, 400, 500,  43,  40, 800, 900])

In [63]:
#! ipython id=ec4ebd6e59a842789ade7673186355fa
inds = [2, 0, 2, 1]
arr = rng.standard_normal((2, 4))
arr


array([[-0.1582,  0.4495, -1.3436, -0.0817],
       [ 1.7247,  2.6182,  0.7774,  0.8286]])

In [64]:
arr.take(inds, axis=1)

array([[-1.3436, -0.1582, -1.3436,  0.4495],
       [ 0.7774,  1.7247,  0.7774,  2.6182]])

In [65]:
#! ipython id=c490df60ace6471fa7a275199f6648f4
arr = np.arange(5)
arr



array([0, 1, 2, 3, 4])

In [66]:
arr * 4

array([ 0,  4,  8, 12, 16])

In [70]:
#! ipython id=b415c720eee543a183697f250dfc43eb
arr = rng.standard_normal((4, 3))
arr

array([[-0.0662, -1.1082,  0.136 ],
       [ 1.3471,  0.0611,  0.0709],
       [ 0.4337,  0.2775,  0.5303],
       [ 0.5367,  0.6184, -0.795 ]])

In [76]:
arr[0]



array([-0.0662, -1.1082,  0.136 ])

In [77]:
arr.mean(0)

array([ 0.5628, -0.0378, -0.0145])

In [87]:
(-1.1082+0.136*-0.0662)/3
-0.0662- 0.5628-1.0384000000000002

-1.6674000000000002

In [86]:
-0.0662-1.1082+0.136 

-1.0384000000000002

In [None]:
 0.3937, 1.7263, 0.1633],
[-0.4384, -1.9878, -0.9839],
[-0.468 , 0.9426, -0.3891],
[ 0.5126, -0.6811, 1.2097bb
 

In [105]:
 
(1.7263 - 1.9878 + 0.9426 - 0.6811)/2

-5.551115123125783e-17

In [68]:
demeaned = arr - arr.mean(0)
demeaned


array([[-0.5148, -1.4769, -0.9495],
       [ 0.9858,  0.4844, -0.196 ],
       [-0.7844, -0.01  ,  0.7757],
       [ 0.3134,  1.0025,  0.3698]])

In [94]:
demeaned.mean(0)

array([ 0.,  0., -0.])

In [106]:
#! ipython id=24389415f5f24b43ab770324a9c9ff5f
arr


array([[-0.0662, -1.1082,  0.136 ],
       [ 1.3471,  0.0611,  0.0709],
       [ 0.4337,  0.2775,  0.5303],
       [ 0.5367,  0.6184, -0.795 ]])

In [107]:
row_means = arr.mean(1)
row_means.shape


(4,)

In [108]:
row_means.reshape((4, 1))


array([[-0.3461],
       [ 0.493 ],
       [ 0.4138],
       [ 0.12  ]])

In [112]:
arr

array([[-0.0662, -1.1082,  0.136 ],
       [ 1.3471,  0.0611,  0.0709],
       [ 0.4337,  0.2775,  0.5303],
       [ 0.5367,  0.6184, -0.795 ]])

In [122]:
demeaned = arr - row_means.reshape((4, 1))
demeaned.mean(1)

array([ 0., -0., -0.,  0.])

In [123]:
#! ipython allow_exceptions id=2b090f7325404c93acd6bf0357012ef2
#:arr - arr.mean(1)
(-0.0662+0.3461)/2.0

0.13995000000000002

In [117]:
 0.493-1.3471

-0.8541

In [119]:
0.4337-0.4138

0.019899999999999973

In [120]:
0.5367-0.12

0.41669999999999996

In [124]:
arr- arr.mean(1)

ValueError: operands could not be broadcast together with shapes (4,3) (4,) 

In [125]:
#! ipython id=3274f631f8944b27849e55022d15e856
arr - arr.mean(1).reshape((4, 1))

array([[ 0.28  , -0.7621,  0.4821],
       [ 0.854 , -0.4319, -0.4221],
       [ 0.0199, -0.1363,  0.1165],
       [ 0.4167,  0.4983, -0.915 ]])

In [126]:
#! ipython id=22c9e0de0f1a42d1acbfcc8a63d6a4d6
arr = np.zeros((4, 4))
arr_3d = arr[:, np.newaxis, :]
arr_3d.shape


(4, 1, 4)

In [127]:
arr_1d = rng.standard_normal(3)
arr_1d[:, np.newaxis]


array([[ 0.3   ],
       [-1.6027],
       [ 0.2668]])

In [128]:
arr_1d[np.newaxis, :]

array([[ 0.3   , -1.6027,  0.2668]])

In [129]:
#! ipython id=bc26b50944e54b2cba705986b6a9079f
arr = rng.standard_normal((3, 4, 5))
depth_means = arr.mean(2)
depth_means


array([[-0.2352, -0.4627, -0.5699, -0.1517],
       [-0.3032,  0.0831,  0.6504, -0.3646],
       [-0.1944, -0.2532,  0.807 ,  0.707 ]])

In [131]:
depth_means.shape


(3, 4)

In [132]:
demeaned = arr - depth_means[:, :, np.newaxis]
demeaned.mean(2)

array([[ 0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.],
       [-0., -0.,  0.,  0.]])

In [133]:
#! ipython id=29bae95d1b0842c0b457dda156c6bfc0
arr = np.zeros((4, 3))
arr[:] = 5
arr

array([[5., 5., 5.],
       [5., 5., 5.],
       [5., 5., 5.],
       [5., 5., 5.]])

In [134]:
#! ipython id=ac69aca59c94463d9b6df157d9b521ac
col = np.array([1.28, -0.42, 0.44, 1.6])
arr[:] = col[:, np.newaxis]
arr


array([[ 1.28,  1.28,  1.28],
       [-0.42, -0.42, -0.42],
       [ 0.44,  0.44,  0.44],
       [ 1.6 ,  1.6 ,  1.6 ]])

In [135]:
arr[:2] = [[-1.37], [0.509]]
arr

array([[-1.37 , -1.37 , -1.37 ],
       [ 0.509,  0.509,  0.509],
       [ 0.44 ,  0.44 ,  0.44 ],
       [ 1.6  ,  1.6  ,  1.6  ]])

In [136]:
#! ipython id=14ab862de0b54de6a88bcee07cc87d2b
arr = np.arange(10)
np.add.reduce(arr)


45

In [137]:
arr.sum()

45

In [138]:
#! ipython id=6a6774bb315f4e68920057ba3e4313b5
my_rng = np.random.default_rng(12346)  # for reproducibility
arr = my_rng.standard_normal((5, 5))
arr
arr[::2].sort(1) # sort a few rows
arr[:, :-1] < arr[:, 1:]


array([[ True,  True,  True,  True],
       [False,  True,  True, False],
       [ True,  True,  True,  True],
       [False,  True,  True, False],
       [ True,  True,  True,  True]])

In [139]:
np.logical_and.reduce(arr[:, :-1] < arr[:, 1:], axis=1)

array([ True, False,  True, False,  True])

In [140]:
#! ipython id=492174bca93b477b813a9387627386bf
arr = np.arange(15).reshape((3, 5))
np.add.accumulate(arr, axis=1)

array([[ 0,  1,  3,  6, 10],
       [ 5, 11, 18, 26, 35],
       [10, 21, 33, 46, 60]])

In [142]:
#! ipython id=a611264ed18b446aa35e1f93fd9c3852
arr = np.arange(3).repeat([1, 2, 2])
arr


array([0, 1, 1, 2, 2])

In [143]:
np.multiply.outer(arr, np.arange(5))

array([[0, 0, 0, 0, 0],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 2, 4, 6, 8],
       [0, 2, 4, 6, 8]])

In [144]:
#! ipython id=83b253bbc5b8420b9caadd08be216a15
x, y = rng.standard_normal((3, 4)), rng.standard_normal(5)
result = np.subtract.outer(x, y)
result.shape

(3, 4, 5)

In [148]:
#! ipython id=9d967075734a4f12a98072f21ccc2f50
arr = np.arange(10)
np.add.reduceat(arr, [0, 5, 8])

array([10, 18, 17])

In [150]:
#! ipython id=2f1bc012f36d4b79b680ae14feb2f5f6
arr = np.multiply.outer(np.arange(4), np.arange(5))
arr


array([[ 0,  0,  0,  0,  0],
       [ 0,  1,  2,  3,  4],
       [ 0,  2,  4,  6,  8],
       [ 0,  3,  6,  9, 12]])

In [151]:
np.add.reduceat(arr, [0, 2, 4], axis=1)

array([[ 0,  0,  0],
       [ 1,  5,  4],
       [ 2, 10,  8],
       [ 3, 15, 12]])

In [212]:
#! ipython id=4be00eeb2796461996ebf7fb17d7d9ef
def add_elements(x, y):
    return x + y
add_them = np.frompyfunc(add_elements, 2, 1)
add_them(np.arange(8), np.arange(8))

array([0, 2, 4, 6, 8, 10, 12, 14], dtype=object)

In [213]:
#! ipython id=1141b1d6ac6c46509bb1ae3ced6bab2d
add_them = np.vectorize(add_elements, otypes=[np.float64])
add_them(np.arange(8), np.arange(8))

array([ 0.,  2.,  4.,  6.,  8., 10., 12., 14.])

In [214]:
#! ipython id=837c3af6451b473db934f3feda79f443
arr = rng.standard_normal(10000)
%timeit add_them(arr, arr)


3.99 ms ± 439 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [215]:
%timeit np.add(arr, arr)

16.4 µs ± 4.52 µs per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [216]:
#! ipython id=c0a9418af68f422db17fc103bba4be52
dtype = [('x', np.float64), ('y', np.int32)]
sarr = np.array([(1.5, 6), (np.pi, -2)], dtype=dtype)
sarr

array([(1.5   ,  6), (3.1416, -2)], dtype=[('x', '<f8'), ('y', '<i4')])

In [217]:
#! ipython id=38a27fd8a31f42eb81fd1f2f7c75690c
sarr[0]


(1.5, 6)

In [218]:
sarr[0]['y']

6

In [219]:
#! ipython id=4cca2ce62d114eb68b1b45f7f254c533
sarr['x']

array([1.5   , 3.1416])

In [220]:
#! ipython id=e9302f10530049b6a23c7af55f170a39
dtype = [('x', np.int64, 3), ('y', np.int32)]
arr = np.zeros(4, dtype=dtype)
arr

array([([0, 0, 0], 0), ([0, 0, 0], 0), ([0, 0, 0], 0), ([0, 0, 0], 0)],
      dtype=[('x', '<i8', (3,)), ('y', '<i4')])

In [221]:
#! ipython id=514ec71c8e9349dead3b18e382625f21
arr[0]['x']

array([0, 0, 0], dtype=int64)

In [222]:
#! ipython id=4daaa32c58614b8d98c4c8cedba3cea5
arr['x']

array([[0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0]], dtype=int64)

In [223]:
#! ipython id=7bd40164ecdd459c8adc144cb17b9467
dtype = [('x', [('a', 'f8'), ('b', 'f4')]), ('y', np.int32)]
data = np.array([((1, 2), 5), ((3, 4), 6)], dtype=dtype)
data['x']


array([(1., 2.), (3., 4.)], dtype=[('a', '<f8'), ('b', '<f4')])

In [224]:
data['y']


array([5, 6])

In [225]:
data['x']['a']

array([1., 3.])

In [226]:
#! ipython id=73efe3c6188e410da27ab48c9a7f53c3
arr = rng.standard_normal(6)
arr.sort()
arr

array([-0.6491, -0.318 , -0.1524,  0.1914,  0.2231,  1.1239])

In [228]:
#! ipython id=69b9121e120542c980b2881fd318539a
arr = rng.standard_normal((3, 5))
arr


array([[ 0.2449,  0.5181, -0.362 ,  0.9837,  1.9402],
       [ 1.2896,  2.2866,  1.3905, -1.965 ,  0.2028],
       [ 0.5111,  1.3789, -1.1615,  0.5708, -0.3207]])

In [229]:
arr[:, 0].sort()  # Sort first column values in place
arr

array([[ 0.2449,  0.5181, -0.362 ,  0.9837,  1.9402],
       [ 0.5111,  2.2866,  1.3905, -1.965 ,  0.2028],
       [ 1.2896,  1.3789, -1.1615,  0.5708, -0.3207]])

In [232]:
#! ipython id=bbf95e7e69c84b5d9217178b033279ff
arr = rng.standard_normal(5)
arr


array([-0.5277,  0.2118,  0.7759, -1.1482, -0.8598])

In [233]:
np.sort(arr)
arr

array([-0.5277,  0.2118,  0.7759, -1.1482, -0.8598])

In [235]:
#! ipython id=60280db86cd24fdeb3b58b93d5382366
arr = rng.standard_normal((3, 5))
arr


array([[ 0.298 , -1.5969, -1.1784,  0.4493,  0.3132],
       [ 0.424 ,  0.3859,  0.1126, -1.5391,  1.5474],
       [-0.4331,  1.1221, -1.0296,  1.4132, -0.7234]])

In [236]:
arr.sort(axis=1)
arr

array([[-1.5969, -1.1784,  0.298 ,  0.3132,  0.4493],
       [-1.5391,  0.1126,  0.3859,  0.424 ,  1.5474],
       [-1.0296, -0.7234, -0.4331,  1.1221,  1.4132]])

In [239]:
#! ipython id=4b269d24749c43d09afe58fad81e2242
arr[:, ::-1]

array([[ 0.4493,  0.3132,  0.298 , -1.1784, -1.5969],
       [ 1.5474,  0.424 ,  0.3859,  0.1126, -1.5391],
       [ 1.4132,  1.1221, -0.4331, -0.7234, -1.0296]])

In [237]:
#! ipython id=06130b05850e48d68bc1ede1f371fea1
values = np.array([5, 0, 1, 3, 2])
indexer = values.argsort()
indexer


array([1, 2, 4, 3, 0], dtype=int64)

In [238]:
values[indexer]

array([0, 1, 2, 3, 5])

In [240]:
#! ipython id=e12e6b522b5b4519ad20abcdd428099d
arr = rng.standard_normal((3, 5))
arr[0] = values
arr


array([[ 5.    ,  0.    ,  1.    ,  3.    ,  2.    ],
       [ 2.1872,  1.8951, -0.2469, -0.888 ,  1.8771],
       [ 0.0151,  1.0777,  0.5006, -1.1561,  1.4865]])

In [241]:
arr[:, arr[0].argsort()]

array([[ 0.    ,  1.    ,  2.    ,  3.    ,  5.    ],
       [ 1.8951, -0.2469,  1.8771, -0.888 ,  2.1872],
       [ 1.0777,  0.5006,  1.4865, -1.1561,  0.0151]])

In [242]:
#! ipython id=74b0473e78574cd39056cd680c9dd72d
first_name = np.array(['Bob', 'Jane', 'Steve', 'Bill', 'Barbara'])
last_name = np.array(['Jones', 'Arnold', 'Arnold', 'Jones', 'Walters'])
sorter = np.lexsort((first_name, last_name))
sorter


array([1, 2, 3, 0, 4], dtype=int64)

In [243]:
list(zip(last_name[sorter], first_name[sorter]))

[('Arnold', 'Jane'),
 ('Arnold', 'Steve'),
 ('Jones', 'Bill'),
 ('Jones', 'Bob'),
 ('Walters', 'Barbara')]

In [244]:
#! ipython id=d84e962e207145bba4e631af61a21c58
values = np.array(['2:first', '2:second', '1:first', '1:second',
                   '1:third'])
key = np.array([2, 2, 1, 1, 1])
indexer = key.argsort(kind='mergesort')
indexer


array([2, 3, 4, 0, 1], dtype=int64)

In [245]:
values.take(indexer)

array(['1:first', '1:second', '1:third', '2:first', '2:second'],
      dtype='<U8')

In [246]:
#! ipython id=2bedabe6402f4a6fbb99a44b2814267c
rng = np.random.default_rng(12345)
arr = rng.standard_normal(20)
arr


array([-1.4238,  1.2637, -0.8707, -0.2592, -0.0753, -0.7409, -1.3678,
        0.6489,  0.3611, -1.9529,  2.3474,  0.9685, -0.7594,  0.9022,
       -0.467 , -0.0607,  0.7888, -1.2567,  0.5759,  1.399 ])

In [182]:
np.partition(arr, 3)

array([-1.9529, -1.4238, -1.3678, -1.2567, -0.8707, -0.7594, -0.7409,
       -0.0607,  0.3611, -0.0753, -0.2592, -0.467 ,  0.5759,  0.9022,
        0.9685,  0.6489,  0.7888,  1.2637,  1.399 ,  2.3474])

In [247]:
#! ipython id=c5cc9ec52b18419d910da3cbb6a65e37
indices = np.argpartition(arr, 3)
indices


array([ 9,  0,  6, 17,  2, 12,  5, 15,  8,  4,  3, 14, 18, 13, 11,  7, 16,
        1, 19, 10], dtype=int64)

In [211]:
arr.take(indices)

array([ 1,  0,  7, 12, 15])

In [248]:
#! ipython id=6792cdc9a8b941598bfeae97a3c8fe53
arr = np.array([0, 1, 7, 12, 15])
arr.searchsorted(9)

3

In [249]:
#! ipython id=0c0cc89abad747b38fd20a51eb8ecbc9
arr.searchsorted([0, 8, 11, 16])

array([0, 3, 3, 5], dtype=int64)

In [250]:
#! ipython id=1d38f21b28f84a539dab05393eef74d6
arr = np.array([0, 0, 0, 1, 1, 1, 1])
arr.searchsorted([0, 1])


array([0, 3], dtype=int64)

In [251]:
arr.searchsorted([0, 1], side='right')

array([3, 7], dtype=int64)

In [252]:
#! ipython id=0916802173d441698ea70fa6267d1679
data = np.floor(rng.uniform(0, 10000, size=50))
bins = np.array([0, 100, 1000, 5000, 10000])
data

array([ 815., 1598., 3401., 4651., 2664., 8157., 1932., 1294.,  916.,
       5985., 8547., 6016., 9319., 7247., 8605., 9293., 5461., 9376.,
       4949., 2737., 4517., 6650., 3308., 9034., 2570., 3398., 2588.,
       3554.,   50., 6286., 2823.,  680., 6168., 1763., 3043., 4408.,
       1502., 2179., 4743., 4763., 2552., 2975., 2790., 2605., 4827.,
       2119., 4956., 2462., 8384., 1801.])

In [253]:
#! ipython id=eff843c29e2b4f25bc876cde836ea118
labels = bins.searchsorted(data)
labels

array([2, 3, 3, 3, 3, 4, 3, 3, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 4,
       3, 4, 3, 3, 3, 3, 1, 4, 3, 2, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 4, 3], dtype=int64)

In [254]:
#! ipython id=fce3b76453ea4db391c4cd582a341b05
pd.Series(data).groupby(labels).mean()

1      50.000000
2     803.666667
3    3079.741935
4    7635.200000
dtype: float64

In [264]:
#! ipython verbatim id=103dc5eb76d24212aa0b6132d0340ab3
import numpy as np

def mean_distance(x, y):
    nx = len(x)
    result = 0.0
    count = 0
    for i in range(nx):
        result += x[i] - y[i]
        count += 1
    return result / count

In [266]:
x = np.random.randn(10000000)

In [267]:
 y = np.random.randn(10000000)

In [268]:
 %timeit mean_distance(x, y)

7.14 s ± 272 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [269]:
%timeit (x - y).mean()

120 ms ± 4.66 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [273]:
#! ipython id=93268b6cec1a4b6683f36f96b26794cc
mmap = np.memmap('mymmap2', dtype='float64', mode='w+', shape=(10000, 10000))
mmap

memmap([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]])

In [274]:
#! ipython id=c5a4b74fb3fd4d0d8171d9fbeb646e8f
section = mmap[:5]

In [276]:
#! ipython id=9cdaeb21f7e2482e85247400525f7530
section[:] = rng.standard_normal((5, 10000))
mmap.flush()
mmap
del mmap

In [277]:
#! ipython id=004c3e0df5d54c7eb5e9666df4735017
mmap = np.memmap('mymmap2', dtype='float64', shape=(10000, 10000))
mmap

memmap([[ 0.7436, -0.2108,  2.7479, ...,  0.3312, -0.5861,  0.3686],
        [-1.2265,  0.0992, -0.4602, ...,  0.1742, -0.2856,  0.205 ],
        [ 0.875 ,  2.0989,  0.3264, ...,  0.2909,  0.1255,  0.3439],
        ...,
        [ 0.    ,  0.    ,  0.    , ...,  0.    ,  0.    ,  0.    ],
        [ 0.    ,  0.    ,  0.    , ...,  0.    ,  0.    ,  0.    ],
        [ 0.    ,  0.    ,  0.    , ...,  0.    ,  0.    ,  0.    ]])

In [278]:
#! ipython id=031548779f4a45b8b9ab9b0d7f47485d
%xdel mmap
!rm mymmap

rm: cannot remove 'mymmap': Device or resource busy


In [279]:
#! ipython id=05f2436e37a144048f7a6b2f923e712e
arr_c = np.ones((100, 10000), order='C')
arr_f = np.ones((100, 10000), order='F')
arr_c.flags


  C_CONTIGUOUS : True
  F_CONTIGUOUS : False
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False

In [280]:
arr_f.flags


  C_CONTIGUOUS : False
  F_CONTIGUOUS : True
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False

In [281]:
arr_f.flags.f_contiguous

True

In [282]:
#! ipython id=b886f529c4c74829ad119f83e5ccdeb9
%timeit arr_c.sum(1)


2.71 ms ± 101 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [283]:
%timeit arr_f.sum(1)

1.78 ms ± 37.3 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [284]:
#! ipython id=6712242905fd4c97938998ffa4cc7b9b
arr_f.copy('C').flags

  C_CONTIGUOUS : True
  F_CONTIGUOUS : False
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False

In [285]:
#! ipython id=06dd1d79515047beb5a8a4895ce17aed
arr_c[:50].flags.contiguous


True

In [286]:
arr_c[:, :50].flags

  C_CONTIGUOUS : False
  F_CONTIGUOUS : False
  OWNDATA : False
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False

In [82]:
#! ipython suppress id=2ccd538ad8c64c5198411a19b44fabf4
%xdel arr_c
%xdel arr_f

In [83]:
#! ipython suppress id=0affe014fbd34cde9a3769dd8c94fdc6
%popd

In [84]:
#! ipython suppress id=eb88d21e8e59441e884dda4884f3d0f8
pd.options.display.max_rows = PREVIOUS_MAX_ROWS