# Advanced NumPy

In [1]:
import numpy as np
import pandas as pd
np.random.seed(12345)
import matplotlib.pyplot as plt
plt.rc('figure', figsize=(10, 6))
PREVIOUS_MAX_ROWS = pd.options.display.max_rows
pd.options.display.max_rows = 20
np.set_printoptions(precision=4, suppress=True)

## ndarray Object Internals

In [2]:
np.ones((10, 5)).shape

(10, 5)

In [3]:
np.ones((3, 4, 5), dtype=np.float64).strides

(160, 40, 8)

### NumPy dtype Hierarchy

In [4]:
ints = np.ones(10, dtype=np.uint16)
floats = np.ones(10, dtype=np.float32)
np.issubdtype(ints.dtype, np.integer)
np.issubdtype(floats.dtype, np.floating)

True

In [5]:
np.float64.mro()

[numpy.float64,
 numpy.floating,
 numpy.inexact,
 numpy.number,
 numpy.generic,
 float,
 object]

In [6]:
np.issubdtype(ints.dtype, np.number)

True

## Advanced Array Manipulation

### Reshaping Arrays

In [7]:
arr = np.arange(8)
arr
arr.reshape((4, 2))

array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7]])

In [8]:
arr.reshape((4, 2)).reshape((2, 4))

array([[0, 1, 2, 3],
       [4, 5, 6, 7]])

In [9]:
arr = np.arange(15)
arr.reshape((5, -1))

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

In [10]:
other_arr = np.ones((3, 5))
print("-----------other_arr---------")
print(other_arr)
print(other_arr.shape)
print("-------arr---------")
print(arr)
arr.reshape(other_arr.shape)

-----------other_arr---------
[[1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]]
(3, 5)
-------arr---------
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14]


array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

The opposite operation of reshape from one-dimensional to a higher dimension is
typically known as **flattening** or **raveling**:

In [11]:
arr = np.arange(15).reshape((5, 3))
print(arr)
print("---------------")
arr.ravel() #back to be flatten by ravel(), same as flatten()

[[ 0  1  2]
 [ 3  4  5]
 [ 6  7  8]
 [ 9 10 11]
 [12 13 14]]
---------------


array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [12]:
arr.flatten()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

### C Versus Fortran Order

In [13]:
arr = np.arange(12).reshape((3, 4))
arr


array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [14]:
print(arr.ravel()) #row major
arr.ravel('F') #column major

[ 0  1  2  3  4  5  6  7  8  9 10 11]


array([ 0,  4,  8,  1,  5,  9,  2,  6, 10,  3,  7, 11])

### Concatenating and Splitting Arrays

In [15]:
arr1 = np.array([[1, 2, 3], [4, 5, 6]])  #(2,3)
print(arr1)
print("---------")
arr2 = np.array([[7, 8, 9], [10, 11, 12]]) #(2,3)
print(arr2)
print("===========================")

print("concatenate axis=0 : ")
print(np.concatenate([arr1, arr2], axis=0)) #
print("concatenate axis=1 : ")
print(np.concatenate([arr1, arr2], axis=1))

print("concatenate axis=1 : ")
print(np.concatenate([arr1, np.arange(2).reshape(2,1)], axis=1))

[[1 2 3]
 [4 5 6]]
---------
[[ 7  8  9]
 [10 11 12]]
concatenate axis=0 : 
[[ 1  2  3]
 [ 4  5  6]
 [ 7  8  9]
 [10 11 12]]
concatenate axis=1 : 
[[ 1  2  3  7  8  9]
 [ 4  5  6 10 11 12]]
concatenate axis=1 : 
[[1 2 3 0]
 [4 5 6 1]]


In [16]:
print(np.vstack((arr1, arr2)))  #np.concatenate([arr1, arr2], axis=0)
print("-------------------------")
print(np.hstack((arr1, arr2)))  #np.concatenate([arr1, arr2], axis=1)

[[ 1  2  3]
 [ 4  5  6]
 [ 7  8  9]
 [10 11 12]]
-------------------------
[[ 1  2  3  7  8  9]
 [ 4  5  6 10 11 12]]


[split , on the other hand, slices apart an array into multiple arrays along an axis:](https://www.tutorialsandyou.com/python/numpy-split-in-python-76.html)
* The code np.split(x, 2) has splitted Numpy array x into two equal sub-arrays- [1, 2] and [3, 4]
* The code np.split(y, [2, 3]) has splitted array y into three sub-arrays- y[:2], y[2:3] and y[3:]

In [17]:
print ('Split the array at positions indicated in 1-D array:' )
arr = np.random.randn(5, 2)
print(arr)

print("==========================")
first, second, third = np.split(arr, [5, 3]) #[:5], [5:3], [3:]
print("-----first----------")
print(first)
print("-----second----------")
print(second)
print("-----third----------")
print(third)

print("==========================")
first, second, third = np.split(arr, [2, 3]) #[:2], [2:3], [3:]
print("-----first----------")
print(first)
print("-----second----------")
print(second)
print("-----third----------")
print(third)
print("==========================")
first, second, third = np.split(arr, [1, 2]) #[:1], [1:2] [2:]
print("-----first----------")
print(first)
print("-----second----------")
print(second)
print("-----third----------")
print(third)


Split the array at positions indicated in 1-D array:
[[-0.2047  0.4789]
 [-0.5194 -0.5557]
 [ 1.9658  1.3934]
 [ 0.0929  0.2817]
 [ 0.769   1.2464]]
-----first----------
[[-0.2047  0.4789]
 [-0.5194 -0.5557]
 [ 1.9658  1.3934]
 [ 0.0929  0.2817]
 [ 0.769   1.2464]]
-----second----------
[]
-----third----------
[[0.0929 0.2817]
 [0.769  1.2464]]
-----first----------
[[-0.2047  0.4789]
 [-0.5194 -0.5557]]
-----second----------
[[1.9658 1.3934]]
-----third----------
[[0.0929 0.2817]
 [0.769  1.2464]]
-----first----------
[[-0.2047  0.4789]]
-----second----------
[[-0.5194 -0.5557]]
-----third----------
[[1.9658 1.3934]
 [0.0929 0.2817]
 [0.769  1.2464]]


In [18]:
#numpy.split(ary, indices_or_sections, axis)

#indices_or_sections : 
# Can be an integer, indicating the number of equal sized subarrays 
# to be created from the input array. If this parameter is a 1-D array, 
# the entries indicate the points at which a new subarray is to be created.

# axis : default is 0 (row , below direction)
import numpy as np 
a = np.arange(9) 

print ('First array:')
print (a )


print ('Split the array in 3 equal-sized subarrays:' )
b = np.split(a,3) 
print( b )
 

print ('Split the array at positions indicated in 1-D array:' )
b = np.split(a,[4,7]) #[:4], [4:7], [7:]
print (b) 

First array:
[0 1 2 3 4 5 6 7 8]
Split the array in 3 equal-sized subarrays:
[array([0, 1, 2]), array([3, 4, 5]), array([6, 7, 8])]
Split the array at positions indicated in 1-D array:
[array([0, 1, 2, 3]), array([4, 5, 6]), array([7, 8])]


#### Stacking helpers: r_ and c_

In [19]:
arr = np.arange(6)

print("------=======np.r_--------")
print("--np.r_[arr1, arr2]")

arr1 = arr.reshape((3, 2))
print(arr1)
print("--------------")
arr2 = np.random.randn(3, 2)
print(arr2)
print("------=======np.r_--------")
print("--np.r_[arr1, arr2]")
print(np.r_[arr1, arr2])  #.r_  row base concatenate axis=0
print("--concatenate((a,b)), axis = 0)")
print(np.concatenate((arr1, arr2), axis=0))
print("--concatenate([a,b], axis = 0)")
print(np.concatenate([arr1, arr2], axis=0))
print("--------=======np.c_-----------")
print(np.c_[np.r_[arr1, arr2], arr])  #.c_ column base concatenate axis=1
print("---- ---- ---")
# print("--np.concatenate( [np.concatenate([arr1, arr2], axis=0), arr], axis=1 ) ") #error 2d vs 1d
# print(np.concatenate( [np.concatenate([arr1, arr2], axis=0), arr], axis=1 ) )
#TypeError: concatenate() got multiple values for argument 'axis'
# print(np.concatenate( np.concatenate([arr1, arr2], axis=0), arr, axis=1 ) ) S
temp1 = np.concatenate([arr1, arr2], axis=0)
print(temp1.shape)
print(temp1)
print('********')
print(arr.shape)
print(arr)
print("-------")
# ValueError: all the input arrays must have same number of dimensions,
# but the array at index 0 has 2 dimension(s) and the array at index 1 has 1 dimension(s)
print(np.concatenate( [temp1, arr.reshape(6,1)], axis=1 ) ) 
print("-------")
# np.hstack((temp1,arr)) #error 2d vs. 1d
np.hstack((temp1,arr.reshape(6,1)))
print(np.r_[arr1, arr2])  #.r_  row base concatenate axis=0
print("--concatenate((a,b)), axis = 0)")
print(np.concatenate((arr1, arr2), axis=0))
print("--concatenate([a,b], axis = 0)")
print(np.concatenate([arr1, arr2], axis=0))
print("--------=======np.c_-----------")
print(np.c_[np.r_[arr1, arr2], arr])  #.c_ column base concatenate axis=1
print("---- ---- ---")
# print("--np.concatenate( [np.concatenate([arr1, arr2], axis=0), arr], axis=1 ) ") #error 2d vs 1d
# print(np.concatenate( [np.concatenate([arr1, arr2], axis=0), arr], axis=1 ) )
#TypeError: concatenate() got multiple values for argument 'axis'
# print(np.concatenate( np.concatenate([arr1, arr2], axis=0), arr, axis=1 ) ) S
temp1 = np.concatenate([arr1, arr2], axis=0)
print(temp1.shape)
print(temp1)
print('********')
print(arr.shape)
print(arr)
print("-------")
# ValueError: all the input arrays must have same number of dimensions,
# but the array at index 0 has 2 dimension(s) and the array at index 1 has 1 dimension(s)
print(np.concatenate( [temp1, arr.reshape(6,1)], axis=1 ) ) 
print("-------")
# np.hstack((temp1,arr)) #error 2d vs. 1d
np.hstack((temp1,arr.reshape(6,1)))

--np.r_[arr1, arr2]
[[0 1]
 [2 3]
 [4 5]]
--------------
[[ 1.0072 -1.2962]
 [ 0.275   0.2289]
 [ 1.3529  0.8864]]
--np.r_[arr1, arr2]
[[ 0.      1.    ]
 [ 2.      3.    ]
 [ 4.      5.    ]
 [ 1.0072 -1.2962]
 [ 0.275   0.2289]
 [ 1.3529  0.8864]]
--concatenate((a,b)), axis = 0)
[[ 0.      1.    ]
 [ 2.      3.    ]
 [ 4.      5.    ]
 [ 1.0072 -1.2962]
 [ 0.275   0.2289]
 [ 1.3529  0.8864]]
--concatenate([a,b], axis = 0)
[[ 0.      1.    ]
 [ 2.      3.    ]
 [ 4.      5.    ]
 [ 1.0072 -1.2962]
 [ 0.275   0.2289]
 [ 1.3529  0.8864]]
[[ 0.      1.      0.    ]
 [ 2.      3.      1.    ]
 [ 4.      5.      2.    ]
 [ 1.0072 -1.2962  3.    ]
 [ 0.275   0.2289  4.    ]
 [ 1.3529  0.8864  5.    ]]
---- ---- ---
(6, 2)
[[ 0.      1.    ]
 [ 2.      3.    ]
 [ 4.      5.    ]
 [ 1.0072 -1.2962]
 [ 0.275   0.2289]
 [ 1.3529  0.8864]]
********
(6,)
[0 1 2 3 4 5]
-------
[[ 0.      1.      0.    ]
 [ 2.      3.      1.    ]
 [ 4.      5.      2.    ]
 [ 1.0072 -1.2962  3.    ]
 [ 0.275   0.2

array([[ 0.    ,  1.    ,  0.    ],
       [ 2.    ,  3.    ,  1.    ],
       [ 4.    ,  5.    ,  2.    ],
       [ 1.0072, -1.2962,  3.    ],
       [ 0.275 ,  0.2289,  4.    ],
       [ 1.3529,  0.8864,  5.    ]])

In [20]:
a = np.arange(1,6)
print(a)
b= np.arange(-10, -5)
print(b)
print("------np.r_[a,b]-------")
print(np.r_[a,b])
np.c_[a, b]

[1 2 3 4 5]
[-10  -9  -8  -7  -6]
------np.r_[a,b]-------
[  1   2   3   4   5 -10  -9  -8  -7  -6]


array([[  1, -10],
       [  2,  -9],
       [  3,  -8],
       [  4,  -7],
       [  5,  -6]])

In [21]:
np.c_[1:6, -10:-5]

array([[  1, -10],
       [  2,  -9],
       [  3,  -8],
       [  4,  -7],
       [  5,  -6]])

### Repeating Elements: tile and repeat

In [22]:
arr = np.arange(3)
print(arr)
arr.repeat(3)

[0 1 2]


array([0, 0, 0, 1, 1, 1, 2, 2, 2])

In [23]:
arr.repeat([2, 3, 4])

array([0, 0, 1, 1, 1, 2, 2, 2, 2])

In [24]:
arr = np.random.randn(2, 2)
print(arr)
print("--------------")
arr.repeat(2, axis=0)

[[-2.0016 -0.3718]
 [ 1.669  -0.4386]]
--------------


array([[-2.0016, -0.3718],
       [-2.0016, -0.3718],
       [ 1.669 , -0.4386],
       [ 1.669 , -0.4386]])

In [25]:
print(arr.repeat([2, 3], axis=0))  #first row 2 times, second row 3 times
arr.repeat([2, 3], axis=1) #1sst column 2times, 2nd column 3times

[[-2.0016 -0.3718]
 [-2.0016 -0.3718]
 [ 1.669  -0.4386]
 [ 1.669  -0.4386]
 [ 1.669  -0.4386]]


array([[-2.0016, -2.0016, -0.3718, -0.3718, -0.3718],
       [ 1.669 ,  1.669 , -0.4386, -0.4386, -0.4386]])

In [26]:
arr
print(np.tile(arr, 2)) #whole arr repeat column wise 2 times
print("---------------")
print(arr.repeat(2, axis=1)) # each colum repeat colum wise 2 times

[[-2.0016 -0.3718 -2.0016 -0.3718]
 [ 1.669  -0.4386  1.669  -0.4386]]
---------------
[[-2.0016 -2.0016 -0.3718 -0.3718]
 [ 1.669   1.669  -0.4386 -0.4386]]


In [27]:
arr
print(np.tile(arr, (2, 1)))
print("------------------------------")
np.tile(arr, (3, 2))#whole arr repeat row wise 3times , whole arr repeat column wise 2 times

[[-2.0016 -0.3718]
 [ 1.669  -0.4386]
 [-2.0016 -0.3718]
 [ 1.669  -0.4386]]
------------------------------


array([[-2.0016, -0.3718, -2.0016, -0.3718],
       [ 1.669 , -0.4386,  1.669 , -0.4386],
       [-2.0016, -0.3718, -2.0016, -0.3718],
       [ 1.669 , -0.4386,  1.669 , -0.4386],
       [-2.0016, -0.3718, -2.0016, -0.3718],
       [ 1.669 , -0.4386,  1.669 , -0.4386]])

### Fancy Indexing Equivalents: take and put

In [28]:
arr = np.arange(10) * 100
print(arr)
print("--------------------")
inds = [7, 1, 2, 6]  #reorder arr according to [index 7th element, 1st, 2nd...]
arr[inds]

[  0 100 200 300 400 500 600 700 800 900]
--------------------


array([700, 100, 200, 600])

In [29]:
############ ? #############
# put: Replaces specified elements of an array with given values.
print(arr)
print("--")
print(arr.take(inds))
print(arr[inds])
print(inds)
print("---------------")
arr.put(inds, 42) #replace all inds' element values with 42
print("*replace inds 7's value 700 to 42, 1's 100 to 42, 2's 200 to 42...from arr ")
print(arr)
arr.put(inds, [40, 41, 42, 43])
arr

[  0 100 200 300 400 500 600 700 800 900]
--
[700 100 200 600]
[700 100 200 600]
[7, 1, 2, 6]
---------------
*replace inds 7's value 700 to 42, 1's 100 to 42, 2's 200 to 42...from arr 
[  0  42  42 300 400 500  42  42 800 900]


array([  0,  41,  42, 300, 400, 500,  43,  40, 800, 900])

In [30]:
inds = [2, 0, 2, 1]
arr = np.random.randn(2, 4)
print(arr)
arr.take(inds, axis=1) #column-wise reorder element of index 2-> first, index 0-> second...

[[-0.5397  0.477   3.2489 -1.0212]
 [-0.5771  0.1241  0.3026  0.5238]]


array([[ 3.2489, -0.5397,  3.2489,  0.477 ],
       [ 0.3026, -0.5771,  0.3026,  0.1241]])

## Broadcasting

In [31]:
arr = np.arange(5)
arr
arr * 4

array([ 0,  4,  8, 12, 16])

In [32]:
arr = np.random.randn(4, 3)
print(arr)
print("----arr.mean(0): row-wise(top-to-bottom): ttl 3----")
print(arr.mean(0))
print("-------arr-arr.mean(0)-------------")
demeaned = arr - arr.mean(0)
print(demeaned)
print("----------------------------")
print(demeaned.mean(0))

print("============")
sum(arr -arr.mean(0)) / len(arr[:,0])

[[ 0.0009  1.3438 -0.7135]
 [-0.8312 -2.3702 -1.8608]
 [-0.8608  0.5601 -1.2659]
 [ 0.1198 -1.0635  0.3329]]
----arr.mean(0): row-wise(top-to-bottom): ttl 3----
[-0.3928 -0.3824 -0.8768]
-------arr-arr.mean(0)-------------
[[ 0.3937  1.7263  0.1633]
 [-0.4384 -1.9878 -0.9839]
 [-0.468   0.9426 -0.3891]
 [ 0.5126 -0.6811  1.2097]]
----------------------------
[-0.  0. -0.]


array([-0.,  0., -0.])

In [40]:
#test 
arr[:,1].sum()/len(arr) #2nd column sum/2nd column length -> 2nd column mean  

-0.38244725384202977

In [41]:
print(round(sum(arr[:,0])/len(arr[:,0]), 4))
sum(arr)/len(arr[:,0])

-0.3928


array([-0.3928, -0.3824, -0.8768])

In [45]:
 np.testing.assert_equal(round(sum(arr[:,0])/len(arr[:,0]), 4), round(arr.mean(0)[0],4)) #equal as -0.3928

In [42]:
arr[:,0].sum()/len(arr[:,0])

-0.39278588371244455

In [53]:
print(arr)
print("----columnwise (left to right) mean: ttl 4-----------")
row_means = arr.mean(1) #columnwise (left to right)
print(" --1st col mean, 2nd , 3rd, 4th")
print(row_means)
print(row_means.shape)
print(row_means.reshape((4, 1)))
print("-----------------")
print("arr.shape : ", arr.shape)
print("row_means.reshape((4, 1) : ", row_means.reshape((4, 1)).shape)

print("----------demeaned:(4,3) - (4,1)---------")
demeaned = arr - row_means.reshape((4, 1)) #(4,3) - (4,1)
print(demeaned)
print("------demeaned.mean(1)-------")
print(demeaned.mean(1))
demeaned.mean(1).reshape(4,1)

[[ 0.0009  1.3438 -0.7135]
 [-0.8312 -2.3702 -1.8608]
 [-0.8608  0.5601 -1.2659]
 [ 0.1198 -1.0635  0.3329]]
----columnwise (left to right) mean: ttl 4-----------
 --1st col mean, 2nd , 3rd, 4th
[ 0.2104 -1.6874 -0.5222 -0.2036]
(4,)
[[ 0.2104]
 [-1.6874]
 [-0.5222]
 [-0.2036]]
-----------------
arr.shape :  (4, 3)
row_means.reshape((4, 1) :  (4, 1)
----------demeaned:(4,3) - (4,1)---------
[[-0.2095  1.1334 -0.9239]
 [ 0.8562 -0.6828 -0.1734]
 [-0.3386  1.0823 -0.7438]
 [ 0.3234 -0.8599  0.5365]]
------demeaned.mean(1)-------
[ 0. -0.  0.  0.]


array([[ 0.],
       [-0.],
       [ 0.],
       [ 0.]])

In [75]:
def column_stack(x):
    return np.column_stack(x)

In [76]:
def row_stack(x):
    return np.row_stack(x)

In [77]:
def vstack(x):
    return np.vstack(x)

In [78]:
def hstack(x):
    return np.hstack(x)

In [91]:
a = np.array((1,2,3))
b = np.array((2,3,4))
c = np.arange(2*3).reshape(2,3)
d = np.random.randint(2, size=(2,3))

In [92]:
column_stack((a,b))

array([[1, 2],
       [2, 3],
       [3, 4]])

In [93]:
print("--------c---------")
print(c)
print("----------d-----------")
print(d)

--------c---------
[[0 1 2]
 [3 4 5]]
----------d-----------
[[0 0 1]
 [1 1 1]]


In [94]:
column_stack((c,d))

array([[0, 1, 2, 0, 0, 1],
       [3, 4, 5, 1, 1, 1]])

In [96]:
row_stack((c,d))

array([[0, 1, 2],
       [3, 4, 5],
       [0, 0, 1],
       [1, 1, 1]])

In [98]:
vstack((c,d))

array([[0, 1, 2],
       [3, 4, 5],
       [0, 0, 1],
       [1, 1, 1]])

In [99]:
hstack((c,d))

array([[0, 1, 2, 0, 0, 1],
       [3, 4, 5, 1, 1, 1]])

In [117]:
np.c_[a, b]

array([[1, 2],
       [2, 3],
       [3, 4]])

In [120]:
np.r_[a,b]

array([1, 2, 3, 2, 3, 4])

In [118]:
np.c_[c,d]

array([[0, 1, 2, 0, 0, 1],
       [3, 4, 5, 1, 1, 1]])

In [121]:
np.r_[c,d]

array([[0, 1, 2],
       [3, 4, 5],
       [0, 0, 1],
       [1, 1, 1]])

### Broadcasting Over Other Axes

In [107]:
print(arr.shape)
print(arr.mean(1).shape)

(4, 3)
(4,)


In [108]:
# arr - arr.mean(1)#ValueError: operands could not be broadcast together with shapes (4,3) (4,) 

In [109]:
arr - arr.mean(1).reshape((4, 1))

array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.]])

In [110]:
sum(arr - arr.mean(1).reshape((4, 1))) #default row-wise(top-down) sume

array([0., 0., 0.])

In [104]:
arr = np.zeros((4, 4))
arr_3d = arr[:, np.newaxis, :]
arr_3d.shape
arr_1d = np.random.normal(size=3)
arr_1d[:, np.newaxis]
arr_1d[np.newaxis, :]

array([[0.3313, 1.3497, 0.0699]])

In [None]:
arr = np.random.randn(3, 4, 5)
print(arr)
print("--------------")
depth_means = arr.mean(2)
print(depth_means)
print(depth_means.shape)
print("-----====--------")
demeaned = arr - depth_means[:, :, np.newaxis]
demeaned.mean(2)

In [None]:
arr = np.random.randn(3, 4, 5) #row:3, column:4 , depth:5
print(arr)
print("------arr.mean(2) -> depth:5's mean --------")
depth_means = arr.mean(2)
print(depth_means)
print(depth_means.shape)
print("-----====--------")
print(arr.shape)
print(depth_means.shape)
demeaned = arr - depth_means[:, :, np.newaxis] # convert 2d to 3d by [:,:,np.newaxis]
demeaned.mean(2)

```python
def demean_axis(arr, axis=0):
    means = arr.mean(axis)

    # This generalizes things like [:, :, np.newaxis] to N dimensions
    indexer = [slice(None)] * arr.ndim
    indexer[axis] = np.newaxis
    return arr - means[indexer]
```

### Setting Array Values by Broadcasting

In [122]:
#setting values via array indexing.
arr = np.zeros((4, 3))
arr[:] = 5
arr

array([[5., 5., 5.],
       [5., 5., 5.],
       [5., 5., 5.],
       [5., 5., 5.]])

In [123]:
arr = np.zeros((4, 3))
arr[:,:] = 5
arr

array([[5., 5., 5.],
       [5., 5., 5.],
       [5., 5., 5.],
       [5., 5., 5.]])

In [124]:
## Set into column: row number must be matched, rows are replaced.

# if we had a one-dimensional array of values we wanted 
# to set into the columns of the array, 
# we can do that as long as the shape is compatible:
col = np.array([1.28, -0.42, 0.44, 1.6])
print("** col, ", col.shape); print(col)
print("*** arr, ",arr.shape); print(arr)
print("----------------------------------")
print("* col[:,np.newaxis].shape :",col[:,np.newaxis].shape)
print(col[:,np.newaxis])
print("-----arr[:] = col[:, np.newaxis]----------")
arr[:] = col[:, np.newaxis] #set into the column of array:for all columns, all rows are replaced
print(arr)
arr[:2] = [[-1.37], [0.509]] #for all columns, row1 and row 2 are replaced with-1.37 and 0.509 
arr

** col,  (4,)
[ 1.28 -0.42  0.44  1.6 ]
*** arr,  (4, 3)
[[5. 5. 5.]
 [5. 5. 5.]
 [5. 5. 5.]
 [5. 5. 5.]]
----------------------------------
* col[:,np.newaxis].shape : (4, 1)
[[ 1.28]
 [-0.42]
 [ 0.44]
 [ 1.6 ]]
-----arr[:] = col[:, np.newaxis]----------
[[ 1.28  1.28  1.28]
 [-0.42 -0.42 -0.42]
 [ 0.44  0.44  0.44]
 [ 1.6   1.6   1.6 ]]


array([[-1.37 , -1.37 , -1.37 ],
       [ 0.509,  0.509,  0.509],
       [ 0.44 ,  0.44 ,  0.44 ],
       [ 1.6  ,  1.6  ,  1.6  ]])

In [125]:
## columns number must be matched: columns are replaced
# arr = np.arange(12).reshape(3,4)
arr = np.random.randn(3, 4)
row = np.array([1.28, -0.42, 0.44, 1.6])
print("** row, ", row.shape); print(row)
print("*** arr, ",arr.shape); print(arr)
print("----------------------------------")
print("* row[np.newaxis,:].shape :",row[np.newaxis,:].shape)
print(row[np.newaxis,:])
print("-----arr[:] = row[np.newaxis, :]----------")
arr[:] = row[np.newaxis,:] #set into the row of array:for all rows, all columns are replaced
print(arr)
arr[:2] = [[-1.37], [0.509]] #for all row, col 1 and col 2 are replaced with-1.37 and 0.509 
arr

** row,  (4,)
[ 1.28 -0.42  0.44  1.6 ]
*** arr,  (3, 4)
[[ 0.2467 -0.0119  1.0048  1.3272]
 [-0.9193 -1.5491  0.0222  0.7584]
 [-0.6605  0.8626 -0.01    0.05  ]]
----------------------------------
* row[np.newaxis,:].shape : (1, 4)
[[ 1.28 -0.42  0.44  1.6 ]]
-----arr[:] = row[np.newaxis, :]----------
[[ 1.28 -0.42  0.44  1.6 ]
 [ 1.28 -0.42  0.44  1.6 ]
 [ 1.28 -0.42  0.44  1.6 ]]


array([[-1.37 , -1.37 , -1.37 , -1.37 ],
       [ 0.509,  0.509,  0.509,  0.509],
       [ 1.28 , -0.42 ,  0.44 ,  1.6  ]])

## Advanced ufunc Usage

### ufunc Instance Methods

In [126]:
# reduce takes a single array and aggregates its values, optionally along an axis,
arr = np.arange(10)
print(np.add.reduce(arr))
arr.sum()

45


45

In [127]:
np.random.seed(12346)  # for reproducibility
arr = np.random.randn(5, 5)
print(arr)
print("------ arr[::2]----") #from start, to end, increment 2
print(arr[::2])
print("------ arr[::1]----") #increment 1
print(arr[::1])
print("------ arr[::3]----") #increment 3
print(arr[::3])

print("arr---")
print(arr)
print("************")
print(arr[::2])

print("-----after arr[::2].sort(1)-for every other row(increment 2)-(default is row),column wise(left to right) sort----")
arr[::2].sort(1) # sort only even row, sorting method: column-wise(left to right) sort.
print(arr)
print("======arr[:, :-1]========")
print(arr[:, :-1])
print("---- arr[:, 1:]------")
print( arr[:, 1:])
print("---------arr[:, :-1] < arr[:, 1:]------")
print(arr[:, :-1] < arr[:, 1:])

np.logical_and.reduce(arr[:, :-1] < arr[:, 1:], axis=1) #column-wise, from left to right 'np.logical_and' op

[[-0.09    0.7594  0.7483 -0.9815  0.3658]
 [-0.3154 -0.8661  0.0279 -0.4556 -1.6019]
 [ 0.2483 -0.3215 -0.8487  0.0005 -0.5465]
 [ 0.2539  1.9368 -0.7995 -0.5692  0.0489]
 [-0.6491 -0.4795 -0.9535  1.4225  0.1754]]
------ arr[::2]----
[[-0.09    0.7594  0.7483 -0.9815  0.3658]
 [ 0.2483 -0.3215 -0.8487  0.0005 -0.5465]
 [-0.6491 -0.4795 -0.9535  1.4225  0.1754]]
------ arr[::1]----
[[-0.09    0.7594  0.7483 -0.9815  0.3658]
 [-0.3154 -0.8661  0.0279 -0.4556 -1.6019]
 [ 0.2483 -0.3215 -0.8487  0.0005 -0.5465]
 [ 0.2539  1.9368 -0.7995 -0.5692  0.0489]
 [-0.6491 -0.4795 -0.9535  1.4225  0.1754]]
------ arr[::3]----
[[-0.09    0.7594  0.7483 -0.9815  0.3658]
 [ 0.2539  1.9368 -0.7995 -0.5692  0.0489]]
arr---
[[-0.09    0.7594  0.7483 -0.9815  0.3658]
 [-0.3154 -0.8661  0.0279 -0.4556 -1.6019]
 [ 0.2483 -0.3215 -0.8487  0.0005 -0.5465]
 [ 0.2539  1.9368 -0.7995 -0.5692  0.0489]
 [-0.6491 -0.4795 -0.9535  1.4225  0.1754]]
************
[[-0.09    0.7594  0.7483 -0.9815  0.3658]
 [ 0.2483 -0

array([ True, False,  True, False,  True])

In [128]:
print(arr)
print("======arr[:, :-2]========") #all row, colunm is from start to (-2: from rear 2nd -> this is not incl)
print(arr[:, :-2])
print("======arr[::-2]========")# increment -2 row-wise(default)-every other row (from rear)
print(arr[::-2])
print("---- arr[:, 2:]------") #all row, column is from 2 row to the end.
print( arr[:, 2:])

[[-0.9815 -0.09    0.3658  0.7483  0.7594]
 [-0.3154 -0.8661  0.0279 -0.4556 -1.6019]
 [-0.8487 -0.5465 -0.3215  0.0005  0.2483]
 [ 0.2539  1.9368 -0.7995 -0.5692  0.0489]
 [-0.9535 -0.6491 -0.4795  0.1754  1.4225]]
[[-0.9815 -0.09    0.3658]
 [-0.3154 -0.8661  0.0279]
 [-0.8487 -0.5465 -0.3215]
 [ 0.2539  1.9368 -0.7995]
 [-0.9535 -0.6491 -0.4795]]
[[-0.9535 -0.6491 -0.4795  0.1754  1.4225]
 [-0.8487 -0.5465 -0.3215  0.0005  0.2483]
 [-0.9815 -0.09    0.3658  0.7483  0.7594]]
---- arr[:, 2:]------
[[ 0.3658  0.7483  0.7594]
 [ 0.0279 -0.4556 -1.6019]
 [-0.3215  0.0005  0.2483]
 [-0.7995 -0.5692  0.0489]
 [-0.4795  0.1754  1.4225]]


In [None]:
#It produces an array of the same size 
# with the intermediate “accumulated” values:
arr = np.arange(15).reshape((3, 5))
print(arr)
print("-----------------")
np.add.accumulate(arr, axis=1)

In [None]:
arr = np.arange(3).repeat([1, 2, 2]) # 0-> once, 1 -> 2 times, 2 -> 2 times
print(arr.shape)
print(arr)
print("******")
print((np.arange(5).shape))
print(np.arange(5))
print("-------np.multiply.outer-------------")
print(np.multiply.outer(arr, np.arange(5)))
print("shape : ",np.multiply.outer(arr, np.arange(5)).shape)
print("-------np.multiply-------------")
print(np.multiply(arr, np.arange(5)))
print("--------arr * np.arange(5)-----------")
print(arr * np.arange(5))
print("--------np.dot------------")
np.dot(arr, np.arange(5))

In [None]:
x, y = np.random.randn(3, 4), np.random.randn(5)
print(x)
print(x.shape)
print("---------------")
print(y)
print(y.shape)
print("---------------")
result = np.subtract.outer(x, y)
print(result)
result.shape

In [None]:
arr = np.arange(10)
print(arr)
np.add.reduce(arr)

In [41]:
arr = np.arange(10)
np.add.reduceat(arr, [0])

array([45])

In [42]:
arr = np.arange(10)
np.add.reduceat(arr, [0, 2])

array([ 1, 44])

In [43]:
arr = np.arange(10)
np.add.reduceat(arr, [0, 1])

array([ 0, 45])

In [44]:
arr = np.arange(10)
np.add.reduceat(arr, [2, 4])

array([ 5, 39])

In [45]:
arr = np.arange(10)
np.add.reduceat(arr, [0, 4])

array([ 6, 39])

In [46]:
arr = np.arange(10)
np.add.reduceat(arr, [0, 5])

array([10, 35])

In [47]:
arr = np.arange(10)
np.add.reduceat(arr, [0, 5, 8])

array([10, 18, 17])

In [48]:
arr = np.multiply.outer(np.arange(4), np.arange(5))
arr
np.add.reduceat(arr, [0, 2, 4], axis=1)

array([[ 0,  0,  0],
       [ 1,  5,  4],
       [ 2, 10,  8],
       [ 3, 15, 12]])

### Writing New ufuncs in Python

In [49]:
def add_elements(x, y):
    return x + y
add_them = np.frompyfunc(add_elements, 2, 1)
add_them(np.arange(8), np.arange(8))

array([0, 2, 4, 6, 8, 10, 12, 14], dtype=object)

In [50]:
add_them = np.vectorize(add_elements, otypes=[np.float64])
add_them(np.arange(8), np.arange(8))

array([ 0.,  2.,  4.,  6.,  8., 10., 12., 14.])

In [51]:
arr = np.random.randn(10000)
%timeit add_them(arr, arr)
%timeit np.add(arr, arr)

1.37 ms ± 15 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
4.11 µs ± 22.2 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


## Structured and Record Arrays

In [52]:
dtype = [('x', np.float64), ('y', np.int32)]
sarr = np.array([(1.5, 6), (np.pi, -2)], dtype=dtype)
sarr

array([(1.5   ,  6), (3.1416, -2)], dtype=[('x', '<f8'), ('y', '<i4')])

In [53]:
sarr[0]
sarr[0]['y']

6

In [54]:
sarr['x']

array([1.5   , 3.1416])

### Nested dtypes and Multidimensional Fields

In [55]:
dtype = [('x', np.int64, 3), ('y', np.int32)]
arr = np.zeros(4, dtype=dtype)
arr

array([([0, 0, 0], 0), ([0, 0, 0], 0), ([0, 0, 0], 0), ([0, 0, 0], 0)],
      dtype=[('x', '<i8', (3,)), ('y', '<i4')])

In [56]:
arr[0]['x']

array([0, 0, 0])

In [57]:
arr['x']

array([[0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0]])

In [58]:
dtype = [('x', [('a', 'f8'), ('b', 'f4')]), ('y', np.int32)]
data = np.array([((1, 2), 5), ((3, 4), 6)], dtype=dtype)
data['x']
data['y']
data['x']['a']

array([1., 3.])

### Why Use Structured Arrays?

## More About Sorting

In [59]:
arr = np.random.randn(6)
arr.sort()
arr

array([-1.0054, -0.4969, -0.2703, -0.2165,  0.51  ,  0.5927])

In [60]:
arr = np.random.randn(3, 5)
arr
arr[:, 0].sort()  # Sort first column values in-place
arr

array([[-0.9784,  0.6754, -0.044 , -0.6036, -0.3693],
       [-0.6212,  1.1781, -2.8685,  0.108 ,  0.6046],
       [ 1.0304,  2.0542, -1.2113, -1.0375,  0.7093]])

In [61]:
arr = np.random.randn(5)
arr
np.sort(arr)
arr

array([-0.4803, -0.8566,  1.1659,  0.2808, -0.8511])

In [62]:
arr = np.random.randn(3, 5)
arr
arr.sort(axis=1)
arr

array([[-1.1091, -0.66  , -0.3381, -0.2201,  0.6632],
       [-0.7161, -0.1332, -0.1168,  0.0229,  0.635 ],
       [-1.8294, -1.7927, -0.1485,  0.035 ,  0.05  ]])

In [63]:
arr[:, ::-1]

array([[ 0.6632, -0.2201, -0.3381, -0.66  , -1.1091],
       [ 0.635 ,  0.0229, -0.1168, -0.1332, -0.7161],
       [ 0.05  ,  0.035 , -0.1485, -1.7927, -1.8294]])

### Indirect Sorts: argsort and lexsort

In [64]:
values = np.array([5, 0, 1, 3, 2])
indexer = values.argsort()
indexer
values[indexer]

array([0, 1, 2, 3, 5])

In [65]:
arr = np.random.randn(3, 5)
arr[0] = values
arr
arr[:, arr[0].argsort()]

array([[ 0.    ,  1.    ,  2.    ,  3.    ,  5.    ],
       [ 0.5303,  1.0183,  0.8767,  0.1939,  0.0494],
       [-1.4071, -0.4736,  1.4226, -0.853 ,  0.2835]])

In [66]:
first_name = np.array(['Bob', 'Jane', 'Steve', 'Bill', 'Barbara'])
last_name = np.array(['Jones', 'Arnold', 'Arnold', 'Jones', 'Walters'])
sorter = np.lexsort((first_name, last_name))
sorter
zip(last_name[sorter], first_name[sorter])

<zip at 0x7feb424006e0>

### Alternative Sort Algorithms

In [67]:
values = np.array(['2:first', '2:second', '1:first', '1:second',
                   '1:third'])
key = np.array([2, 2, 1, 1, 1])
indexer = key.argsort(kind='mergesort')
indexer
values.take(indexer)

array(['1:first', '1:second', '1:third', '2:first', '2:second'],
      dtype='<U8')

### Partially Sorting Arrays

In [68]:
np.random.seed(12345)
arr = np.random.randn(20)
arr
np.partition(arr, 3)

array([-2.0016, -1.2962, -0.5557, -0.5194, -0.3718, -0.4386, -0.2047,
        0.2817,  0.769 ,  0.4789,  1.0072,  0.0929,  0.275 ,  0.2289,
        1.3529,  0.8864,  1.3934,  1.9658,  1.669 ,  1.2464])

In [69]:
indices = np.argpartition(arr, 3)
indices
arr.take(indices)

array([-2.0016, -1.2962, -0.5557, -0.5194, -0.3718, -0.4386, -0.2047,
        0.2817,  0.769 ,  0.4789,  1.0072,  0.0929,  0.275 ,  0.2289,
        1.3529,  0.8864,  1.3934,  1.9658,  1.669 ,  1.2464])

### numpy.searchsorted: Finding Elements in a Sorted Array

In [70]:
arr = np.array([0, 1, 7, 12, 15])
arr.searchsorted(9)

3

In [71]:
arr.searchsorted([0, 8, 11, 16])

array([0, 3, 3, 5])

In [72]:
arr = np.array([0, 0, 0, 1, 1, 1, 1])
arr.searchsorted([0, 1])
arr.searchsorted([0, 1], side='right')

array([3, 7])

In [73]:
data = np.floor(np.random.uniform(0, 10000, size=50))
bins = np.array([0, 100, 1000, 5000, 10000])
data

array([9940., 6768., 7908., 1709.,  268., 8003., 9037.,  246., 4917.,
       5262., 5963.,  519., 8950., 7282., 8183., 5002., 8101.,  959.,
       2189., 2587., 4681., 4593., 7095., 1780., 5314., 1677., 7688.,
       9281., 6094., 1501., 4896., 3773., 8486., 9110., 3838., 3154.,
       5683., 1878., 1258., 6875., 7996., 5735., 9732., 6340., 8884.,
       4954., 3516., 7142., 5039., 2256.])

In [74]:
labels = bins.searchsorted(data)
labels

array([4, 4, 4, 3, 2, 4, 4, 2, 3, 4, 4, 2, 4, 4, 4, 4, 4, 2, 3, 3, 3, 3,
       4, 3, 4, 3, 4, 4, 4, 3, 3, 3, 4, 4, 3, 3, 4, 3, 3, 4, 4, 4, 4, 4,
       4, 3, 3, 4, 4, 3])

In [75]:
pd.Series(data).groupby(labels).mean()

2     498.000000
3    3064.277778
4    7389.035714
dtype: float64

## Writing Fast NumPy Functions with Numba

In [76]:
import numpy as np

def mean_distance(x, y):
    nx = len(x)
    result = 0.0
    count = 0
    for i in range(nx):
        result += x[i] - y[i]
        count += 1
    return result / count

```python
In [209]: x = np.random.randn(10000000)

In [210]: y = np.random.randn(10000000)

In [211]: %timeit mean_distance(x, y)
1 loop, best of 3: 2 s per loop

In [212]: %timeit (x - y).mean()
100 loops, best of 3: 14.7 ms per loop
```

```python
In [213]: import numba as nb

In [214]: numba_mean_distance = nb.jit(mean_distance)
```

```python
@nb.jit
def mean_distance(x, y):
    nx = len(x)
    result = 0.0
    count = 0
    for i in range(nx):
        result += x[i] - y[i]
        count += 1
    return result / count
```

```python
In [215]: %timeit numba_mean_distance(x, y)
100 loops, best of 3: 10.3 ms per loop
```

```python
from numba import float64, njit

@njit(float64(float64[:], float64[:]))
def mean_distance(x, y):
    return (x - y).mean()
```

### Creating Custom numpy.ufunc Objects with Numba

```python
from numba import vectorize

@vectorize
def nb_add(x, y):
    return x + y
```

```python
In [13]: x = np.arange(10)

In [14]: nb_add(x, x)
Out[14]: array([  0.,   2.,   4.,   6.,   8.,  10.,  12.,  14.,  16.,  18.])

In [15]: nb_add.accumulate(x, 0)
Out[15]: array([  0.,   1.,   3.,   6.,  10.,  15.,  21.,  28.,  36.,  45.])
```

## Advanced Array Input and Output

### Memory-Mapped Files

In [77]:
mmap = np.memmap('mymmap', dtype='float64', mode='w+',
                 shape=(10000, 10000))
mmap

memmap([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]])

In [78]:
section = mmap[:5]

In [79]:
section[:] = np.random.randn(5, 10000)
mmap.flush()
mmap
del mmap

In [80]:
mmap = np.memmap('mymmap', dtype='float64', shape=(10000, 10000))
mmap

memmap([[ 0.7584, -0.6605,  0.8626, ...,  0.6046, -0.6212,  2.0542],
        [-1.2113, -1.0375,  0.7093, ..., -1.4117, -0.1719, -0.8957],
        [-0.1419, -0.3375,  0.4329, ...,  1.2914, -0.752 , -0.44  ],
        ...,
        [ 0.    ,  0.    ,  0.    , ...,  0.    ,  0.    ,  0.    ],
        [ 0.    ,  0.    ,  0.    , ...,  0.    ,  0.    ,  0.    ],
        [ 0.    ,  0.    ,  0.    , ...,  0.    ,  0.    ,  0.    ]])

In [81]:
%xdel mmap
!rm mymmap

### HDF5 and Other Array Storage Options

## Performance Tips

### The Importance of Contiguous Memory

In [82]:
arr_c = np.ones((1000, 1000), order='C')
arr_f = np.ones((1000, 1000), order='F')
arr_c.flags
arr_f.flags
arr_f.flags.f_contiguous

True

In [83]:
%timeit arr_c.sum(1)
%timeit arr_f.sum(1)

346 µs ± 5.58 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
491 µs ± 4.75 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [84]:
arr_f.copy('C').flags

  C_CONTIGUOUS : True
  F_CONTIGUOUS : False
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False
  UPDATEIFCOPY : False

In [85]:
arr_c[:50].flags.contiguous
arr_c[:, :50].flags

  C_CONTIGUOUS : False
  F_CONTIGUOUS : False
  OWNDATA : False
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False
  UPDATEIFCOPY : False

In [86]:
%xdel arr_c
%xdel arr_f

In [87]:
pd.options.display.max_rows = PREVIOUS_MAX_ROWS