In [1]:
import numpy as np

#### The NumPy ndarray: A Multidimensional Array Object
• ndarray, an efficient multidimensional array providing fast array-oriented arithmetic operations and flexible broadcasting capabilities.

• Mathematical functions for fast operations on entire array.

• NumPy-based algorithms are generally 10 to 100 times faster than their pure Python counterparts and use significantly less memory.

In [2]:
my_arr = np.arange(100000)
%timeit my_arr2 = my_arr * 2
# 39.2 µs ± 2.36 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)

38.7 µs ± 839 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [3]:
my_list = list(range(100000))
%timeit my_list2 = [x * 2 for x in my_list]
# 5.12 ms ± 133 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)

4.97 ms ± 166 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


#### sklearn.datasets.load_iris

The iris dataset is a classic and very easy multi-class classification dataset.
feature_names = ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']

In [4]:
from sklearn.datasets import load_iris
X, y = load_iris(return_X_y=True)

#### ndarray.shape
Tuple of array dimensions.

#### ndarray.ndim
Number of array dimensions.

#### ndarray.size
Number of elements in the array.

In [5]:
print('Type of X:', type(X))
print('X.shape =', X.shape)
print('X.ndim =', X.ndim)
print('X.size =', X.size)
print('dtype of X: ', X.dtype, '\n')

print('Type of y:', type(y))
print('y.shape =', y.shape)
print('y.ndim =', y.ndim)
print('y.size =', y.size)
print('dtype of y: ', y.dtype)

Type of X: <class 'numpy.ndarray'>
X.shape = (150, 4)
X.ndim = 2
X.size = 600
dtype of X:  float64 

Type of y: <class 'numpy.ndarray'>
y.shape = (150,)
y.ndim = 1
y.size = 150
dtype of y:  int64


#### Basic Indexing and Slicing

arr2D[0][2] is the same as arr2D[0, 2]

If you want a copy of a slice of an ndarray instead of a view, you will need to explicitly copy the array—for example, arr[5:8].copy()

In [6]:
print(X[37][2])
print(X[37, 2])

1.4
1.4


In [7]:
# arr1 is a copy of a slice of an ndarray
arr1 = X[:10, 2].copy()
print('arr1 =', arr1)

# arr2 is a "view" of a slice of an ndarray
arr2 = arr1[:5]
print('arr2 (a view) =', arr2)

arr2[0] = 0
print('arr2 (data at index-0 is changed) =', arr2)
print('arr1 (data at index-0 is also changed)=', arr1, '\n')

arr1 = [1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5]
arr2 (a view) = [1.4 1.4 1.3 1.5 1.4]
arr2 (data at index-0 is changed) = [0.  1.4 1.3 1.5 1.4]
arr1 (data at index-0 is also changed)= [0.  1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5] 



In [8]:
# arr1 is a copy of a slice of an ndarray
arr1 = X[:10, 2].copy()
print('arr1 =', arr1)

# arr2 is a copy of a slice of ndarray
arr2 = arr1[:5].copy()
print('arr2 (a copy) =', arr2)

arr2[0] = 0
print('arr2 (data at index-0 is changed) =', arr2)
print('arr1 (data at index-0 remains the same) =', arr1)

arr1 = [1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5]
arr2 (a copy) = [1.4 1.4 1.3 1.5 1.4]
arr2 (data at index-0 is changed) = [0.  1.4 1.3 1.5 1.4]
arr1 (data at index-0 remains the same) = [1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5]


#### ndarray.astype(dtype, order='K', casting='unsafe', subok=True, copy=True)
Copy of the array, cast to a specified type.

In [9]:
# NumPy ndarray supports indexing and slicing
X1 = X[:5, :]
print('X1 = ', X1, '\n')

X2 = X1.astype(np.int64)
print('X2 = ', X2)

X1 =  [[5.1 3.5 1.4 0.2]
 [4.9 3.  1.4 0.2]
 [4.7 3.2 1.3 0.2]
 [4.6 3.1 1.5 0.2]
 [5.  3.6 1.4 0.2]] 

X2 =  [[5 3 1 0]
 [4 3 1 0]
 [4 3 1 0]
 [4 3 1 0]
 [5 3 1 0]]


#### numpy.arange([start, ]stop, [step, ]dtype=None)
Return evenly spaced values within a given interval.

#### numpy.linspace(start, stop, num=50, endpoint=True, retstep=False, dtype=None)
Return evenly spaced numbers over a specified interval.

In [10]:
X0 = np.zeros((3, 2))
print('X0 =', X0, '\n')

X1 = np.ones((3, 2), dtype=int)
print('X1 =', X1, '\n')

X2 = np.arange(1, 9, 2)
print('X2 =', X2, '\n')

X3 = np.linspace(1, 10, 8)
print('X3 =', X3)

X0 = [[0. 0.]
 [0. 0.]
 [0. 0.]] 

X1 = [[1 1]
 [1 1]
 [1 1]] 

X2 = [1 3 5 7] 

X3 = [ 1.          2.28571429  3.57142857  4.85714286  6.14285714  7.42857143
  8.71428571 10.        ]


#### Boolean Indexing

In [11]:
X1 = X[:10, 1]
print('X1 =', X1)
print('(X1 > 3.2) = ', X1 > 3.2)

count = (X[:, 1] > 3.0).sum()
print('Count of Iris with sepal width wider than 3.0cm =', count)

X1 = [3.5 3.  3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1]
(X1 > 3.2) =  [ True False False False  True  True  True  True False False]
Count of Iris with sepal width wider than 3.0cm = 67


In [12]:
# Only consder the iris with sepal width > 3.0cm
print(f'Average sepal length = {X[X[:, 1] > 3.0, 0].mean():.1f} (cm)', )
print(f'Average sepal width = {X[X[:, 1] > 3.0, 1].mean():.1f} (cm)', )
print(f'Average petal length = {X[X[:, 1] > 3.0, 2].mean():.1f} (cm)', )
print(f'Average petal width = {X[X[:, 1] > 3.0, 3].mean():.1f} (cm)', )

Average sepal length = 5.7 (cm)
Average sepal width = 3.4 (cm)
Average petal length = 2.9 (cm)
Average petal width = 0.9 (cm)


In [13]:
count = ((X[:, 0] > 6.0) & (X[:, 1] > 3.0)).sum()
print('Count of Iris with sepal length longer than 6.0cm AND sepal width wider than 3.0cm =', count)

count = ((X[:, 0] > 6.0) | (X[:, 1] > 3.0)).sum()
print('Count of Iris with sepal length longer than 6.0cm OR sepal width wider than 3.0cm =', count)

Count of Iris with sepal length longer than 6.0cm AND sepal width wider than 3.0cm = 23
Count of Iris with sepal length longer than 6.0cm OR sepal width wider than 3.0cm = 105


#### numpy.reshape(a, newshape, order='C')
Gives a new shape to an array without changing its data.

In [14]:
arr1 = np.arange(6)
arr2 = arr1.reshape(2, 3)

print(f'arr1 =\n{arr1}')
print(f'arr1.shape = {arr1.shape}')
print()
print(f'arr2 =\n{arr2}')
print(f'arr2.shape = {arr2.shape}')

arr1 =
[0 1 2 3 4 5]
arr1.shape = (6,)

arr2 =
[[0 1 2]
 [3 4 5]]
arr2.shape = (2, 3)


In [15]:
arr1 = np.arange(6)
arr2 = arr1.reshape(-1, 1)
arr3 = arr1[:, np.newaxis]
print(f'arr1 = \n{arr1}')
print(f'arr1.shape = {arr1.shape}')
print()
print(f'arr2 = \n{arr2}')
print(f'arr2.shape = {arr2.shape}')
print()
print(f'arr3 = \n{arr3}')
print(f'arr3.shape = {arr3.shape}')

arr1 = 
[0 1 2 3 4 5]
arr1.shape = (6,)

arr2 = 
[[0]
 [1]
 [2]
 [3]
 [4]
 [5]]
arr2.shape = (6, 1)

arr3 = 
[[0]
 [1]
 [2]
 [3]
 [4]
 [5]]
arr3.shape = (6, 1)


#### Maxtix arithmetics in NumPy
• Arithemtic standard operators: +, -, *, /, **, //, %

• Scalar product

In [16]:
mat1 = np.arange(1, 7).reshape(2, 3)
mat2 = np.arange(7, 13).reshape(2, 3)
print(f'Matrix-1 =\n{mat1}')
print(f'Matrix-2 =\n{mat2}')

Matrix-1 =
[[1 2 3]
 [4 5 6]]
Matrix-2 =
[[ 7  8  9]
 [10 11 12]]


In [17]:
mat1 + mat2

array([[ 8, 10, 12],
       [14, 16, 18]])

In [18]:
mat1 - mat2

array([[-6, -6, -6],
       [-6, -6, -6]])

In [19]:
mat1 * mat2

array([[ 7, 16, 27],
       [40, 55, 72]])

In [20]:
mat1 * 10

array([[10, 20, 30],
       [40, 50, 60]])

#### Universal functions (ufunc)

In [21]:
arr1 = np.arange(1, 9)
print(f'arr1 = {arr1}')

arr1 = [1 2 3 4 5 6 7 8]


In [22]:
arr1.sum()

36

In [23]:
arr1.min()

1

In [24]:
arr1.max()

8

In [25]:
arr1.mean()

4.5

In [26]:
arr1.std()

2.29128784747792

In [27]:
np.exp(arr1)

array([2.71828183e+00, 7.38905610e+00, 2.00855369e+01, 5.45981500e+01,
       1.48413159e+02, 4.03428793e+02, 1.09663316e+03, 2.98095799e+03])

In [28]:
np.square(arr1)

array([ 1,  4,  9, 16, 25, 36, 49, 64])

In [29]:
np.sqrt(arr1)

array([1.        , 1.41421356, 1.73205081, 2.        , 2.23606798,
       2.44948974, 2.64575131, 2.82842712])

In [30]:
x1 = np.arange(10)
x2 = np.arange(10) / 2

y = np.sqrt(x1 ** 2  + x2 ** 2)
y

array([ 0.        ,  1.11803399,  2.23606798,  3.35410197,  4.47213595,
        5.59016994,  6.70820393,  7.82623792,  8.94427191, 10.0623059 ])

In [31]:
np.where(y > 5, 0, y)

array([0.        , 1.11803399, 2.23606798, 3.35410197, 4.47213595,
       0.        , 0.        , 0.        , 0.        , 0.        ])

#### Sorting
#### numpy.sort(a, axis=-1, kind='quicksort', order=None)
Return a sorted copy of an array.

|kind|speed|worst case|work space|stable|
|---|---|---|---|---|
|quicksort|1|$$O(n^2)$$|0|no|
|mergesort|2|$$O(n log(n))$$|~n/2|yes|
|heapsort|3|$$O(n log(n))$$|0|no|

#### arr.sort() method: inplace sorting
#### np.sort(arr) function: return a sorted copy of an array

In [32]:
mat0 = np.random.randint(1, 20, (5, 3))
mat1 = mat0.copy()
print(f'mat0 =\n{mat0}')
mat1.sort()
print(f'mat1 (sort along axis=1) =\n{mat1}')
mat2 = np.sort(mat0, axis=0)
print(f'mat2 (sort along axis=0) =\n{mat2}')

mat0 =
[[ 4 19  6]
 [ 1 16  9]
 [19  8 15]
 [16  6  4]
 [ 3  1 11]]
mat1 (sort along axis=1) =
[[ 4  6 19]
 [ 1  9 16]
 [ 8 15 19]
 [ 4  6 16]
 [ 1  3 11]]
mat2 (sort along axis=0) =
[[ 1  1  4]
 [ 3  6  6]
 [ 4  8  9]
 [16 16 11]
 [19 19 15]]


#### numpy.argmax(a, axis=None, out=None)
Returns the indices of the maximum values along an axis.

#### numpy.argsort(a, axis=-1, kind='quicksort', order=None)
Returns the indices that would sort an array.

In [33]:
# An example from Decision Tree Classification
feature_importances = np.array([0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.01019737, 0.04839825, 0.        , 0.        , 0.0024156 ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.72682851, 0.0458159 , 0.        , 0.        , 0.0141577 ,
       0.        , 0.018188  , 0.1221132 , 0.01188548, 0.        ])

print(f'Maximum feature importance = {feature_importances.max()}')
print(f'feature_importances.argmax() @ index = {feature_importances.argmax()}')
print(f'Top-3 important features: {feature_importances.argsort()[-3:][::-1]}')

Maximum feature importance = 0.72682851
feature_importances.argmax() @ index = 20
Top-3 important features: [20 27 11]


#### Linear Algebra
#### numpy.transpose(a, axes=None)
Permute the dimensions of an array.

#### numpy.dot(a, b, out=None)
Dot product of two arrays. Specifically,

#### numpy.linalg.inv(a)
Compute the (multiplicative) inverse of a matrix.

In [34]:
print(f'Transpose of Matrix-1 =\n{np.transpose(mat1)}')
print(f'Transpose of Matrix-1 =\n{mat1.T}')

Transpose of Matrix-1 =
[[ 4  1  8  4  1]
 [ 6  9 15  6  3]
 [19 16 19 16 11]]
Transpose of Matrix-1 =
[[ 4  1  8  4  1]
 [ 6  9 15  6  3]
 [19 16 19 16 11]]


In [35]:
mat1 = np.arange(1, 7).reshape(2, 3)
mat2 = np.arange(7, 13).reshape(3, 2)
mat3 = np.dot(mat1, mat2)
mat4 = mat1.dot(mat2)
print(f'Matrix-1 =\n{mat1}')
print(f'Matrix-2 =\n{mat2}')
print(f'Matrix-3 =\n{mat3}')
print(f'Matrix-4 =\n{mat4}')

Matrix-1 =
[[1 2 3]
 [4 5 6]]
Matrix-2 =
[[ 7  8]
 [ 9 10]
 [11 12]]
Matrix-3 =
[[ 58  64]
 [139 154]]
Matrix-4 =
[[ 58  64]
 [139 154]]


In [36]:
mat1 = np.arange(1, 5).reshape(2, 2)
mat2 = np.linalg.inv(mat1)
print(f'Matrix-1 =\n{mat1}')
print(f'Matrix-2 =\n{mat2}')

Matrix-1 =
[[1 2]
 [3 4]]
Matrix-2 =
[[-2.   1. ]
 [ 1.5 -0.5]]
