# NumPy Examples

In [1]:
import numpy as np

## DataCamp Top 20

https://www.datacamp.com/blog/numpy-interview-questions

### (1) What is NumPy?

* Provides support for large, multi-dimentional arrays

* Provides mathematical functions

* Allows for vectorized operations (loops not needed)

* Provides foundation for more advanced packages: pandas, scikit learn, pytorch

* Data storage types are more efficient than Python native objects

### (2) Create a 1D array

In [4]:
arr = np.array([1, 2, 3])
arr

array([1, 2, 3])

### (3) Differences between NumPy array and Python list

NumPy lists:

* must be all of same type

* are more memory efficient

* support vectorized operations (loops not needed)

* operate natively with many mathematical operations

### (4) Size and shape of arrays

In [12]:
arr = np.array([[1, 2, 3], [4, 5, 6]])
print(arr.shape) # dimensions
print(arr.size) # total number of elements

(2, 3)
6


### (5) Reshape arrays

In [17]:
arr = np.array([1, 2, 3, 4, 5, 6])
print(arr)
print(arr.reshape((3,2))) # method
print(np.reshape(arr, (2,3))) # function

[1 2 3 4 5 6]
[[1 2]
 [3 4]
 [5 6]]
[[1 2 3]
 [4 5 6]]


### (6) Arrays of zeroes or ones

In [20]:
print(np.zeros((1,6)))
np.ones((3,2))

[[0. 0. 0. 0. 0. 0.]]


array([[1., 1.],
       [1., 1.],
       [1., 1.]])

### (7) Broadcasting

Ensures that dimensions match before operations

In [23]:
a = np.array([1, 2, 3]) # horizontal
b = np.array([[1], [1], [1]]) # vertical
print(a + b)

[[2 3 4]
 [2 3 4]
 [2 3 4]]


### (8) Basic statistics

In [44]:
uni_arr = np.random.uniform(0, 10, 100)
print(np.mean(uni_arr))
print(np.median(uni_arr))
print(np.std(uni_arr))

4.924511945827471
5.03524060064375
2.763200589928771


### (9) Vectorized if-else with `np.where`

In [46]:
nor_arr = np.random.normal(0, 10, 100)
np.where(nor_arr > 0, True, False)

array([False,  True, False, False,  True,  True,  True,  True,  True,
       False,  True, False,  True, False,  True,  True,  True,  True,
        True,  True, False,  True,  True, False,  True, False,  True,
        True,  True, False,  True,  True,  True, False, False,  True,
        True, False, False, False,  True, False,  True,  True,  True,
       False, False,  True, False, False, False,  True,  True, False,
       False, False, False,  True,  True,  True,  True,  True,  True,
        True, False, False, False, False,  True, False, False,  True,
       False,  True,  True,  True,  True,  True, False, False, False,
        True,  True, False, False,  True,  True,  True, False,  True,
       False,  True,  True, False, False,  True,  True,  True, False,
        True])

### (10) Vectorized mathematical operations (MSE)

In [48]:
arr1 = np.random.uniform(0, 10, 100)
arr2 = np.random.uniform(0, 10, 100)
n = len(arr1)

print( 1/n * np.sum( np.square(arr2 - arr1)) )
print( np.mean( np.square(arr2-arr1) ) )

17.195712832042254
17.19571283204225


### (11) Sliding window

In [54]:
from numpy.lib.stride_tricks import sliding_window_view

arr = np.arange(6)
print(arr)
print(sliding_window_view(arr, 3))
print(np.mean(sliding_window_view(arr, 3), axis=1)) # mean over horizontal rows

[0 1 2 3 4 5]
[[0 1 2]
 [1 2 3]
 [2 3 4]
 [3 4 5]]
[1. 2. 3. 4.]


### (12) Indexing

In [71]:
arr = np.array([[10, 15, 20, 25],
                  [30, 35, 40, 45],
                  [50, 55, 60, 65]])

condition = arr > 30
print(arr[ condition ])

rows = np.array([0, 1, 2])
cols = np.array([1, 2, 3])
print(arr[ rows, cols ])

[35 40 45 50 55 60 65]
[15 40 65]


### (13) Matrix decomposition

In [105]:
M = np.random.randn(9,9) # 9x9

U, s, Vt = np.linalg.svd(M, full_matrices=False) # M = U * S * V.T

V = Vt.T

print(U.shape, s.shape, V.shape) # 9x9, 9x1, 9x9

S = np.diag(s)

Mhat = np.dot(U, np.dot(S, V.T))

np.allclose(M,Mhat)

(9, 9) (9,) (9, 9)


True

### (14) Memory optimization

In [106]:
# large_array = np.memmap(filename mode='w+' shape=large_array_shape)

### (15) Missing values

In [128]:
arr = np.array([0, 1, 2, 3, 4])
arr = arr/arr
print(np.isnan(arr))
print(np.isinf(arr))

[ True False False False False]
[False False False False False]


  arr = arr/arr


### (16) Applying functions along rows/cols

In [134]:
arr = np.array([[10, 15, 20, 25],
                  [30, 35, 40, 45],
                  [50, 55, 60, 65]])
print(arr)

np.apply_along_axis(np.mean, axis=0, arr=arr)

[[10 15 20 25]
 [30 35 40 45]
 [50 55 60 65]]


array([30., 35., 40., 45.])