In [1]:
import numpy as np

# Remove rows with NaN's from numpy array

In [2]:
a = np.array([[1, 8], [2,9], [3,10], [4, np.NaN], [5, 12], [6, np.NaN]])
print('Original array:\n', a)
a = a[~np.isnan(a)[:, 1]]
print('With rows containing NaN removed:\n', a)

Original array:
 [[ 1.  8.]
 [ 2.  9.]
 [ 3. 10.]
 [ 4. nan]
 [ 5. 12.]
 [ 6. nan]]
With rows containing NaN removed:
 [[ 1.  8.]
 [ 2.  9.]
 [ 3. 10.]
 [ 5. 12.]]


# Find index of nearest value in an array

See Nov. 20, 2014 answer to [Finding the nearest value and return the index of array in Python](http://stackoverflow.com/questions/8914491/finding-the-nearest-value-and-return-the-index-of-array-in-python)

In [3]:
def get_index(array, value):
    idx = (np.abs(array - value)).argmin()
    return idx

a = np.linspace(0., 10., 11)
print(a)
values = [1.2, 5.1, 5.6]
for value in values:
    idx = get_index(a, value)
    print(value, idx, a[idx])

[ 0.  1.  2.  3.  4.  5.  6.  7.  8.  9. 10.]
1.2 1 1.0
5.1 5 5.0
5.6 6 6.0


# Boolean indexing

In [4]:
a = np.random.randint(0, 100, 10)
a

array([20, 47, 64, 49, 44, 60, 16, 58, 31, 45])

In [5]:
# Select elements greater than 60
a[a>60]

array([64])

In [6]:
# Select elements greater than 60 and odd
a[(a>60) & (a%2 == 1)]

array([], dtype=int64)

In [7]:
a.mean()

43.4

In [8]:
a.std()

15.531902652283136

In [9]:
# Select all of the even numbers greater than the mean
a[a>a.mean()]

array([47, 64, 49, 44, 60, 58, 45])

In [10]:
# Select all numbers that are within one standard deviation of the mean
a[(a<(a.mean() + a.std())) & (a>(a.mean() - a.std()))]

array([47, 49, 44, 58, 31, 45])

# Clip values in 1D and 2D arrays

In [11]:
a = np.array([1.0, 0.1, 1.e-3, 0.0, -1.e-3, -0.1, -1.0])
print(a)
a = a.clip(min=0)
print(a)

[ 1.     0.1    0.001  0.    -0.001 -0.1   -1.   ]
[1.    0.1   0.001 0.    0.    0.    0.   ]


In [12]:
b = np.array([[0, 1.0], [1, 0.1], [1, 1.e-3], [3, 0.0], [4, -1.e-3], [5, -0.1], [6, -1.0]])
print(b)
print()
b[:,1] = b[:,1].clip(min=0)
print(b)

[[ 0.e+00  1.e+00]
 [ 1.e+00  1.e-01]
 [ 1.e+00  1.e-03]
 [ 3.e+00  0.e+00]
 [ 4.e+00 -1.e-03]
 [ 5.e+00 -1.e-01]
 [ 6.e+00 -1.e+00]]

[[0.e+00 1.e+00]
 [1.e+00 1.e-01]
 [1.e+00 1.e-03]
 [3.e+00 0.e+00]
 [4.e+00 0.e+00]
 [5.e+00 0.e+00]
 [6.e+00 0.e+00]]


# Slice objects

[How can I create a slice object for Numpy array?](https://stackoverflow.com/questions/38917173/how-can-i-create-a-slice-object-for-numpy-array)  
[numpy.s_](https://docs.scipy.org/doc/numpy/reference/generated/numpy.s_.html)

## Use to give slices meaningful names

In [13]:
a = np.arange(20).reshape(4, 5)

middle = np.s_[1:3, 1:4]
lowerright = np.s_[2:, 3:]
row2everyother = np.s_[2, 0::2]
lastrow = np.s_[-1, :]

print(a)
print("middle:")
print(a[middle])
print("lowerright:")
print(a[lowerright])
print("row2everyother:")
print(a[row2everyother])
print("lastrow:")
print(a[lastrow])

[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]
 [15 16 17 18 19]]
middle:
[[ 6  7  8]
 [11 12 13]]
lowerright:
[[13 14]
 [18 19]]
row2everyother:
[10 12 14]
lastrow:
[15 16 17 18 19]


In [14]:
print(middle)
print(row2everyother)
print(lastrow)

(slice(1, 3, None), slice(1, 4, None))
(2, slice(0, None, 2))
(-1, slice(None, None, None))


## Accessing indices in slice objects

In [15]:
print(middle)
xslice, yslice = middle[1], middle[0]
print(xslice, yslice)
print(xslice.start, xslice.stop, xslice.step)
print(yslice.start, yslice.stop, yslice.step)

(slice(1, 3, None), slice(1, 4, None))
slice(1, 4, None) slice(1, 3, None)
1 4 None
1 3 None


# Get name of numpy variable

See https://stackoverflow.com/questions/34980833/python-name-of-np-array-variable-as-string

In [16]:
def namestr(obj, namespace):
    return [name for name in namespace if namespace[name] is obj][0]

In [17]:
temp_1D_array = np.linspace(0, 1, 101)

namestr(temp_1D_array, globals())

'temp_1D_array'

# From article

See [Top 4 Numpy Functions You Don’t Know About (Probably)](https://towardsdatascience.com/top-4-numpy-functions-you-dont-know-about-probably-28fcd5d7174f)



## Where

where() function will return the index of elements from an array that satisfy a certain condition

In [18]:
grades = np.array([1, 3, 4, 2, 5, 5])
np.where(grades > 3)

(array([2, 4, 5]),)

Replace values that do and don't satisfy the given condition

In [22]:
np.where(grades > 3, 'gt3', 'lt3')

array(['lt3', 'lt3', 'gt3', 'lt3', 'gt3', 'gt3'], dtype='<U3')

## argmin(), argmax(), argsort()

## allclose()

*It will return True if items in two arrays are equal within a tolerance. It will provide you with a great way of checking if two arrays are similar*

In [24]:
arr1 = np.array([0.15, 0.20, 0.25, 0.17])
arr2 = np.array([0.14, 0.21, 0.27, 0.15])

np.allclose(arr1, arr2, 0.1)

False

In [25]:
np.allclose(arr1, arr2, 0.2)

True