In [1]:
# Filtering

# 1. Filtering data
# 2. Filtering in Numpy
# 3. Axis-wise filtering

import numpy as np

In [2]:
# key idea: is a boolean mask behind filtering

arr = np.array([[0, 2, 3],
                [1, 3, -6],
                [-3, -2, 1]])
print(repr(arr == 3))
print(repr(arr > 0))
print(repr(arr != 1))

array([[False, False,  True],
       [False,  True, False],
       [False, False, False]])
array([[False,  True,  True],
       [ True,  True, False],
       [False, False,  True]])
array([[ True,  True,  True],
       [False,  True,  True],
       [ True,  True, False]])


In [3]:
# only need to be careful with np.nan, which does'not alloq to use filtering,
# np.isnan(arr) get's boolean where we have np.nan

arr = np.array([[0, 2, np.nan],
                [1, np.nan, -6],
                [np.nan, -2, 1]])

print(repr(np.isnan(arr)))

array([[False, False,  True],
       [False,  True, False],
       [ True, False, False]])


In [6]:
# 2. Filtering so Numpy uses that boolean mask as a parameter within np.where function

print(repr(np.where([True, False, True]))) # extracts indexes where true

arr = np.array([0, 3, 5, 3, 1])
print(repr(np.where(arr == 3)))


(array([0, 2]),)
(array([1, 3]),)


In [7]:
arr = np.array([[0, 2, 3],
                [1, 0, 0],
                [-3, 0, 0]])
x_ind, y_ind = np.where(arr != 0)
print(repr(x_ind)) # x indices of non-zero elements
print(repr(y_ind)) # y indices of non-zero elements
print(repr(arr[x_ind, y_ind]))

array([0, 0, 1, 2])
array([1, 2, 0, 0])
array([ 2,  3,  1, -3])


In [8]:
np_filter = np.array([[True, False], [False, True]])
positives = np.array([[1, 2], [3, 4]])
negatives = np.array([[-2, -5], [-1, -8]])
print(repr(np.where(np_filter, positives, negatives)))

np_filter = positives > 2
print(repr(np.where(np_filter, positives, negatives)))

np_filter = negatives > 0
print(repr(np.where(np_filter, positives, negatives)))

# a bit convoluted,
# with 1 parameter, it looks only for true
# with 3 arguments, if it sees true it pick ups from positive(second argument), else from negatives(third argument)

array([[ 1, -5],
       [-1,  4]])
array([[-2, -5],
       [ 3,  4]])
array([[-2, -5],
       [-1, -8]])


In [10]:
# 3 Filtering based on axis
arr = np.array([[-2, -1, -3],
                [4, 5, -6],
                [3, 9, 1]])

print(repr(arr > 0))

array([[False, False, False],
       [ True,  True, False],
       [ True,  True,  True]])


In [11]:
print(np.any(arr > 0)) # any uses logical OR

True


In [12]:
print(np.all(arr > 0)) # all uses logical AND

False


In [13]:
arr = np.array([[-2, -1, -3],
                [4, 5, -6],
                [3, 9, 1]])
print(repr(arr > 0))

print(repr(np.any(arr > 0, axis=0)))

print(repr(np.any(arr > 0, axis=1)))

print(repr(np.all(arr > 0, axis=1)))

array([[False, False, False],
       [ True,  True, False],
       [ True,  True,  True]])
array([ True,  True,  True])
array([False,  True,  True])
array([False, False,  True])


In [20]:
arr = np.array([[-2, -1, -3],
                [4, 5, -6],
                [3, 9, 1]])
has_positive = np.any(arr > 0, axis=1) # try to change to all 0 = columnwise 1 = rowise
print(has_positive)
print(repr(arr[np.where(has_positive)]))

[False  True  True]
array([[ 4,  5, -6],
       [ 3,  9,  1]])
