Dealing with missing data: Any operations with Missing value will result in a missing value

In [1]:
import numpy as np

# Create a mask array

```python
np.ma.array(
    data,
    dtype=None,
    copy=False,
    order=None,
    mask=False,
    fill_value=None,
    keep_mask=True,
    hard_mask=False,
    shrink=True,
    subok=True,
    ndmin=0,
)
```

In [7]:
x = np.ma.array([1,2,3,4], mask = [True, False, True, False], dtype = np.float32)
x

masked_array(data=[--, 2.0, --, 4.0],
             mask=[ True, False,  True, False],
       fill_value=1e+20,
            dtype=float32)

In [8]:
y = np.ma.array([5,6,7,8], mask = [True, True, False, True], dtype = np.float64)
y

masked_array(data=[--, --, 7.0, --],
             mask=[ True,  True, False,  True],
       fill_value=1e+20)

In [9]:
x + y

masked_array(data=[--, --, --, --],
             mask=[ True,  True,  True,  True],
       fill_value=1e+20,
            dtype=float64)

In [5]:
data = [1,2,3,4]
v = np.ma.array(data, mask = [0,1,1,0])
v

masked_array(data=[1, --, --, 4],
             mask=[False,  True,  True, False],
       fill_value=999999)

In [6]:
v.mean() #mask array ignore masked data --> v.mean() = (1 + 4) / 2 = 2.5

2.5

# Modify the mask

assign variable to **`np.ma.masked`**

In [11]:
a = np.ma.array([1,2,3,4], mask = [1,1,1, 0], dtype = np.float)
a

masked_array(data=[--, --, --, 4.0],
             mask=[ True,  True,  True, False],
       fill_value=1e+20)

In [12]:
#mask the first value
a[0] = np.ma.masked
a

masked_array(data=[--, --, --, 4.0],
             mask=[ True,  True,  True, False],
       fill_value=1e+20)

In [13]:
#the mask is cleared on assignment
a[0] = 9
a

masked_array(data=[9.0, --, --, 4.0],
             mask=[False,  True,  True, False],
       fill_value=1e+20)

The masked entries can be filled with a given value to get an usual array back:

In [17]:
a.filled(-1)

array([ 9., -1., -1.,  4.])

The mask can also be cleared:

In [18]:
a

masked_array(data=[9.0, --, --, 4.0],
             mask=[False,  True,  True, False],
       fill_value=1e+20)

In [19]:
#clear mask, restore data
a.mask = np.ma.nomask
a

masked_array(data=[9.0, 2.0, 3.0, 4.0],
             mask=[False, False, False, False],
       fill_value=1e+20)

# Mask Functions

 Like ndarray functions but operate on mask array

In [11]:
np.ma.sqrt([1,-1,4,-4])

masked_array(data=[1.0, --, 2.0, --],
             mask=[False,  True, False,  True],
       fill_value=1e+20)

In [12]:
dir(np.ma)

['MAError',
 'MaskError',
 'MaskType',
 'MaskedArray',
 '__all__',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 'abs',
 'absolute',
 'absolute_import',
 'add',
 'all',
 'allclose',
 'allequal',
 'alltrue',
 'amax',
 'amin',
 'angle',
 'anom',
 'anomalies',
 'any',
 'append',
 'apply_along_axis',
 'apply_over_axes',
 'arange',
 'arccos',
 'arccosh',
 'arcsin',
 'arcsinh',
 'arctan',
 'arctan2',
 'arctanh',
 'argmax',
 'argmin',
 'argsort',
 'around',
 'array',
 'asanyarray',
 'asarray',
 'atleast_1d',
 'atleast_2d',
 'atleast_3d',
 'average',
 'bitwise_and',
 'bitwise_or',
 'bitwise_xor',
 'bool_',
 'ceil',
 'choose',
 'clip',
 'clump_masked',
 'clump_unmasked',
 'column_stack',
 'common_fill_value',
 'compress',
 'compress_cols',
 'compress_nd',
 'compress_rowcols',
 'compress_rows',
 'compressed',
 'concatenate',
 'conjugate',
 'convolve',
 'copy',
 'core',
 'corrcoef',
 'correlate',
 'cos',
 'cosh',
 '

In [20]:
try:
    np.log([-1, 1])
except:
    pass
else:
    print(np.ma.log([-1,1])) #we can take the log of -1, so the result is masked at the position of -1

[-- 0.0]


  
