In [2]:
## Based on material at:
#  - https://jakevdp.github.io/PythonDataScienceHandbook/03.04-missing-values.html


In [3]:
# Imports
import numpy as np
import pandas as pd

In [4]:
vals_valid = np.array([1, 3, 4])
vals_valid

array([1, 3, 4])

In [5]:
vals_valid.sum()

8

In [6]:
vals = np.array([1, None, 3, 4, "", np.nan])
vals

array([1, None, 3, 4, '', nan], dtype=object)

In [7]:
vals.sum()

TypeError: unsupported operand type(s) for +: 'int' and 'NoneType'

## Using Numpy's NaN

In [8]:
vals = np.array([1, 3, 4,  np.nan])
vals

array([ 1.,  3.,  4., nan])

In [9]:
vals.sum()

nan

In [10]:
# Ignores Nan
np.nansum(vals)

8.0

## Checking in Pandas

In [11]:
data = pd.Series([1, np.nan, 'hello', None, ""])

In [12]:
# Checking nulls
data.isnull()

0    False
1     True
2    False
3     True
4    False
dtype: bool

In [13]:
# Getting non nulls
data[data.notnull()]

0        1
2    hello
4         
dtype: object

In [14]:
# A test dataframe
df = pd.DataFrame([["Alpha",  np.nan, 2],
                   ['Beta',   3,      5],
                   [None,     4,      6]])

In [15]:
# All rows without null
df.dropna()

Unnamed: 0,0,1,2
1,Beta,3.0,5


In [16]:
# All columns without null
df.dropna(axis='columns')

Unnamed: 0,2
0,2
1,5
2,6


In [17]:
# Filling null with a value
df.fillna(0)

Unnamed: 0,0,1,2
0,Alpha,0.0,2
1,Beta,3.0,5
2,0,4.0,6
