In [8]:
import pandas as pd
import numpy as np
from numpy import nan as NA

In [2]:
string_data = pd.Series(['this', 'that', np.NaN, 'the other'])
string_data

0         this
1         that
2          NaN
3    the other
dtype: object

In [3]:
string_data.isnull()

# None is treated as NA
string_data[0] = None
string_data

0         None
1         that
2          NaN
3    the other
dtype: object

In [4]:
# Creating a mask to filter it out
string_data[string_data.isnull() == False]

1         that
3    the other
dtype: object

In [5]:
# Or just notnull
string_data[string_data.notnull()]

1         that
3    the other
dtype: object

In [6]:
# Or jsut get rid of it
string_data.dropna()

1         that
3    the other
dtype: object

In [11]:
# There's equivalent functionality for dataframes too
df1 = pd.DataFrame([[1, 2, 3, 4], [1, NA, NA],
                   [NA, NA, NA], [NA, 8, 2]])

df1

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,4.0
1,1.0,,,
2,,,,
3,,8.0,2.0,


In [13]:
# Drops all rows witha ny NAN
df1_clean = df1.dropna()
df1_clean

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,4.0


In [14]:
# parameter to only drop all NAN rows
df1.dropna(how='all')

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,4.0
1,1.0,,,
3,,8.0,2.0,


In [28]:
# Then if you care about columns, just pass in axis parameter
df1.iloc[:1, 3] = NA
df1.dropna(axis=1, how='all')

Unnamed: 0,0,1,2
0,1.0,2.0,3.0
1,1.0,,
2,,,
3,,8.0,2.0


In [29]:
# Filling missing data
df1.fillna(10)

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,10.0
1,1.0,10.0,10.0,10.0
2,10.0,10.0,10.0,10.0
3,10.0,8.0,2.0,10.0


In [30]:
# can pass a dict to specify fill na by column
df1.fillna({0: 45, 1: 10})

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,
1,1.0,10.0,,
2,45.0,10.0,,
3,45.0,8.0,2.0,


In [33]:
# There some slick interpolation methods for fillna
df1.fillna(method='ffill', limit=1)

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,
1,1.0,2.0,3.0,
2,1.0,,,
3,,8.0,2.0,


In [36]:
# Consider using it to pass like avg or median
df1.fillna(df1.mean())

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,
1,1.0,5.0,2.5,
2,1.0,5.0,2.5,
3,1.0,8.0,2.0,
