In [1]:
import pandas as pd
import numpy as np

In [7]:
# Create a table

people = {
    'first':['Corey','Jane','John','Chris',np.nan,None,'NA'],
    'last':['Schafer','Doe','Schafer',np.nan,np.nan,'Missing',None],
    'email':['CoreyMSchafer@gmail.com','JaneDoe@email.com','JohnDoe@email.com',None,np.nan,'Anonymous@email.com','None'],
    'age':['33','55','63','36',None,None,'Missing']
}

In [9]:
# Print the table

df = pd.DataFrame(people)
df

Unnamed: 0,first,last,email,age
0,Corey,Schafer,CoreyMSchafer@gmail.com,33
1,Jane,Doe,JaneDoe@email.com,55
2,John,Schafer,JohnDoe@email.com,63
3,Chris,,,36
4,,,,
5,,Missing,Anonymous@email.com,
6,,,,Missing


In [11]:
# remove the rows that don't have values

df.dropna()

Unnamed: 0,first,last,email,age
0,Corey,Schafer,CoreyMSchafer@gmail.com,33
1,Jane,Doe,JaneDoe@email.com,55
2,John,Schafer,JohnDoe@email.com,63


In [12]:
# default argument for df.dropna() is:
# when set to index it will drop the row with missing values
# when set to column, will drop the column with missing values
# any will identify and drop row with "any" missing value

df.dropna(axis='index', how='any')

Unnamed: 0,first,last,email,age
0,Corey,Schafer,CoreyMSchafer@gmail.com,33
1,Jane,Doe,JaneDoe@email.com,55
2,John,Schafer,JohnDoe@email.com,63


In [14]:
# any can be also set to all, and will drop rows when all of the values are missing

df.dropna(axis='index', how='all')

Unnamed: 0,first,last,email,age
0,Corey,Schafer,CoreyMSchafer@gmail.com,33
1,Jane,Doe,JaneDoe@email.com,55
2,John,Schafer,JohnDoe@email.com,63
3,Chris,,,36
5,,Missing,Anonymous@email.com,
6,,,,Missing


In [15]:
# setting index to columns will return all since all have values

df.dropna(axis='columns', how='all')

Unnamed: 0,first,last,email,age
0,Corey,Schafer,CoreyMSchafer@gmail.com,33
1,Jane,Doe,JaneDoe@email.com,55
2,John,Schafer,JohnDoe@email.com,63
3,Chris,,,36
4,,,,
5,,Missing,Anonymous@email.com,
6,,,,Missing


In [16]:
# setting all to any will return none since every row has missing value

df.dropna(axis='columns', how='any')

0
1
2
3
4
5
6


In [18]:
# what if we need to retain atleast only one column
# then pass it to a subset
# the 'any' on how or even 'all' will only look for missing values on the email column
# as long as the email is there it shouldn't drop those rows

df.dropna(axis='index', how='any', subset=['email'])

Unnamed: 0,first,last,email,age
0,Corey,Schafer,CoreyMSchafer@gmail.com,33
1,Jane,Doe,JaneDoe@email.com,55
2,John,Schafer,JohnDoe@email.com,63
5,,Missing,Anonymous@email.com,
6,,,,Missing


In [19]:
# what if you need two columns
# as long as the last name and email is there, do not drop the column

df.dropna(axis='index', how='any', subset=['last','email'])

Unnamed: 0,first,last,email,age
0,Corey,Schafer,CoreyMSchafer@gmail.com,33.0
1,Jane,Doe,JaneDoe@email.com,55.0
2,John,Schafer,JohnDoe@email.com,63.0
5,,Missing,Anonymous@email.com,


In [22]:
# if we want to permanently change our dataframe set inplace = True

df = pd.DataFrame(people)
df.replace('NA',np.nan, inplace=True)
df.replace('Missing',np.nan, inplace=True)

In [23]:
df

Unnamed: 0,first,last,email,age
0,Corey,Schafer,CoreyMSchafer@gmail.com,33.0
1,Jane,Doe,JaneDoe@email.com,55.0
2,John,Schafer,JohnDoe@email.com,63.0
3,Chris,,,36.0
4,,,,
5,,,Anonymous@email.com,
6,,,,


In [24]:
# if we run the table again, all rows with missing values are gone

df.dropna()

Unnamed: 0,first,last,email,age
0,Corey,Schafer,CoreyMSchafer@gmail.com,33
1,Jane,Doe,JaneDoe@email.com,55
2,John,Schafer,JohnDoe@email.com,63


In [26]:
#check if values are 'na'

df.isna()

Unnamed: 0,first,last,email,age
0,False,False,False,False
1,False,False,False,False
2,False,False,False,False
3,False,True,True,False
4,True,True,True,True
5,True,True,False,True
6,True,True,False,True


In [28]:
# use the fillna method to fill values
# this is useful for numerical data

df.fillna('MISSING')

Unnamed: 0,first,last,email,age
0,Corey,Schafer,CoreyMSchafer@gmail.com,33
1,Jane,Doe,JaneDoe@email.com,55
2,John,Schafer,JohnDoe@email.com,63
3,Chris,MISSING,MISSING,36
4,MISSING,MISSING,MISSING,MISSING
5,MISSING,MISSING,Anonymous@email.com,MISSING
6,MISSING,MISSING,,MISSING
