In [1]:
import numpy as np
from pandas import Series,DataFrame
import pandas as pd

In [2]:
# Create a series called data
# Pass a list to it
data = Series(['one','two',np.nan,'four'])

In [3]:
# Show series
data

0     one
1     two
2     NaN
3    four
dtype: object

In [4]:
# Pass method isnull to find a null
data.isnull()

0    False
1    False
2     True
3    False
dtype: bool

In [5]:
# Can drop or remove null values
# Pass dropna method
data.dropna()

0     one
1     two
3    four
dtype: object

In [7]:
# Let's see how works in a dataframe
# Create a 4 by 3 dataframe with several null values
# Show dataframe
dframe = DataFrame([[1,2,3],[np.nan,5,6],[7,np.nan,9],[np.nan,np.nan,np.nan]])
dframe

Unnamed: 0,0,1,2
0,1.0,2.0,3.0
1,,5.0,6.0
2,7.0,,9.0
3,,,


In [10]:
# Make another dataframe
# Pass method to drop null values
# Will see that any rows with a null will be dropped
clean_dframe = dframe.dropna()
clean_dframe

Unnamed: 0,0,1,2
0,1,2,3


In [11]:
# Can specify to drop rows if all values are null
# Show dframe
dframe.dropna(how='all')

Unnamed: 0,0,1,2
0,1.0,2.0,3
1,,5.0,6
2,7.0,,9


In [12]:
# Drop column instead of rows
# Specify an axis
# Show original dataframe
dframe

Unnamed: 0,0,1,2
0,1.0,2.0,3.0
1,,5.0,6.0
2,7.0,,9.0
3,,,


In [14]:
# Will drop all rows as they all have nulls
dframe.dropna(axis=1)

0
1
2
3


In [15]:
# Thresholding
# DROPPING ROWS WITHOUT AT LEAST 'N' DATAPOINTS
# Set npn variable to np.nan
# Quicker to type variable
# Create new 4 by 4 dataframe with null values
npn = np.nan
dframe2 = DataFrame([[1,2,3,npn],[2,npn,5,6],[npn,7,npn,9],[1,npn,npn,npn]])

In [16]:
# Show dframe2
dframe2

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,
1,2.0,,5.0,6.0
2,,7.0,,9.0
3,1.0,,,


In [17]:
# Threshold by data points that don't have a null
# Use drop.na and pass thresh argument
# Will drop last row because only 1 data point
dframe2.dropna(thresh=2)

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,
1,2.0,,5.0,6.0
2,,7.0,,9.0


In [18]:
# Will drop last 2 rows as these don't have at least 3 data points
dframe2.dropna(thresh=3)

Unnamed: 0,0,1,2,3
0,1,2.0,3,
1,2,,5,6.0


In [19]:
# dframe2
dframe2

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,
1,2.0,,5.0,6.0
2,,7.0,,9.0
3,1.0,,,


In [20]:
# Can fill null values with a value
# instead of dropping them
# BECAUSE NOT USING '=' SIGN
dframe2.fillna(1)

Unnamed: 0,0,1,2,3
0,1,2,3,1
1,2,1,5,6
2,1,7,1,9
3,1,1,1,1


In [22]:
# Fill different values for different columns
# REMEMBER. NOT MAKING PERMANENT CHANGES TO DFRAME 2
# Can pass a dictionary
# show dframe2
dframe2

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,
1,2.0,,5.0,6.0
2,,7.0,,9.0
3,1.0,,,


In [23]:
# Fill null values in column with column number
dframe2.fillna({0:0,1:1,2:2,3:3})

Unnamed: 0,0,1,2,3
0,1,2,3,3
1,2,1,5,6
2,0,7,2,9
3,1,1,2,3


In [24]:
# Modify dframe permanently
# Show dframe2
dframe2

Unnamed: 0,0,1,2,3
0,1.0,2.0,3.0,
1,2.0,,5.0,6.0
2,,7.0,,9.0
3,1.0,,,


In [25]:
dframe2.fillna(0,inplace=True)

In [26]:
#Show dframe2
# Nulls replaced permanently by 0
dframe2

Unnamed: 0,0,1,2,3
0,1,2,3,0
1,2,0,5,6
2,0,7,0,9
3,1,0,0,0
