In [1]:
import pandas as pd

In [31]:
stock = [{'bikes': 10, 'pants': 28, 'watches': 10, 'suites': 10, 'shoes': 5}, 
         {'bikes': 12, 'watches': 8, 'glasses': 50},
         {'bikes': 3, 'pants': 2, 'watches': 9, 'shoes': 5, 'glasses': 10, 'suites': 10}]
df = pd.DataFrame(stock, index=['Store 1', 'Store 2', 'Store 3'])
df

Unnamed: 0,bikes,pants,watches,suites,shoes,glasses
Store 1,10,28.0,10,10.0,5.0,
Store 2,12,,8,,,50.0
Store 3,3,2.0,9,10.0,5.0,10.0


In [32]:
# lets find out how many values are null / NaN
df.isnull() # convert NaN values to True otherwise to False

Unnamed: 0,bikes,pants,watches,suites,shoes,glasses
Store 1,False,False,False,False,False,True
Store 2,False,True,False,True,True,False
Store 3,False,False,False,False,False,False


In [33]:
df.isnull().sum() # Sum of null values by column

bikes      0
pants      1
watches    0
suites     1
shoes      1
glasses    1
dtype: int64

In [34]:
df.isnull().sum().sum() # Total count of null values

4

In [35]:
# use count to return a count of non null rows grouped by column
df.count()

bikes      3
pants      2
watches    3
suites     2
shoes      2
glasses    2
dtype: int64

In [36]:
# remove all rows that contain null / NaN
df.dropna(axis=0)

Unnamed: 0,bikes,pants,watches,suites,shoes,glasses
Store 3,3,2.0,9,10.0,5.0,10.0


In [37]:
# remove all columns that contain null / NaN
df.dropna(axis=1)

Unnamed: 0,bikes,watches
Store 1,10,10
Store 2,12,8
Store 3,3,9


In [38]:
# we can replace null values with 0 for example, like so:
df.fillna(0)

Unnamed: 0,bikes,pants,watches,suites,shoes,glasses
Store 1,10,28.0,10,10.0,5.0,0.0
Store 2,12,0.0,8,0.0,0.0,50.0
Store 3,3,2.0,9,10.0,5.0,10.0


In [39]:
df

Unnamed: 0,bikes,pants,watches,suites,shoes,glasses
Store 1,10,28.0,10,10.0,5.0,
Store 2,12,,8,,,50.0
Store 3,3,2.0,9,10.0,5.0,10.0


In [41]:
# use forward filling by replacing the value from the previous row
# Note how the first row still has NaN
df.fillna(method='ffill', axis=0)

Unnamed: 0,bikes,pants,watches,suites,shoes,glasses
Store 1,10,28.0,10,10.0,5.0,
Store 2,12,28.0,8,10.0,5.0,50.0
Store 3,3,2.0,9,10.0,5.0,10.0


In [42]:
df.fillna(method='ffill', axis=1) # using the columns for forward filling

Unnamed: 0,bikes,pants,watches,suites,shoes,glasses
Store 1,10.0,28.0,10.0,10.0,5.0,5.0
Store 2,12.0,12.0,8.0,8.0,8.0,50.0
Store 3,3.0,2.0,9.0,10.0,5.0,10.0


In [44]:
# can also apply backward filling
df.fillna(method='backfill', axis=0)  

Unnamed: 0,bikes,pants,watches,suites,shoes,glasses
Store 1,10,28.0,10,10.0,5.0,50.0
Store 2,12,2.0,8,10.0,5.0,50.0
Store 3,3,2.0,9,10.0,5.0,10.0


In [46]:
# fill using linear interpolation so from top down by column filling in the missing based on a liner interpolation
df.interpolate(method='linear', axis=0)

Unnamed: 0,bikes,pants,watches,suites,shoes,glasses
Store 1,10,28.0,10,10.0,5.0,
Store 2,12,15.0,8,10.0,5.0,50.0
Store 3,3,2.0,9,10.0,5.0,10.0
