Often, you have data and you need to determine which questions were answered which weren't.
The computer calls a value that has no data either a null or sometimes not-a-number.
When you are performing calculations with these missing values, you must be careful about how you handle these.


# Masking

# Finding Nulls

# Any and All

In [3]:
data = '''
household,dorm,phone_energy,laptop_energy
A,tuscany,,50
B,sauv,30,
C,tuscany,12,45
D,sauv,,
'''

%matplotlib inline
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from io import StringIO
from tabulate import tabulate

df = pd.read_csv(StringIO(data))
df

Unnamed: 0,household,dorm,phone_energy,laptop_energy
0,A,tuscany,,50.0
1,B,sauv,30.0,
2,C,tuscany,12.0,45.0
3,D,sauv,,


In [5]:
# note that you are getting a list of boolean values
# these boolean values can be used as an index to get rows of the data frame
df['phone_energy'].isnull()

0     True
1    False
2    False
3     True
Name: phone_energy, dtype: bool

In [7]:
# you can look at the entries where the laptop energy values are blank
df[df['laptop_energy'].isnull()]

Unnamed: 0,household,dorm,phone_energy,laptop_energy
1,B,sauv,30.0,
3,D,sauv,,


In [9]:
# you can look at the entries where the laptop energy values are not blank
df[df['laptop_energy'].notnull()]

Unnamed: 0,household,dorm,phone_energy,laptop_energy
0,A,tuscany,,50.0
2,C,tuscany,12.0,45.0


In [11]:
df['phone_energy'].value_counts()

30.0    1
12.0    1
Name: phone_energy, dtype: int64

In [12]:
# you can count how many nulls but you won't know where they are
df['phone_energy'].value_counts(dropna=False)

NaN      2
 30.0    1
 12.0    1
Name: phone_energy, dtype: int64

In [15]:
# lets say you want rows 1 and 3 from the data frame
df.iloc[[1,3]]

Unnamed: 0,household,dorm,phone_energy,laptop_energy
1,B,sauv,30.0,
3,D,sauv,,


In [17]:
# you can get the same thing by passing in a list of False and True values
# where only values corresponding to row 1 and 3 are True
df[[False, True, False, True]]

Unnamed: 0,household,dorm,phone_energy,laptop_energy
1,B,sauv,30.0,
3,D,sauv,,


In [19]:
# you can generate a list of True and False values using functions in Pandas
df['laptop_energy'].isnull()

0    False
1     True
2    False
3     True
Name: laptop_energy, dtype: bool

In [22]:
df['laptop_energy'] > 48

0     True
1    False
2    False
3    False
Name: laptop_energy, dtype: bool