NaNs, nulls, not a number - synonyms for the absence of a value

### Reading in Data with read_csv(

In [2]:
import pandas as pd

In [11]:
# a valid path is also a url

pd.read_csv('drinks.csv', usecols=['country', 'wine_servings']) 

Unnamed: 0,country,wine_servings
0,Afghanistan,
1,Albania,54.0
2,Algeria,14.0
3,Andorra,312.0
4,Angola,45.0
...,...,...
188,Venezuela,3.0
189,Vietnam,1.0
190,Yemen,
191,Zambia,4.0


In [13]:
alcohol = pd.read_csv('drinks.csv', usecols=['country', 'wine_servings'], index_col = 'country')

In [14]:
alcohol

Unnamed: 0_level_0,wine_servings
country,Unnamed: 1_level_1
Afghanistan,
Albania,54.0
Algeria,14.0
Andorra,312.0
Angola,45.0
...,...
Venezuela,3.0
Vietnam,1.0
Yemen,
Zambia,4.0


In [15]:
type(alcohol)

pandas.core.frame.DataFrame

In [17]:
alcohol = pd.read_csv('drinks.csv', usecols=['country', 'wine_servings'], index_col='country').squeeze('columns')

In [18]:
alcohol

country
Afghanistan      NaN
Albania         54.0
Algeria         14.0
Andorra        312.0
Angola          45.0
               ...  
Venezuela        3.0
Vietnam          1.0
Yemen            NaN
Zambia           4.0
Zimbabwe         4.0
Name: wine_servings, Length: 193, dtype: float64

In [20]:
type(alcohol)

pandas.core.series.Series

### Series sizing with .size, .shape and len()

In [21]:
alcohol.size

193

In [22]:
alcohol.values.size

193

In [23]:
alcohol.index.size

193

In [24]:
alcohol.shape

(193,)

In [30]:
alcohol.size == alcohol.shape[0]

True

### Unique values and series monotonicity

In [31]:
alcohol.is_unique

False

In [32]:
alcohol.nunique()

71

In [33]:
alcohol.nunique(dropna=False)

72

In [40]:
pd.Series(reversed([1,1,1])).is_monotonic_decreasing

True

### The count() method

In [45]:
alcohol.count()

162

In [43]:
alcohol.hasnans

True

In [48]:
list(alcohol.isnull())

[True,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 True,
 False,
 False,
 False,
 False,
 False,
 True,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 True,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 True,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 True,
 False,
 True,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 True,
 True,
 True,
 True,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 True,
 False,
 False,
 False,
 False,
 True,
 False,
 True,
 False,
 False,
 False,
 False,
 True,
 True,
 False,
 False,
 True,
 True,
 False,
 False,
 False,
 True,
 False,
 False,
 False,
 False,
 True,
 False,
 False,
 True,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 Fals

In [52]:
alcohol[alcohol.isnull()].index

Index(['Afghanistan', 'Bangladesh', 'Bhutan', 'Burundi', 'North Korea',
       'Eritrea', 'Ethiopia', 'India', 'Indonesia', 'Iran', 'Iraq', 'Kuwait',
       'Lesotho', 'Libya', 'Malaysia', 'Maldives', 'Marshall Islands',
       'Mauritania', 'Monaco', 'Myanmar', 'Nepal', 'Pakistan', 'Rwanda',
       'San Marino', 'Saudi Arabia', 'Somalia', 'Sri Lanka', 'Sudan',
       'Tajikistan', 'Uganda', 'Yemen'],
      dtype='object', name='country')

In [60]:
(alcohol.loc[alcohol.isnull()].index)

Index(['Afghanistan', 'Bangladesh', 'Bhutan', 'Burundi', 'North Korea',
       'Eritrea', 'Ethiopia', 'India', 'Indonesia', 'Iran', 'Iraq', 'Kuwait',
       'Lesotho', 'Libya', 'Malaysia', 'Maldives', 'Marshall Islands',
       'Mauritania', 'Monaco', 'Myanmar', 'Nepal', 'Pakistan', 'Rwanda',
       'San Marino', 'Saudi Arabia', 'Somalia', 'Sri Lanka', 'Sudan',
       'Tajikistan', 'Uganda', 'Yemen'],
      dtype='object', name='country')

In [62]:
null_count = alcohol.loc[alcohol.isnull()]

In [63]:
len(null_count)

31

In [64]:
alcohol.isnull().sum()

31

In [68]:
sum([True, False, True]) 

2

In [69]:
alcohol.isnull()

country
Afghanistan     True
Albania        False
Algeria        False
Andorra        False
Angola         False
               ...  
Venezuela      False
Vietnam        False
Yemen           True
Zambia         False
Zimbabwe       False
Name: wine_servings, Length: 193, dtype: bool

In [None]:
(alcohol.loc[alcohol.isnull()].index)

Index(['Afghanistan', 'Bangladesh', 'Bhutan', 'Burundi', 'North Korea',
       'Eritrea', 'Ethiopia', 'India', 'Indonesia', 'Iran', 'Iraq', 'Kuwait',
       'Lesotho', 'Libya', 'Malaysia', 'Maldives', 'Marshall Islands',
       'Mauritania', 'Monaco', 'Myanmar', 'Nepal', 'Pakistan', 'Rwanda',
       'San Marino', 'Saudi Arabia', 'Somalia', 'Sri Lanka', 'Sudan',
       'Tajikistan', 'Uganda', 'Yemen'],
      dtype='object', name='country')

In [70]:
all = alcohol.size

In [71]:
nonnulls = alcohol.count()

In [72]:
nulls = alcohol.isnull().sum()

In [73]:
all == nonnulls + nulls

True

### Another Approach to Nulls

In [74]:
import numpy as np

In [75]:
# ufunc -> universal function

In [76]:
np.isnan

<ufunc 'isnan'>

In [79]:
ser = pd.Series(data = [True, False, None, 2], dtype=float)

In [80]:
np.isnan(ser)

0    False
1    False
2     True
3    False
dtype: bool

In [81]:
alcohol[np.isnan].size

31

### The Other Side: notnull() and notna()

In [84]:
alcohol.notnull().sum()

162

In [83]:
alcohol[alcohol.notnull()]

country
Albania               54.0
Algeria               14.0
Andorra              312.0
Angola                45.0
Antigua & Barbuda     45.0
                     ...  
Vanuatu               11.0
Venezuela              3.0
Vietnam                1.0
Zambia                 4.0
Zimbabwe               4.0
Name: wine_servings, Length: 162, dtype: float64