# Sentinel and Missing Values in Python and Pandas
Python has several ways of representing missing values illustrated in the cells below.

In [2]:
import pandas as pd
import numpy as np

### Negative zero

In [3]:
sminus0 = -1.0 * pd.Series([0.0,0.0])
sminus0

0   -0.0
1   -0.0
dtype: float64

### Positive and negative infinity   inf and -inf in Pandas


In [4]:
s1 = pd.Series([1.0,-1.0])
s0 = pd.Series([0.0,0.0])
sRatio = s1/s0
print(sRatio)
print('\n datatype of 1.0/0.0 is ', type(sRatio[0]))
print('\n numpy isinf() on 1.0/0.0 returns ',  np.isinf(sRatio[0]))

0    inf
1   -inf
dtype: float64

 datatype of 1.0/0.0 is  <class 'numpy.float64'>

 numpy isinf() on 1.0/0.0 returns  True


### Base Python  infinite value computations

In [5]:
1.0/0.0

ZeroDivisionError: float division by zero

### numpy inf

In [6]:
f64One = np.float64(1.0)
f64zero = np.float64(0.0)
rat64 = f64One / f64zero
print('the result is ', rat64, ' having type ', type(rat64))

the result is  inf  having type  <class 'numpy.float64'>


  This is separate from the ipykernel package so we can avoid doing imports until


# Missing Values


### Numeric data

In [26]:
import math
print('python None is of type ', type(None),  
      'pd.isnull?  ', pd.isnull(None))
print('math nan is of type ', type(math.nan),  
      'pd.isnull?  ', pd.isnull(math.nan))
print('numpy nan is of type ', type(np.nan),  
      'pd.isnull?  ', pd.isnull(np.nan))

print('\nnumpy inf is of type ', type(np.inf),  
      pd.isnull?  ', pd.isnull(np.inf))


python None is of type  <class 'NoneType'> pd.isnull?   True
math nan is of type  <class 'float'> pd.isnull?   True
numpy nan is of type  <class 'float'> pd.isnull?   True

numpy inf is of type  <class 'float'> pd.isnull?   False


### Beware of direct comparison
NaN values do not compare as you might expect them to. They are basically incomparable. In this example compating np.nan to itself returns False.
Instead use the isna or isnull function. The isnull method is an alias of the isna method.

see
https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.isnull.html 

or 

https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.isna.html

In [7]:
print('np.nan == np.nan returns ', np.nan == np.nan) 
print('\npd.isnull(np.nan)returns ', pd.isnull(np.nan)) 

print(' \nAre pd.isnull() and pd.isna() equivalent?', pd.isnull == pd.isna)



np.nan == np.nan returns  False

pd.isnull(np.nan)returns  True
 
Are pd.isnull() and pd.isna() equivalent? True


### Strings

In [40]:
sObj = pd.Series(['A', 'B', 'C',np.nan, 'd'])
print('The default datatype for a Series made from a list is: ',sObj.dtype)
print('The datatype of np.nan as an object is ', type(sObj[3]))

s = pd.Series(['A', 'B', 'C',  np.nan, 'd'], dtype="string")
print('\nwhen forced to a string:\n')
print(s)
print('The datatype of np.nan as a string is ', type(s[3]),
     '\n   is it na? ', pd.isnull(s[3]))

The default datatype for a Series made from a list is:  object
The datatype of np.nan as an object is  <class 'float'>

when forced to a string:

0       A
1       B
2       C
3    <NA>
4       d
dtype: string
The datatype of np.nan as a string is  <class 'pandas._libs.missing.NAType'> 
   is it na?  True


### Date Times

In [41]:
print('Pandas NaT is of type ', type(pd.NaT),  
        '\n    pd.isnull?  ', pd.isnull(pd.NaT))


Pandas NaT is of type  <class 'pandas._libs.tslibs.nattype.NaTType'> 
    pd.isnull?   True


### Categorical data
The categorical datatype in pandas is a structure with an underlying integer array of codes  Each code represents a category value. Think of this as the categorical structure having a dictionary of integer keys with which to look up the values. This can save memory when the values are large (like long strings). A code of -1 represents a missing value.

In [43]:
cat = pd.Categorical(values = ['a', 'b', np.nan, 'a', 'b', 'c'], 
               categories=['b','a'],
              ordered=True)
print(cat)
print('Codes for cat are ', cat.codes)

[a, b, NaN, a, b, NaN]
Categories (2, object): [b < a]
Codes for cat are  [ 1  0 -1  1  0 -1]


In [46]:
c = pd.Series(["a", 22, "c", np.nan , "a"], dtype="category")
print(c)

print('\nThe datatype of the first value is ', type(c[0]))
print('\The datatype of the second value is ', type(c[1]))

print('\nThe datatype of np.nan as a categorical is ', type(c[3]))


0      a
1     22
2      c
3    NaN
4      a
dtype: category
Categories (3, object): [22, a, c]

The datatype of the first value is  <class 'str'>
\The datatype of the second value is  <class 'int'>

The datatype of np.nan as a categorical is  <class 'float'>


In [52]:
s1 = pd.Series([1.0,-1.0])
rottenLog = np.log(s1)
print(' log of ', s1[1], ' returns ', rottenLog[1], 
      '\n which is of type ', type(rottenLog[1]))

 log of  -1.0  returns  nan 
 which is of type  <class 'numpy.float64'>
