In [1]:
import numpy as np
import pandas as pd

### in Pandas invalid or missing values are assigned NaN

In [2]:
obj = pd.Series([1, np.nan, 3, 7, np.nan])
obj

0    1.0
1    NaN
2    3.0
3    7.0
4    NaN
dtype: float64

### using dropna to discard NaN values

In [3]:
obj.dropna()

0    1.0
2    3.0
3    7.0
dtype: float64

In [4]:
obj[obj.notnull()]

0    1.0
2    3.0
3    7.0
dtype: float64

In [7]:
data = pd.DataFrame([
    [1,np.nan,3],
    [4,np.nan,6],
    [7,8,np.nan],
])
data

Unnamed: 0,0,1,2
0,1,,3.0
1,4,,6.0
2,7,8.0,


### to drop all rows and columns containing NaN

In [9]:
cleaned = data.dropna()
cleaned

Unnamed: 0,0,1,2


In [12]:
data.dropna(how='all')

Unnamed: 0,0,1,2
0,1,,3.0
1,4,,6.0
2,7,8.0,


In [13]:
data.dropna(how='any')

Unnamed: 0,0,1,2


In [14]:
data

Unnamed: 0,0,1,2
0,1,,3.0
1,4,,6.0
2,7,8.0,


### filling the missing data values with a given value

In [16]:
data.fillna(0)

Unnamed: 0,0,1,2
0,1,0.0,3.0
1,4,0.0,6.0
2,7,8.0,0.0


### dropping duplicates

In [19]:
data = pd.DataFrame({
    'k1':[1,1,2,2,3,3,4,5,6],
    'k2':[1,1,2,2,2,3,4,5,9],
})
data

Unnamed: 0,k1,k2
0,1,1
1,1,1
2,2,2
3,2,2
4,3,2
5,3,3
6,4,4
7,5,5
8,6,9


In [20]:
data.drop_duplicates()

Unnamed: 0,k1,k2
0,1,1
2,2,2
4,3,2
5,3,3
6,4,4
7,5,5
8,6,9


In [21]:
data.duplicated()

0    False
1     True
2    False
3     True
4    False
5    False
6    False
7    False
8    False
dtype: bool

### adding extra column using map functions

In [22]:
data = pd.DataFrame({
    'food':['bacon','pork','bacon','postrami','beef','Bacon','Pork','ham','lox'],
    'ounces':[4,3,12,6,8,9,12,10,3],
})

In [23]:
data

Unnamed: 0,food,ounces
0,bacon,4
1,pork,3
2,bacon,12
3,postrami,6
4,beef,8
5,Bacon,9
6,Pork,12
7,ham,10
8,lox,3


In [24]:
meat_to_animal={
    'bacon':'pig',
    'pork':'pig',
    'postrami':'cow',
    'beef':'cow',
    'ham':'pig',
    'lox':'salmon',
}

### some food is capitalized so lets lowercase

In [25]:
lowercased = data['food'].str.lower()
lowercased

0       bacon
1        pork
2       bacon
3    postrami
4        beef
5       bacon
6        pork
7         ham
8         lox
Name: food, dtype: object

In [26]:
data['animal'] = lowercased.map(meat_to_animal)

In [27]:
data

Unnamed: 0,food,ounces,animal
0,bacon,4,pig
1,pork,3,pig
2,bacon,12,pig
3,postrami,6,cow
4,beef,8,cow
5,Bacon,9,pig
6,Pork,12,pig
7,ham,10,pig
8,lox,3,salmon


In [28]:
data['food'].map(lambda x:meat_to_animal[x.lower()])
data

Unnamed: 0,food,ounces,animal
0,bacon,4,pig
1,pork,3,pig
2,bacon,12,pig
3,postrami,6,cow
4,beef,8,cow
5,Bacon,9,pig
6,Pork,12,pig
7,ham,10,pig
8,lox,3,salmon


In [44]:
data = pd.DataFrame([
    [1,np.nan,3],
    [4,np.nan,6],
    [7,8,np.nan],
])
data

Unnamed: 0,0,1,2
0,1,,3.0
1,4,,6.0
2,7,8.0,


In [45]:
data1 = data.replace(np.nan, -999)
data1

Unnamed: 0,0,1,2
0,1,-999.0,3.0
1,4,-999.0,6.0
2,7,8.0,-999.0


In [42]:
df = pd.DataFrame({'A': [0, 1, 2, 3, 4],
                   'B': [5, 6, 7, 8, 9],
                   'C': ['a', 'b', 'c', 'd', 'e']})
df

Unnamed: 0,A,B,C
0,0,5,a
1,1,6,b
2,2,7,c
3,3,8,d
4,4,9,e


In [46]:
df.replace(5, np.nan)

Unnamed: 0,A,B,C
0,0,,a
1,1,6.0,b
2,2,7.0,c
3,3,8.0,d
4,4,9.0,e
