# Missing Value

In [None]:
df['var'].fillna() # Basic Syntax

> ## Library

In [2]:
import pandas as pd
import numpy as np

In [5]:
df = pd.DataFrame({
    'x1':[4, 6, 7, np.nan, 2, 1],
    'x2':[1, 4, 6, 8, np.nan, 1],
    'x3':[11, 12, 13, 16, 18, 20],
    'x4':['A', 'A', np.nan, 'C', 'C', 'D'],
    'x5':['X', 'X', 'Y', np.nan, 'X', 'Y'],
    'x6':['M', 'M', np.nan, np.nan, 'N', 'N'],
})

df

Unnamed: 0,x1,x2,x3,x4,x5,x6
0,4.0,1.0,11,A,X,M
1,6.0,4.0,12,A,X,M
2,7.0,6.0,13,,Y,
3,,8.0,16,C,,
4,2.0,,18,C,X,N
5,1.0,1.0,20,D,Y,N


> ## Simple Imputer: Mean or Median 

for numeric variable

In [6]:
from sklearn.impute import SimpleImputer

In [13]:
simple_impute = SimpleImputer(strategy='mean') # Strategy mean or median
df[['x1', 'x2']] = simple_impute.fit_transform(df[['x1', 'x2']])
df

Unnamed: 0,x1,x2,x3,x4,x5,x6
0,4.0,1.0,11,A,X,M
1,6.0,4.0,12,A,X,M
2,7.0,6.0,13,,Y,
3,4.0,8.0,16,C,,
4,2.0,4.0,18,C,X,N
5,1.0,1.0,20,D,Y,N


> ## Simple Imputer: Mode

for categorical variable

In [15]:
simple_imputer = SimpleImputer(strategy = 'most_frequent') # Strategy for mode
df[['x4', 'x5']] = simple_imputer.fit_transform(df[['x4', 'x5']])
df

Unnamed: 0,x1,x2,x3,x4,x5,x6
0,4.0,1.0,11,A,X,M
1,6.0,4.0,12,A,X,M
2,7.0,6.0,13,A,Y,
3,4.0,8.0,16,C,X,
4,2.0,4.0,18,C,X,N
5,1.0,1.0,20,D,Y,N


> ## Simple Imputer: Constant

for categorical and numerical variable

In [16]:
df = pd.DataFrame({
    'x1':[4, 6, 7, np.nan, 2, 1],
    'x2':[1, 4, 6, 8, np.nan, 1],
    'x3':[11, 12, 13, 16, 18, 20],
    'x4':['A', 'A', np.nan, 'C', 'C', 'D'],
    'x5':['X', 'X', 'Y', np.nan, 'X', 'Y'],
    'x6':['M', 'M', np.nan, np.nan, 'N', 'N'],
})

In [18]:
simple_impute = SimpleImputer(strategy = 'constant', fill_value = 5)
df[['x1', 'x2']] = simple_impute.fit_transform(df[['x1', 'x2']])
df

Unnamed: 0,x1,x2,x3,x4,x5,x6
0,4.0,1.0,11,A,X,M
1,6.0,4.0,12,A,X,M
2,7.0,6.0,13,,Y,
3,5.0,8.0,16,C,,
4,2.0,5.0,18,C,X,N
5,1.0,1.0,20,D,Y,N


In [20]:
simple_impute = SimpleImputer(strategy = 'constant', fill_value = 'P')
df[['x4', 'x5', 'x6']] = simple_impute.fit_transform(df[['x4', 'x5', 'x6']])
df

Unnamed: 0,x1,x2,x3,x4,x5,x6
0,4.0,1.0,11,A,X,M
1,6.0,4.0,12,A,X,M
2,7.0,6.0,13,P,Y,P
3,5.0,8.0,16,C,P,P
4,2.0,5.0,18,C,X,N
5,1.0,1.0,20,D,Y,N


> ## Iterative Imputer

1. For numerical variable
2. Prediction model : Linear

In [22]:
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer

In [23]:
df = pd.DataFrame({
    'x1':[4, 6, 7, np.nan, 2, 1],
    'x2':[1, 4, 6, 8, np.nan, 1],
    'x3':[11, 12, 13, 16, 18, 20],
    'x4':['A', 'A', np.nan, 'C', 'C', 'D'],
    'x5':['X', 'X', 'Y', np.nan, 'X', 'Y'],
    'x6':['M', 'M', np.nan, np.nan, 'N', 'N'],
})

In [28]:
iter_imputer = IterativeImputer()

df[['x1', 'x2', 'x3']] = iter_imputer.fit_transform(df[['x1', 'x2', 'x3']])
df

Unnamed: 0,x1,x2,x3,x4,x5,x6
0,4.0,1.0,11.0,A,X,M
1,6.0,4.0,12.0,A,X,M
2,7.0,6.0,13.0,,Y,
3,7.512603,8.0,16.0,C,,
4,2.0,1.44481,18.0,C,X,N
5,1.0,1.0,20.0,D,Y,N


> ## KNN Imputer
1. For numerical variable
2. Predictive using KNN Model

In [29]:
from sklearn.impute import KNNImputer

In [30]:
df = pd.DataFrame({
    'x1':[4, 6, 7, np.nan, 2, 1],
    'x2':[1, 4, 6, 8, np.nan, 1],
    'x3':[11, 12, 13, 16, 18, 20],
    'x4':['A', 'A', np.nan, 'C', 'C', 'D'],
    'x5':['X', 'X', 'Y', np.nan, 'X', 'Y'],
    'x6':['M', 'M', np.nan, np.nan, 'N', 'N'],
})

In [34]:
knn_impute = KNNImputer()
df[['x1', 'x2', 'x3']] = knn_impute.fit_transform(df[['x1', 'x2', 'x3']])
df

Unnamed: 0,x1,x2,x3,x4,x5,x6
0,4.0,1.0,11.0,A,X,M
1,6.0,4.0,12.0,A,X,M
2,7.0,6.0,13.0,,Y,
3,4.0,8.0,16.0,C,,
4,2.0,4.0,18.0,C,X,N
5,1.0,1.0,20.0,D,Y,N
