# 2.3.1	Handling missing or corrupted data example
## Check for the Missing Data
The first task is to check for the missing data. Missing data comes from all sorts of issues, but the result is usually a corrupted or unusable model. Detecting missing data is an essential first check.

In [1]:
import pandas as pd
import numpy as np

s = pd.Series([1, 2, 3, np.NaN, 5, 6, None, np.inf, -np.inf])

## print(s.isnull())
print(s.isin([np.NaN, None, np.inf, -np.inf]))

print()
print(s[s.isin([np.NaN, None, np.inf, -np.inf])])

0    False
1    False
2    False
3     True
4    False
5    False
6     True
7     True
8     True
dtype: bool

3    NaN
6    NaN
7    inf
8   -inf
dtype: float64


## Replace the missing values.

In [2]:
replace = s.replace([np.inf, -np.inf], np.NaN)
print(s.mean())
print(replace.mean())

nan
3.4


In [3]:
replace = replace.fillna(replace.mean())
print(replace)

0    1.0
1    2.0
2    3.0
3    3.4
4    5.0
5    6.0
6    3.4
7    3.4
8    3.4
dtype: float64


## Drop the missing values.

In [4]:
dropped = s.replace([np.inf, -np.inf], np.nan).dropna()
print(dropped)

0    1.0
1    2.0
2    3.0
4    5.0
5    6.0
dtype: float64


## Use an Imputer

In [5]:
import pandas as pd
import numpy as np
from sklearn.impute import SimpleImputer

s = pd.Series([1, 2, 3, np.NaN, 5, 6, None, np.inf, -np.inf])
s = s.replace([np.inf, -np.inf], np.NaN)

imp = SimpleImputer(missing_values=np.NaN, strategy='mean')

imp.fit([[1, 2, 3, 4, 5, 6, 7, 8, 9]])

s = pd.Series(imp.transform([s]).tolist()[0])

print(s)

0    1.0
1    2.0
2    3.0
3    4.0
4    5.0
5    6.0
6    7.0
7    8.0
8    9.0
dtype: float64
