## Types:
1) Drop
2) Fill with mean
3) Simple Interpolation
4) Linear Regression
5) Multiple Interpolation
6) Multiple Imputation

In [28]:
# Basic Imports 
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.datasets import load_iris
data = load_iris()


In [29]:
df = pd.DataFrame(data.data, columns=data.feature_names)
df1 = df

#### Drop

In [3]:
df1.iloc[0:123:4,] = np.nan
df1.isnull().sum()

sepal length (cm)    31
sepal width (cm)     31
petal length (cm)    31
petal width (cm)     31
dtype: int64

In [5]:
df1.dropna(inplace=True)
df1.isnull().sum()

sepal length (cm)    0
sepal width (cm)     0
petal length (cm)    0
petal width (cm)     0
dtype: int64

#### Fill with mean

In [6]:
df2 = df
df2.iloc[0:132,2]=np.nan
df2.isnull().sum()

sepal length (cm)      0
sepal width (cm)       0
petal length (cm)    119
petal width (cm)       0
dtype: int64

In [8]:
df2.fillna(df.mean,inplace=True)
df2.isnull().sum()

sepal length (cm)    0
sepal width (cm)     0
petal length (cm)    0
petal width (cm)     0
dtype: int64

#### Simple Interpolation

In [9]:
df3 = df
df3.iloc[0:123:3,0] = np.nan
df3.isnull().sum()

sepal length (cm)    40
sepal width (cm)      0
petal length (cm)     0
petal width (cm)      0
dtype: int64

In [12]:
df3 =df3.interpolate(method='linear')
df3.isnull().sum()

sepal length (cm)    1
sepal width (cm)     0
petal length (cm)    0
petal width (cm)     0
dtype: int64

#### Linear Regression

In [39]:
df4 = df
df4.iloc[1:123:2,0] = np.nan
df4.isnull().sum()

sepal length (cm)    61
sepal width (cm)      0
petal length (cm)     0
petal width (cm)      0
dtype: int64

In [40]:
df_miss = df4[df4.isna().any(axis=1)]
df_nomis = df4.dropna()

In [41]:
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
x_train = df_nomis.drop(columns = ['sepal length (cm)'])
y_train = df_nomis['sepal length (cm)']
x_test = df_miss.drop(columns = ['sepal length (cm)'])
regressor.fit(x_train,y_train)

In [43]:
df_miss['sepal length (cm)'] = regressor.predict(x_test)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_miss['sepal length (cm)'] = regressor.predict(x_test)


In [44]:
df_final = pd.concat([df_nomis, df_miss], axis = 0)
df_final

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.100000,3.5,1.4,0.2
2,4.700000,3.2,1.3,0.2
4,5.000000,3.6,1.4,0.2
6,4.600000,3.4,1.4,0.3
8,4.400000,2.9,1.4,0.2
...,...,...,...,...
113,5.924947,2.5,5.0,2.0
115,6.422180,3.2,5.3,2.3
117,7.920133,3.8,6.7,2.2
119,6.035216,2.2,5.0,1.5


#### Multiple Interpolation

In [49]:
df5 = df
df5.iloc[1:100:3,0] = np.nan
df5.iloc[0:50:2,1]  = np.nan
df5.isnull().sum()

sepal length (cm)    33
sepal width (cm)     25
petal length (cm)     0
petal width (cm)      0
dtype: int64

In [50]:
df5['sepal length (cm)']=df5['sepal length (cm)'].interpolate(method='linear')
df5['sepal width (cm)']=df['sepal width (cm)'].interpolate(method = 'quadratic')

In [51]:
df5.isnull().sum()

sepal length (cm)    0
sepal width (cm)     1
petal length (cm)    0
petal width (cm)     0
dtype: int64

#### Multiple Imputation

In [52]:
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
imputer = IterativeImputer()
df6 = df
df6.iloc[1:100:2,1] = np.nan
df6.isnull().sum()

sepal length (cm)     0
sepal width (cm)     51
petal length (cm)     0
petal width (cm)      0
dtype: int64

In [53]:
df6 = imputer.fit_transform(df6)

In [56]:
df6 = pd.DataFrame(df6, columns=data.feature_names)
df6

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.100,3.270360,1.4,0.2
1,4.900,3.230658,1.4,0.2
2,4.700,2.914542,1.3,0.2
3,4.675,3.164106,1.5,0.2
4,4.650,3.556373,1.4,0.2
...,...,...,...,...
145,6.700,3.000000,5.2,2.3
146,6.300,2.500000,5.0,1.9
147,6.500,3.000000,5.2,2.0
148,6.200,3.400000,5.4,2.3


In [57]:
df6.isnull().sum()

sepal length (cm)    0
sepal width (cm)     0
petal length (cm)    0
petal width (cm)     0
dtype: int64