In [None]:
import numpy as np
import pandas as pd

np.random.seed(42)
data = {
    "idade": [25, 30, np.nan, 40, 35, np.nan, 50],
    "salario": [3000, 4000, 5000, np.nan, 6000, 7000, np.nan],
    "experiencia": [1, 5, 3, np.nan, 8, 10, np.nan]
}

df = pd.DataFrame(data)
df.head()

Unnamed: 0,idade,salario,experiencia
0,25.0,3000.0,1.0
1,30.0,4000.0,5.0
2,,5000.0,3.0
3,40.0,,
4,35.0,6000.0,8.0


## Utilizando Regressão
- para preencher valores nulos na coluna idade

In [2]:
from sklearn.linear_model import LinearRegression

df_reg = df.copy()

X_train = df_reg.dropna().drop("idade", axis=1)
y_train = df_reg.dropna()["idade"]

X_test = df_reg[df_reg["idade"].isna()].drop("idade", axis=1) 

regressor = LinearRegression()
regressor.fit(X_train, y_train)

df_reg.loc[df_reg["idade"].isna(), "idade"] = regressor.predict(X_test)

df_reg.head()

Unnamed: 0,idade,salario,experiencia
0,25.0,3000.0,1.0
1,30.0,4000.0,5.0
2,29.0,5000.0,3.0
3,40.0,,
4,35.0,6000.0,8.0


## Utilizando KNN

In [3]:
from sklearn.impute import KNNImputer

knn_imputer = KNNImputer(n_neighbors=3)

df_knn = pd.DataFrame(knn_imputer.fit_transform(df), columns=df.columns)

df_knn.head()

Unnamed: 0,idade,salario,experiencia
0,25.0,3000.0,1.0
1,30.0,4000.0,5.0
2,30.0,5000.0,3.0
3,40.0,4333.333333,4.666667
4,35.0,6000.0,8.0


## Imputação Múltipla (MICE - Multiple Imputation by Chained Equations)

In [4]:
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer

iter_imputer = IterativeImputer(max_iter=10, random_state=42)

df_iterative = pd.DataFrame(iter_imputer.fit_transform(df), columns=df.columns)

df_iterative.head()



Unnamed: 0,idade,salario,experiencia
0,25.0,3000.0,1.0
1,30.0,4000.0,5.0
2,32.456618,5000.0,3.0
3,40.0,7165.445611,9.790104
4,35.0,6000.0,8.0
