# Imputacion Multiple (MICE)

## Preparacion del entorno

### Librerias

In [1]:
import janitor  # limpiar datos
import matplotlib.pyplot as plt # visualizacion de datos
import missingno    # exploracion de valores faltantes
import nhanes.load  # libreria con los datos a analizar
import numpy as np  # calculos numericos
import pandas as pd # trabajar con dataframe
import scipy.stats  # trabjar con estadisticos
import seaborn as sns   #   visualizacion estadistica
# import session_info
# sklearn contiene los modelos a utilizar
import sklearn.compose
import sklearn.impute
import sklearn.preprocessing
import statsmodels.api as sm
import statsmodels.datasets
import statsmodels.formula.api as smf

from sklearn.ensemble import RandomForestRegressor
from sklearn.experimental import enable_iterative_imputer
from sklearn.kernel_approximation import Nystroem
from sklearn.linear_model import BayesianRidge, Ridge
from sklearn.neighbors import KNeighborsRegressor
from statsmodels.graphics.mosaicplot import mosaic

### Configuracion de las Graficas

In [2]:
%matplotlib inline

# tamaño de los graficos
sns.set(
    rc={
        'figure.figsize':(8, 6)
    }
)

# estilo de los graficos
sns.set_style('whitegrid')

### Importar funciones personalizadas

In [3]:
%run '../utils2.ipynb'

## Cargar datos preparados

In [4]:
%run tratamiento_v_categoricas.ipynb

12.25 %


In [5]:
nhanes_transformed_df

Unnamed: 0_level_0,cigarettes,sugar_gm,minutes_sedentary,vigorous_work,moderate_work,vigorous_recreational,moderate_recreational,weight,diabetes,age
SEQN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
93705.0,1.0,43.04,300.0,0.0,0.0,0.0,1.0,165.0,0.0,66.0
93706.0,,,240.0,0.0,0.0,0.0,1.0,145.0,0.0,18.0
93709.0,0.0,,600.0,0.0,1.0,0.0,0.0,200.0,0.0,75.0
93711.0,,163.76,420.0,0.0,0.0,1.0,1.0,142.0,0.0,56.0
93712.0,2.0,207.67,120.0,1.0,1.0,1.0,1.0,135.0,0.0,18.0
...,...,...,...,...,...,...,...,...,...,...
102948.0,1.0,,120.0,0.0,0.0,1.0,0.0,118.0,0.0,31.0
102949.0,0.0,,60.0,1.0,1.0,0.0,0.0,180.0,0.0,33.0
102953.0,1.0,,360.0,1.0,1.0,0.0,0.0,218.0,0.0,42.0
102954.0,,87.58,600.0,0.0,0.0,0.0,1.0,150.0,0.0,41.0


## Imputacion MICE

### Copia de los datos

In [6]:
nhanes_mice_df = nhanes_transformed_df.copy(deep=True)

### Imputador con MICE

In [7]:
# crea copias del dataframe, las analiza y devuelve un solo conjunto de datos
# MICE en python es conocido como Iterative Imputer

mice_imputer = sklearn.impute.IterativeImputer(
    estimator = BayesianRidge(), # metodo bayesiano para estimar los missing
    initial_strategy='mean',
    imputation_order='ascending'
)

mice_imputer

### Ajuste, transformacion y sustitucion de los datos imputados

In [9]:

nhanes_mice_df.iloc[:, :] = mice_imputer.fit_transform(nhanes_transformed_df).round()
nhanes_mice_df

Unnamed: 0_level_0,cigarettes,sugar_gm,minutes_sedentary,vigorous_work,moderate_work,vigorous_recreational,moderate_recreational,weight,diabetes,age
SEQN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
93705.0,1.0,43.0,300.0,0.0,0.0,0.0,1.0,165.0,0.0,66.0
93706.0,1.0,86.0,240.0,0.0,0.0,0.0,1.0,145.0,0.0,18.0
93709.0,0.0,119.0,600.0,0.0,1.0,0.0,0.0,200.0,0.0,75.0
93711.0,1.0,164.0,420.0,0.0,0.0,1.0,1.0,142.0,0.0,56.0
93712.0,2.0,208.0,120.0,1.0,1.0,1.0,1.0,135.0,0.0,18.0
...,...,...,...,...,...,...,...,...,...,...
102948.0,1.0,84.0,120.0,0.0,0.0,1.0,0.0,118.0,0.0,31.0
102949.0,0.0,128.0,60.0,1.0,1.0,0.0,0.0,180.0,0.0,33.0
102953.0,1.0,109.0,360.0,1.0,1.0,0.0,0.0,218.0,0.0,42.0
102954.0,1.0,88.0,600.0,0.0,0.0,0.0,1.0,150.0,0.0,41.0
