In [20]:
import pandas as pd
import numpy as np
import random
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
from autoimpute.imputations import MiceImputer

# 1. Carregar dados
file_path = 'data/glass.data'
columns = ['Id','RI','Na','Mg','Al','Si','K','Ca','Ba','Fe','Type_of_glass']
df = pd.read_csv(file_path, header=None, names=columns)
df.drop(columns=['Id'], inplace=True)

# 2. Backup original
df_original = df.copy()

# 3. Criar missing na coluna alvo (20%)
random.seed(42)
missing_indices = df.sample(frac=0.2, random_state=42).index
df.loc[missing_indices, 'Type_of_glass'] = np.nan

# 4. Encoding com NaN preservado
le = LabelEncoder()
le.fit(df_original['Type_of_glass'])

def encode_with_nan(series, encoder):
    encoded = pd.Series(data=np.nan, index=series.index)
    not_null = series.notnull()
    encoded.loc[not_null] = encoder.transform(series.loc[not_null])
    return encoded

df['Type_encoded'] = encode_with_nan(df['Type_of_glass'], le)
df['Type_encoded'] = df['Type_encoded'].astype("object")  # Para o autoimpute

# 5. Configurar imputador MICE do autoimpute
mice = MiceImputer(
    n=1,
    strategy={"Type_encoded": "categorical"},
    predictors="all"
)

# 6. Rodar imputação e capturar output
df_imp_gen = mice.fit_transform(df.drop(columns=['Type_of_glass']))
print("Tipo do resultado de fit_transform:", type(df_imp_gen))

# 7. Converter generator em lista e pegar primeiro DataFrame
df_imp_list = list(df_imp_gen)
df_imp = df_imp_list[0]

# 8. Atualizar coluna imputada
df['Type_encoded'] = df_imp['Type_encoded']

# 9. Reverter encoding para valores originais
df['Type_of_glass_imputed'] = le.inverse_transform(df['Type_encoded'].astype(int))

# 10. Avaliar acurácia nas linhas que tinham missing
valores_reais = df_original.loc[missing_indices, 'Type_of_glass']
valores_previstos = df.loc[missing_indices, 'Type_of_glass_imputed']

acc = accuracy_score(valores_reais, valores_previstos)
print(f"Acurácia com MICE autoimpute: {acc:.4f}")


Tipo do resultado de fit_transform: <class 'generator'>


TypeError: tuple indices must be integers or slices, not str