# Comparativa de Métodos de Imputación
Este notebook compara tu método de imputación (PSO) con métodos clásicos de imputación usando Python.

In [None]:

import pandas as pd

# Cargar datasets
# Sustituye las rutas con las rutas reales
df_original = pd.read_csv('ruta/a/dataset_original.csv')
df_missing = pd.read_csv('ruta/a/dataset_con_missing.csv')
df_pso = pd.read_csv('C:\Users\esthe\OneDrive\UPC\Subjects\TFG\TFG-2025-Imputacio-Dades-Absents\RESULTS\out2\out\dataset_out\test_0_rows_1000_columns_500_comp_0.5_input__p20_m30_c115_c215_w5_r5_i2_v20_imputed.csv')

df_original.head()


In [None]:

from sklearn.impute import SimpleImputer, KNNImputer
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer

# Imputación media
mean_imputer = SimpleImputer(strategy='mean')
df_mean = pd.DataFrame(mean_imputer.fit_transform(df_missing), columns=df_missing.columns)

# Imputación KNN
knn_imputer = KNNImputer(n_neighbors=5)
df_knn = pd.DataFrame(knn_imputer.fit_transform(df_missing), columns=df_missing.columns)

# Imputación MICE (IterativeImputer)
iterative_imputer = IterativeImputer(random_state=0)
df_iter = pd.DataFrame(iterative_imputer.fit_transform(df_missing), columns=df_missing.columns)


In [None]:

from sklearn.metrics import mean_squared_error, mean_absolute_error
import numpy as np

def calcular_metricas(imputado, original, mascara_nan):
    rmse = np.sqrt(mean_squared_error(original[mascara_nan], imputado[mascara_nan]))
    mae = mean_absolute_error(original[mascara_nan], imputado[mascara_nan])
    return rmse, mae

# Crear máscara de valores originalmente ausentes
mask = df_missing.isna()

# Calcular métricas para cada método
rmse_pso, mae_pso = calcular_metricas(df_pso, df_original, mask)
rmse_mean, mae_mean = calcular_metricas(df_mean, df_original, mask)
rmse_knn, mae_knn = calcular_metricas(df_knn, df_original, mask)
rmse_iter, mae_iter = calcular_metricas(df_iter, df_original, mask)

print(f"PSO -> RMSE: {rmse_pso:.4f}, MAE: {mae_pso:.4f}")
print(f"Mean -> RMSE: {rmse_mean:.4f}, MAE: {mae_mean:.4f}")
print(f"KNN -> RMSE: {rmse_knn:.4f}, MAE: {mae_knn:.4f}")
print(f"Iterative (MICE) -> RMSE: {rmse_iter:.4f}, MAE: {mae_iter:.4f}")


In [None]:

import matplotlib.pyplot as plt

# Comparar RMSE de cada método
metodos = ['PSO', 'Mean', 'KNN', 'Iterative']
rmse_vals = [rmse_pso, rmse_mean, rmse_knn, rmse_iter]

plt.bar(metodos, rmse_vals)
plt.title('Comparativa RMSE por método de imputación')
plt.ylabel('RMSE')
plt.show()
