In [13]:
import os
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error
from sklearn.impute import KNNImputer
from fancyimpute import SoftImpute, IterativeSVD
from statsmodels.tsa.seasonal import seasonal_decompose
import warnings

warnings.filterwarnings("ignore")

# Funções auxiliares simplificadas
def ensure_datetime_index(df):
    if not isinstance(df.index, pd.DatetimeIndex):
        try:
            df.index = pd.to_datetime(df.index)
        except Exception:
            first_col = df.columns[0]
            df[first_col] = pd.to_datetime(df[first_col], errors="coerce")
            df = df.set_index(first_col)
    return df.sort_index()

def introduce_missing_data(df, missing_rate, seed=42):
    rng = np.random.default_rng(seed)
    df_missing = df.copy()
    mask = rng.random(len(df_missing)) < missing_rate
    df_missing.loc[mask, "throughput"] = np.nan
    return df_missing

def robust_seasonal_decomposition(series, period=24):
    try:
        filled = series.interpolate(method="time", limit_direction="both")
        if not isinstance(series.index, pd.DatetimeIndex) or len(filled) < 3 * period:
            rolling_mean = filled.rolling(period, min_periods=1).mean()
            if isinstance(series.index, pd.DatetimeIndex):
                seasonal_pattern = filled.groupby(filled.index.hour).transform("median")
            else:
                seasonal_pattern = pd.Series(0, index=filled.index)
            return rolling_mean, seasonal_pattern - seasonal_pattern.mean(), filled - rolling_mean - seasonal_pattern
        
        decomp = seasonal_decompose(filled, model="additive", period=period, extrapolate_trend="freq")
        return decomp.trend, decomp.seasonal, decomp.resid
    except Exception:
        return None, None, None

def create_overlapping_matrix(series, window_size=72, overlap=0.8):
    """Cria matriz temporal com sobreposição otimizada para dados de rede"""
    vals = series.values.astype(np.float64)
    n = len(vals)
    step = max(1, int(window_size * (1 - overlap)))
    windows = [vals[i:i+window_size] for i in range(0, n - window_size + 1, step)]
    return np.array(windows).T

def reconstruct_series(matrix, orig_index):
    if matrix.shape[1] == 1:
        return pd.Series(matrix[:, 0], index=orig_index[:len(matrix)])
    
    reconstructed = np.zeros(len(orig_index))
    weights = np.zeros(len(orig_index))
    
    for i in range(matrix.shape[1]):
        start = i
        for j in range(matrix.shape[0]):
            idx = start + j
            if idx < len(orig_index) and not np.isnan(matrix[j, i]):
                reconstructed[idx] += matrix[j, i]
                weights[idx] += 1
    
    return pd.Series(np.where(weights > 0, reconstructed/weights, np.nan), 
                    index=orig_index)

def evaluate_imputation(df_missing, imputed, df_true):
    missing_mask = df_missing["throughput"].isna()
    valid_mask = missing_mask & imputed.notna()
    if valid_mask.sum() == 0:
        return np.nan, np.nan
    
    y_true = df_true.loc[valid_mask, "throughput"]
    y_pred = imputed.loc[valid_mask]
    return (np.sqrt(mean_squared_error(y_true, y_pred)), 
            np.mean(np.abs(y_true - y_pred)))

def normalize_series(series):
    """Normalização robusta para dados de rede com distribuição assimétrica"""
    log_vals = np.log1p(np.abs(series)) * np.sign(series)
    median = np.nanmedian(log_vals)
    mad = np.nanmedian(np.abs(log_vals - median))
    return (log_vals - median) / (mad + 1e-8), median, mad

def denormalize_series(norm_vals, median, mad):
    """Desnormalização para dados de rede"""
    log_vals = norm_vals * (mad + 1e-8) + median
    return np.sign(log_vals) * (np.expm1(np.abs(log_vals)))

def optimize_svd_parameters(matrix):
    """Seleção automática de parâmetros SVD baseada na estrutura dos dados"""
    n, m = matrix.shape
    rank = min(15, max(5, int(min(n, m) * 0.3)))
    shrinkage = max(0.1, min(1.0, np.count_nonzero(np.isnan(matrix)) / (n * m)))
    return {'rank': rank, 'shrinkage': shrinkage}

# Funções principais otimizadas
def knn_imputer(df, n_neighbors=5):
    features = []
    
    # Série original
    features.append(df["throughput"])
    
    # Features temporais
    if isinstance(df.index, pd.DatetimeIndex):
        features.append(pd.Series(df.index.hour, index=df.index))
        features.append(pd.Series(np.sin(2 * np.pi * df.index.hour / 24), index=df.index))
        features.append(pd.Series(np.cos(2 * np.pi * df.index.hour / 24), index=df.index))
    
    # Lags
    for i in range(1, 6):
        features.append(df["throughput"].shift(i))
    
    # Médias móveis
    features.append(df["throughput"].rolling(24, min_periods=1).mean())
    
    feature_matrix = pd.concat(features, axis=1).fillna(0)
    imputer = KNNImputer(n_neighbors=n_neighbors)
    imputed_values = imputer.fit_transform(feature_matrix)
    return pd.Series(imputed_values[:, 0], index=df.index)

def hybrid_imputation(df):
    trend, seasonal, resid = robust_seasonal_decomposition(df["throughput"])
    if trend is None:
        return df["throughput"].interpolate()
    
    window_size = min(72, len(df))
    resid_matrix = create_overlapping_matrix(resid.fillna(0), window_size)
    resid_imputed = SoftImpute().fit_transform(resid_matrix)
    resid_series = reconstruct_series(resid_imputed, df.index).fillna(0)
    return (trend.fillna(0) + seasonal.fillna(0) + resid_series)

def iterative_svd_imputation(df):
    window_size = min(72, len(df))
    matrix = create_overlapping_matrix(df["throughput"], window_size)
    rank = min(15, matrix.shape[1]//2, matrix.shape[0]//2)
    imputed = IterativeSVD(rank=rank).fit_transform(matrix)
    return reconstruct_series(imputed, df.index)


def weighted_reconstruction(matrix, series_index):
    """Reconstrução ponderada com pesos temporais"""
    n, m = matrix.shape
    step = max(1, int(n * 0.2))
    reconstructed = np.zeros(len(series_index))
    weights = np.zeros(len(series_index))
    
    # Pesos triangulares (mais peso no centro da janela)
    window_weights = 1.0 - np.abs(np.linspace(-1, 1, n))
    
    for i in range(m):
        start_idx = i * step
        end_idx = start_idx + n
        
        for j in range(n):
            idx = start_idx + j
            if idx < len(series_index) and not np.isnan(matrix[j, i]):
                weight = window_weights[j]
                reconstructed[idx] += matrix[j, i] * weight
                weights[idx] += weight
    
    # Normalização e tratamento de bordas
    reconstructed = np.where(weights > 0, reconstructed / weights, np.nan)
    return pd.Series(reconstructed, index=series_index)

def enhanced_hybrid_imputation(df):
    """Imputação híbrida aprimorada para dados de rede"""
    # Passo 1: Normalização robusta
    norm_vals, median, mad = normalize_series(df['throughput'])
    
    # Passo 2: Decomposição sazonal
    trend, seasonal, resid = robust_seasonal_decomposition(
        pd.Series(norm_vals, index=df.index)
    )
    
    # Passo 3: Criação da matriz de resíduos
    resid_matrix = create_overlapping_matrix(resid.fillna(0))
    
    # Passo 4: Otimização e aplicação do SVD
    params = optimize_svd_parameters(resid_matrix)
    if params['shrinkage'] > 0:
        svd_imputer = SoftImpute(shrinkage_value=params['shrinkage'])
    else:
        svd_imputer = IterativeSVD(rank=params['rank'])
    
    imputed_resid = svd_imputer.fit_transform(resid_matrix)
    
    # Passo 5: Reconstrução ponderada
    resid_series = weighted_reconstruction(imputed_resid, df.index)
    
    # Passo 6: Combinação de componentes
    combined = trend.fillna(0) + seasonal.fillna(0) + resid_series.fillna(0)
    
    # Passo 7: Desnormalização
    return denormalize_series(combined, median, mad)

# Atualização do pipeline principal
def main_pipeline(data_folder):
    datasets = {}
    for file in os.listdir(data_folder):
        if file.endswith(".csv"):
            try:
                df = pd.read_csv(os.path.join(data_folder, file))
                time_col, thr_col = df.columns[0], df.columns[1]
                df = df.set_index(pd.to_datetime(df[time_col])).rename(columns={thr_col: "throughput"})[["throughput"]]
                if not df["throughput"].dropna().empty:
                    datasets[file] = df
            except Exception:
                continue
    
    if not datasets:
        return None

    results = []
    missing_rates = [0.1, 0.2, 0.3, 0.4]
    
    for name, df_true in datasets.items():
        for rate in missing_rates:
            df_missing = introduce_missing_data(df_true.copy(), rate)
            key = f"{name}_missing_{int(rate*100)}%"
            
            # Métodos básicos
            median_imp = df_missing["throughput"].fillna(df_missing["throughput"].median())
            interp_imp = df_missing["throughput"].interpolate(method='time').fillna(median_imp)
            
            # Métodos avançados
            try:
                hybrid_imp = enhanced_hybrid_imputation(df_missing)
            except Exception as e:
                print(f"Erro no método híbrido: {e}")
                hybrid_imp = interp_imp.copy()
            
            # Avaliação
            metrics = {
                "dataset": key,
                "missing_rate": rate,
                "median_rmse": evaluate_imputation(df_missing, median_imp, df_true)[0],
                "interpolation_rmse": evaluate_imputation(df_missing, interp_imp, df_true)[0],
                "hybrid_rmse": evaluate_imputation(df_missing, hybrid_imp, df_true)[0]
            }
            results.append(metrics)
    
    return pd.DataFrame(results)
# Execução
if __name__ == "__main__":
    results_df = main_pipeline("data/")
    if results_df is not None:
        print(results_df)
        results_df.to_csv("results.csv", index=False)
    else:
        print("No valid datasets found")



[SoftImpute] Max Singular Value of X_init = 26.796666
[SoftImpute] Iter 1: observed MAE=0.003365 rank=72
[SoftImpute] Iter 2: observed MAE=0.003365 rank=72
[SoftImpute] Iter 3: observed MAE=0.003365 rank=72
[SoftImpute] Iter 4: observed MAE=0.003365 rank=72
[SoftImpute] Iter 5: observed MAE=0.003365 rank=72
[SoftImpute] Iter 6: observed MAE=0.003365 rank=72
[SoftImpute] Iter 7: observed MAE=0.003365 rank=72
[SoftImpute] Iter 8: observed MAE=0.003365 rank=72
[SoftImpute] Iter 9: observed MAE=0.003365 rank=72
[SoftImpute] Iter 10: observed MAE=0.003365 rank=72
[SoftImpute] Iter 11: observed MAE=0.003365 rank=72
[SoftImpute] Iter 12: observed MAE=0.003365 rank=72
[SoftImpute] Iter 13: observed MAE=0.003365 rank=72
[SoftImpute] Iter 14: observed MAE=0.003365 rank=72
[SoftImpute] Iter 15: observed MAE=0.003365 rank=72
[SoftImpute] Iter 16: observed MAE=0.003365 rank=72
[SoftImpute] Iter 17: observed MAE=0.003365 rank=72
[SoftImpute] Iter 18: observed MAE=0.003365 rank=72
[SoftImpute] Iter 1

  reconstructed = np.where(weights > 0, reconstructed / weights, np.nan)


[SoftImpute] Iter 1: observed MAE=0.003398 rank=72
[SoftImpute] Iter 2: observed MAE=0.003398 rank=72
[SoftImpute] Iter 3: observed MAE=0.003398 rank=72
[SoftImpute] Iter 4: observed MAE=0.003398 rank=72
[SoftImpute] Iter 5: observed MAE=0.003398 rank=72
[SoftImpute] Iter 6: observed MAE=0.003398 rank=72
[SoftImpute] Iter 7: observed MAE=0.003398 rank=72
[SoftImpute] Iter 8: observed MAE=0.003398 rank=72
[SoftImpute] Iter 9: observed MAE=0.003398 rank=72
[SoftImpute] Iter 10: observed MAE=0.003398 rank=72
[SoftImpute] Iter 11: observed MAE=0.003398 rank=72
[SoftImpute] Iter 12: observed MAE=0.003398 rank=72
[SoftImpute] Iter 13: observed MAE=0.003398 rank=72
[SoftImpute] Iter 14: observed MAE=0.003398 rank=72
[SoftImpute] Iter 15: observed MAE=0.003398 rank=72
[SoftImpute] Iter 16: observed MAE=0.003398 rank=72
[SoftImpute] Iter 17: observed MAE=0.003398 rank=72
[SoftImpute] Iter 18: observed MAE=0.003398 rank=72
[SoftImpute] Iter 19: observed MAE=0.003398 rank=72
[SoftImpute] Iter 20:

  reconstructed = np.where(weights > 0, reconstructed / weights, np.nan)


[SoftImpute] Iter 1: observed MAE=0.003382 rank=72
[SoftImpute] Iter 2: observed MAE=0.003382 rank=72
[SoftImpute] Iter 3: observed MAE=0.003382 rank=72
[SoftImpute] Iter 4: observed MAE=0.003382 rank=72
[SoftImpute] Iter 5: observed MAE=0.003382 rank=72
[SoftImpute] Iter 6: observed MAE=0.003382 rank=72
[SoftImpute] Iter 7: observed MAE=0.003382 rank=72
[SoftImpute] Iter 8: observed MAE=0.003382 rank=72
[SoftImpute] Iter 9: observed MAE=0.003382 rank=72
[SoftImpute] Iter 10: observed MAE=0.003382 rank=72
[SoftImpute] Iter 11: observed MAE=0.003382 rank=72
[SoftImpute] Iter 12: observed MAE=0.003382 rank=72
[SoftImpute] Iter 13: observed MAE=0.003382 rank=72
[SoftImpute] Iter 14: observed MAE=0.003382 rank=72
[SoftImpute] Iter 15: observed MAE=0.003382 rank=72
[SoftImpute] Iter 16: observed MAE=0.003382 rank=72
[SoftImpute] Iter 17: observed MAE=0.003382 rank=72
[SoftImpute] Iter 18: observed MAE=0.003382 rank=72
[SoftImpute] Iter 19: observed MAE=0.003382 rank=72
[SoftImpute] Iter 20:

  reconstructed = np.where(weights > 0, reconstructed / weights, np.nan)


[SoftImpute] Max Singular Value of X_init = 21.411393
[SoftImpute] Iter 1: observed MAE=0.003342 rank=72
[SoftImpute] Iter 2: observed MAE=0.003342 rank=72
[SoftImpute] Iter 3: observed MAE=0.003342 rank=72
[SoftImpute] Iter 4: observed MAE=0.003342 rank=72
[SoftImpute] Iter 5: observed MAE=0.003342 rank=72
[SoftImpute] Iter 6: observed MAE=0.003342 rank=72
[SoftImpute] Iter 7: observed MAE=0.003342 rank=72
[SoftImpute] Iter 8: observed MAE=0.003342 rank=72
[SoftImpute] Iter 9: observed MAE=0.003342 rank=72
[SoftImpute] Iter 10: observed MAE=0.003342 rank=72
[SoftImpute] Iter 11: observed MAE=0.003342 rank=72
[SoftImpute] Iter 12: observed MAE=0.003342 rank=72
[SoftImpute] Iter 13: observed MAE=0.003342 rank=72
[SoftImpute] Iter 14: observed MAE=0.003342 rank=72
[SoftImpute] Iter 15: observed MAE=0.003342 rank=72
[SoftImpute] Iter 16: observed MAE=0.003342 rank=72
[SoftImpute] Iter 17: observed MAE=0.003342 rank=72
[SoftImpute] Iter 18: observed MAE=0.003342 rank=72
[SoftImpute] Iter 1

  reconstructed = np.where(weights > 0, reconstructed / weights, np.nan)


[SoftImpute] Max Singular Value of X_init = 45.274273
[SoftImpute] Iter 1: observed MAE=0.003167 rank=72
[SoftImpute] Iter 2: observed MAE=0.003167 rank=72
[SoftImpute] Iter 3: observed MAE=0.003167 rank=72
[SoftImpute] Iter 4: observed MAE=0.003167 rank=72
[SoftImpute] Iter 5: observed MAE=0.003167 rank=72
[SoftImpute] Iter 6: observed MAE=0.003167 rank=72
[SoftImpute] Iter 7: observed MAE=0.003167 rank=72
[SoftImpute] Iter 8: observed MAE=0.003167 rank=72
[SoftImpute] Iter 9: observed MAE=0.003167 rank=72
[SoftImpute] Iter 10: observed MAE=0.003167 rank=72
[SoftImpute] Iter 11: observed MAE=0.003167 rank=72
[SoftImpute] Iter 12: observed MAE=0.003167 rank=72
[SoftImpute] Iter 13: observed MAE=0.003167 rank=72
[SoftImpute] Iter 14: observed MAE=0.003167 rank=72
[SoftImpute] Iter 15: observed MAE=0.003167 rank=72
[SoftImpute] Iter 16: observed MAE=0.003167 rank=72
[SoftImpute] Iter 17: observed MAE=0.003167 rank=72
[SoftImpute] Iter 18: observed MAE=0.003167 rank=72
[SoftImpute] Iter 1

  reconstructed = np.where(weights > 0, reconstructed / weights, np.nan)


[SoftImpute] Iter 1: observed MAE=0.003139 rank=72
[SoftImpute] Iter 2: observed MAE=0.003139 rank=72
[SoftImpute] Iter 3: observed MAE=0.003139 rank=72
[SoftImpute] Iter 4: observed MAE=0.003139 rank=72
[SoftImpute] Iter 5: observed MAE=0.003139 rank=72
[SoftImpute] Iter 6: observed MAE=0.003139 rank=72
[SoftImpute] Iter 7: observed MAE=0.003139 rank=72
[SoftImpute] Iter 8: observed MAE=0.003139 rank=72
[SoftImpute] Iter 9: observed MAE=0.003139 rank=72
[SoftImpute] Iter 10: observed MAE=0.003139 rank=72
[SoftImpute] Iter 11: observed MAE=0.003139 rank=72
[SoftImpute] Iter 12: observed MAE=0.003139 rank=72
[SoftImpute] Iter 13: observed MAE=0.003139 rank=72
[SoftImpute] Iter 14: observed MAE=0.003139 rank=72
[SoftImpute] Iter 15: observed MAE=0.003139 rank=72
[SoftImpute] Iter 16: observed MAE=0.003139 rank=72
[SoftImpute] Iter 17: observed MAE=0.003139 rank=72
[SoftImpute] Iter 18: observed MAE=0.003139 rank=72
[SoftImpute] Iter 19: observed MAE=0.003139 rank=72
[SoftImpute] Iter 20:

  reconstructed = np.where(weights > 0, reconstructed / weights, np.nan)


[SoftImpute] Max Singular Value of X_init = 44.553555
[SoftImpute] Iter 1: observed MAE=0.003079 rank=72
[SoftImpute] Iter 2: observed MAE=0.003079 rank=72
[SoftImpute] Iter 3: observed MAE=0.003079 rank=72
[SoftImpute] Iter 4: observed MAE=0.003079 rank=72
[SoftImpute] Iter 5: observed MAE=0.003079 rank=72
[SoftImpute] Iter 6: observed MAE=0.003079 rank=72
[SoftImpute] Iter 7: observed MAE=0.003079 rank=72
[SoftImpute] Iter 8: observed MAE=0.003079 rank=72
[SoftImpute] Iter 9: observed MAE=0.003079 rank=72
[SoftImpute] Iter 10: observed MAE=0.003079 rank=72
[SoftImpute] Iter 11: observed MAE=0.003079 rank=72
[SoftImpute] Iter 12: observed MAE=0.003079 rank=72
[SoftImpute] Iter 13: observed MAE=0.003079 rank=72
[SoftImpute] Iter 14: observed MAE=0.003079 rank=72
[SoftImpute] Iter 15: observed MAE=0.003079 rank=72
[SoftImpute] Iter 16: observed MAE=0.003079 rank=72
[SoftImpute] Iter 17: observed MAE=0.003079 rank=72
[SoftImpute] Iter 18: observed MAE=0.003079 rank=72
[SoftImpute] Iter 1

  reconstructed = np.where(weights > 0, reconstructed / weights, np.nan)


[SoftImpute] Max Singular Value of X_init = 44.644579
[SoftImpute] Iter 1: observed MAE=0.003020 rank=72
[SoftImpute] Iter 2: observed MAE=0.003020 rank=72
[SoftImpute] Iter 3: observed MAE=0.003020 rank=72
[SoftImpute] Iter 4: observed MAE=0.003020 rank=72
[SoftImpute] Iter 5: observed MAE=0.003020 rank=72
[SoftImpute] Iter 6: observed MAE=0.003020 rank=72
[SoftImpute] Iter 7: observed MAE=0.003020 rank=72
[SoftImpute] Iter 8: observed MAE=0.003020 rank=72
[SoftImpute] Iter 9: observed MAE=0.003020 rank=72
[SoftImpute] Iter 10: observed MAE=0.003020 rank=72
[SoftImpute] Iter 11: observed MAE=0.003020 rank=72
[SoftImpute] Iter 12: observed MAE=0.003020 rank=72
[SoftImpute] Iter 13: observed MAE=0.003020 rank=72
[SoftImpute] Iter 14: observed MAE=0.003020 rank=72
[SoftImpute] Iter 15: observed MAE=0.003020 rank=72
[SoftImpute] Iter 16: observed MAE=0.003020 rank=72
[SoftImpute] Iter 17: observed MAE=0.003020 rank=72
[SoftImpute] Iter 18: observed MAE=0.003020 rank=72
[SoftImpute] Iter 1

  reconstructed = np.where(weights > 0, reconstructed / weights, np.nan)


[SoftImpute] Max Singular Value of X_init = 59.296458
[SoftImpute] Iter 1: observed MAE=0.003310 rank=72
[SoftImpute] Iter 2: observed MAE=0.003310 rank=72
[SoftImpute] Iter 3: observed MAE=0.003310 rank=72
[SoftImpute] Iter 4: observed MAE=0.003310 rank=72
[SoftImpute] Iter 5: observed MAE=0.003310 rank=72
[SoftImpute] Iter 6: observed MAE=0.003310 rank=72
[SoftImpute] Iter 7: observed MAE=0.003310 rank=72
[SoftImpute] Iter 8: observed MAE=0.003310 rank=72
[SoftImpute] Iter 9: observed MAE=0.003310 rank=72
[SoftImpute] Iter 10: observed MAE=0.003310 rank=72
[SoftImpute] Iter 11: observed MAE=0.003310 rank=72
[SoftImpute] Iter 12: observed MAE=0.003310 rank=72
[SoftImpute] Iter 13: observed MAE=0.003310 rank=72
[SoftImpute] Iter 14: observed MAE=0.003310 rank=72
[SoftImpute] Iter 15: observed MAE=0.003310 rank=72
[SoftImpute] Iter 16: observed MAE=0.003310 rank=72
[SoftImpute] Iter 17: observed MAE=0.003310 rank=72
[SoftImpute] Iter 18: observed MAE=0.003310 rank=72
[SoftImpute] Iter 1

  reconstructed = np.where(weights > 0, reconstructed / weights, np.nan)


[SoftImpute] Iter 1: observed MAE=0.003273 rank=72
[SoftImpute] Iter 2: observed MAE=0.003273 rank=72
[SoftImpute] Iter 3: observed MAE=0.003273 rank=72
[SoftImpute] Iter 4: observed MAE=0.003273 rank=72
[SoftImpute] Iter 5: observed MAE=0.003273 rank=72
[SoftImpute] Iter 6: observed MAE=0.003273 rank=72
[SoftImpute] Iter 7: observed MAE=0.003273 rank=72
[SoftImpute] Iter 8: observed MAE=0.003273 rank=72
[SoftImpute] Iter 9: observed MAE=0.003273 rank=72
[SoftImpute] Iter 10: observed MAE=0.003273 rank=72
[SoftImpute] Iter 11: observed MAE=0.003273 rank=72
[SoftImpute] Iter 12: observed MAE=0.003273 rank=72
[SoftImpute] Iter 13: observed MAE=0.003273 rank=72
[SoftImpute] Iter 14: observed MAE=0.003273 rank=72
[SoftImpute] Iter 15: observed MAE=0.003273 rank=72
[SoftImpute] Iter 16: observed MAE=0.003273 rank=72
[SoftImpute] Iter 17: observed MAE=0.003273 rank=72
[SoftImpute] Iter 18: observed MAE=0.003273 rank=72
[SoftImpute] Iter 19: observed MAE=0.003273 rank=72
[SoftImpute] Iter 20:

  reconstructed = np.where(weights > 0, reconstructed / weights, np.nan)


[SoftImpute] Iter 1: observed MAE=0.003219 rank=72
[SoftImpute] Iter 2: observed MAE=0.003219 rank=72
[SoftImpute] Iter 3: observed MAE=0.003219 rank=72
[SoftImpute] Iter 4: observed MAE=0.003219 rank=72
[SoftImpute] Iter 5: observed MAE=0.003219 rank=72
[SoftImpute] Iter 6: observed MAE=0.003219 rank=72
[SoftImpute] Iter 7: observed MAE=0.003219 rank=72
[SoftImpute] Iter 8: observed MAE=0.003219 rank=72
[SoftImpute] Iter 9: observed MAE=0.003219 rank=72
[SoftImpute] Iter 10: observed MAE=0.003219 rank=72
[SoftImpute] Iter 11: observed MAE=0.003219 rank=72
[SoftImpute] Iter 12: observed MAE=0.003219 rank=72
[SoftImpute] Iter 13: observed MAE=0.003219 rank=72
[SoftImpute] Iter 14: observed MAE=0.003219 rank=72
[SoftImpute] Iter 15: observed MAE=0.003219 rank=72
[SoftImpute] Iter 16: observed MAE=0.003219 rank=72
[SoftImpute] Iter 17: observed MAE=0.003219 rank=72
[SoftImpute] Iter 18: observed MAE=0.003219 rank=72
[SoftImpute] Iter 19: observed MAE=0.003219 rank=72
[SoftImpute] Iter 20:

  reconstructed = np.where(weights > 0, reconstructed / weights, np.nan)


[SoftImpute] Max Singular Value of X_init = 57.909588
[SoftImpute] Iter 1: observed MAE=0.003176 rank=72
[SoftImpute] Iter 2: observed MAE=0.003176 rank=72
[SoftImpute] Iter 3: observed MAE=0.003176 rank=72
[SoftImpute] Iter 4: observed MAE=0.003176 rank=72
[SoftImpute] Iter 5: observed MAE=0.003176 rank=72
[SoftImpute] Iter 6: observed MAE=0.003176 rank=72
[SoftImpute] Iter 7: observed MAE=0.003176 rank=72
[SoftImpute] Iter 8: observed MAE=0.003176 rank=72
[SoftImpute] Iter 9: observed MAE=0.003176 rank=72
[SoftImpute] Iter 10: observed MAE=0.003176 rank=72
[SoftImpute] Iter 11: observed MAE=0.003176 rank=72
[SoftImpute] Iter 12: observed MAE=0.003176 rank=72
[SoftImpute] Iter 13: observed MAE=0.003176 rank=72
[SoftImpute] Iter 14: observed MAE=0.003176 rank=72
[SoftImpute] Iter 15: observed MAE=0.003176 rank=72
[SoftImpute] Iter 16: observed MAE=0.003176 rank=72
[SoftImpute] Iter 17: observed MAE=0.003176 rank=72
[SoftImpute] Iter 18: observed MAE=0.003176 rank=72
[SoftImpute] Iter 1

  reconstructed = np.where(weights > 0, reconstructed / weights, np.nan)


[SoftImpute] Iter 1: observed MAE=0.003215 rank=72
[SoftImpute] Iter 2: observed MAE=0.003215 rank=72
[SoftImpute] Iter 3: observed MAE=0.003215 rank=72
[SoftImpute] Iter 4: observed MAE=0.003215 rank=72
[SoftImpute] Iter 5: observed MAE=0.003215 rank=72
[SoftImpute] Iter 6: observed MAE=0.003215 rank=72
[SoftImpute] Iter 7: observed MAE=0.003215 rank=72
[SoftImpute] Iter 8: observed MAE=0.003215 rank=72
[SoftImpute] Iter 9: observed MAE=0.003215 rank=72
[SoftImpute] Iter 10: observed MAE=0.003215 rank=72
[SoftImpute] Iter 11: observed MAE=0.003215 rank=72
[SoftImpute] Iter 12: observed MAE=0.003215 rank=72
[SoftImpute] Iter 13: observed MAE=0.003215 rank=72
[SoftImpute] Iter 14: observed MAE=0.003215 rank=72
[SoftImpute] Iter 15: observed MAE=0.003215 rank=72
[SoftImpute] Iter 16: observed MAE=0.003215 rank=72
[SoftImpute] Iter 17: observed MAE=0.003215 rank=72
[SoftImpute] Iter 18: observed MAE=0.003215 rank=72
[SoftImpute] Iter 19: observed MAE=0.003215 rank=72
[SoftImpute] Iter 20:

  reconstructed = np.where(weights > 0, reconstructed / weights, np.nan)


[SoftImpute] Iter 2: observed MAE=0.003178 rank=72
[SoftImpute] Iter 3: observed MAE=0.003178 rank=72
[SoftImpute] Iter 4: observed MAE=0.003178 rank=72
[SoftImpute] Iter 5: observed MAE=0.003178 rank=72
[SoftImpute] Iter 6: observed MAE=0.003178 rank=72
[SoftImpute] Iter 7: observed MAE=0.003178 rank=72
[SoftImpute] Iter 8: observed MAE=0.003178 rank=72
[SoftImpute] Iter 9: observed MAE=0.003178 rank=72
[SoftImpute] Iter 10: observed MAE=0.003178 rank=72
[SoftImpute] Iter 11: observed MAE=0.003178 rank=72
[SoftImpute] Iter 12: observed MAE=0.003178 rank=72
[SoftImpute] Iter 13: observed MAE=0.003178 rank=72
[SoftImpute] Iter 14: observed MAE=0.003178 rank=72
[SoftImpute] Iter 15: observed MAE=0.003178 rank=72
[SoftImpute] Iter 16: observed MAE=0.003178 rank=72
[SoftImpute] Iter 17: observed MAE=0.003178 rank=72
[SoftImpute] Iter 18: observed MAE=0.003178 rank=72
[SoftImpute] Iter 19: observed MAE=0.003178 rank=72
[SoftImpute] Iter 20: observed MAE=0.003178 rank=72
[SoftImpute] Iter 21

  reconstructed = np.where(weights > 0, reconstructed / weights, np.nan)


[SoftImpute] Max Singular Value of X_init = 20.236459
[SoftImpute] Iter 1: observed MAE=0.003127 rank=72
[SoftImpute] Iter 2: observed MAE=0.003127 rank=72
[SoftImpute] Iter 3: observed MAE=0.003127 rank=72
[SoftImpute] Iter 4: observed MAE=0.003127 rank=72
[SoftImpute] Iter 5: observed MAE=0.003127 rank=72
[SoftImpute] Iter 6: observed MAE=0.003127 rank=72
[SoftImpute] Iter 7: observed MAE=0.003127 rank=72
[SoftImpute] Iter 8: observed MAE=0.003127 rank=72
[SoftImpute] Iter 9: observed MAE=0.003127 rank=72
[SoftImpute] Iter 10: observed MAE=0.003127 rank=72
[SoftImpute] Iter 11: observed MAE=0.003127 rank=72
[SoftImpute] Iter 12: observed MAE=0.003127 rank=72
[SoftImpute] Iter 13: observed MAE=0.003127 rank=72
[SoftImpute] Iter 14: observed MAE=0.003127 rank=72
[SoftImpute] Iter 15: observed MAE=0.003127 rank=72
[SoftImpute] Iter 16: observed MAE=0.003127 rank=72
[SoftImpute] Iter 17: observed MAE=0.003127 rank=72
[SoftImpute] Iter 18: observed MAE=0.003127 rank=72
[SoftImpute] Iter 1

  reconstructed = np.where(weights > 0, reconstructed / weights, np.nan)


[SoftImpute] Iter 1: observed MAE=0.003054 rank=72
[SoftImpute] Iter 2: observed MAE=0.003054 rank=72
[SoftImpute] Iter 3: observed MAE=0.003054 rank=72
[SoftImpute] Iter 4: observed MAE=0.003054 rank=72
[SoftImpute] Iter 5: observed MAE=0.003054 rank=72
[SoftImpute] Iter 6: observed MAE=0.003054 rank=72
[SoftImpute] Iter 7: observed MAE=0.003054 rank=72
[SoftImpute] Iter 8: observed MAE=0.003054 rank=72
[SoftImpute] Iter 9: observed MAE=0.003054 rank=72
[SoftImpute] Iter 10: observed MAE=0.003054 rank=72
[SoftImpute] Iter 11: observed MAE=0.003054 rank=72
[SoftImpute] Iter 12: observed MAE=0.003054 rank=72
[SoftImpute] Iter 13: observed MAE=0.003054 rank=72
[SoftImpute] Iter 14: observed MAE=0.003054 rank=72
[SoftImpute] Iter 15: observed MAE=0.003054 rank=72
[SoftImpute] Iter 16: observed MAE=0.003054 rank=72
[SoftImpute] Iter 17: observed MAE=0.003054 rank=72
[SoftImpute] Iter 18: observed MAE=0.003054 rank=72
[SoftImpute] Iter 19: observed MAE=0.003054 rank=72
[SoftImpute] Iter 20:

  reconstructed = np.where(weights > 0, reconstructed / weights, np.nan)


[SoftImpute] Iter 14: observed MAE=0.003145 rank=72
[SoftImpute] Iter 15: observed MAE=0.003145 rank=72
[SoftImpute] Iter 16: observed MAE=0.003145 rank=72
[SoftImpute] Iter 17: observed MAE=0.003145 rank=72
[SoftImpute] Iter 18: observed MAE=0.003145 rank=72
[SoftImpute] Iter 19: observed MAE=0.003145 rank=72
[SoftImpute] Iter 20: observed MAE=0.003145 rank=72
[SoftImpute] Iter 21: observed MAE=0.003145 rank=72
[SoftImpute] Iter 22: observed MAE=0.003145 rank=72
[SoftImpute] Iter 23: observed MAE=0.003145 rank=72
[SoftImpute] Iter 24: observed MAE=0.003145 rank=72
[SoftImpute] Iter 25: observed MAE=0.003145 rank=72
[SoftImpute] Iter 26: observed MAE=0.003145 rank=72
[SoftImpute] Iter 27: observed MAE=0.003145 rank=72
[SoftImpute] Iter 28: observed MAE=0.003145 rank=72
[SoftImpute] Iter 29: observed MAE=0.003145 rank=72
[SoftImpute] Iter 30: observed MAE=0.003145 rank=72
[SoftImpute] Iter 31: observed MAE=0.003145 rank=72
[SoftImpute] Iter 32: observed MAE=0.003145 rank=72
[SoftImpute]

  reconstructed = np.where(weights > 0, reconstructed / weights, np.nan)


[SoftImpute] Max Singular Value of X_init = 34.089588
[SoftImpute] Iter 1: observed MAE=0.003085 rank=72
[SoftImpute] Iter 2: observed MAE=0.003085 rank=72
[SoftImpute] Iter 3: observed MAE=0.003085 rank=72
[SoftImpute] Iter 4: observed MAE=0.003085 rank=72
[SoftImpute] Iter 5: observed MAE=0.003085 rank=72
[SoftImpute] Iter 6: observed MAE=0.003085 rank=72
[SoftImpute] Iter 7: observed MAE=0.003085 rank=72
[SoftImpute] Iter 8: observed MAE=0.003085 rank=72
[SoftImpute] Iter 9: observed MAE=0.003085 rank=72
[SoftImpute] Iter 10: observed MAE=0.003085 rank=72
[SoftImpute] Iter 11: observed MAE=0.003085 rank=72
[SoftImpute] Iter 12: observed MAE=0.003085 rank=72
[SoftImpute] Iter 13: observed MAE=0.003085 rank=72
[SoftImpute] Iter 14: observed MAE=0.003085 rank=72
[SoftImpute] Iter 15: observed MAE=0.003085 rank=72
[SoftImpute] Iter 16: observed MAE=0.003085 rank=72
[SoftImpute] Iter 17: observed MAE=0.003085 rank=72
[SoftImpute] Iter 18: observed MAE=0.003085 rank=72
[SoftImpute] Iter 1

  reconstructed = np.where(weights > 0, reconstructed / weights, np.nan)


[SoftImpute] Iter 1: observed MAE=0.003029 rank=72
[SoftImpute] Iter 2: observed MAE=0.003029 rank=72
[SoftImpute] Iter 3: observed MAE=0.003029 rank=72
[SoftImpute] Iter 4: observed MAE=0.003029 rank=72
[SoftImpute] Iter 5: observed MAE=0.003029 rank=72
[SoftImpute] Iter 6: observed MAE=0.003029 rank=72
[SoftImpute] Iter 7: observed MAE=0.003029 rank=72
[SoftImpute] Iter 8: observed MAE=0.003029 rank=72
[SoftImpute] Iter 9: observed MAE=0.003029 rank=72
[SoftImpute] Iter 10: observed MAE=0.003029 rank=72
[SoftImpute] Iter 11: observed MAE=0.003029 rank=72
[SoftImpute] Iter 12: observed MAE=0.003029 rank=72
[SoftImpute] Iter 13: observed MAE=0.003029 rank=72
[SoftImpute] Iter 14: observed MAE=0.003029 rank=72
[SoftImpute] Iter 15: observed MAE=0.003029 rank=72
[SoftImpute] Iter 16: observed MAE=0.003029 rank=72
[SoftImpute] Iter 17: observed MAE=0.003029 rank=72
[SoftImpute] Iter 18: observed MAE=0.003029 rank=72
[SoftImpute] Iter 19: observed MAE=0.003029 rank=72
[SoftImpute] Iter 20:

  reconstructed = np.where(weights > 0, reconstructed / weights, np.nan)


[SoftImpute] Max Singular Value of X_init = 33.853436
[SoftImpute] Iter 1: observed MAE=0.002990 rank=72
[SoftImpute] Iter 2: observed MAE=0.002990 rank=72
[SoftImpute] Iter 3: observed MAE=0.002990 rank=72
[SoftImpute] Iter 4: observed MAE=0.002990 rank=72
[SoftImpute] Iter 5: observed MAE=0.002990 rank=72
[SoftImpute] Iter 6: observed MAE=0.002990 rank=72
[SoftImpute] Iter 7: observed MAE=0.002990 rank=72
[SoftImpute] Iter 8: observed MAE=0.002990 rank=72
[SoftImpute] Iter 9: observed MAE=0.002990 rank=72
[SoftImpute] Iter 10: observed MAE=0.002990 rank=72
[SoftImpute] Iter 11: observed MAE=0.002990 rank=72
[SoftImpute] Iter 12: observed MAE=0.002990 rank=72
[SoftImpute] Iter 13: observed MAE=0.002990 rank=72
[SoftImpute] Iter 14: observed MAE=0.002990 rank=72
[SoftImpute] Iter 15: observed MAE=0.002990 rank=72
[SoftImpute] Iter 16: observed MAE=0.002990 rank=72
[SoftImpute] Iter 17: observed MAE=0.002990 rank=72
[SoftImpute] Iter 18: observed MAE=0.002990 rank=72
[SoftImpute] Iter 1

  reconstructed = np.where(weights > 0, reconstructed / weights, np.nan)


[SoftImpute] Max Singular Value of X_init = 54.708726
[SoftImpute] Iter 1: observed MAE=0.003420 rank=72
[SoftImpute] Iter 2: observed MAE=0.003420 rank=72
[SoftImpute] Iter 3: observed MAE=0.003420 rank=72
[SoftImpute] Iter 4: observed MAE=0.003420 rank=72
[SoftImpute] Iter 5: observed MAE=0.003420 rank=72
[SoftImpute] Iter 6: observed MAE=0.003420 rank=72
[SoftImpute] Iter 7: observed MAE=0.003420 rank=72
[SoftImpute] Iter 8: observed MAE=0.003420 rank=72
[SoftImpute] Iter 9: observed MAE=0.003420 rank=72
[SoftImpute] Iter 10: observed MAE=0.003420 rank=72
[SoftImpute] Iter 11: observed MAE=0.003420 rank=72
[SoftImpute] Iter 12: observed MAE=0.003420 rank=72
[SoftImpute] Iter 13: observed MAE=0.003420 rank=72
[SoftImpute] Iter 14: observed MAE=0.003420 rank=72
[SoftImpute] Iter 15: observed MAE=0.003420 rank=72
[SoftImpute] Iter 16: observed MAE=0.003420 rank=72
[SoftImpute] Iter 17: observed MAE=0.003420 rank=72
[SoftImpute] Iter 18: observed MAE=0.003420 rank=72
[SoftImpute] Iter 1

  reconstructed = np.where(weights > 0, reconstructed / weights, np.nan)


[SoftImpute] Iter 1: observed MAE=0.003467 rank=72
[SoftImpute] Iter 2: observed MAE=0.003467 rank=72
[SoftImpute] Iter 3: observed MAE=0.003467 rank=72
[SoftImpute] Iter 4: observed MAE=0.003467 rank=72
[SoftImpute] Iter 5: observed MAE=0.003467 rank=72
[SoftImpute] Iter 6: observed MAE=0.003467 rank=72
[SoftImpute] Iter 7: observed MAE=0.003467 rank=72
[SoftImpute] Iter 8: observed MAE=0.003467 rank=72
[SoftImpute] Iter 9: observed MAE=0.003467 rank=72
[SoftImpute] Iter 10: observed MAE=0.003467 rank=72
[SoftImpute] Iter 11: observed MAE=0.003467 rank=72
[SoftImpute] Iter 12: observed MAE=0.003467 rank=72
[SoftImpute] Iter 13: observed MAE=0.003467 rank=72
[SoftImpute] Iter 14: observed MAE=0.003467 rank=72
[SoftImpute] Iter 15: observed MAE=0.003467 rank=72
[SoftImpute] Iter 16: observed MAE=0.003467 rank=72
[SoftImpute] Iter 17: observed MAE=0.003467 rank=72
[SoftImpute] Iter 18: observed MAE=0.003467 rank=72
[SoftImpute] Iter 19: observed MAE=0.003467 rank=72
[SoftImpute] Iter 20:

  reconstructed = np.where(weights > 0, reconstructed / weights, np.nan)


[SoftImpute] Iter 1: observed MAE=0.003455 rank=72
[SoftImpute] Iter 2: observed MAE=0.003455 rank=72
[SoftImpute] Iter 3: observed MAE=0.003455 rank=72
[SoftImpute] Iter 4: observed MAE=0.003455 rank=72
[SoftImpute] Iter 5: observed MAE=0.003455 rank=72
[SoftImpute] Iter 6: observed MAE=0.003455 rank=72
[SoftImpute] Iter 7: observed MAE=0.003455 rank=72
[SoftImpute] Iter 8: observed MAE=0.003455 rank=72
[SoftImpute] Iter 9: observed MAE=0.003455 rank=72
[SoftImpute] Iter 10: observed MAE=0.003455 rank=72
[SoftImpute] Iter 11: observed MAE=0.003455 rank=72
[SoftImpute] Iter 12: observed MAE=0.003455 rank=72
[SoftImpute] Iter 13: observed MAE=0.003455 rank=72
[SoftImpute] Iter 14: observed MAE=0.003455 rank=72
[SoftImpute] Iter 15: observed MAE=0.003455 rank=72
[SoftImpute] Iter 16: observed MAE=0.003455 rank=72
[SoftImpute] Iter 17: observed MAE=0.003455 rank=72
[SoftImpute] Iter 18: observed MAE=0.003455 rank=72
[SoftImpute] Iter 19: observed MAE=0.003455 rank=72
[SoftImpute] Iter 20:

  reconstructed = np.where(weights > 0, reconstructed / weights, np.nan)


[SoftImpute] Max Singular Value of X_init = 54.336918
[SoftImpute] Iter 1: observed MAE=0.003399 rank=72
[SoftImpute] Iter 2: observed MAE=0.003399 rank=72
[SoftImpute] Iter 3: observed MAE=0.003399 rank=72
[SoftImpute] Iter 4: observed MAE=0.003399 rank=72
[SoftImpute] Iter 5: observed MAE=0.003399 rank=72
[SoftImpute] Iter 6: observed MAE=0.003399 rank=72
[SoftImpute] Iter 7: observed MAE=0.003399 rank=72
[SoftImpute] Iter 8: observed MAE=0.003399 rank=72
[SoftImpute] Iter 9: observed MAE=0.003399 rank=72
[SoftImpute] Iter 10: observed MAE=0.003399 rank=72
[SoftImpute] Iter 11: observed MAE=0.003399 rank=72
[SoftImpute] Iter 12: observed MAE=0.003399 rank=72
[SoftImpute] Iter 13: observed MAE=0.003399 rank=72
[SoftImpute] Iter 14: observed MAE=0.003399 rank=72
[SoftImpute] Iter 15: observed MAE=0.003399 rank=72
[SoftImpute] Iter 16: observed MAE=0.003399 rank=72
[SoftImpute] Iter 17: observed MAE=0.003399 rank=72
[SoftImpute] Iter 18: observed MAE=0.003399 rank=72
[SoftImpute] Iter 1

  reconstructed = np.where(weights > 0, reconstructed / weights, np.nan)


In [14]:
results_df

Unnamed: 0,dataset,missing_rate,median_rmse,interpolation_rmse,hybrid_rmse
0,40.csv_missing_10%,0.1,1579.62985,473.801879,462.828036
1,40.csv_missing_20%,0.2,1582.552052,499.42707,489.693608
2,40.csv_missing_30%,0.3,1587.511841,518.674859,512.17903
3,40.csv_missing_40%,0.4,1585.000055,533.137537,526.231597
4,29.csv_missing_10%,0.1,279602.820101,80902.406448,80764.857935
5,29.csv_missing_20%,0.2,289996.620072,77701.872856,78403.387182
6,29.csv_missing_30%,0.3,284000.176662,83429.861759,83769.534903
7,29.csv_missing_40%,0.4,282110.519094,87431.68823,88411.63691
8,15.csv_missing_10%,0.1,169.025019,67.343055,59.424397
9,15.csv_missing_20%,0.2,173.224147,72.085998,62.912532
