In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import os
from scipy import stats as scipy_stats
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import warnings
from typing import Dict, List, Any 
from sklearn.model_selection import GridSearchCV
from pathlib import Path
import datetime
warnings.filterwarnings('ignore')

# Load BASELINE datasets
df_baseline_iowait = pd.read_csv("baseline/cpu_iowait.csv")
df_baseline_irq = pd.read_csv("baseline/cpu_irq.csv")
df_baseline_system_msec = pd.read_csv("baseline/cpu_system_msec.csv")
df_baseline_user_msec = pd.read_csv("baseline/cpu_user_msec.csv")
df_baseline_util_per = pd.read_csv("baseline/cpu_util_per.csv")

# Load CACHE FILLING datasets
df_cachefilling_iowait = pd.read_csv("cache filling/cpu_iowait.csv")
df_cachefilling_irq = pd.read_csv("cache filling/cpu_irq.csv")
df_cachefilling_system_msec = pd.read_csv("cache filling/cpu_system_msec.csv")
df_cachefilling_user_msec = pd.read_csv("cache filling/cpu_user_msec.csv")
df_cachefilling_util_per = pd.read_csv("cache filling/cpu_util_per.csv")

# Load GC STRESS datasets
df_gcstress_iowait = pd.read_csv("gc stress/cpu_iowait.csv")
df_gcstress_irq = pd.read_csv("gc stress/cpu_irq.csv")
df_gcstress_system_msec = pd.read_csv("gc stress/cpu_system_msec.csv")
df_gcstress_user_msec = pd.read_csv("gc stress/cpu_user_msec.csv")
df_gcstress_util_per = pd.read_csv("gc stress/cpu_util_per.csv")

# Load OOM datasets
df_oom_iowait = pd.read_csv("oom/cpu_iowait.csv")
df_oom_irq = pd.read_csv("oom/cpu_irq.csv")
df_oom_system_msec = pd.read_csv("oom/cpu_system_msec.csv")
df_oom_user_msec = pd.read_csv("oom/cpu_user_msec.csv")
df_oom_util_per = pd.read_csv("oom/cpu_util_per.csv")

# Load THREADFULL datasets
df_threadfull_iowait = pd.read_csv("threadfull/cpu_iowait.csv")
df_threadfull_irq = pd.read_csv("threadfull/cpu_irq.csv")
df_threadfull_system_msec = pd.read_csv("threadfull/cpu_system_msec.csv")
df_threadfull_user_msec = pd.read_csv("threadfull/cpu_user_msec.csv")
df_threadfull_util_per = pd.read_csv("threadfull/cpu_util_per.csv")


# Add source labels - BASELINE
df_baseline_iowait["source"] = "BASELINE"
df_baseline_irq["source"] = "BASELINE"
df_baseline_system_msec["source"] = "BASELINE"
df_baseline_user_msec["source"] = "BASELINE"
df_baseline_util_per["source"] = "BASELINE"

# Add source labels - CACHE FILLING
df_cachefilling_iowait["source"] = "CACHE_FILLING"
df_cachefilling_irq["source"] = "CACHE_FILLING"
df_cachefilling_system_msec["source"] = "CACHE_FILLING"
df_cachefilling_user_msec["source"] = "CACHE_FILLING"
df_cachefilling_util_per["source"] = "CACHE_FILLING"

# Add source labels - GC STRESS
df_gcstress_iowait["source"] = "GC_STRESS"
df_gcstress_irq["source"] = "GC_STRESS"
df_gcstress_system_msec["source"] = "GC_STRESS"
df_gcstress_user_msec["source"] = "GC_STRESS"
df_gcstress_util_per["source"] = "GC_STRESS"

# Add source labels - OOM
df_oom_iowait["source"] = "OOM"
df_oom_irq["source"] = "OOM"
df_oom_system_msec["source"] = "OOM"
df_oom_user_msec["source"] = "OOM"
df_oom_util_per["source"] = "OOM"

# Add source labels - THREADFULL
df_threadfull_iowait["source"] = "THREADFULL"
df_threadfull_irq["source"] = "THREADFULL"
df_threadfull_system_msec["source"] = "THREADFULL"
df_threadfull_user_msec["source"] = "THREADFULL"
df_threadfull_util_per["source"] = "THREADFULL"


# Convert to datetime - BASELINE
df_baseline_iowait["Time"] = pd.to_datetime(df_baseline_iowait["Time"])
df_baseline_irq["Time"] = pd.to_datetime(df_baseline_irq["Time"])
df_baseline_system_msec["Time"] = pd.to_datetime(df_baseline_system_msec["Time"])
df_baseline_user_msec["Time"] = pd.to_datetime(df_baseline_user_msec["Time"])
df_baseline_util_per["Time"] = pd.to_datetime(df_baseline_util_per["Time"])

# Convert to datetime - CACHE FILLING
df_cachefilling_iowait["Time"] = pd.to_datetime(df_cachefilling_iowait["Time"])
df_cachefilling_irq["Time"] = pd.to_datetime(df_cachefilling_irq["Time"])
df_cachefilling_system_msec["Time"] = pd.to_datetime(df_cachefilling_system_msec["Time"])
df_cachefilling_user_msec["Time"] = pd.to_datetime(df_cachefilling_user_msec["Time"])
df_cachefilling_util_per["Time"] = pd.to_datetime(df_cachefilling_util_per["Time"])

# Convert to datetime - GC STRESS
df_gcstress_iowait["Time"] = pd.to_datetime(df_gcstress_iowait["Time"])
df_gcstress_irq["Time"] = pd.to_datetime(df_gcstress_irq["Time"])
df_gcstress_system_msec["Time"] = pd.to_datetime(df_gcstress_system_msec["Time"])
df_gcstress_user_msec["Time"] = pd.to_datetime(df_gcstress_user_msec["Time"])
df_gcstress_util_per["Time"] = pd.to_datetime(df_gcstress_util_per["Time"])

# Convert to datetime - OOM
df_oom_iowait["Time"] = pd.to_datetime(df_oom_iowait["Time"])
df_oom_irq["Time"] = pd.to_datetime(df_oom_irq["Time"])
df_oom_system_msec["Time"] = pd.to_datetime(df_oom_system_msec["Time"])
df_oom_user_msec["Time"] = pd.to_datetime(df_oom_user_msec["Time"])
df_oom_util_per["Time"] = pd.to_datetime(df_oom_util_per["Time"])

# Convert to datetime - THREADFULL
df_threadfull_iowait["Time"] = pd.to_datetime(df_threadfull_iowait["Time"])
df_threadfull_irq["Time"] = pd.to_datetime(df_threadfull_irq["Time"])
df_threadfull_system_msec["Time"] = pd.to_datetime(df_threadfull_system_msec["Time"])
df_threadfull_user_msec["Time"] = pd.to_datetime(df_threadfull_user_msec["Time"])
df_threadfull_util_per["Time"] = pd.to_datetime(df_threadfull_util_per["Time"])


delay = 30
duration = 50

# Synchronize all datasets with baseline timeline
time_offset = df_baseline_iowait["Time"].min()

# Synchronize CACHE FILLING datasets
cachefilling_offset = time_offset - df_cachefilling_iowait["Time"].min()
df_cachefilling_iowait["Time"] += cachefilling_offset
df_cachefilling_irq["Time"] += cachefilling_offset
df_cachefilling_system_msec["Time"] += cachefilling_offset
df_cachefilling_user_msec["Time"] += cachefilling_offset
df_cachefilling_util_per["Time"] += cachefilling_offset

# Synchronize GC STRESS datasets
gcstress_offset = time_offset - df_gcstress_iowait["Time"].min()
df_gcstress_iowait["Time"] += gcstress_offset
df_gcstress_irq["Time"] += gcstress_offset
df_gcstress_system_msec["Time"] += gcstress_offset
df_gcstress_user_msec["Time"] += gcstress_offset
df_gcstress_util_per["Time"] += gcstress_offset

# Synchronize OOM datasets
oom_offset = time_offset - df_oom_iowait["Time"].min()
df_oom_iowait["Time"] += oom_offset
df_oom_irq["Time"] += oom_offset
df_oom_system_msec["Time"] += oom_offset
df_oom_user_msec["Time"] += oom_offset
df_oom_util_per["Time"] += oom_offset

# Synchronize THREADFULL datasets
threadfull_offset = time_offset - df_threadfull_iowait["Time"].min()
df_threadfull_iowait["Time"] += threadfull_offset
df_threadfull_irq["Time"] += threadfull_offset
df_threadfull_system_msec["Time"] += threadfull_offset
df_threadfull_user_msec["Time"] += threadfull_offset
df_threadfull_util_per["Time"] += threadfull_offset


# Convert timeline to minutes for ALL datasets
all_dfs = [
    df_baseline_iowait, df_baseline_irq, df_baseline_system_msec, df_baseline_user_msec, df_baseline_util_per,
    df_cachefilling_iowait, df_cachefilling_irq, df_cachefilling_system_msec, df_cachefilling_user_msec, df_cachefilling_util_per,
    df_gcstress_iowait, df_gcstress_irq, df_gcstress_system_msec, df_gcstress_user_msec, df_gcstress_util_per,
    df_oom_iowait, df_oom_irq, df_oom_system_msec, df_oom_user_msec, df_oom_util_per,
    df_threadfull_iowait, df_threadfull_irq, df_threadfull_system_msec, df_threadfull_user_msec, df_threadfull_util_per
]

for df in all_dfs:
    df["Minutes"] = (df["Time"] - df["Time"].min()).dt.total_seconds() / 60

# COMPLETE DATASETS DICTIONARY 
all_datasets = {
    'IOWait': {
        'baseline': df_baseline_iowait,
        'cache_filling': df_cachefilling_iowait,
        'gc_stress': df_gcstress_iowait,
        'oom': df_oom_iowait,
        'threadfull': df_threadfull_iowait,
    },
    'IRQ': {
        'baseline': df_baseline_irq,
        'cache_filling': df_cachefilling_irq,
        'gc_stress': df_gcstress_irq,
        'oom': df_oom_irq,
        'threadfull': df_threadfull_irq,
    },
    'System': {
        'baseline': df_baseline_system_msec,
        'cache_filling': df_cachefilling_system_msec,
        'gc_stress': df_gcstress_system_msec,
        'oom': df_oom_system_msec,
        'threadfull': df_threadfull_system_msec,
    },
    'User': {
        'baseline': df_baseline_user_msec,
        'cache_filling': df_cachefilling_user_msec,
        'gc_stress': df_gcstress_user_msec,
        'oom': df_oom_user_msec,
        'threadfull': df_threadfull_user_msec,
    },
    'Utilization': {
        'baseline': df_baseline_util_per,
        'cache_filling': df_cachefilling_util_per,
        'gc_stress': df_gcstress_util_per,
        'oom': df_oom_util_per,
        'threadfull': df_threadfull_util_per,
    }
}

print("✅ All datasets loaded successfully!")
print(f"📊 Loaded {len(all_datasets)} metrics across {len(all_datasets['IOWait'])} experiment types")
print("\nDataset structure:")
for metric, experiments in all_datasets.items():
    print(f"  {metric}: {list(experiments.keys())}")

✅ All datasets loaded successfully!
📊 Loaded 5 metrics across 5 experiment types

Dataset structure:
  IOWait: ['baseline', 'cache_filling', 'gc_stress', 'oom', 'threadfull']
  IRQ: ['baseline', 'cache_filling', 'gc_stress', 'oom', 'threadfull']
  System: ['baseline', 'cache_filling', 'gc_stress', 'oom', 'threadfull']
  User: ['baseline', 'cache_filling', 'gc_stress', 'oom', 'threadfull']
  Utilization: ['baseline', 'cache_filling', 'gc_stress', 'oom', 'threadfull']


In [None]:
def get_tuna_parameters(metric_name, experiment_type):
    """Get consistent TUNA parameters for a metric and experiment"""
    
    if "iowait" in metric_name.lower():
        window_size = 3       #3 3.1
        threshold = 3.0
    elif "irq" in metric_name.lower():
        window_size = 4         #4 0.8
        threshold = 0.8
    elif "system" in metric_name.lower():
        window_size = 20
        threshold = 1.1
    elif "user" in metric_name.lower():
        window_size = 20
        threshold = 0.8
    elif "utilization" in metric_name.lower():
        window_size = 30
        threshold = 0.5

    
    min_absolute_range_factor = 0.4
    penalty_factor = 0.5
    lookback_window = 10
    
    return {
        'window_size': window_size,
        'threshold': threshold,
        'min_absolute_range_factor': min_absolute_range_factor,
        'penalty_factor': penalty_factor,
        'lookback_window': min(lookback_window, 15)
    }

def detect_outliers_tuna_memory(timeseries, metric_name="", experiment_type=""):
    """TUNA's relative range outlier detection with MIN/MAX marking"""
    
    outlier_mask = np.zeros(len(timeseries), dtype=bool)
    params = get_tuna_parameters(metric_name, experiment_type)
    
    window_size = params['window_size']
    threshold = params['threshold']
    min_absolute_range = params['min_absolute_range_factor'] * np.std(timeseries)
    
    # Analyze windows with sliding step (step=1)
    for i in range(0, len(timeseries) - window_size + 1, 1):
        window = timeseries[i:i + window_size]
        window_mean = np.mean(window)
        window_range = np.max(window) - np.min(window)
        
        if window_mean > 0:
            relative_range = window_range / window_mean
            
            # Apply outlier detection
            if relative_range > threshold and window_range > min_absolute_range:
                window_max = np.max(window)
                window_min = np.min(window)
                
                # Mark only min/max values as outliers
                for j in range(window_size):
                    actual_idx = i + j
                    if actual_idx < len(timeseries):
                        if (timeseries[actual_idx] == window_max or 
                            timeseries[actual_idx] == window_min):
                            outlier_mask[actual_idx] = True
    
    return outlier_mask

def get_local_stable_baseline(timeseries, stable_mask, current_idx, lookback_window):
    """Get local baseline from recent stable points - simplified strategy"""
    
    # Look backwards for recent stable points
    start_idx = max(0, current_idx - lookback_window)
    local_window = timeseries[start_idx:current_idx]
    local_stable_mask = stable_mask[start_idx:current_idx]
    
    recent_stable = local_window[local_stable_mask]
    
    if len(recent_stable) >= 3:
        return np.median(recent_stable)
    elif len(recent_stable) >= 1:
        return np.mean(recent_stable)
    
    # If no recent stable points, look forward
    end_idx = min(len(timeseries), current_idx + lookback_window)
    forward_window = timeseries[current_idx+1:end_idx]
    forward_stable_mask = stable_mask[current_idx+1:end_idx]
    
    forward_stable = forward_window[forward_stable_mask]
    
    if len(forward_stable) >= 1:
        return np.median(forward_stable) if len(forward_stable) >= 3 else np.mean(forward_stable)
    
    # Fallback to global median of stable values
    stable_values = timeseries[stable_mask]
    return np.median(stable_values) if len(stable_values) > 0 else np.median(timeseries)

def apply_penalty_local_trend(timeseries, outlier_mask, metric_name="", experiment_type=""):
    """Apply penalty based on local stable trend"""
    
    cleaned_series = timeseries.copy()
    stable_mask = ~outlier_mask
    params = get_tuna_parameters(metric_name, experiment_type)
    
    effective_penalty = params['penalty_factor']
    lookback_window = params['lookback_window']
    
    for i in range(len(timeseries)):
        if outlier_mask[i]:
            local_baseline = get_local_stable_baseline(timeseries, stable_mask, i, lookback_window)
            original_value = timeseries[i]
            deviation = original_value - local_baseline
            cleaned_series[i] = local_baseline + deviation * effective_penalty
    
    return cleaned_series

def create_features_for_ml(timeseries, stable_mask, experiment_type):
    """Create features ONLY from stable regions"""
    
    features = []
    feature_indices = []
    window_size = 3  # Fixed feature window size
    
    for i in range(window_size, len(timeseries)):
        if stable_mask[i]:
            window = timeseries[i-window_size:i]
            window_stable_mask = stable_mask[i-window_size:i]
            
            # Only create features if at least 80% of the window is stable
            stable_ratio = np.sum(window_stable_mask) / len(window_stable_mask)
            if stable_ratio >= 0.8:
                stable_window = window[window_stable_mask]
                
                if len(stable_window) > 0:
                    feature_vector = [
                        np.mean(stable_window),
                        np.std(stable_window),
                        np.median(stable_window),
                        np.max(stable_window) - np.min(stable_window),
                        timeseries[i-1] if stable_mask[i-1] else np.mean(stable_window),
                        i / len(timeseries),
                    ]
                    
                    # Clean any NaN/inf values
                    feature_vector = [0.0 if np.isnan(val) or np.isinf(val) else float(val) for val in feature_vector]
                    
                    # Experiment type encoding
                    exp_features = [0, 0, 0, 0, 0]
                    if experiment_type == "baseline":
                        exp_features[0] = 1
                    elif experiment_type == "cache_filling":
                        exp_features[1] = 1
                    elif experiment_type == "gc_stress":
                        exp_features[2] = 1
                    elif experiment_type == "oom":
                        exp_features[3] = 1
                    elif experiment_type == "threadfull":
                        exp_features[4] = 1
                    
                    feature_vector.extend(exp_features)
                    features.append(feature_vector)
                    feature_indices.append(i)
    
    return np.array(features), np.array(feature_indices)

def apply_tuna_to_single_series(cumulative, exp_name, model, scaler, metric_name=""):
    """Apply TUNA cleaning to a single series"""
    
    # Phase 1: Identify outliers
    outlier_mask = detect_outliers_tuna_memory(cumulative, metric_name=metric_name, experiment_type=exp_name)
    stable_mask = ~outlier_mask
    outliers_count = np.sum(outlier_mask)
    
    # Phase 2: Apply penalty
    cleaned_series = cumulative.copy()
    if outliers_count > 0:
        cleaned_series = apply_penalty_local_trend(cumulative, outlier_mask, metric_name, exp_name)
    
    # Phase 3: ML enhancement
    if model is not None and scaler is not None:
        features, feature_indices = create_features_for_ml(cumulative, stable_mask, exp_name)
        
        if len(features) > 0:
            features_scaled = scaler.transform(features)
            ml_predictions = model.predict(features_scaled)
            
            for i, prediction in enumerate(ml_predictions):
                actual_idx = feature_indices[i]
                if actual_idx < len(cleaned_series) and stable_mask[actual_idx]:
                    if not np.isnan(prediction) and not np.isinf(prediction):
                        # Blend ML prediction with penalized value
                        cleaned_series[actual_idx] = prediction
    
    # Calculate statistics
    original_std = np.std(cumulative)
    cleaned_std = np.std(cleaned_series)
    noise_reduction = (original_std - cleaned_std) / original_std * 100
    correlation = np.corrcoef(cumulative, cleaned_series)[0, 1]
    
    return cleaned_series, outlier_mask, {
        'outliers': outliers_count,
        'noise_reduction': noise_reduction,
        'correlation': correlation
    }

def apply_tuna_to_dataframe(df, exp_name, models, scalers, metric_name=""):
    """Apply TUNA cleaning to each column in DataFrame"""
    
    exclude_cols = ['Time', 'Minutes', 'source']
    value_cols = [col for col in df.columns if col not in exclude_cols]
    
    column_results = {}
    cleaned_df = df.copy()
    
    for col in value_cols:
        col_values = df[col].values
        model = models.get(col)
        scaler = scalers.get(col)
        
        cleaned_values, outlier_mask, stats = apply_tuna_to_single_series(
            col_values, exp_name, model, scaler, metric_name
        )
        
        column_results[col] = {
            'original': col_values,
            'cleaned': cleaned_values,
            'outliers': outlier_mask,
            'stats': stats
        }
        
        cleaned_df[col] = cleaned_values
    
    return cleaned_df, column_results

def train_models_for_all_columns(metric_name, all_experiments):
    """Train RandomForest models for each column using stable periods"""
    
    exclude_cols = ['Time', 'Minutes', 'source']
    first_df = list(all_experiments.values())[0]
    value_cols = [col for col in first_df.columns if col not in exclude_cols]
    
    models = {}
    scalers = {}
    
    for col in value_cols:
        X_stable_all = []
        y_stable_all = []
        
        for exp_name, df in all_experiments.items():
            col_values = df[col].values
            outlier_mask = detect_outliers_tuna_memory(col_values, metric_name=metric_name, experiment_type=exp_name)
            stable_mask = ~outlier_mask
            
            features, feature_indices = create_features_for_ml(col_values, stable_mask, exp_name)
            
            if len(features) > 0:
                stable_targets = col_values[feature_indices]
                
                valid_mask = ~(np.isnan(stable_targets) | np.isinf(stable_targets))
                stable_features = features[valid_mask]
                stable_targets = stable_targets[valid_mask]
                
                if len(stable_features) > 0:
                    # Smooth targets with local median
                    smoothed_targets = []
                    for j, target in enumerate(stable_targets):
                        start_idx = max(0, j-2)
                        end_idx = min(len(stable_targets), j+3)
                        local_values = stable_targets[start_idx:end_idx]
                        smoothed_targets.append(np.median(local_values))
                    
                    X_stable_all.extend(stable_features)
                    y_stable_all.extend(smoothed_targets)
        
        if len(X_stable_all) >= 10:
            X_stable_all = np.array(X_stable_all)
            y_stable_all = np.array(y_stable_all)
            
            # Clean data
            nan_mask = np.isnan(y_stable_all) | np.isinf(y_stable_all)
            if np.any(nan_mask):
                X_stable_all = X_stable_all[~nan_mask]
                y_stable_all = y_stable_all[~nan_mask]
            
            feature_nan_mask = np.isnan(X_stable_all).any(axis=1) | np.isinf(X_stable_all).any(axis=1)
            if np.any(feature_nan_mask):
                X_stable_all = X_stable_all[~feature_nan_mask]
                y_stable_all = y_stable_all[~feature_nan_mask]
            
            if len(X_stable_all) >= 10:
                scaler = StandardScaler()
                X_scaled = scaler.fit_transform(X_stable_all)
                
                param_grid = {
                    'n_estimators': [100],
                    'max_depth': [5, 10, 15],
                    'min_samples_split': [2, 5],
                    'min_samples_leaf': [1, 2, 4],
                    'max_features': ['sqrt', 'log2']
                }
                
                model = RandomForestRegressor(
                    criterion='squared_error',
                    bootstrap=True,
                    n_jobs=-1,
                    random_state=42
                )
                
                grid_search = GridSearchCV(
                    estimator=model,
                    param_grid=param_grid,
                    scoring='neg_mean_squared_error',
                    cv=3,
                    n_jobs=-1,
                    verbose=0
                )
                
                grid_search.fit(X_scaled, y_stable_all)
                models[col] = grid_search.best_estimator_
                scalers[col] = scaler
            else:
                models[col] = None
                scalers[col] = None
        else:
            models[col] = None
            scalers[col] = None
    
    return models, scalers

def run_tuna_analysis(all_datasets):
    """Run TUNA analysis on all datasets"""
    
    tuna_results = {}
    
    for metric_name, experiments in all_datasets.items():
        
        # Check experiments for variance
        valid_experiments = {}
        for exp_name, df in experiments.items():
            exclude_cols = ['Time', 'Minutes', 'source']
            value_cols = [col for col in df.columns if col not in exclude_cols]
            cumulative_values = df[value_cols].sum(axis=1).values
            
            has_variance = (np.std(cumulative_values) > 0 and len(np.unique(cumulative_values)) > 1)
            if has_variance:
                valid_experiments[exp_name] = df
        
        if not valid_experiments:
            continue
        
        # Train models
        models, scalers = train_models_for_all_columns(metric_name, valid_experiments)
        
        metric_results = {}
        for exp_name, df in experiments.items():
            if exp_name in valid_experiments:
                # Apply TUNA cleaning
                cleaned_df, column_results = apply_tuna_to_dataframe(
                    df, exp_name, models, scalers, metric_name=metric_name
                )
                
                total_outliers = sum(result['stats']['outliers'] for result in column_results.values())
                
                exclude_cols = ['Time', 'Minutes', 'source']
                value_cols = [col for col in df.columns if col not in exclude_cols]
                
                original_cumulative = df[value_cols].sum(axis=1).values
                cleaned_cumulative = cleaned_df[value_cols].sum(axis=1).values
                
                original_std = np.std(original_cumulative)
                cleaned_std = np.std(cleaned_cumulative)
                noise_reduction = (original_std - cleaned_std) / original_std * 100 if original_std > 0 else 0
                correlation = np.corrcoef(original_cumulative, cleaned_cumulative)[0, 1]
                
                overall_outlier_mask = np.zeros(len(df), dtype=bool)
                for col_result in column_results.values():
                    overall_outlier_mask |= col_result['outliers']
                
                metric_results[exp_name] = {
                    'original': original_cumulative,
                    'cleaned': cleaned_cumulative,
                    'outliers': overall_outlier_mask,
                    'stats': {
                        'outliers': total_outliers,
                        'noise_reduction': noise_reduction,
                        'correlation': correlation
                    },
                    'column_results': column_results,
                    'cleaned_df': cleaned_df
                }
            else:
                # No cleaning for invalid experiments
                exclude_cols = ['Time', 'Minutes', 'source']
                value_cols = [col for col in df.columns if col not in exclude_cols]
                cumulative_values = df[value_cols].sum(axis=1).values
                
                metric_results[exp_name] = {
                    'original': cumulative_values,
                    'cleaned': cumulative_values,
                    'outliers': np.zeros(len(df), dtype=bool),
                    'stats': {
                        'outliers': 0,
                        'noise_reduction': 0.0,
                        'correlation': 1.0
                    },
                    'column_results': {},
                    'cleaned_df': df.copy()
                }
        
        if metric_results:
            tuna_results[metric_name] = metric_results
    
    return tuna_results

def plot_tuna_results(tuna_results, all_datasets):
    """Plot TUNA results"""
    
    for metric_name, metric_results in tuna_results.items():
        n_experiments = len(metric_results)
        fig, axes = plt.subplots(n_experiments, 1, figsize=(15, 4*n_experiments))
        
        if n_experiments == 1:
            axes = [axes]
        
        for i, (exp_name, results) in enumerate(metric_results.items()):
            original = results['original']
            cleaned = results['cleaned']
            outliers = results['outliers']
            stats = results['stats']
            
            df = all_datasets[metric_name][exp_name]
            minutes = df['Minutes'].values
            
            # Plot data
            axes[i].plot(minutes, original, 'b-', alpha=0.7, label='Original', linewidth=1)
            axes[i].plot(minutes, cleaned, 'r-', alpha=0.8, label='TUNA Cleaned', linewidth=1.5)
            
            # Mark outliers
            if np.any(outliers):
                outlier_minutes = minutes[outliers]
                outlier_original_values = original[outliers]
                axes[i].scatter(outlier_minutes, outlier_original_values, 
                              color='red', s=8, alpha=0.8, marker='o', 
                              label=f'Outliers ({np.sum(outliers)})', zorder=5)
            
            # Labels based on CPU metrics
            if 'iowait' in metric_name.lower():
                y_label = 'CPU IOWait (msec)'
            elif 'irq' in metric_name.lower():
                y_label = 'CPU IRQ (msec)'
            elif 'system' in metric_name.lower():
                y_label = 'CPU System (msec)'
            elif 'user' in metric_name.lower():
                y_label = 'CPU User (msec)'
            elif 'util' in metric_name.lower():
                y_label = 'CPU Utilization (%)'
            else:
                y_label = 'CPU Values'
            
            axes[i].set_title(f'{metric_name} - {exp_name} (Noise Reduction: {stats["noise_reduction"]:.1f}%, Outliers: {stats["outliers"]})')
            axes[i].set_xlabel('Minutes')
            axes[i].set_ylabel(y_label)
            axes[i].set_xlim(0, 120)
            axes[i].legend()
            axes[i].grid(True, alpha=0.3)
        
        plt.suptitle(f'TUNA Results: {metric_name}', fontsize=16, y=0.98)
        plt.tight_layout(rect=[0, 0, 1, 0.96])
        plt.show()

def print_tuna_summary(tuna_results):
    """Print TUNA summary"""
    
    print(f"\nTUNA Results Summary:")
    print("="*70)
    print(f"{'Metric':<15} {'Experiment':<15} {'Outliers':<10} {'Noise Red%':<12} {'Correlation':<12}")
    print("-"*70)
    
    for metric_name, metric_results in tuna_results.items():
        for exp_name, results in metric_results.items():
            stats = results['stats']
            print(f"{metric_name:<15} {exp_name:<15} {stats['outliers']:<10} "
                  f"{stats['noise_reduction']:>10.1f}% {stats['correlation']:>11.3f}")

def export_tuna_data_to_csv(tuna_results, export_dir="../../noise_reduction_data"):
    """Export TUNA results to CSV"""
    
    os.makedirs(export_dir, exist_ok=True)
    export_data = []
    
    for metric_name, metric_data in tuna_results.items():
        for experiment_name, experiment_data in metric_data.items():
            if 'stats' in experiment_data:
                stats = experiment_data['stats']
                export_data.append({
                    'experiment_name': experiment_name,
                    'metric': metric_name,
                    'noise_reduction_pct': stats.get('noise_reduction', 0.0),
                    'correlation': stats.get('correlation', 1.0),
                    'outliers_removed': stats.get('outliers', 0),
                })
    
    csv_filename = "jvm_gateway_tuna_cpu.csv"
    csv_path = os.path.join(export_dir, csv_filename)
    
    df_simple = pd.DataFrame(export_data)
    df_simple.to_csv(csv_path, index=False)
    return csv_path

def create_cleaned_csv_from_tuna_columns(original_df, column_results, metric_name, experiment_name, include_metadata=False):
    """Create a new DataFrame with TUNA-cleaned values"""
    
    cleaned_df = original_df.copy()
    
    exclude_cols = ['Time', 'Minutes', 'source']
    value_cols = [col for col in cleaned_df.columns if col not in exclude_cols]
    
    total_outliers = 0
    
    for col in value_cols:
        if col in column_results:
            cleaned_values = column_results[col]['cleaned']
            cleaned_df[col] = cleaned_values[:len(cleaned_df)]
            total_outliers += column_results[col]['stats']['outliers']
    
    if include_metadata:
        cleaned_df['tuna_processed'] = True
        cleaned_df['tuna_total_outliers'] = total_outliers
        cleaned_df['tuna_metric_name'] = metric_name
        cleaned_df['tuna_experiment'] = experiment_name
        
        for col in value_cols:
            if col in column_results:
                outlier_mask = column_results[col]['outliers']
                cleaned_df[f'tuna_outlier_{col}'] = outlier_mask[:len(cleaned_df)]
                cleaned_df[f'tuna_noise_reduction_{col}'] = column_results[col]['stats']['noise_reduction']
                cleaned_df[f'tuna_correlation_{col}'] = column_results[col]['stats']['correlation']
    
    return cleaned_df

def export_all_cleaned_csvs(tuna_results, all_datasets, output_base_path="cleaned_cpu_data", include_metadata=False):
    """Export all TUNA-cleaned datasets organized by experiment type"""
    
    output_base = Path(output_base_path)
    output_base.mkdir(parents=True, exist_ok=True)
    
    created_files = {}
    total_files = 0
    
    experiment_types = set()
    for metric_results in tuna_results.values():
        experiment_types.update(metric_results.keys())
    
    for exp_name in experiment_types:
        exp_dir = output_base / exp_name
        exp_dir.mkdir(exist_ok=True)
        created_files[exp_name] = []
    
    for metric_name, metric_results in tuna_results.items():
        for exp_name, results in metric_results.items():
            original_df = all_datasets[metric_name][exp_name]
            
            if 'column_results' in results:
                column_results = results['column_results']
            else:
                column_results = {}
            
            cleaned_df = create_cleaned_csv_from_tuna_columns(
                original_df=original_df,
                column_results=column_results,
                metric_name=metric_name,
                experiment_name=exp_name,
                include_metadata=include_metadata
            )
            
            csv_filename = f"cpu_{metric_name.lower()}.csv"
            csv_path = output_base / exp_name / csv_filename
            
            cleaned_df.to_csv(csv_path, index=False)
            created_files[exp_name].append(str(csv_path))
            total_files += 1
    
    return created_files

tuna_results = run_tuna_analysis(all_datasets)
plot_tuna_results(tuna_results, all_datasets)
print_tuna_summary(tuna_results)
csv_path = export_tuna_data_to_csv(tuna_results)
cleaned_files = export_all_cleaned_csvs(tuna_results, all_datasets)
