In [None]:
import numpy as np
import pandas as pd

def monte_carlo_simulation(df_original, improvements, n_simulations=1000):
    results = []

    for i in range(n_simulations):
        df_sim = df_original.copy()

        # Step 1: perturb inputs with noise / uncertainty
        # Example: add Gaussian noise to some columns (adjust stdev as needed)
        for col in ['clienti serviti nel mese', 'giorni lavorati nel mese', 'Delta (orario di fine montaggio EFFETTIVO - orario di fine montaggio CONCORDATO) [min]']:
            if col in df_sim.columns:
                mean = df_sim[col].mean()
                std = df_sim[col].std()
                noise = np.random.normal(0, std*0.1, size=df_sim.shape[0])  # 10% noise
                df_sim[col] = df_sim[col] + noise
                df_sim[col] = df_sim[col].clip(lower=0)  # no negative values

        # Step 2: recalculate KPIs on noisy data
        kpis_before = calculate_kpis(df_sim).mean()

        # Step 3: apply improvements (could also randomize improvements if uncertain)
        df_after = apply_improvements(df_sim, improvements)
        kpis_after = calculate_kpis(df_after).mean()

        # Step 4: calculate improvements
        improvements_sim = {}
        for kpi in KPI_COLUMNS:
            base_val = kpis_before.get(kpi, 0)
            new_val = kpis_after.get(kpi, 0)
            if pd.isna(base_val) or abs(base_val) < 1e-10:
                improvements_sim[kpi] = 0
            elif kpi in ['RitardoMedioConsegna', 'PercentualeConsegneRiprogrammate']:
                improvements_sim[kpi] = (base_val - new_val) / (base_val + 1e-10)
            else:
                improvements_sim[kpi] = (new_val - base_val) / (base_val + 1e-10)

        results.append(improvements_sim)

    # Step 5: aggregate results
    df_results = pd.DataFrame(results)

    summary = df_results.describe(percentiles=[0.05, 0.5, 0.95])
    print("Monte Carlo simulation summary of KPI improvements:")
    print(summary.loc[['mean', 'std', '5%', '50%', '95%']])

    return df_results

def usage_example():
    # 1. Load and clean data
    df = pd.read_csv('dataset.csv')
    df = clean_column_names(df)
    df_clean = clean_data(df)

    # 2. Define improvements (you can use your KPI_IMPROVEMENTS dictionary)
    improvements = KPI_IMPROVEMENTS

    # 3. Run Monte Carlo simulation with 1000 iterations
    monte_results = monte_carlo_simulation(df_clean, improvements, n_simulations=1000)

    # 4. Example: show mean improvements from the simulation
    mean_improvements = monte_results.mean()
    print("\nMean KPI improvements from Monte Carlo simulation:")
    print(mean_improvements)

    # 5. Optional: visualize distributions (requires matplotlib and seaborn)
    try:
        import matplotlib.pyplot as plt
        import seaborn as sns

        plt.figure(figsize=(12, 8))
        for i, col in enumerate(KPI_COLUMNS):
            plt.subplot(3, 3, i + 1)
            sns.histplot(monte_results[col], kde=True, bins=30)
            plt.title(col)
            plt.xlabel('Improvement')
        plt.tight_layout()
        plt.show()
    except ImportError:
        print("matplotlib or seaborn not installed, skipping plots.")

if __name__ == "__main__":
    usage_example()

