# Statistics generation and creation

Notebook to read simulation data, calculate statistics and aggregated statistics and save to separate csv files

In [1]:
from src.metrics import *
from src.data_loader import *
import matplotlib.pyplot as plt
import numpy as numpy
import seaborn as sns
import pandas as pd

In [2]:
results_pid_nominal = load_data('pid', 'nominal')
results_pid_with_noise = load_data('pid', 'noise')
results_pid_with_disturbances = load_data('pid', 'disturbances')

results_onoff_nominal = load_data('onoff', 'nominal')
results_onoff_with_noise = load_data('onoff', 'noise')
results_onoff_with_disturbances = load_data('onoff', 'disturbances')

results_fuzzy_nominal = load_data('fuzzy', 'nominal')
results_fuzzy_with_noise = load_data('fuzzy', 'noise')
results_fuzzy_with_disturbances = load_data('fuzzy', 'disturbances')

metrics = [steady_state_error, mean_square_error, overshoot, rise_time, settling_time, comfort_time, 
            energy_consumed,variance_after_settling, 
            recovery_time, number_of_oscillations
            ]

results = {
    "results_pid_nominal": results_pid_nominal,
    "results_pid_with_noise": results_pid_with_noise,
    "results_pid_with_disturbances": results_pid_with_disturbances,
     "results_onoff_nominal": results_onoff_nominal,
     "results_onoff_with_noise": results_onoff_with_noise,
     "results_onoff_with_disturbances": results_onoff_with_disturbances,
     "results_fuzzy_nominal": results_fuzzy_nominal,
     "results_fuzzy_with_noise": results_fuzzy_with_noise,
     "results_fuzzy_with_disturbances": results_fuzzy_with_disturbances,
}

evaluation_vars = ['measuredTemp', 'outsideTemp', 'heatSourcePower', 'temperatureSensor_T', 'normalNoise_y', 'windowState', 'time']

for result_name, result_data in results.items():
    clean_and_merge_data(result_data, result_name, evaluation_vars)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cleaned_frame['simulation_run'] = i
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cleaned_frame['simulation_run'] = i
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cleaned_frame['simulation_run'] = i
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col

In [None]:
def bootstrap_mean_confidence_interval(data, num_samples=1000, confidence=0.95):
    """Bootstrap method to compute confidence intervals"""
    data = np.array(data)
    data = data[~np.isnan(data)] 
    if len(data) <= 1:
        return np.nan, np.nan, np.nan
    
    n = len(data)
    means = []
    
    for _ in range(num_samples):
        sample = np.random.choice(data, size=n, replace=True)
        means.append(np.mean(sample))
    
    mean = np.mean(means)
    std_error = np.std(means)
    
    z = stats.norm.ppf(1 - (1 - confidence) / 2)
    ci_lower = mean - z * std_error
    ci_upper = mean + z * std_error
    
    return mean, ci_lower, ci_upper

def bootstrap_mean_variance_ci(data, num_samples=1000, confidence=0.95):
    """Bootstrap estimate of the variance of the sample mean and its CI"""
    data = np.array(data)
    data = data[~np.isnan(data)]
    if len(data) <= 1:
        return np.nan, np.nan, np.nan
    
    n = len(data)
    boot_means = []
    
    for _ in range(num_samples):
        sample = np.random.choice(data, size=n, replace=True)
        boot_means.append(np.mean(sample))
    
    # Variance of the estimator (mean) across bootstraps
    est_var = np.var(boot_means, ddof=1)
    std_error = np.std(boot_means, ddof=1)
    
    z = stats.norm.ppf(1 - (1 - confidence) / 2)
    ci_lower = np.mean(boot_means) - z * std_error
    ci_upper = np.mean(boot_means) + z * std_error
    
    return est_var, ci_lower, ci_upper

In [None]:
def calculate_and_export_stats(scenario_results, metrics, scenario_name):
    aggregated_stats = pd.DataFrame(columns=['metric', 'mean', 'mean_ci_lower','mean_ci_upper', 'var', 'var_ci_lower', 'var_ci_upper'])
    stats_per_run = []
    long_format_records = []
    for metric in metrics:
        metric_results = ([metric(result, 'temperatureSensor_T', 20.0, 'windowState' ) for result in scenario_results])
        
        stats_per_run.append({
            'metric': metric.__name__,
            'values': metric_results
        })

        for i, value in enumerate(metric_results):
            long_format_records.append({
                'controller': scenario_name,
                'run': i,
                'metric': metric.__name__,
                'value': value
            })


        # Calculate confidence intervals for mean and variance
        mean, mean_ci_lower, mean_ci_upper = bootstrap_mean_confidence_interval(metric_results)
        var, var_ci_lower, var_ci_upper = bootstrap_mean_variance_ci(metric_results)

        
        metric_entry ={
            'metric': metric.__name__,
            'mean': mean,
            'mean_ci_lower': mean_ci_lower,
            'mean_ci_upper': mean_ci_upper,
            'var': var,
            'var_ci_lower': var_ci_lower,
            'var_ci_upper': var_ci_upper 
        }
        aggregated_stats = pd.concat([aggregated_stats, pd.DataFrame([metric_entry])], ignore_index=True)

    aggregated_stats.to_csv(f"simulation_results/statistics/{scenario_name}_aggregated.csv", index=False)
    
    long_format_df = pd.DataFrame(long_format_records)
    long_format_df.to_csv(f"simulation_results/statistics/{scenario_name}_all.csv", index=False)

In [4]:
for name, scenario in results.items():
    calculate_and_export_stats(scenario, metrics, scenario_name=name)
    print(f"Stats for {name} calculated and exported.")



  aggregated_stats = pd.concat([aggregated_stats, pd.DataFrame([metric_entry])], ignore_index=True)


Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance ti

  aggregated_stats = pd.concat([aggregated_stats, pd.DataFrame([metric_entry])], ignore_index=True)


Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance ti

  aggregated_stats = pd.concat([aggregated_stats, pd.DataFrame([metric_entry])], ignore_index=True)


Disturbance times: 3972    3972.0
Name: time, dtype: float64
Disturbance times: 6774    6774.0
Name: time, dtype: float64
Disturbance times: 8796    8796.0
Name: time, dtype: float64
Disturbance times: 10093    10093.0
Name: time, dtype: float64
Disturbance times: 2055    2055.0
Name: time, dtype: float64
Disturbance times: 688    688.0
Name: time, dtype: float64
Disturbance times: 2363    2363.0
Name: time, dtype: float64
Disturbance times: 1300    1300.0
8330    8330.0
Name: time, dtype: float64
Disturbance times: 801        801.0
3229      3229.0
12918    12918.0
Name: time, dtype: float64
Disturbance times: 1160    1160.0
Name: time, dtype: float64
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: 1178    1178.0
9589    9589.0
Name: time, dtype: float64
Disturbance times: 628    628.0
Name: time, dtype: float64
Disturbance times: 1901    1901.0
Name: time, dtype: float64
Disturbance times: 4818    4818.0
Name: time, dtype: float64
Disturbance times: 2859 

  aggregated_stats = pd.concat([aggregated_stats, pd.DataFrame([metric_entry])], ignore_index=True)


Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance ti

  aggregated_stats = pd.concat([aggregated_stats, pd.DataFrame([metric_entry])], ignore_index=True)


Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance ti

  aggregated_stats = pd.concat([aggregated_stats, pd.DataFrame([metric_entry])], ignore_index=True)


Disturbance times: 3972    3972.0
Name: time, dtype: float64
Disturbance times: 6774    6774.0
Name: time, dtype: float64
Disturbance times: 8796    8796.0
Name: time, dtype: float64
Disturbance times: 10093    10093.0
Name: time, dtype: float64
Disturbance times: 2055    2055.0
Name: time, dtype: float64
Disturbance times: 688    688.0
Name: time, dtype: float64
Disturbance times: 2363    2363.0
Name: time, dtype: float64
Disturbance times: 1300    1300.0
8330    8330.0
Name: time, dtype: float64
Disturbance times: 801        801.0
3229      3229.0
12918    12918.0
Name: time, dtype: float64
Disturbance times: 1160    1160.0
Name: time, dtype: float64
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: 1178    1178.0
9589    9589.0
Name: time, dtype: float64
Disturbance times: 628    628.0
Name: time, dtype: float64
Disturbance times: 1901    1901.0
Name: time, dtype: float64
Disturbance times: 4818    4818.0
Name: time, dtype: float64
Disturbance times: 2859 

  aggregated_stats = pd.concat([aggregated_stats, pd.DataFrame([metric_entry])], ignore_index=True)


Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance ti

  aggregated_stats = pd.concat([aggregated_stats, pd.DataFrame([metric_entry])], ignore_index=True)


Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance ti

  aggregated_stats = pd.concat([aggregated_stats, pd.DataFrame([metric_entry])], ignore_index=True)


Disturbance times: 3972    3972.0
Name: time, dtype: float64
Disturbance times: 6774    6774.0
Name: time, dtype: float64
Disturbance times: 8796    8796.0
Name: time, dtype: float64
Disturbance times: 10093    10093.0
Name: time, dtype: float64
Disturbance times: 2055    2055.0
Name: time, dtype: float64
Disturbance times: 688    688.0
Name: time, dtype: float64
Disturbance times: 2363    2363.0
Name: time, dtype: float64
Disturbance times: 1300    1300.0
8330    8330.0
Name: time, dtype: float64
Disturbance times: 801        801.0
3229      3229.0
12918    12918.0
Name: time, dtype: float64
Disturbance times: 1160    1160.0
Name: time, dtype: float64
Disturbance times: Series([], Name: time, dtype: float64)
Disturbance times: 1178    1178.0
9589    9589.0
Name: time, dtype: float64
Disturbance times: 628    628.0
Name: time, dtype: float64
Disturbance times: 1901    1901.0
Name: time, dtype: float64
Disturbance times: 4818    4818.0
Name: time, dtype: float64
Disturbance times: 2859 