In [1]:
import pandas as pd
import numpy as np

In [13]:
dataset_name = 'Sonar'
result_path = f'{dataset_name}_results'
p_value = 0.25

In [2]:
df_onestep = pd.read_csv('Sonar_results/raw_metric_data_onestep_0.25.csv')
df_twostep = pd.read_csv('Sonar_results/raw_metric_data_twostep_0.25.csv')

In [3]:
df_onestep

Unnamed: 0,times_onestep,sizes_onestep,rsum_onestep,coverage_onestep
0,88.935451,52,33.337683,1
1,70.985979,51,32.551809,1
2,67.381834,49,23.205780,1
3,14.136300,48,23.987324,1
4,22.452725,47,26.187650,1
...,...,...,...,...
151,106.850426,49,27.962885,1
152,157.359452,44,28.984295,1
153,5.790139,53,18.846061,1
154,37.255163,52,29.959078,1


In [7]:
metrics_dataframes = []
times_onestep = df_onestep['times_onestep'].values
sizes_onestep = df_onestep['sizes_onestep'].values
rsum_onestep = df_onestep['rsum_onestep'].values
coverage_onestep = df_onestep['coverage_onestep'].values

In [4]:
df_twostep

Unnamed: 0,times_twostep,sizes_twostep,rsum_twostep,coverage_twostep
0,148.709568,52,32.626706,1
1,115.102216,51,31.821206,1
2,223.312868,49,24.543724,1
3,68.542638,48,25.819401,1
4,97.203997,47,33.185838,1
...,...,...,...,...
151,215.638015,49,28.459948,1
152,296.190268,44,31.863497,1
153,46.106256,53,27.916647,1
154,68.261259,52,30.147560,1


In [8]:
times_twostep = df_twostep['times_twostep'].values
sizes_twostep = df_twostep['sizes_twostep'].values
rsum_twostep = df_twostep['rsum_twostep'].values
coverage_twostep = df_twostep['coverage_twostep'].values

In [11]:
def compute_mean_std(arr):
    return np.mean(arr), np.std(arr)

def relative_percentage_diff(new, old):
    if np.any(old == 0):
        print(f'Warning: found possible division by zero')
        return np.where(old != 0, ((new - old) / old) * 100, np.nan)
    return ((new - old) / old) * 100

# Compute means and standard deviations
(time_mean_onestep, time_std_onestep) = compute_mean_std(times_onestep)
(time_mean_twostep, time_std_twostep) = compute_mean_std(times_twostep)

(sizes_mean_onestep, sizes_std_onestep) = compute_mean_std(sizes_onestep)
(sizes_mean_twostep, sizes_std_twostep) = compute_mean_std(sizes_twostep)

(rsum_mean_onestep, rsum_std_onestep) = compute_mean_std(rsum_onestep)
(rsum_mean_twostep, rsum_std_twostep) = compute_mean_std(rsum_twostep)

(coverage_mean_onestep, coverage_std_onestep) = compute_mean_std(coverage_onestep)
(coverage_mean_twostep, coverage_std_twostep) = compute_mean_std(coverage_twostep)

# Compute relative percentage differences (Mean & Std)
time_mean_diff = relative_percentage_diff(time_mean_twostep, time_mean_onestep)
sizes_mean_diff = relative_percentage_diff(sizes_mean_twostep, sizes_mean_onestep)
rsum_mean_diff = relative_percentage_diff(rsum_mean_twostep, rsum_mean_onestep)
coverage_mean_diff = relative_percentage_diff(coverage_mean_twostep, coverage_mean_onestep)

time_std_diff = relative_percentage_diff(time_std_twostep, time_std_onestep)
sizes_std_diff = relative_percentage_diff(sizes_std_twostep, sizes_std_onestep)
rsum_std_diff = relative_percentage_diff(rsum_std_twostep, rsum_std_onestep)
coverage_std_diff = relative_percentage_diff(coverage_std_twostep, coverage_std_onestep)

# Compute pointwise relative differences
time_relative_pointwise = relative_percentage_diff(times_twostep, times_onestep)
sizes_relative_pointwise = relative_percentage_diff(sizes_twostep, sizes_onestep)
rsum_relative_pointwise = relative_percentage_diff(rsum_twostep, rsum_onestep)
coverage_relative_pointwise = relative_percentage_diff(coverage_twostep, coverage_onestep)

# Compute pointwise means
time_relative_mean = np.mean(time_relative_pointwise) 
sizes_relative_mean = np.mean(sizes_relative_pointwise)
rsum_relative_mean = np.mean(rsum_relative_pointwise)
coverage_relative_mean = np.mean(coverage_relative_pointwise)

# Compute pointwise standard deviations
time_relative_std = np.std(time_relative_pointwise) 
sizes_relative_std = np.std(sizes_relative_pointwise)
rsum_relative_std = np.std(rsum_relative_pointwise)
coverage_relative_std = np.std(coverage_relative_pointwise)

# Organize Data
all_metrics_data = {
    'Metric': ['Time', 'Size', 'Ranges_Sum', 'Coverage'],
    'ONESTEP_MEAN': [time_mean_onestep, sizes_mean_onestep, rsum_mean_onestep, coverage_mean_onestep],
    'ONESTEP_STD': [time_std_onestep, sizes_std_onestep, rsum_std_onestep, coverage_std_onestep],
    'TWOSTEP_MEAN': [time_mean_twostep, sizes_mean_twostep, rsum_mean_twostep, coverage_mean_twostep],
    'TWOSTEP_STD': [time_std_twostep, sizes_std_twostep, rsum_std_twostep, coverage_std_twostep],
    'MEAN_DIFF_%': [time_mean_diff, sizes_mean_diff, rsum_mean_diff, coverage_mean_diff],
    'STD_DIFF_%': [time_std_diff, sizes_std_diff, rsum_std_diff, coverage_std_diff],
    'POINTWISE_MEAN_%': [time_relative_mean, sizes_relative_mean, rsum_relative_mean, coverage_relative_mean],
    'POINTWISE_STD_%': [time_relative_std, sizes_relative_std, rsum_relative_std, coverage_relative_std]
}
# Display and save
all_metrics_df = pd.DataFrame(all_metrics_data)
display(all_metrics_df)
all_metrics_df.to_csv(f'{result_path}/results_{p_value}.csv', index=False)

#Save Raw Metric Data
raw_df = pd.DataFrame({
    "times_onestep": times_onestep, 
    "times_twostep": times_twostep,
    "sizes_onestep": sizes_onestep, 
    "sizes_twostep": sizes_twostep,
    "rsum_onestep": rsum_onestep, 
    "rsum_twostep": rsum_twostep,
    "coverage_onestep": coverage_onestep, 
    "coverage_twostep": coverage_twostep,
    "time_relative_%": time_relative_pointwise,
    "sizes_relative_%": sizes_relative_pointwise,
    "rsum_relative_%": rsum_relative_pointwise,
    "coverage_relative_%": coverage_relative_pointwise
})

# Save to CSV
raw_df.to_csv(f"{result_path}/raw_metric_data_{p_value}.csv", index=False)



  return np.where(old != 0, ((new - old) / old) * 100, np.nan)


Unnamed: 0,Metric,ONESTEP_MEAN,ONESTEP_STD,TWOSTEP_MEAN,TWOSTEP_STD,MEAN_DIFF_%,STD_DIFF_%,POINTWISE_MEAN_%,POINTWISE_STD_%
0,Time,55.805916,58.495814,125.712468,99.375159,125.267277,69.884222,397.986322,831.05369
1,Size,51.282051,3.864359,51.282051,3.864359,0.0,0.0,0.0,0.0
2,Ranges_Sum,25.94071,7.283984,28.553896,5.548288,10.073685,-23.828936,16.331843,32.581119
3,Coverage,1.0,0.0,1.0,0.0,0.0,,0.0,0.0


In [14]:
all_metrics_df.to_csv(f'{result_path}/results_{p_value}.csv', index=False)
raw_df.to_csv(f"{result_path}/raw_metric_data_{p_value}.csv", index=False)