In [6]:
import pandas as pd
import os

# List of CSV files to process
base_path = r'C:\Users\willi\CAN_experiments\\'
csv_files = [
    'performance_metrics_correlated_signal.csv',
    'performance_metrics_max_speedometer.csv',
    'performance_metrics_reverse_light_off.csv',
    'performance_metrics_reverse_light_on.csv',
    'performance_metrics_max_engine_coolant.csv'
]

# Initialize a dictionary to store the results
all_stats = {}

# Function to calculate stats for a given DataFrame
def calculate_stats(df):
    min_val = round(df['roc_auc'].min(), 2)
    max_val = round(df['roc_auc'].max(), 2)
    min_window_offset = df.loc[df['roc_auc'].idxmin(), ['window_size', 'offset']].to_list()
    max_window_offset = df.loc[df['roc_auc'].idxmax(), ['window_size', 'offset']].to_list()
    stats = {
        'μ': round(df['roc_auc'].mean(), 2),
        'σ': round(df['roc_auc'].std(), 2),
        'η': round(df['roc_auc'].median(), 2),
        'min': f"{min_val} ({min_window_offset[0]}, {min_window_offset[1]})",
        'max': f"{max_val} ({max_window_offset[0]}, {max_window_offset[1]})"
    }
    return stats

# Iterate over each file and calculate stats
for file in csv_files:
    file_path = os.path.join(base_path, file)
    df = pd.read_csv(file_path)
    attack_type = file.replace('performance_metrics_', '').replace('.csv', '')

    # Filter the DataFrame for 'embeddings_only' and 'normalized'
    embeddings_only_df = df[df['version'] == 'embeddings_only']
    normalized_df = df[df['version'] == 'normalized']

    # Calculate stats for each version
    embeddings_only_stats = calculate_stats(embeddings_only_df)
    normalized_stats = calculate_stats(normalized_df)

    # Store the results in the dictionary
    all_stats[attack_type] = {
        'embeddings_only': embeddings_only_stats,
        'normalized': normalized_stats
    }

# Function to print stats for a given attack type
def print_stats(attack_type, stats):
    print(f"\nStatistics for '{attack_type}' attack type (embeddings_only):")
    if 'embeddings_only' in stats:
        embeddings_only_stats = stats['embeddings_only']
        print(f"μ (mean): {embeddings_only_stats['μ']}")
        print(f"σ (standard deviation): {embeddings_only_stats['σ']}")
        print(f"η (median): {embeddings_only_stats['η']}")
        print(f"min: {embeddings_only_stats['min']}")
        print(f"max: {embeddings_only_stats['max']}")
    else:
        print("No data available for 'embeddings_only' version.")

    print(f"\nStatistics for '{attack_type}' attack type (normalized):")
    if 'normalized' in stats:
        normalized_stats = stats['normalized']
        print(f"μ (mean): {normalized_stats['μ']}")
        print(f"σ (standard deviation): {normalized_stats['σ']}")
        print(f"η (median): {normalized_stats['η']}")
        print(f"min: {normalized_stats['min']}")
        print(f"max: {normalized_stats['max']}")
    else:
        print("No data available for 'normalized' version.")

# Display the statistics for each attack type
for attack_type, stats in all_stats.items():
    print_stats(attack_type, stats)

# Initialize a dictionary to store the combined max stats for all attack types
combined_max_stats = {}

# Calculate overall max statistics for all attack types for both 'embeddings_only' and 'normalized'
for attack_type, stats in all_stats.items():
    # Extract the maximum values for 'embeddings_only' and 'normalized' versions
    max_values_embeddings_only = [float(stats['embeddings_only']['max'].split()[0])] if 'embeddings_only' in stats else []
    max_values_normalized = [float(stats['normalized']['max'].split()[0])] if 'normalized' in stats else []

    # Combine the maximum values from both versions
    combined_max_values = max_values_embeddings_only + max_values_normalized

    # Calculate mean and standard deviation of the combined maximum values
    mean_max_combined = round(pd.Series(combined_max_values).mean(), 2)
    std_max_combined = round(pd.Series(combined_max_values).std(), 2)

    # Store the results in the dictionary
    combined_max_stats[attack_type] = {
        'mean_max_combined': mean_max_combined,
        'std_max_combined': std_max_combined
    }

# Display the combined max statistics for all attack types
for attack_type, stats in combined_max_stats.items():
    print(f"\nStatistics for '{attack_type}' attack type (combined embeddings_only and normalized):")
    print(f"Average of max (max̅): {stats['mean_max_combined']}")  
    print(f"Standard deviation of max (σmax): {stats['std_max_combined']}")




Statistics for 'correlated_signal' attack type (embeddings_only):
μ (mean): 0.94
σ (standard deviation): 0.02
η (median): 0.93
min: 0.9 (15, 8)
max: 0.98 (9, 6)

Statistics for 'correlated_signal' attack type (normalized):
μ (mean): 0.96
σ (standard deviation): 0.02
η (median): 0.96
min: 0.92 (6, 3)
max: 0.99 (13, 7)

Statistics for 'max_speedometer' attack type (embeddings_only):
μ (mean): 0.91
σ (standard deviation): 0.03
η (median): 0.91
min: 0.85 (4, 1)
max: 0.98 (14, 2)

Statistics for 'max_speedometer' attack type (normalized):
μ (mean): 0.94
σ (standard deviation): 0.03
η (median): 0.93
min: 0.87 (13, 10)
max: 0.99 (9, 2)

Statistics for 'reverse_light_off' attack type (embeddings_only):
μ (mean): 0.93
σ (standard deviation): 0.02
η (median): 0.93
min: 0.89 (6, 3)
max: 0.98 (14, 11)

Statistics for 'reverse_light_off' attack type (normalized):
μ (mean): 0.96
σ (standard deviation): 0.02
η (median): 0.96
min: 0.91 (6, 3)
max: 0.99 (5, 5)

Statistics for 'reverse_light_on' attack

In [7]:
# Initialize dictionaries to store the overall max statistics for each version
overall_max_stats = {
    'embeddings_only': [],
    'normalized': []
}

# Collect the max values for each version across all attack types
for attack_type, stats in all_stats.items():
    if 'embeddings_only' in stats:
        overall_max_stats['embeddings_only'].append(float(stats['embeddings_only']['max'].split()[0]))
    if 'normalized' in stats:
        overall_max_stats['normalized'].append(float(stats['normalized']['max'].split()[0]))

# Print collected max values for verification
print("Collected max values for 'embeddings_only':", overall_max_stats['embeddings_only'])
print("Collected max values for 'normalized':", overall_max_stats['normalized'])

# Calculate the mean and standard deviation for each version
for version, max_values in overall_max_stats.items():
    mean_max = round(pd.Series(max_values).mean(), 2)
    std_max = round(pd.Series(max_values).std(), 2)
    print(f"\nStatistics for '{version}' version (all attack types):")
    print(f"Average of max (\u0305max): {mean_max}")  # max with overline
    print(f"Standard deviation of max (σmax): {std_max}")  # sigma max


Collected max values for 'embeddings_only': [0.98, 0.98, 0.98, 0.97, 0.98]
Collected max values for 'normalized': [0.99, 0.99, 0.99, 0.99, 0.99]

Statistics for 'embeddings_only' version (all attack types):
Average of max (̅max): 0.98
Standard deviation of max (σmax): 0.0

Statistics for 'normalized' version (all attack types):
Average of max (̅max): 0.99
Standard deviation of max (σmax): 0.0
