In [14]:
import pandas as pd
from scipy.stats import mannwhitneyu, ks_2samp

# Load the performance metrics CSV file
input_csv_path = r"C:\Users\willi\CAN_experiments\New_Experiments\attack\final_consolidated_performance_metrics.csv"
performance_df = pd.read_csv(input_csv_path)

# Get unique attack types
attack_types = performance_df['attack_type'].unique()

# Initialize dictionaries to store the test results
mannwhitneyu_results = {}
ks_results = {}

# Perform the tests for each attack type
for attack_type in attack_types:
    # Filter the data for the current attack type
    attack_data = performance_df[performance_df['attack_type'] == attack_type]
    
    # Separate the data into the two versions
    embeddings_only = attack_data[attack_data['version'] == 'embeddings_only_normalized']['roc_auc']
    all_normalized = attack_data[attack_data['version'] == 'all_normalized']['roc_auc']
    
    # Mann-Whitney U test with alternative='greater'
    u_statistic, p_value_u = mannwhitneyu(all_normalized, embeddings_only, alternative='greater')
    mannwhitneyu_results[attack_type] = {'U Statistic': u_statistic, 'P Value': p_value_u}
    
    # Two-sided Kolmogorov-Smirnov test
    ks_statistic, p_value_ks_two_sided = ks_2samp(all_normalized, embeddings_only)
    
    # Convert to one-sided p-value
    p_value_ks_one_sided = p_value_ks_two_sided / 2 if all_normalized.mean() > embeddings_only.mean() else 1 - p_value_ks_two_sided / 2
    ks_results[attack_type] = {'KS Statistic': ks_statistic, 'P Value': p_value_ks_one_sided}

# Print the results
for attack_type in mannwhitneyu_results:
    print(f"\nResults for '{attack_type}' attack type:")
    print(f"Mann-Whitney U Test - U Statistic: {mannwhitneyu_results[attack_type]['U Statistic']}, P Value: {mannwhitneyu_results[attack_type]['P Value']}")
    print(f"One-sided Kolmogorov-Smirnov Test - KS Statistic: {ks_results[attack_type]['KS Statistic']}, P Value: {ks_results[attack_type]['P Value']}")




Results for 'correlated_signal_attack' attack type:
Mann-Whitney U Test - U Statistic: 1069.5, P Value: 9.838866122260563e-07
One-sided Kolmogorov-Smirnov Test - KS Statistic: 0.5833333333333334, P Value: 2.6111215754171615e-06

Results for 'max_engine_coolant' attack type:
Mann-Whitney U Test - U Statistic: 811.5, P Value: 0.033126121206945455
One-sided Kolmogorov-Smirnov Test - KS Statistic: 0.3333333333333333, P Value: 0.01796614914874737

Results for 'max_speedometer_attack' attack type:
Mann-Whitney U Test - U Statistic: 1085.0, P Value: 4.197807086982331e-07
One-sided Kolmogorov-Smirnov Test - KS Statistic: 0.5555555555555556, P Value: 9.302120612423639e-06

Results for 'reverse_light_off_attack' attack type:
Mann-Whitney U Test - U Statistic: 1040.5, P Value: 4.837230070325178e-06
One-sided Kolmogorov-Smirnov Test - KS Statistic: 0.5, P Value: 9.362918786361047e-05

Results for 'reverse_light_on_attack' attack type:
Mann-Whitney U Test - U Statistic: 1112.0, P Value: 8.35038834

In [15]:
import pandas as pd

# Load the provided CSV file
input_csv_path = r"C:\Users\willi\CAN_experiments\New_Experiments\attack\final_consolidated_performance_metrics.csv"
performance_df = pd.read_csv(input_csv_path)

# Get unique attack types
attack_types = performance_df['attack_type'].unique()

# Initialize a dictionary to store sample sizes
sample_sizes = {}

# Iterate over each attack type
for attack_type in attack_types:
    # Filter the data for the current attack type
    attack_data = performance_df[performance_df['attack_type'] == attack_type]
    
    # Calculate sample sizes for each version
    embeddings_only_size = attack_data[attack_data['version'] == 'embeddings_only_normalized'].shape[0]
    all_normalized_size = attack_data[attack_data['version'] == 'all_normalized'].shape[0]
    
    # Store the sample sizes in the dictionary
    sample_sizes[attack_type] = {
        'embeddings_only_normalized': embeddings_only_size,
        'all_normalized': all_normalized_size
    }

# Print the sample sizes
for attack_type, sizes in sample_sizes.items():
    print(f"Sample sizes for '{attack_type}' attack type:")
    print(f"  Embeddings Only: {sizes['embeddings_only_normalized']}")
    print(f"  Normalized: {sizes['all_normalized']}")


Sample sizes for 'correlated_signal_attack' attack type:
  Embeddings Only: 36
  Normalized: 36
Sample sizes for 'max_engine_coolant' attack type:
  Embeddings Only: 36
  Normalized: 36
Sample sizes for 'max_speedometer_attack' attack type:
  Embeddings Only: 36
  Normalized: 36
Sample sizes for 'reverse_light_off_attack' attack type:
  Embeddings Only: 36
  Normalized: 36
Sample sizes for 'reverse_light_on_attack' attack type:
  Embeddings Only: 36
  Normalized: 36
