In [1]:
import os
import pandas as pd

def construct_entropy_folder_path(entropy):
    return f"./results/stationary_distribution/robustness_analysis/entropy_{entropy}"

def summarize_attack_data(entropy):
    entropy_folder_path = construct_entropy_folder_path(entropy)
    
    if not os.path.exists(entropy_folder_path):
        print(f"Entropy folder not found: {entropy_folder_path}")
        return
    
    summary_data = []
    
    for output_folder in os.listdir(entropy_folder_path):
        output_folder_path = os.path.join(entropy_folder_path, output_folder)
        
        if os.path.isdir(output_folder_path) and output_folder.startswith("output"):
            corpuses_path = os.path.join(output_folder_path, "corpuses")
            
            if os.path.exists(corpuses_path):
                for attack_file in os.listdir(corpuses_path):
                    if attack_file.startswith("attack_") and attack_file.endswith(".csv"):
                        attack_file_path = os.path.join(corpuses_path, attack_file)
                        # print(f"Processing file: {attack_file_path}")  # Debugging print
                        df = pd.read_csv(attack_file_path)
                        
                        total_perturbations = len(df) - 1
                        successful_perturbations = df['quality_preserved'].sum()
                        
                        summary_data.append({
                            'Output Folder': output_folder,
                            'Corpuses Subdirectory': "corpuses",
                            'Attack File': attack_file,
                            'Total Perturbations': total_perturbations,
                            'Successful Perturbations': successful_perturbations
                        })
            else:
                print(f"No 'corpuses' subdirectory found in {output_folder_path}")
                
    if summary_data:
        summary_df = pd.DataFrame(summary_data)
        summary_df = summary_df.sort_values(['Output Folder', 'Attack File'])
        print(f"Summary for Entropy {entropy}:")
        print(summary_df.to_string(index=False))  # Changed to print

if __name__ == '__main__':
    entropy = input("Enter entropy: ")
    summarize_attack_data(entropy)


Summary for Entropy 1:
Output Folder Corpuses Subdirectory      Attack File  Total Perturbations  Successful Perturbations
     output_1              corpuses     attack_1.csv                  120                         8
     output_1              corpuses     attack_2.csv                  258                        31
     output_1              corpuses   attack_2_1.csv                 1258                        67
     output_1              corpuses attack_2_1_1.csv                 1509                        67
     output_1              corpuses attack_2_1_2.csv                 1509                        67
     output_1              corpuses     attack_3.csv                  600                        33
     output_1              corpuses   attack_3_1.csv                 1377                       103
     output_1              corpuses attack_3_1_1.csv                 1426                       120
     output_1              corpuses     attack_4.csv                  600    