# Aggregate csv files from multiple experiments

## Get report folders from Github
Upload report files, get data from github repository, just specify experiment number to get files from, if you want to upload files manually skip these two cells or set the filed to 0

In [63]:
# set the experiment number to take files from
# set 0 to skip and upload files manually
experiment_number = 4

In [64]:
# Import the data
import requests
import pandas as pd
import os

if experiment_number > 0:
  # get the reports from GitHub
  folder_path = f"exp_{experiment_number}"
  repo_owner = "asmeta"
  repo_name = "asmeta"
  branch_name = "isaac"
  folder_path = f"code/experimental/asmeta.evotest/asmeta.evotest.experiments/data/exp_{experiment_number}"

  # GitHub URL API
  api_url = f"https://api.github.com/repos/{repo_owner}/{repo_name}/contents/{folder_path}?ref={branch_name}"

  # Github API
  response = requests.get(api_url)
  subfolders = response.json()

  # Loop through each subfolder
  for subfolder in subfolders:
      if subfolder["type"] == "dir":
          subfolder_url = subfolder["url"]
          response = requests.get(subfolder_url)
          files = response.json()

          # Download CSV files from the subfolder
          for file in files:
              if file["name"].endswith(".csv"):
                  file_url = file["download_url"]

                  # Create the directory if it doesn't exist
                  file_path = os.path.join("/content/", subfolder["name"], file["name"])  # Preserve subfolder structure
                  os.makedirs(os.path.dirname(file_path), exist_ok=True)

                  with open(file_path, "wb") as f:
                      f.write(requests.get(file_url).content)

## Build the benchmark file

In [65]:
import os
import pandas as pd
import re

# Compares benchmarks.csv files in subfolders and saves a consolidated file.

first_benchmark = None  # To store the first benchmark file for comparison
all_benchmarks_same = True  # Flag to track if all benchmarks are the same

for subdir, dirs, files in os.walk("/content/"):
    # Skip subfolders that don't match the numeric pattern
    if not re.fullmatch(r'\d+', os.path.basename(subdir)):
        continue
    benchmark_path = os.path.join(subdir, 'benchmark.csv')
    if os.path.exists(benchmark_path):
        # Read the benchmark file
        current_benchmark = pd.read_csv(benchmark_path)

        # Compare with the first benchmark file
        if first_benchmark is None:
            first_benchmark = current_benchmark
        elif not current_benchmark.equals(first_benchmark):
            all_benchmarks_same = False
            print(f"Warning: Benchmark file in '{subdir}' is different.")

# Save the benchmark file to the root directory if all are the same
if all_benchmarks_same and first_benchmark is not None:
    first_benchmark.to_csv(os.path.join("/content/", 'benchmark.csv'), index=False)
    print("Benchmarks are the same. Saved to '/content/benchmark.csv'.")
else:
    print("Benchmarks are different. No consolidated file saved.")

Benchmarks are the same. Saved to '/content/benchmark.csv'.


## Aggregates execution time files

In [66]:
import os
import pandas as pd
import re

# Aggregates execution_time.csv files from subfolders and saves a consolidated file.

all_data = []  # To store data from all execution_time.csv files

for subdir, dirs, files in os.walk("/content/"):
    # Skip subfolders that don't match the numeric pattern
    if not re.fullmatch(r'\d+', os.path.basename(subdir)):
        continue

    execution_time_path = os.path.join(subdir, 'execution_time.csv')
    if os.path.exists(execution_time_path):
        # Read the execution_time.csv file
        df = pd.read_csv(execution_time_path)
        all_data.append(df)

if all_data:
    # Concatenate all data into a single DataFrame
    combined_df = pd.concat(all_data, ignore_index=True)

    # Calculate the mean of columns (excluding 'AsmSpec')
    aggregated_df = combined_df.groupby('AsmSpec').agg({
        'EvoAvalla': 'mean',
        'Random': 'mean',
        'Atgt': 'mean'
    }).reset_index()

    # Save the aggregated data to a new CSV file
    aggregated_df.to_csv(os.path.join("/content/", 'execution_time.csv'), index=False)
    print("Aggregated execution times saved to '/content/execution_time.csv'.")
else:
    print("No execution_time.csv files found in subfolders.")

Aggregated execution times saved to '/content/execution_time.csv'.


## Aggregates report files

In [67]:
import os
import pandas as pd
import re
import glob

def aggregate_specific_report(report_file_name, root_dir='/content/'):
    """
    Aggregates a specific report CSV file from subfolders and saves a consolidated file.

    Args:
        report_file_name: The name of the report CSV file to process (e.g., 'report_evoavalla.csv').
        root_dir: The root directory to search for subfolders. Defaults to '/content/'.
    """
    aggregated_data = {}  # To store aggregated data
    experiments = 0 # number of experiments

    for subdir, dirs, files in os.walk(root_dir):
        # Skip subfolders that don't match the numeric pattern
        if not re.fullmatch(r'\d+', os.path.basename(subdir)):
            continue

        # Find the specific report CSV file in the subfolder
        report_file_path = os.path.join(subdir, report_file_name)
        if os.path.exists(report_file_path):
            df = pd.read_csv(report_file_path)
            experiments += 1

            for index, row in df.iterrows():
                key = (row['asm_name'], row['rule_signature'])

                if key in aggregated_data:
                    existing_data = aggregated_data[key]
                    # Check if tot_conditional_rules and tot_update_rules are the same
                    if (existing_data['tot_conditional_rules'][0] == row['tot_conditional_rules'] and
                            existing_data['tot_update_rules'][0] == row['tot_update_rules']):
                        existing_data['covered_true_conditional_rules'].append(row['covered_true_conditional_rules'])
                        existing_data['covered_false_conditional_rules'].append(row['covered_false_conditional_rules'])
                        existing_data['covered_update_rules'].append(row['covered_update_rules'])
                    else:
                        print(f"Warning: Inconsistent tot_conditional_rules or tot_update_rules for key: {key}")
                else:
                    aggregated_data[key] = {
                        'execution_id': ["aggregate_values_" + row['asm_name']],
                        'tot_conditional_rules': [row['tot_conditional_rules']],
                        'tot_update_rules': [row['tot_update_rules']],
                        'covered_true_conditional_rules': [row['covered_true_conditional_rules']],
                        'covered_false_conditional_rules': [row['covered_false_conditional_rules']],
                        'covered_update_rules': [row['covered_update_rules']],
                        'failing_scenarios': ["none"]
                    }

    # Create a new DataFrame from the aggregated data
    aggregated_df = pd.DataFrame(columns=['execution_id', 'asm_name', 'rule_signature', 'tot_conditional_rules',
                                         'covered_true_conditional_rules', 'covered_false_conditional_rules',
                                         'tot_update_rules', 'covered_update_rules', 'failing_scenarios'])

    for key, data in aggregated_data.items():
        aggregated_df = pd.concat([aggregated_df, pd.DataFrame({
            'execution_id': data['execution_id'][0],
            'asm_name': key[0],
            'rule_signature': key[1],
            'tot_conditional_rules': data['tot_conditional_rules'][0],
            'covered_true_conditional_rules': [pd.Series(data['covered_true_conditional_rules']).sum() / experiments],
            'covered_false_conditional_rules': [pd.Series(data['covered_false_conditional_rules']).sum() / experiments],
            'tot_update_rules': data['tot_update_rules'][0],
            'covered_update_rules': [pd.Series(data['covered_update_rules']).sum() / experiments],
            'failing_scenarios': data['failing_scenarios'][0]
        })], ignore_index=True)

    aggregated_df.to_csv(os.path.join(root_dir, report_file_name), index=False)
    print(f"Aggregated {report_file_name} saved to '/content/{report_file_name}'.")


### Aggregate Random Report

In [68]:
# Call the function to execute with the specific report file name
aggregate_specific_report('report_random.csv')

Aggregated report_random.csv saved to '/content/report_random.csv'.


  aggregated_df = pd.concat([aggregated_df, pd.DataFrame({


### Aggregate EvoAvalla Report

In [69]:
# Call the function to execute with the specific report file name
aggregate_specific_report('report_evoavalla.csv')

Aggregated report_evoavalla.csv saved to '/content/report_evoavalla.csv'.


  aggregated_df = pd.concat([aggregated_df, pd.DataFrame({


### Aggregate Atgt Report

In [70]:
# Call the function to execute with the specific report file name
aggregate_specific_report('report_atgt.csv')

  aggregated_df = pd.concat([aggregated_df, pd.DataFrame({


Aggregated report_atgt.csv saved to '/content/report_atgt.csv'.
