In [20]:
import os
import pandas as pd

def extract_and_save_averages_with_replica(dir1, dir2, output_csv="combined_averages.csv"):
    """
    Extracts 'Replica' and 'Average' column values from all CSV files in two directories,
    saves them into a single CSV file with a unique enumerated Index column.

    Args:
        dir1 (str): Path to the first directory containing CSV files.
        dir2 (str): Path to the second directory containing CSV files.
        output_csv (str): Path to save the combined averages CSV file.
    """
    combined_data = []
    current_index = 1  # Start the index from 1

    # Mappings for F2 and F3
    period_mapping = {"P2": "1/2", "P3": "1/3", "P12": "1/12"}
    instances_mapping = {"I1": 1, "I4": 4, "I6": 6}

    def extract_values(directory, label):
        """Extracts 'Replica' and 'Average' column values and constructs rows for the combined CSV."""
        nonlocal current_index  # To update the global index across all files
        
        # List all .csv files and sort them
        csv_files = sorted([f for f in os.listdir(directory) if f.endswith(".csv")])
        
        for file in csv_files:
            file_path = os.path.join(directory, file)
            try:
                # Read the CSV file
                df = pd.read_csv(file_path)

                # Check if 'Replica' and 'Average' columns exist
                if 'Replica' in df.columns and 'Average' in df.columns:
                    replicas = df['Replica'].tolist()
                    averages = df['Average'].tolist()

                    # Parse file name for F2 and F3 values
                    file_name_parts = os.path.splitext(file)[0].split("_")
                    period = file_name_parts[1] if len(file_name_parts) > 1 else "Unknown"
                    instances = file_name_parts[2] if len(file_name_parts) > 2 else "Unknown"
                    f2 = period_mapping.get(period, "Unknown")
                    f3 = instances_mapping.get(instances, "Unknown")

                    for i in range(len(averages)):
                        combined_data.append([current_index, label, f2, f3, replicas[i], averages[i]])
                        current_index += 1
                else:
                    print(f"'Replica' or 'Average' column not found in {file}")
            except Exception as e:
                print(f"Error reading {file}: {e}")

    # Extract values from both directories
    extract_values(dir1, "Kserve")
    extract_values(dir2, "Local")

    # Create a DataFrame and save to CSV
    combined_df = pd.DataFrame(combined_data, columns=["Index", "F1", "F2", "F3", "Replica", "Average"])
    combined_df.to_csv(output_csv, index=False)
    #print(f"Combined CSV saved as {output_csv}")

# Example usage
directory1 = "kserve/results/csv"
directory2 = "local/results/csv"
output_csv = "all_tests.csv"  # Specify your desired output CSV file name
extract_and_save_averages_with_replica(directory1, directory2, output_csv)