In [11]:
import os
import pandas as pd

# Folder containing the Excel files (current directory)
folder_path = './'

# Dictionary to store the accuracy for each column and DataFrame
accuracy_results = []

# Iterate over all files in the folder
for file_name in os.listdir(folder_path):
    if file_name.endswith('.xlsx') or file_name.endswith('.xls'):  # Check if it's an Excel file
        file_path = os.path.join(folder_path, file_name)  # Get the full path
        
        # Extract 'someVariable' from the filename
        base_name = os.path.splitext(file_name)[0]  # Remove the file extension
        parts = base_name.split('_')  # Split by underscore
        if len(parts) > 1 and parts[0] == 'output':  # Ensure it starts with 'output'
            some_variable = '_'.join(parts[1:])  # Join everything after 'output'
        else:
            some_variable = base_name  # Fallback to the full name if not in expected format
        
        # Load the Excel file into a DataFrame
        df = pd.read_excel(file_path)
        
        # Dynamically identify the columns starting with 'Model' and categorize them
        model_columns = [col for col in df.columns if col.startswith('Model')]
        model_dash_column = next((col for col in model_columns if col.startswith('Model-')), None)
        model_text_column = next((col for col in model_columns if not col.startswith('Model-')), None)
        
        # Ensure the required columns exist
        if model_dash_column and model_text_column and 'answer' in df.columns:
            # Extract the relevant columns and convert to lowercase for comparison
            model_dash_matches = df[model_dash_column].str.lower() == df['answer'].str.lower()
            model_text_matches = df[model_text_column].str.lower() == df['answer'].str.lower()
            
            # Calculate accuracy for both columns
            model_dash_accuracy = model_dash_matches.mean() * 100  # Accuracy in percentage
            model_text_accuracy = model_text_matches.mean() * 100  # Accuracy in percentage
            
            # Append the results to the list
            accuracy_results.append({
                'File': some_variable,
                f'Modified Accuracy': model_dash_accuracy,
                f'Original  Accuracy': model_text_accuracy
            })

# Convert the results to a DataFrame
accuracy_df = pd.DataFrame(accuracy_results)

# Save the results to a CSV file
output_csv_path = './accuracy_summary.csv'
accuracy_df.to_csv(output_csv_path, index=False)

print(f"Accuracy summary saved to {output_csv_path}")


Accuracy summary saved to ./accuracy_summary.csv
