In [1]:
import pandas as pd
from sklearn.metrics import precision_score, recall_score, f1_score
import glob

# Define the folder and file pattern
folder_path = 'validation_repository/'  # Replace with your folder path
file_pattern = '*.csv'  # Adjust if needed for specific file types

# Prediction columns and ground truth column
predictions = ['experience-based', 'statistics-based', 'tuning machine']
ground_truth = 'manual inspection'

# Initialize a dictionary to store metrics for all files
results = []

# Loop through each file in the folder
for file in glob.glob(folder_path + file_pattern):
    # Extract file name (without extension) for column naming
    file_name = file.split('/')[-1].replace('.csv', '')

    # Load the data
    data = pd.read_csv(file)

    # Ensure binary values in the relevant columns
    for column in predictions + [ground_truth]:
        if not data[column].isin([0, 1]).all():
            raise ValueError(f"Column {column} in {file} contains non-binary values. Please preprocess the data.")

    # Calculate metrics for each prediction method
    for pred in predictions:
        precision = precision_score(data[ground_truth], data[pred])
        recall = recall_score(data[ground_truth], data[pred])
        f1 = f1_score(data[ground_truth], data[pred])
        
        # Add metrics to results as a dictionary
        results.append({
            'File': file_name,
            'Predictor': pred,
            'Precision': f"{precision:.2f}",
            'Recall': f"{recall:.2f}",
            'F1-Score': f"{f1:.2f}"
        })

# Convert results to a DataFrame
results_df = pd.DataFrame(results)

# Pivot the DataFrame to prepare for LaTeX table
latex_table = results_df.melt(id_vars=['File', 'Predictor'], 
                              var_name='Metric', 
                              value_name='Value').pivot(index=['Metric', 'Predictor'], 
                                                        columns='File', 
                                                        values='Value')

# Convert the pivoted table to LaTeX
latex_code = latex_table.to_latex(index=True, multirow=True)
print(latex_code)


\begin{tabular}{llllllllllll}
\toprule
 & File & CCC & LBC & LC & LLF & LM & LMC & LML & LSC & LTCE & MNC \\
Metric & Predictor &  &  &  &  &  &  &  &  &  &  \\
\midrule
\multirow[t]{3}{*}{F1-Score} & experience-based & 0.64 & 0.98 & 0.88 & 0.84 & 0.96 & 0.92 & 0.98 & 0.86 & 0.76 & 0.95 \\
 & statistics-based & 0.85 & 0.86 & 0.85 & 0.90 & 0.86 & 0.97 & 0.88 & 0.97 & 0.85 & 0.86 \\
 & tuning machine & 0.92 & 0.98 & 0.93 & 0.97 & 0.96 & 0.97 & 0.98 & 0.97 & 0.90 & 0.95 \\
\cline{1-12}
\multirow[t]{3}{*}{Precision} & experience-based & 0.47 & 0.97 & 0.78 & 0.72 & 0.93 & 0.97 & 1.00 & 0.75 & 0.62 & 0.94 \\
 & statistics-based & 0.92 & 0.76 & 0.92 & 0.84 & 0.76 & 0.95 & 0.78 & 1.00 & 0.82 & 0.92 \\
 & tuning machine & 0.87 & 0.97 & 0.88 & 0.93 & 0.93 & 0.95 & 1.00 & 1.00 & 0.81 & 0.94 \\
\cline{1-12}
\multirow[t]{3}{*}{Recall} & experience-based & 1.00 & 1.00 & 1.00 & 1.00 & 1.00 & 0.87 & 0.97 & 1.00 & 1.00 & 0.97 \\
 & statistics-based & 0.79 & 1.00 & 0.79 & 0.96 & 1.00 & 1.00 & 1.00 & 0.9