In [8]:
import pandas as pd
from sklearn.metrics import precision_score, recall_score, f1_score
import glob

# Define the folder and file pattern
folder_path = 'validation_repository/'  # Replace with your folder path
file_pattern = '*.csv'  # Adjust if needed for specific file types

# Prediction columns and ground truth column
predictions = ['experience-based', 'statistics-based', 'tuning machine']
ground_truth = 'manual inspection'

# Initialize a dictionary to store metrics for all files
results = []

# Loop through each file in the folder
for file in glob.glob(folder_path + file_pattern):
    # Extract file name (without extension) for column naming
    file_name = file.split('/')[-1].replace('.csv', '')

    # Load the data
    data = pd.read_csv(file)

    # Ensure binary values in the relevant columns
    for column in predictions + [ground_truth]:
        if not data[column].isin([0, 1]).all():
            raise ValueError(f"Column {column} in {file} contains non-binary values. Please preprocess the data.")

    # Calculate metrics for each prediction method
    for pred in predictions:
        precision = precision_score(data[ground_truth], data[pred])
        recall = recall_score(data[ground_truth], data[pred])
        f1 = f1_score(data[ground_truth], data[pred])
        
        # Add metrics to results as a dictionary
        results.append({
            'File': file_name,
            'Predictor': pred,
            'Precision': f"{precision:.2f}",
            'Recall': f"{recall:.2f}",
            'F1-Score': f"{f1:.2f}"
        })

# Convert results to a DataFrame
results_df = pd.DataFrame(results)

# Pivot the DataFrame to prepare for LaTeX table
latex_table = results_df.melt(id_vars=['File', 'Predictor'], 
                              var_name='Metric', 
                              value_name='Value').pivot(index=['Metric', 'Predictor'], 
                                                        columns='File', 
                                                        values='Value')

# Convert the pivoted table to LaTeX
latex_code = latex_table.to_latex(index=True, multirow=True)
print(latex_code)


\begin{tabular}{llrrrrrrrrrr}
\toprule
 & File & ComplexContainerComprehension & LargeClass & LongBaseClassList & LongLambdaFunction & LongMessageChain & LongMethod & LongParameterList & LongScopeChaining & LongTernaryConditionalExpression & MultiplyNestedContainer \\
Metric & Predictor &  &  &  &  &  &  &  &  &  &  \\
\midrule
\multirow[t]{3}{*}{F1-Score} & experience-based & 0.636364 & 0.875000 & 0.984127 & 0.835821 & 0.916667 & 0.961538 & 0.984127 & 0.857143 & 0.764706 & 0.950820 \\
 & statistics-based & 0.846154 & 0.846154 & 0.861111 & 0.900000 & 0.974359 & 0.862069 & 0.876712 & 0.971429 & 0.851852 & 0.857143 \\
 & tuning machine & 0.915254 & 0.933333 & 0.984127 & 0.965517 & 0.974359 & 0.961538 & 0.984127 & 0.971429 & 0.896552 & 0.950820 \\
\cline{1-12}
\multirow[t]{3}{*}{Precision} & experience-based & 0.466667 & 0.777778 & 0.968750 & 0.717949 & 0.970588 & 0.925926 & 1.000000 & 0.750000 & 0.619048 & 0.935484 \\
 & statistics-based & 0.916667 & 0.916667 & 0.756098 & 0.843750 & 0.95