In [6]:
import os
import re
import pandas as pd

# Directory containing Weka result files
results_dir = './results'

# List to store parsed results
all_results = []

# Define refined regex patterns to match desired metrics in the file
patterns = {
    'Model': r'Scheme:\s+([\w\W]+)',
    'Accuracy': r'Correctly Classified Instances\s+\d+\s+([\d.]+) %',
    'Kappa': r'Kappa statistic\s+([\d.-]+)',
    'Mean Absolute Error': r'Mean absolute error\s+([\d.-]+)',
    'Root Mean Squared Error': r'Root mean squared error\s+([\d.-]+)',
    'TP Rate Class 0': r'\b0\s+([\d.]+)\s+[\d.]+\s+[\d.]+\s+[\d.]+\s+[\d.]+\s+[\d.]+\s+[\d.]+\s+[\d.]+\s+0',
    'Precision Class 0': r'\b0\s+[\d.]+\s+[\d.]+\s+([\d.]+)\s+[\d.]+\s+[\d.]+\s+[\d.]+\s+[\d.]+\s+[\d.]+\s+0',
    'TP Rate Class 1': r'\b1\s+([\d.]+)\s+[\d.]+\s+[\d.]+\s+[\d.]+\s+[\d.]+\s+[\d.]+\s+[\d.]+\s+[\d.]+\s+1',
    'Precision Class 1': r'\b1\s+[\d.]+\s+[\d.]+\s+([\d.]+)\s+[\d.]+\s+[\d.]+\s+[\d.]+\s+[\d.]+\s+[\d.]+\s+1'
}

# Function to safely parse and convert matched values
def safe_convert(value):
    try:
        # Attempt to convert to float
        return float(value)
    except (TypeError, ValueError):
        # If conversion fails, return original value (string)
        return value

# Loop through each text file in the results directory
for filename in os.listdir(results_dir):
    if filename.endswith('.txt'):
        file_path = os.path.join(results_dir, filename)
        
        # Read the file content
        with open(file_path, 'r') as file:
            content = file.read()
        
        # Initialize dictionary for extracted data
        extracted_data = {'File': filename}
        
        # Extract metrics using regex patterns
        for metric, pattern in patterns.items():
            match = re.search(pattern, content)
            if match:
                extracted_data[metric] = safe_convert(match.group(1))
            else:
                extracted_data[metric] = None  # Capture missing values explicitly
        
        # Append the extracted data to results list
        all_results.append(extracted_data)

# Convert list of dictionaries to DataFrame for table format
results_df = pd.DataFrame(all_results)

# Save results to CSV file and display
results_df.to_csv('weka_summary_table.csv', index=False)
print(results_df)


                      File                                              Model  \
0  knn-1neighbor-kfold.txt  weka.classifiers.lazy.IBk -K 1 -W 0 -A "weka.c...   
1  knn-1neighbor-split.txt  weka.classifiers.lazy.IBk -K 1 -W 0 -A "weka.c...   

   Accuracy   Kappa  Mean Absolute Error  Root Mean Squared Error  \
0   98.0769  0.9596               0.0207                   0.1388   
1   94.3503  0.8786               0.0562                   0.2298   

  TP Rate Class 0 Precision Class 0 TP Rate Class 1 Precision Class 1  
0            None              None            None              None  
1            None              None            None              None  


In [7]:
results_df

Unnamed: 0,File,Model,Accuracy,Kappa,Mean Absolute Error,Root Mean Squared Error,TP Rate Class 0,Precision Class 0,TP Rate Class 1,Precision Class 1
0,knn-1neighbor-kfold.txt,"weka.classifiers.lazy.IBk -K 1 -W 0 -A ""weka.c...",98.0769,0.9596,0.0207,0.1388,,,,
1,knn-1neighbor-split.txt,"weka.classifiers.lazy.IBk -K 1 -W 0 -A ""weka.c...",94.3503,0.8786,0.0562,0.2298,,,,
