In [None]:
import os
import re
import numpy as np
import csv

folder_path = 'testset/model_feedback'

file_names = []
file_avg_scores = []
file_name_scores = []

score_pattern = re.compile(r'Overall Band Score\*\*: (\d+(\.\d+)?)')

for file in os.listdir(folder_path):
    file_path = os.path.join(folder_path, file)
    
    if os.path.isfile(file_path):
        with open(file_path, 'r', encoding='utf-8') as f:
            content = f.read()
        
        scores = [float(match.group(1)) for match in score_pattern.finditer(content)]
        avg_score = np.mean(scores) if scores else None
        
        file_names.append(file)
        file_avg_scores.append(avg_score)
        
        name_score_match = re.search(r'_(\d+)_(\d+)(?:\.txt)?$', file)
        if name_score_match:
            major = name_score_match.group(1)
            minor = name_score_match.group(2)
            file_name_scores.append(float(f"{major}.{minor}"))
        else:
            file_name_scores.append(None)

filtered_indices = [
    i for i, (fname, avg) in enumerate(zip(file_name_scores, file_avg_scores))
    if fname is not None and avg is not None
]

fname_scores_array = np.array([file_name_scores[i] for i in filtered_indices])
avg_scores_array = np.array([file_avg_scores[i] for i in filtered_indices])
differences_array = fname_scores_array - avg_scores_array

mad = np.mean(np.abs(differences_array))
rmse = np.sqrt(np.mean(differences_array**2))
pearson_corr = np.corrcoef(fname_scores_array, avg_scores_array)[0, 1] if len(fname_scores_array) > 1 else None

print("Mean Absolute Difference (MAD):", mad)
print("Root Mean Square Error (RMSE):", rmse)
print("Pearson correlation between file name score and average score:", pearson_corr)

csv_file = 'score_comparison.csv'
with open(csv_file, 'w', newline='', encoding='utf-8') as f:
    writer = csv.writer(f)
    writer.writerow(['File Name', 'Model Overall Band Score', 'Human Band Score', 'Difference'])
    for i in filtered_indices:
        writer.writerow([
            file_names[i],
            file_avg_scores[i],
            file_name_scores[i],
            file_name_scores[i] - file_avg_scores[i]
        ])

print(f"Saved detailed comparison to {csv_file}")

Mean Absolute Difference (MAD): 1.4166666666666665
Root Mean Square Error (RMSE): 1.4766704288891124
Pearson correlation between file name score and average score: 1.0
Saved detailed comparison to score_comparison.csv
