In [2]:
import os
import json
from collections import defaultdict

def calculate_statistics(result_path):
    # Initialize accumulators and counters
    total_rouge_scores = defaultdict(float)
    count_rouge_scores = defaultdict(int)

    total_bert_scores = defaultdict(float)
    count_bert_scores = defaultdict(int)
    
    missing_count = 0

    # Iterate through the result files
    result_files = os.listdir("../../data/process_data")
    
    for result_file in result_files:
        if result_file != "Adaptation_and_Mitigation":
            continue
        with open(f"../../data/process_data/{result_file}/{result_path}", 'r', encoding='utf-8') as f:
            report_list = json.load(f)
            
            for report in report_list:
                # Extract and check sections with possible "Missing"
                for key, section in report.items():
                    
                    if isinstance(section, dict):  # Process the nested dictionary
                        # Check for "summary" or "report" for "Missing"
                        if "summary" in section and "Missing" in section["summary"]:
                            missing_count += 1
                        if "report" in section:
                            for item in section["report"]:
                                if any("Missing" in retrieved for retrieved in item.get("retrieved", [])):
                                    missing_count += 1

                        # calculate rouge score
                        rouge = section["rouge"]
                        for rouge_metric in ["ROUGE-1", "ROUGE-2", "ROUGE-L"]:
                            for metric_type in ["Precision", "Recall", "F1"]:
                                key = f"{rouge_metric} {metric_type}"
                                value = rouge.get(key, 0)
                                total_rouge_scores[key] += float(value)
                                count_rouge_scores[key] += 1
                        
                        # calculate BERT score
                        bert = section["bertscore"]
                        for rouge_metric in ["BERTScore Precision", "BERTScore Recall", "BERTScore F1"]:
                            key = f"{rouge_metric}"
                            value = bert.get(key, 0)
                            total_bert_scores[key] += float(value)
                            count_bert_scores[key] += 1

    # Calculate averages
    average_rouge_scores = {key: (total_rouge_scores[key] / count_rouge_scores[key]) 
                            if count_rouge_scores[key] > 0 else 0 
                            for key in total_rouge_scores}
    
    average_bert_scores = {key: (total_bert_scores[key] / count_bert_scores[key]) 
                            if count_bert_scores[key] > 0 else 0 
                            for key in total_bert_scores}
    
    # Print results

    print("\n=== Average ROUGE Scores ===")
    for key, avg in average_rouge_scores.items():
        print(f"{key}: {avg:.4f}")

    print("\n=== Average BERT Scores ===")
    for key, avg in average_bert_scores.items():
        print(f"{key}: {avg:.4f}")

    print(f"\nTotal 'Missing' count: {missing_count}")


In [5]:
calculate_statistics("DecomposedIR_result/eval_self_reflection.json")


=== Average ROUGE Scores ===
ROUGE-1 Precision: 0.2490
ROUGE-1 Recall: 0.4488
ROUGE-1 F1: 0.2930
ROUGE-2 Precision: 0.0545
ROUGE-2 Recall: 0.1004
ROUGE-2 F1: 0.0641
ROUGE-L Precision: 0.1263
ROUGE-L Recall: 0.2443
ROUGE-L F1: 0.1520

=== Average BERT Scores ===
BERTScore Precision: 0.6164
BERTScore Recall: 0.6873
BERTScore F1: 0.6481

Total 'Missing' count: 0


In [4]:
calculate_statistics("DecomposedIR_result/eval.json")


=== Average ROUGE Scores ===
ROUGE-1 Precision: 0.2918
ROUGE-1 Recall: 0.4402
ROUGE-1 F1: 0.3160
ROUGE-2 Precision: 0.0748
ROUGE-2 Recall: 0.1024
ROUGE-2 F1: 0.0768
ROUGE-L Precision: 0.1567
ROUGE-L Recall: 0.2472
ROUGE-L F1: 0.1714

=== Average BERT Scores ===
BERTScore Precision: 0.1344
BERTScore Recall: 0.2216
BERTScore F1: 0.1767

Total 'Missing' count: 0
