In [3]:
import json
import re

def find_first_number(string):
    match = re.search(r'\d+', string)
    return int(match.group()) if match else None

def calculate_averages(file_path):
    with open(file_path, 'r') as file:
        data = json.load(file)

    total_rouge1_second = 0
    total_rouge1_third = 0
    total_rougeL_second = 0
    total_rougeL_third = 0
    total_time_spent = 0
    gemini_scores = 0

    for entry in data:
        total_rouge1_second += entry['rouge_scores']['rouge1'][1]
        total_rouge1_third += entry['rouge_scores']['rouge1'][2]
        total_rougeL_second += entry['rouge_scores']['rougeL'][1]
        total_rougeL_third += entry['rouge_scores']['rougeL'][2]
        total_time_spent += entry['time_spent']
        gemini_scores += find_first_number(entry['gemini_score'])

    num_entries = len(data)
    avg_rouge1_second = total_rouge1_second / num_entries
    avg_rouge1_third = total_rouge1_third / num_entries
    avg_rougeL_second = total_rougeL_second / num_entries
    avg_rougeL_third = total_rougeL_third / num_entries
    avg_time_spent = total_time_spent / num_entries
    gemini_score = gemini_scores / num_entries

    return {
        'avg_rouge1_second': avg_rouge1_second,
        'avg_rouge1_third': avg_rouge1_third,
        'avg_rougeL_second': avg_rougeL_second,
        'avg_rougeL_third': avg_rougeL_third,
        'avg_time_spent': avg_time_spent,
        'gemini_score': gemini_score
    }

# Example usage
file_paths = ['results_rag.json', 'results_rag_text.json', 'results_advanced_gemini_2.0.json']
for file_path in file_paths:
    averages = calculate_averages(file_path)
    print(f"Averages for {file_path}: {averages}")

Averages for results_rag.json: {'avg_rouge1_second': 0.6553766685333686, 'avg_rouge1_third': 0.2121335173712743, 'avg_rougeL_second': 0.4732676575516205, 'avg_rougeL_third': 0.15335981670239016, 'avg_time_spent': 15.407271986007691, 'gemini_score': 4.48}
Averages for results_rag_text.json: {'avg_rouge1_second': 0.6615814363069472, 'avg_rouge1_third': 0.22911665478069376, 'avg_rougeL_second': 0.46263807665251866, 'avg_rougeL_third': 0.16162015229484034, 'avg_time_spent': 15.621680946350098, 'gemini_score': 4.32}
Averages for results_advanced_gemini_2.0.json: {'avg_rouge1_second': 0.891912930553278, 'avg_rouge1_third': 0.0838475764454364, 'avg_rougeL_second': 0.7449775089653987, 'avg_rougeL_third': 0.06937071776071554, 'avg_time_spent': 8.086044921875, 'gemini_score': 4.96}


In [None]:
0.6553766685333686