In [None]:

import json
import os
import numpy as np
import pandas as pd

# --- Configuration: adjust these to your setup ---
tasks = ['education', 'therapy', 'chatting']
n_files_per_combo = 4
key_to_average = 'P2_index_consistency_score'
key_to_average = 'P2_index_consistency_score'

import json
import os
import numpy as np
import pandas as pd

# --- Configuration: adjust these to your setup ---
llms = ['llama3-70b', 'gemma', 'mistral-7b']
tasks = ['education', 'therapy', 'chatting']
n_files_per_combo = 4

# Original JSON keys and their desired display names
metric_keys = ['consistency_metric', 'P2_index_consistency_score', 'fluency_metric']
rename_map = {
    'P2_prompt_consistency_score': 'line-to-prompt Consistency',
    'P2_index_consistency_score':   'line-to-line consistency',
    'P2_q&a_consistency_score':     'q&a consistency'
}

# Base directory containing one subfolder per task
tasks_root_dir = 'results'

# --- Step 1: Read files and compute per-file means for each metric ---
records = []
for task in tasks:
    task_dir = os.path.join(tasks_root_dir, task)
    for llm in llms:
        llm_dir = os.path.join(task_dir, llm)
        metric_file_means = {key: [] for key in metric_keys}
        for i in range(1, n_files_per_combo + 1):
            file_path = os.path.join(llm_dir, f'run_{i}.json')
            try:
                with open(file_path, 'r') as f:
                    data = json.load(f)
            except FileNotFoundError:
                print(f"Warning: file not found: {file_path}; using 0 for all metrics")
                for key in metric_keys:
                    metric_file_means[key].append(0.0)
                continue

            for key in metric_keys:
                if key in data and isinstance(data[key], list) and data[key]:
                    values = data[key]
                    metric_file_means[key].append(np.mean(values))
                else:
                    print(f"Warning: key '{key}' missing or invalid in {file_path}; using 0")
                    metric_file_means[key].append(0.0)

        record = {'Task': task, 'LLM': llm}
        for key in metric_keys:
            record[key] = np.mean(metric_file_means[key])
        records.append(record)

# --- Step 2: Build a DataFrame and pivot for a multi-index table ---
df = pd.DataFrame(records)
pivot_multi = df.pivot(index='Task', columns='LLM')[metric_keys]

# Rename the metric (outer) level of the columns
pivot_multi.rename(columns=rename_map, level=0, inplace=True)

# --- Step 3: Export to LaTeX ---
latex_code = pivot_multi.to_latex(
    float_format="%.3f",
    caption="LLM Metrics (Consistency, Coherence, Fluency) Across Tasks",
    label="tab:llm_metrics"
)
with open('llm_metrics_table.tex', 'w') as f:
    f.write(latex_code)

# --- Optional: display in console ---
print(pivot_multi)
print("\nLaTeX table exported to llm_metrics_table.tex")

