In [69]:
import pandas as pd
import numpy as np
import os

In [70]:
def generate_and_save_latex_table(df_question, question_file, temp, output_dir):
    """
    Generates a LaTeX table with highlighting for specific values and saves it to a .tex file.

    Values are highlighted if they are not '0/10' or '10/10'.
    """
    # Sanitize model names for LaTeX
    models = df_question.index.get_level_values('Model Display Name').unique()
    
    lines = []
    question_name = question_file.split('.')[0]
    temp_float = f"{int(temp)/10.0}"
    
    question_file_latex = question_name.replace('_', r'\_')
    caption = f"\\caption{{Evaluación del determinismo de las respuestas {question_file_latex} (temperature={temp_float}, top-p=0.1)}}"
    label = f"\\label{{tab:determinism_exercises_temp_{temp}_{question_name}}}"

    # LaTeX table
    lines.append(r"\begin{table}[h!]")
    lines.append(r"\centering")
    lines.append(caption)
    lines.append(label)
    lines.append(r"\begin{tabular}{l *{8}{c}}")
    lines.append(r"\toprule")
    lines.append(r"& \multicolumn{2}{c}{\textbf{R1}} & \multicolumn{2}{c}{\textbf{R2}} & \multicolumn{2}{c}{\textbf{R3}} & \multicolumn{2}{c}{\textbf{R4}} \\")
    lines.append(r"\cmidrule(lr){2-3} \cmidrule(lr){4-5} \cmidrule(lr){6-7} \cmidrule(lr){8-9}")
    lines.append(r"\textbf{Models} & \textbf{oa} & \textbf{mc} & \textbf{oa} & \textbf{mc} & \textbf{oa} & \textbf{mc} & \textbf{oa} & \textbf{mc} \\")
    lines.append(r"\midrule")

    for model in models:
        safe_model_name = model.replace(':', ' : ').replace('  ', ' ')
        row = f"{safe_model_name}\t&"
        
        for r_tech in ['R1', 'R2', 'R3', 'R4']:
            # --- Open Answer ---
            try:
                oa_score = df_question.loc[(model, r_tech), 'Determinism Score_oa']
                if isinstance(oa_score, pd.Series):
                    oa_score = oa_score.iloc[0] if not oa_score.empty else np.nan
                
                if pd.notna(oa_score):
                    val_int = int(round(oa_score * 10, 0))
                    val_str = f"{val_int}/10"
                    # highlight if not 0 or 10
                    if val_int != 10 and val_int != 0:
                        oa_val = f"\\highlight{{{val_str}}}"
                    else:
                        oa_val = f" {val_str} "
                else:
                    oa_val = "   "
            except KeyError:
                oa_val = "   "
            
            # --- Multiple Choice ---
            try:
                mc_score = df_question.loc[(model, r_tech), 'Determinism Score_mc']
                if isinstance(mc_score, pd.Series):
                    mc_score = mc_score.iloc[0] if not mc_score.empty else np.nan
                    
                if pd.notna(mc_score):
                    val_int = int(round(mc_score * 10, 0))
                    val_str = f"{val_int}/10"
                    # highlight command if not 0 or 10
                    if val_int != 10 and val_int != 0:
                        mc_val = f"\\highlight{{{val_str}}}"
                    else:
                        mc_val = f" {val_str} "
                else:
                    mc_val = "   "
            except KeyError:
                mc_val = "   "

            row += f"\t{oa_val} & {mc_val} &"
        
        lines.append(row[:-1] + r"\\")

    lines.append(r"\bottomrule")
    lines.append(r"\end{tabular}")
    lines.append(r"\end{table}")

    # Save to file
    output_filename = f"determinism_table_temp_{temp}_{question_name}.tex"
    output_path = os.path.join(output_dir, f'temp_{temp}')
    os.makedirs(output_path, exist_ok=True)
    tex_dir_temp = os.path.join(output_path, output_filename)

    
    with open(tex_dir_temp, 'w') as f:
        f.write("\n".join(lines))
    print(f"Successfully exported highlighted table: {tex_dir_temp}")



def get_color(value):
    """
    Maps a value from 0-100 to a color in a red-to-green scale.
    """
    if not pd.notna(value):
        return 'white'

    value = max(0, min(100, value))
    
    # Simplified red-to-green gradient
    red = int(255 * (1 - value / 100))
    green = int(255 * (value / 100))
    return f'{{rgb,255:red,{red}; green,{green}; blue,0}}'

def summary_determinism(df_merged, temp):
    """
    Generates a summary LaTeX table with the mean determinism rate for each model,
    with cells colored based on the score (0=red, 100=green).

    Args:
        df_merged (pd.DataFrame): The merged DataFrame with all data for a temperature.
        temp (str): The temperature string (e.g., '00').
        output_dir (str): The directory to save the .tex file.
    """
    # Calculate the mean, grouping by model and prompting technique
    df_summary = df_merged.groupby(['Model Display Name', 'Prompting Tech']).mean(numeric_only=True)
    
    models = df_merged.index.get_level_values('Model Display Name').unique()
    
    lines = []
    temp_float = f"{int(temp)/10.0}"
    

    caption = f"\\caption{{Tasa de Determinismo Promedio (\\%) con temperature={temp_float} y top-p=0.1}}"
    label = f"\\label{{tab:summary_determinism_temp_{temp}}}"

    lines.append(r"\begin{table}[H]")
    lines.append(r"\centering")
    lines.append(caption)
    lines.append(label)
    lines.append(r"\begin{tabular}{l *{8}{c}}")
    lines.append(r"\toprule")
    lines.append(r"& \multicolumn{2}{c}{\textbf{R1}} & \multicolumn{2}{c}{\textbf{R2}} & \multicolumn{2}{c}{\textbf{R3}} & \multicolumn{2}{c}{\textbf{R4}} \\")
    lines.append(r"\cmidrule(lr){2-3} \cmidrule(lr){4-5} \cmidrule(lr){6-7} \cmidrule(lr){8-9}")
    lines.append(r"\textbf{Models} & \textbf{oa} & \textbf{mc} & \textbf{oa} & \textbf{mc} & \textbf{oa} & \textbf{mc} & \textbf{oa} & \textbf{mc} \\")
    lines.append(r"\midrule")

    for model in sorted(models):
        safe_model_name = model.replace(':', ' : ').replace('  ', ' ')
        row = f"{safe_model_name} &"

        for r_tech in ['R1', 'R2', 'R3', 'R4']:
            try:
                # Get mean scores for the current model and tech
                scores = df_summary.loc[(model, r_tech)]
                oa_mean = scores['Determinism Score_oa']
                mc_mean = scores['Determinism Score_mc']
                
                oa_perc = oa_mean * 100
                mc_perc = mc_mean * 100
                
                oa_color = get_color(oa_perc)
                mc_color = get_color(mc_perc)

                oa_val = f"\\cellcolor{oa_color}{int(round(oa_perc, 0))}\\%" if pd.notna(oa_mean) else ""
                mc_val = f"\\cellcolor{mc_color}{int(round(mc_perc, 0))}\\%" if pd.notna(mc_mean) else ""

            except KeyError:
                oa_val, mc_val = "", ""

            row += f" {oa_val} & {mc_val} &"
        
        lines.append(row[:-1] + r"\\")

    lines.append(r"\bottomrule")
    lines.append(r"\end{tabular}")
    lines.append(r"\end{table}")

    # Save to file
    output_dir_path = 'determinism_tables_summary'
    output_filename = f"summary_determinism_temp_{temp}.tex"
    
    tables_dir = '../../../data/determinism_tables/tex_tables'
    tex_dir = os.path.join(tables_dir, output_dir_path)
    os.makedirs(tex_dir, exist_ok=True)
    tex_dir_temp = os.path.join(tex_dir, output_filename)

    with open(tex_dir_temp, 'w') as f:
        f.write("\n".join(lines))
    print(f"Successfully exported summary table: {tex_dir_temp}")


In [71]:

TABLES_DIR = '../../../data/determinism_tables'
TEX_DIR = os.path.join(TABLES_DIR, 'tex_tables')
INDEX_COLS = ['Model Display Name', 'Prompting Tech', 'Question File']
TEMPERATURES = ['00', '02', '04']


for temp in TEMPERATURES:
    try:
        print(f"\nProcessing files for temperature: {temp}...")

        path_mc = os.path.join(TABLES_DIR, f'determinism_table_temp_{temp}_mc.csv')
        path_oa = os.path.join(TABLES_DIR, f'determinism_table_temp_{temp}_oa.csv')

        df_mc = pd.read_csv(path_mc).set_index(INDEX_COLS)
        df_oa = pd.read_csv(path_oa).set_index(INDEX_COLS)

        # Merge the dataframes
        df_merged = pd.merge(df_oa, df_mc, left_index=True, right_index=True, how='outer', suffixes=('_oa', '_mc'))
        
        question_files = df_merged.index.get_level_values('Question File').unique()
        for question_file in sorted(question_files):
            df_question = df_merged[df_merged.index.get_level_values('Question File') == question_file]
        
        summary_determinism(df_merged, temp)
        print(f"Successfully generated summary table for temperature {temp}.")

    except FileNotFoundError as e:
        print(f"Error: Could not find file {e.filename}. Please check the path.")
    except Exception as e:
        print(f"An unexpected error occurred for temperature {temp}: {e}")


Processing files for temperature: 00...
Successfully exported summary table: ../../../data/determinism_tables/tex_tables/determinism_tables_summary/summary_determinism_temp_00.tex
Successfully generated summary table for temperature 00.

Processing files for temperature: 02...
Successfully exported summary table: ../../../data/determinism_tables/tex_tables/determinism_tables_summary/summary_determinism_temp_02.tex
Successfully generated summary table for temperature 02.

Processing files for temperature: 04...
Successfully exported summary table: ../../../data/determinism_tables/tex_tables/determinism_tables_summary/summary_determinism_temp_04.tex
Successfully generated summary table for temperature 04.
