In [1]:
import pandas as pd
import os

def load_data(folder_path="datos"):
    """Carga los datos desde los archivos CSV"""
    evaluacion_path = os.path.join(folder_path, "datos_tablas_evaluacion.csv")
    entrenamiento_path = os.path.join(folder_path, "datos_tablas_entrenamiento.csv")
    
    df_eval = pd.read_csv(evaluacion_path)
    df_train = pd.read_csv(entrenamiento_path)
    
    print(f"Datos cargados - Train: {len(df_train)} filas, Eval: {len(df_eval)} filas")
    return df_eval, df_train

def clean_encoding_name(encoding):
    """Limpia el nombre del encoding para LaTeX manteniendo nombres completos"""
    # Solo escapar caracteres especiales para LaTeX
    encoding = encoding.replace('_', '\\_')
    encoding = encoding.replace('-', '-')
    return encoding

def format_number(value, decimals=3):
    """Formatea números para LaTeX"""
    if pd.isna(value):
        return "-"
    return f"{value:.{decimals}f}"

def generate_detailed_table(df_train, df_eval, model_name):
    """Genera tabla detallada para un modelo específico"""
    
    temp_train = df_train[df_train['model_name'] == model_name].copy()
    temp_train = temp_train.sort_values(by='encoding').reset_index(drop=True)
    
    temp_eval = df_eval[df_eval['model_name'] == model_name].copy()
    temp_eval = temp_eval.sort_values(by='encoding').reset_index(drop=True)
    
    if len(temp_train) == 0 or len(temp_eval) == 0:
        return f"% No hay datos para el modelo {model_name}\n"
    
    model_clean = model_name.replace(' ', '_').lower()
    
    latex_code = f"""
\\begin{{table*}}[htbp]
\\centering
\\caption{{Resultados Detallados - {model_name}}}
\\label{{tab:{model_clean}_detailed}}
\\footnotesize
\\begin{{tabular}}{{lcccccc}}
\\toprule
& \\multicolumn{{2}}{{c}}{{\\textbf{{Accuracy}}}} & \\multicolumn{{2}}{{c}}{{\\textbf{{F1-Score Weighted}}}} & \\multicolumn{{2}}{{c}}{{\\textbf{{F1-Score Macro}}}} \\\\
\\cmidrule(lr){{2-3}} \\cmidrule(lr){{4-5}} \\cmidrule(lr){{6-7}}
\\textbf{{Encoding}} & Train & \\textbf{{Test}} & Train & \\textbf{{Test}} & Train & \\textbf{{Test}} \\\\
\\midrule
"""
    
    # Combinar y ordenar datos por accuracy de test (descendente)
    combined_data = []
    for i in range(len(temp_train)):
        if i < len(temp_eval):
            combined_data.append({
                'encoding': temp_train['encoding'].iloc[i],
                'train_acc': temp_train['accuracy'].iloc[i],
                'test_acc': temp_eval['accuracy'].iloc[i],
                'train_f1w': temp_train['f1_score_weighted'].iloc[i],
                'test_f1w': temp_eval['f1_score_weighted'].iloc[i],
                'train_f1m': temp_train['f1_score_macro'].iloc[i],
                'test_f1m': temp_eval['f1_score_macro'].iloc[i]
            })
    
    # Ordenar por accuracy de test
    combined_data.sort(key=lambda x: x['test_acc'], reverse=True)
    
    for data in combined_data:
        encoding = clean_encoding_name(data['encoding'])
        
        train_acc = format_number(data['train_acc'])
        test_acc = format_number(data['test_acc'])
        train_f1w = format_number(data['train_f1w'])
        test_f1w = format_number(data['test_f1w'])
        train_f1m = format_number(data['train_f1m'])
        test_f1m = format_number(data['test_f1m'])
        
        # Resaltar datos de test en negrita
        test_acc = f"\\textbf{{{test_acc}}}"
        test_f1w = f"\\textbf{{{test_f1w}}}"
        test_f1m = f"\\textbf{{{test_f1m}}}"
        
        latex_code += f"{encoding} & {train_acc} & {test_acc} & {train_f1w} & {test_f1w} & {train_f1m} & {test_f1m} \\\\\n"
    
    latex_code += """\\bottomrule
\\end{tabular}
\\end{table*}

"""
    
    return latex_code

def save_table_to_file(latex_content, filename="tabla_detallada.tex"):
    """Guarda la tabla LaTeX en un archivo"""
    with open(filename, 'w', encoding='utf-8') as f:
        f.write(latex_content)
    print(f"Tabla guardada en: {filename}")

def main():
    """Función principal"""
    print("Generador de Tablas Detalladas")
    print("=" * 40)
    
    # Cargar datos
    df_eval, df_train = load_data()
    
    # Obtener modelos disponibles
    models = sorted(df_train['model_name'].unique())
    print(f"Modelos encontrados: {models}")
    
    # Generar tablas para todos los modelos
    latex_content = """"""
    
    print("\nGenerando tablas detalladas para todos los modelos...")
    for model in models:
        print(f"  - Generando tabla para: {model}")
        latex_content += generate_detailed_table(df_train, df_eval, model)
    
    # Guardar archivo
    # save_table_to_file(latex_content)
    
    print(f"\nTablas detalladas para todos los modelos generadas exitosamente!")
    print(f"Se generaron {len(models)} tablas en total.")

    print(latex_content)

if __name__ == "__main__":
    main()

Generador de Tablas Detalladas
Datos cargados - Train: 48 filas, Eval: 48 filas
Modelos encontrados: ['Random Forest', 'SVM', 'XGBoost']

Generando tablas detalladas para todos los modelos...
  - Generando tabla para: Random Forest
  - Generando tabla para: SVM
  - Generando tabla para: XGBoost

Tablas detalladas para todos los modelos generadas exitosamente!
Se generaron 3 tablas en total.

\begin{table*}[htbp]
\centering
\caption{Resultados Detallados - Random Forest}
\label{tab:random_forest_detailed}
\footnotesize
\begin{tabular}{lcccccc}
\toprule
& \multicolumn{2}{c}{\textbf{Accuracy}} & \multicolumn{2}{c}{\textbf{F1-Score Weighted}} & \multicolumn{2}{c}{\textbf{F1-Score Macro}} \\
\cmidrule(lr){2-3} \cmidrule(lr){4-5} \cmidrule(lr){6-7}
\textbf{Encoding} & Train & \textbf{Test} & Train & \textbf{Test} & Train & \textbf{Test} \\
\midrule
AS\_FFT & 0.993 & \textbf{0.998} & 0.993 & \textbf{0.999} & 0.993 & \textbf{0.956} \\
AS\_Wavelet & 0.993 & \textbf{0.998} & 0.993 & \textbf{0.99

In [1]:
import pandas as pd
import os

# Cargar datos
df_eval = pd.read_csv('datos/datos_tablas_evaluacion.csv')
df_train = pd.read_csv('datos/datos_tablas_entrenamiento.csv')

# Crear directorio si no existe
if not os.path.exists('latex_tables'):
    os.makedirs('latex_tables')

print(f"Datos cargados - Train: {len(df_train)} filas, Eval: {len(df_eval)} filas")

# Obtener modelos disponibles
models = sorted(df_train['model_name'].unique())
print(f"Modelos encontrados: {models}")

# Generar tablas para todos los modelos
all_tables = []

for model in models:
    print(f"Generando tabla para: {model}")
    
    # Filtrar datos por modelo
    temp_train = df_train[df_train['model_name'] == model].copy()
    temp_eval = df_eval[df_eval['model_name'] == model].copy()
    
    if len(temp_train) == 0 or len(temp_eval) == 0:
        continue
    
    # Combinar datos por encoding
    combined_data = []
    train_encodings = set(temp_train['encoding'].unique())
    eval_encodings = set(temp_eval['encoding'].unique())
    common_encodings = train_encodings.intersection(eval_encodings)
    
    # Verificar encodings faltantes
    if len(common_encodings) != len(train_encodings):
        print(f"  Advertencia: Encodings solo en train: {train_encodings - eval_encodings}")
    if len(common_encodings) != len(eval_encodings):
        print(f"  Advertencia: Encodings solo en eval: {eval_encodings - train_encodings}")
    
    for encoding in common_encodings:
        train_row = temp_train[temp_train['encoding'] == encoding]
        eval_row = temp_eval[temp_eval['encoding'] == encoding]
        
        # Manejo robusto de columnas de precision
        train_precw = train_row['precision_weighted'].iloc[0] if 'precision_weighted' in train_row.columns else None
        test_precw = eval_row['precision_weighted'].iloc[0] if 'precision_weighted' in eval_row.columns else None
        train_precm = train_row['precision_macro'].iloc[0] if 'precision_macro' in train_row.columns else None
        test_precm = eval_row['precision_macro'].iloc[0] if 'precision_macro' in eval_row.columns else None
        
        combined_data.append({
            'encoding': encoding,
            'train_acc': train_row['accuracy'].iloc[0],
            'test_acc': eval_row['accuracy'].iloc[0],
            'train_f1w': train_row['f1_score_weighted'].iloc[0],
            'test_f1w': eval_row['f1_score_weighted'].iloc[0],
            'train_f1m': train_row['f1_score_macro'].iloc[0],
            'test_f1m': eval_row['f1_score_macro'].iloc[0],
            'train_precw': train_precw,
            'test_precw': test_precw,
            'train_precm': train_precm,
            'test_precm': test_precm
        })
    
    # Ordenar por accuracy de test (manejo robusto de NaN)
    combined_data.sort(key=lambda x: x['test_acc'] if pd.notna(x['test_acc']) else -1, reverse=True)
    
    # Generar código LaTeX
    model_clean = model.replace(' ', '_').lower()
    
    latex_lines = [
        f"\\begin{{table*}}[htbp]",
        f"\\centering",
        f"\\caption{{Todas las Métricas - {model}}}",
        f"\\label{{tab:{model_clean}_complete}}",
        f"\\scriptsize",
        f"\\begin{{tabular}}{{lcccccccccc}}",
        f"\\toprule",
        f"& \\multicolumn{{2}}{{c}}{{\\textbf{{Accuracy}}}} & \\multicolumn{{2}}{{c}}{{\\textbf{{F1-Score Weighted}}}} & \\multicolumn{{2}}{{c}}{{\\textbf{{F1-Score Macro}}}} & \\multicolumn{{2}}{{c}}{{\\textbf{{Precision Weighted}}}} & \\multicolumn{{2}}{{c}}{{\\textbf{{Precision Macro}}}} \\\\",
        f"\\cmidrule(lr){{2-3}} \\cmidrule(lr){{4-5}} \\cmidrule(lr){{6-7}} \\cmidrule(lr){{8-9}} \\cmidrule(lr){{10-11}}",
        f"\\textbf{{Encoding}} & Train & \\textbf{{Eval}} & Train & \\textbf{{Eval}} & Train & \\textbf{{Eval}} & Train & \\textbf{{Eval}} & Train & \\textbf{{Eval}} \\\\",
        f"\\midrule"
    ]
    
    # Agregar filas de datos
    for data in combined_data:
        encoding = data['encoding'].replace('_', '\\_')
        
        # Formatear números
        train_acc = f"{data['train_acc']:.3f}" if pd.notna(data['train_acc']) else "-"
        test_acc = f"{data['test_acc']:.3f}" if pd.notna(data['test_acc']) else "-"
        train_f1w = f"{data['train_f1w']:.3f}" if pd.notna(data['train_f1w']) else "-"
        test_f1w = f"{data['test_f1w']:.3f}" if pd.notna(data['test_f1w']) else "-"
        train_f1m = f"{data['train_f1m']:.3f}" if pd.notna(data['train_f1m']) else "-"
        test_f1m = f"{data['test_f1m']:.3f}" if pd.notna(data['test_f1m']) else "-"
        train_precw = f"{data['train_precw']:.3f}" if pd.notna(data['train_precw']) else "-"
        test_precw = f"{data['test_precw']:.3f}" if pd.notna(data['test_precw']) else "-"
        train_precm = f"{data['train_precm']:.3f}" if pd.notna(data['train_precm']) else "-"
        test_precm = f"{data['test_precm']:.3f}" if pd.notna(data['test_precm']) else "-"
        
        # Resaltar datos de test en negrita
        test_acc = f"\\textbf{{{test_acc}}}"
        test_f1w = f"\\textbf{{{test_f1w}}}"
        test_f1m = f"\\textbf{{{test_f1m}}}"
        test_precw = f"\\textbf{{{test_precw}}}"
        test_precm = f"\\textbf{{{test_precm}}}"
        
        latex_lines.append(f"{encoding} & {train_acc} & {test_acc} & {train_f1w} & {test_f1w} & {train_f1m} & {test_f1m} & {train_precw} & {test_precw} & {train_precm} & {test_precm} \\\\")
    
    # Cerrar tabla
    latex_lines.extend([
        "\\bottomrule",
        "\\end{tabular}",
        "\\end{table*}",
        ""
    ])
    
    all_tables.extend(latex_lines)

# Unir todas las tablas
latex_content = '\n'.join(all_tables)

# Guardar archivo
filepath = os.path.join('latex_tables', 'tablas_completas.tex')
with open(filepath, 'w', encoding='utf-8') as f:
    f.write(latex_content)

print(f"\nTabla guardada en: {filepath}")
print(f"Archivo generado: tablas_completas.tex")

# Imprimir código LaTeX
print("\n" + "="*50)
print(latex_content)

Datos cargados - Train: 48 filas, Eval: 48 filas
Modelos encontrados: ['Random Forest', 'SVM', 'XGBoost']
Generando tabla para: Random Forest
Generando tabla para: SVM
Generando tabla para: XGBoost

Tabla guardada en: latex_tables\tablas_completas.tex
Archivo generado: tablas_completas.tex

\begin{table*}[htbp]
\centering
\caption{Todas las Métricas - Random Forest}
\label{tab:random_forest_complete}
\scriptsize
\begin{tabular}{lcccccccccc}
\toprule
& \multicolumn{2}{c}{\textbf{Accuracy}} & \multicolumn{2}{c}{\textbf{F1-Score Weighted}} & \multicolumn{2}{c}{\textbf{F1-Score Macro}} & \multicolumn{2}{c}{\textbf{Precision Weighted}} & \multicolumn{2}{c}{\textbf{Precision Macro}} \\
\cmidrule(lr){2-3} \cmidrule(lr){4-5} \cmidrule(lr){6-7} \cmidrule(lr){8-9} \cmidrule(lr){10-11}
\textbf{Encoding} & Train & \textbf{Eval} & Train & \textbf{Eval} & Train & \textbf{Eval} & Train & \textbf{Eval} & Train & \textbf{Eval} \\
\midrule
AS\_Wavelet & 0.997 & \textbf{0.995} & 0.997 & \textbf{0.995} & 

In [4]:
df_eval.sort_values(by='accuracy', ascending=False)

Unnamed: 0,accuracy,encoding,f1_score_macro,f1_score_weighted,model_name,precision_macro,precision_weighted,recall_macro,recall_weighted
7,0.995324,AS_One Hot,0.995346,0.995327,SVM,0.995465,0.995388,0.995285,0.995324
4,0.995207,AS_One Hot + Wavelet,0.99523,0.99521,SVM,0.995349,0.99527,0.99517,0.995207
8,0.995207,AS_K-mers,0.995208,0.99521,SVM,0.99531,0.995275,0.99517,0.995207
9,0.995207,AS_One Hot + FFT,0.995231,0.995211,SVM,0.995362,0.995284,0.99517,0.995207
13,0.994857,AS_FFT,0.99485,0.994861,SVM,0.994976,0.994936,0.994796,0.994857
2,0.994857,AS_K-mers + Wavelet,0.994883,0.994857,SVM,0.995008,0.994923,0.994825,0.994857
24,0.994506,AS_K-mers,0.994543,0.994511,Random Forest,0.994681,0.99459,0.99448,0.994506
31,0.994506,AS_Wavelet,0.994538,0.994505,Random Forest,0.994667,0.994576,0.99448,0.994506
18,0.994506,AS_K-mers + Wavelet,0.994538,0.994506,Random Forest,0.994666,0.994576,0.99448,0.994506
39,0.994389,AS_One Hot,0.994377,0.994387,XGBoost,0.994456,0.994452,0.994365,0.994389
