In [None]:
# ============================================================
# LATEX TABLE GENERATOR FOR THESIS
# ============================================================

import pandas as pd
import numpy as np
from pathlib import Path

print("LaTeX Table Generator")
print("="*60)

In [None]:
# ============================================================
# 1. LOAD COMBINED RESULTS
# ============================================================

all_results = pd.read_csv("results/comparisons/tables/all_methods_combined.csv")
baseline = all_results[all_results['method'] == 'baseline']

print(f"âœ“ Loaded {len(all_results)} experiments")
print(f"âœ“ Methods: {all_results['method'].nunique()}")

In [None]:
# ============================================================
# 2. TABLE 1: BEST RESULTS PER METHOD
# ============================================================

def generate_best_results_table():
    """Generate LaTeX table of best results for each method."""
    
    # Get best result per method
    best_results = all_results.loc[all_results.groupby('method')['f1_score'].idxmax()][
        ['method', 'model', 'n_features', 'f1_score', 'auc']
    ].sort_values('f1_score', ascending=False)
    
    # Calculate feature reduction
    baseline_features = baseline['n_features'].iloc[0]
    best_results['reduction_pct'] = ((baseline_features - best_results['n_features']) / baseline_features * 100).round(1)
    
    # Start LaTeX table
    latex = r"""\begin{table}[htbp]
\centering
\caption{Best Performance of Each Feature Selection Method}
\label{tab:best_results}
\begin{tabular}{lcccccc}
\hline
\textbf{Method} & \textbf{Model} & \textbf{Features} & \textbf{Reduction} & \textbf{F1 Score} & \textbf{AUC} \\
\hline
"""
    
    # Add rows
    for idx, row in best_results.iterrows():
        method_name = row['method'].replace('_', ' ').title()
        model_name = row['model'].replace('_', ' ').title()
        
        latex += f"{method_name} & {model_name} & {int(row['n_features'])} & {row['reduction_pct']:.1f}\\% & {row['f1_score']:.4f} & {row['auc']:.4f} \\\\\n"
    
    # Add baseline for comparison
    baseline_row = baseline.iloc[0]
    latex += r"\hline" + "\n"
    latex += f"Baseline (No FS) & {baseline_row['model'].replace('_', ' ').title()} & {int(baseline_row['n_features'])} & 0.0\\% & {baseline_row['f1_score']:.4f} & {baseline_row['auc']:.4f} \\\\\n"
    
    # Close table
    latex += r"""\hline
\end{tabular}
\end{table}
"""
    
    return latex

table1 = generate_best_results_table()
print("TABLE 1: Best Results Per Method")
print("="*60)
print(table1)

In [None]:
# ============================================================
# 3. TABLE 2: PERFORMANCE BY NUMBER OF FEATURES
# ============================================================

def generate_performance_by_features_table(feature_counts=[10, 20, 30]):
    """Generate LaTeX table showing performance at different feature counts."""
    
    latex = r"""\begin{table}[htbp]
\centering
\caption{Average F1 Score by Feature Selection Method and Number of Features}
\label{tab:performance_by_features}
\begin{tabular}{l""" + "c" * len(feature_counts) + r"""}
\hline
\textbf{Method} """
    
    # Add column headers
    for fc in feature_counts:
        latex += f"& \\textbf{{{fc} features}} "
    latex += r"\\" + "\n\\hline\n"
    
    # Get average F1 for each method and feature count
    methods = sorted([m for m in all_results['method'].unique() if m != 'baseline'])
    
    for method in methods:
        method_data = all_results[all_results['method'] == method]
        method_name = method.replace('_', ' ').title()
        
        latex += f"{method_name} "
        
        for fc in feature_counts:
            fc_data = method_data[method_data['n_features'] == fc]
            if len(fc_data) > 0:
                avg_f1 = fc_data['f1_score'].mean()
                latex += f"& {avg_f1:.4f} "
            else:
                latex += "& --- "
        
        latex += r"\\" + "\n"
    
    # Add baseline
    latex += r"\hline" + "\n"
    baseline_f1 = baseline['f1_score'].mean()
    baseline_features = int(baseline['n_features'].iloc[0])
    latex += f"Baseline ({baseline_features} features) "
    for _ in feature_counts:
        latex += f"& {baseline_f1:.4f} "
    latex += r"\\" + "\n"
    
    # Close table
    latex += r"""\hline
\end{tabular}
\end{table}
"""
    
    return latex

table2 = generate_performance_by_features_table()
print("\nTABLE 2: Performance by Number of Features")
print("="*60)
print(table2)

In [None]:
# ============================================================
# 4. TABLE 3: METHOD COMPARISON BY CATEGORY
# ============================================================

def generate_category_comparison_table():
    """Generate LaTeX table comparing method categories."""
    
    categories = {
        'Traditional': ['correlation', 'variance', 'chi_square'],
        'Embedded': ['l1_lasso', 'l2_ridge'],
        'XAI-Based': ['shap', 'lime'],
        'Optimization': ['pso', 'differential_evolution']
    }
    
    latex = r"""\begin{table}[htbp]
\centering
\caption{Feature Selection Method Comparison by Category}
\label{tab:category_comparison}
\begin{tabular}{lccccc}
\hline
\textbf{Category} & \textbf{Best Method} & \textbf{Features} & \textbf{F1 Score} & \textbf{AUC} & \textbf{Reduction} \\
\hline
"""
    
    baseline_features = baseline['n_features'].iloc[0]
    
    for category, methods in categories.items():
        cat_results = all_results[all_results['method'].isin(methods)]
        if len(cat_results) > 0:
            best = cat_results.loc[cat_results['f1_score'].idxmax()]
            reduction = (1 - best['n_features'] / baseline_features) * 100
            
            best_method = best['method'].replace('_', ' ').title()
            
            latex += f"{category} & {best_method} & {int(best['n_features'])} & {best['f1_score']:.4f} & {best['auc']:.4f} & {reduction:.1f}\\% \\\\\n"
    
    # Add baseline
    latex += r"\hline" + "\n"
    baseline_row = baseline.iloc[0]
    latex += f"Baseline & All Features & {int(baseline_row['n_features'])} & {baseline_row['f1_score']:.4f} & {baseline_row['auc']:.4f} & 0.0\\% \\\\\n"
    
    # Close table
    latex += r"""\hline
\end{tabular}
\end{table}
"""
    
    return latex

table3 = generate_category_comparison_table()
print("\nTABLE 3: Category Comparison")
print("="*60)
print(table3)

In [None]:
# ============================================================
# 5. TABLE 4: MODEL PERFORMANCE COMPARISON
# ============================================================

def generate_model_comparison_table():
    """Generate LaTeX table comparing models across methods."""
    
    models = sorted(all_results['model'].unique())
    methods = sorted([m for m in all_results['method'].unique() if m != 'baseline'])[:6]  # Top 6 methods
    
    latex = r"""\begin{table}[htbp]
\centering
\caption{Maximum F1 Score by Model and Feature Selection Method}
\label{tab:model_comparison}
\begin{tabular}{l""" + "c" * len(models) + r"""}
\hline
\textbf{Method} """
    
    # Add column headers
    for model in models:
        model_name = model.replace('_', ' ').title()
        latex += f"& \\textbf{{{model_name}}} "
    latex += r"\\" + "\n\\hline\n"
    
    # Add rows for each method
    for method in methods:
        method_data = all_results[all_results['method'] == method]
        method_name = method.replace('_', ' ').title()
        
        latex += f"{method_name} "
        
        for model in models:
            model_data = method_data[method_data['model'] == model]
            if len(model_data) > 0:
                max_f1 = model_data['f1_score'].max()
                latex += f"& {max_f1:.4f} "
            else:
                latex += "& --- "
        
        latex += r"\\" + "\n"
    
    # Add baseline
    latex += r"\hline" + "\n"
    latex += "Baseline "
    for model in models:
        baseline_model = baseline[baseline['model'] == model]
        if len(baseline_model) > 0:
            latex += f"& {baseline_model['f1_score'].iloc[0]:.4f} "
        else:
            latex += "& --- "
    latex += r"\\" + "\n"
    
    # Close table
    latex += r"""\hline
\end{tabular}
\end{table}
"""
    
    return latex

table4 = generate_model_comparison_table()
print("\nTABLE 4: Model Comparison")
print("="*60)
print(table4)

In [None]:
# ============================================================
# 6. TABLE 5: TOP SELECTED FEATURES
# ============================================================

def generate_top_features_table(top_n=15):
    """Generate LaTeX table of most frequently selected features."""
    
    freq_df = pd.read_csv("results/comparisons/tables/feature_frequency.csv")
    top_features = freq_df.head(top_n)
    
    latex = r"""\begin{table}[htbp]
\centering
\caption{Top """ + str(top_n) + r""" Most Frequently Selected Features Across All Methods}
\label{tab:top_features}
\begin{tabular}{clc}
\hline
\textbf{Rank} & \textbf{Feature Name} & \textbf{Selection Frequency} \\
\hline
"""
    
    for i, (idx, row) in enumerate(top_features.iterrows(), 1):
        feature_name = row['feature'].replace('_', '\\_')  # Escape underscores for LaTeX
        latex += f"{i} & {feature_name} & {int(row['frequency'])} \\\\\n"
    
    # Close table
    latex += r"""\hline
\end{tabular}
\end{table}
"""
    
    return latex

table5 = generate_top_features_table()
print("\nTABLE 5: Top Selected Features")
print("="*60)
print(table5)

In [None]:
# ============================================================
# 7. SAVE ALL TABLES
# ============================================================

output_dir = Path("results/comparisons/reports")
output_dir.mkdir(parents=True, exist_ok=True)

# Save individual tables
tables = {
    "table1_best_results.tex": table1,
    "table2_performance_by_features.tex": table2,
    "table3_category_comparison.tex": table3,
    "table4_model_comparison.tex": table4,
    "table5_top_features.tex": table5
}

for filename, content in tables.items():
    filepath = output_dir / filename
    with open(filepath, 'w') as f:
        f.write(content)
    print(f"âœ“ Saved: {filename}")

# Create combined file with all tables
combined = r"""\documentclass{article}
\usepackage{booktabs}
\usepackage{multirow}
\begin{document}

% ==================================================
% TABLE 1: Best Results Per Method
% ==================================================

""" + table1 + r"""

\newpage

% ==================================================
% TABLE 2: Performance by Number of Features
% ==================================================

""" + table2 + r"""

\newpage

% ==================================================
% TABLE 3: Category Comparison
% ==================================================

""" + table3 + r"""

\newpage

% ==================================================
% TABLE 4: Model Comparison
% ==================================================

""" + table4 + r"""

\newpage

% ==================================================
% TABLE 5: Top Selected Features
% ==================================================

""" + table5 + r"""

\end{document}
"""

with open(output_dir / "all_tables_combined.tex", 'w') as f:
    f.write(combined)

print(f"\nâœ“ Saved: all_tables_combined.tex")

In [None]:
# ============================================================
# 8. GENERATE USAGE INSTRUCTIONS
# ============================================================

instructions = """
# HOW TO USE LATEX TABLES IN YOUR THESIS

## Individual Tables
Each table is saved as a separate .tex file in:
`results/comparisons/reports/`

### Copy-Paste Method:
1. Open the .tex file you need (e.g., table1_best_results.tex)
2. Copy the entire content
3. Paste into your thesis .tex file where you want the table

### Include Method:
In your thesis .tex file:
```latex
\\input{results/comparisons/reports/table1_best_results.tex}
```

## All Tables Combined
File: `all_tables_combined.tex`
This is a complete LaTeX document with all tables.

To compile:
```bash
pdflatex all_tables_combined.tex
```

## Customization Tips

### Adjust Column Widths:
Replace `\begin{tabular}{lcccc}` with specific widths:
```latex
\\begin{tabular}{p{3cm}p{2cm}p{2cm}p{2cm}p{2cm}}
```

### Add Colors (requires xcolor package):
```latex
\\usepackage[table]{xcolor}
\\rowcolor{gray!20} % Add before row
```

### Use booktabs for professional look:
Already included! Uses \\hline for lines.

### Adjust Caption Position:
Move `\\caption{}` after `\\begin{tabular}` to place it at bottom

## Required Packages
Add to your thesis preamble:
```latex
\\usepackage{booktabs}
\\usepackage{multirow}
\\usepackage{graphicx}
```

## Tables Generated:
1. table1_best_results.tex - Best performance per method
2. table2_performance_by_features.tex - F1 scores at different feature counts
3. table3_category_comparison.tex - Comparison by method category
4. table4_model_comparison.tex - Model performance comparison
5. table5_top_features.tex - Most frequently selected features

Choose the tables most relevant to your thesis discussion!
"""

with open(output_dir / "USAGE_INSTRUCTIONS.md", 'w') as f:
    f.write(instructions)

print("\n" + "="*60)
print("LATEX TABLE GENERATION COMPLETE!")
print("="*60)
print("\nGenerated Files:")
print(f"  ðŸ“„ Individual tables (5): {output_dir}/")
print(f"  ðŸ“„ Combined document: all_tables_combined.tex")
print(f"  ðŸ“„ Instructions: USAGE_INSTRUCTIONS.md")
print("\nNext Steps:")
print("  1. Review the .tex files")
print("  2. Copy tables into your thesis")
print("  3. Compile and verify formatting")
print("="*60)