In [1]:
import pandas as pd
import numpy as np
from globals import BASE_DIR
import os
import json

available_datasets = ["foursquaretky", "yelp"]




In [2]:
# this one is for the condensed table that only includes comparison to BPR 

for dataset in available_datasets:
    ttest_path = f'{BASE_DIR}/{dataset}_dataset/evaluation_results_ttest.json'
    with open(ttest_path, "r") as f:
        ttest_results = json.load(f)

        df = pd.read_csv(f"{BASE_DIR}/{dataset}_dataset/evaluation_results_ttest_baselines.csv")

        df["user_group"] = df["user_group"].replace({"high": "HighPop", "medium": "MedPop", "low": "LowPop", "all": "All"})

        # Sort user_group by custom order HighPop, MedPop, LowPop, All
        user_group_order = ["HighPop", "MedPop", "LowPop", "All"]
        df["user_group"] = pd.Categorical(df["user_group"], categories=user_group_order, ordered=True)

        
        

        df_pivot = df.pivot(
            index=["user_group"],
            columns="method",
            values=["ndcg", "arp", "poplift"]
        )


        # Sort the pivot table by the custom order of user_group
        df_pivot = df_pivot.sort_values(by=["user_group"], axis=0, ascending=True)
        df_reset = df_pivot.reset_index()

     

        # Generate LaTeX table with full structure, but exclude "gini" columns
        latex_table = df_reset.to_latex(
            escape=False,
            index=False,
            multicolumn=True,
            multirow=True,
            column_format="lccccccccccccccc",
            header=True
        )

        # Modify LaTeX table formatting and wrap in \begin{table} environment
        latex_table = latex_table.replace(
            "\\toprule",
            "\\toprule\n\\multicolumn{1}{c}{} & \\multicolumn{5}{c}{\\textbf{ndcg}} & \\multicolumn{5}{c}{\\textbf{arp}} & \\multicolumn{5}{c}{\\textbf{poplift}} \\\\ \\cmidrule(lr){3-7} \\cmidrule(lr){8-12} \\cmidrule(lr){13-17}"
        )
        latex_table = latex_table.replace("\\midrule", "\\hline")
        latex_table = latex_table.replace("\\bottomrule", "\\hline")
        latex_table = latex_table.replace("user_group", "group")
        latex_table = latex_table.replace("cp_min_js", "$ BPR + CP_\Im$")
        latex_table = latex_table.replace("cp", "BPR + $CP_H$")
        latex_table = latex_table.replace("%", "\%")
        latex_table = latex_table.replace("**", "*")

        # Wrap the LaTeX table in the full structure
        latex_output = f"""
    \\begin{{table*}}[]
    \\centering
    \\resizebox{{\\textwidth}}{{!}}{{%
    {latex_table}
    }}
    \\caption{{{dataset.capitalize()} evaluation results}}
    \\label{{tab:{dataset}_eval}}
    \\end{{table*}}
    """

        # Save LaTeX file
        with open(f"{BASE_DIR}/{dataset}_dataset/{dataset}_eval_table_bpr_comparison.tex", "w") as f:
            f.write(latex_output)


In [3]:
for dataset in available_datasets:
    ttest_path = f'{BASE_DIR}/{dataset}_dataset/evaluation_results_ttest.json'
    with open(ttest_path, "r") as f:
        ttest_results = json.load(f)

        df = pd.read_csv(f"{BASE_DIR}/{dataset}_dataset/evaluation_results_updated.csv")

        df = df.loc[df["model"].isin(["BPR", "LORE", "USG"])]
        df = df.loc[df["method"] != "upd"]
        df.drop(columns=["js"], inplace=True)

        df["user_group"] = df["user_group"].replace({"high": "HighPop", "medium": "MedPop", "low": "LowPop", "all": "All"})

        # Sort user_group by custom order HighPop, MedPop, LowPop, All
        user_group_order = ["HighPop", "MedPop", "LowPop", "All"]
        df["user_group"] = pd.Categorical(df["user_group"], categories=user_group_order, ordered=True)

        float_columns = df.select_dtypes(include="float").columns
        df[float_columns] = df[float_columns].round(4)

        # Remove "gini" from the values being pivoted
        df_pivot = df.pivot(
            index=["model", "user_group"],
            columns="method",
            values=["ndcg", "arp", "poplift"]  # Excluded "gini"
        )

        # Sort the pivot table by the custom order of user_group
        df_pivot = df_pivot.sort_values(by=["user_group"], axis=0, ascending=True)

        df_reset = df_pivot.reset_index()

        # Generate LaTeX table with full structure, but exclude "gini" columns
        latex_table = df_reset.to_latex(
            escape=False,
            index=False,
            multicolumn=True,
            multirow=True,
            column_format="llcccccccc",  # Adjusted for 3 metrics instead of 4
            header=True
        )

        # Modify LaTeX table formatting and wrap in \begin{table} environment
        latex_table = latex_table.replace(
            "\\toprule",
            "\\toprule\n\\multicolumn{2}{c}{} & \\multicolumn{3}{c}{\\textbf{ndcg}} & \\multicolumn{3}{c}{\\textbf{arp}} & \\multicolumn{3}{c}{\\textbf{poplift}} \\\\ \\cmidrule(lr){3-5} \\cmidrule(lr){6-8} \\cmidrule(lr){9-11}"
        )
        latex_table = latex_table.replace("\\midrule", "\\hline")
        latex_table = latex_table.replace("\\bottomrule", "\\hline")
        latex_table = latex_table.replace("user_group", "group")
        latex_table = latex_table.replace("cp_min_js", "CP_\Im")
        latex_table = latex_table.replace("cp", "CP_H")
        latex_table = latex_table.replace("baseline", "Base")
        latex_table = latex_table.replace("%", "\%")
        latex_table = latex_table.replace("Δ=", "")

        # Wrap the LaTeX table in the full structure
        latex_output = f"""
    \\begin{{table*}}[]
    \\centering
    \\resizebox{{\\textwidth}}{{!}}{{%
    {latex_table}
    }}
    \\caption{{{dataset.capitalize()} evaluation results}}
    \\label{{tab:{dataset}_eval}}
    \\end{{table*}}
    """

        # Save LaTeX file
        with open(f"{BASE_DIR}/{dataset}_dataset/{dataset}_eval_table_baseline_comparison.tex", "w") as f:
            f.write(latex_output)


In [16]:
def format_best(row, col_group, best_criteria, model, method, ttest_results):
    """Format best values in bold and mark significant differences in blue/green on 'g3' rows."""
    values = row[col_group]
    
    # Determine sorting order based on criteria
    if best_criteria == "highest":
        sorted_indices = np.argsort(values)[::-1]  # Descending
    elif best_criteria == "lowest":
        sorted_indices = np.argsort(values)  # Ascending
    elif best_criteria == "closest_to_zero":
        sorted_indices = np.argsort(np.abs(values))  # Closest to zero

    formatted = values.astype(str)  # Convert values to string for LaTeX

    # Apply bold formatting for the best value(s)
    if len(sorted_indices) > 0:
        best_value = values[sorted_indices[0]]
        best_indices = np.where(values == best_value)[0]
        for idx in best_indices:
            formatted[idx] = f"\\textbf{{{formatted[idx]}}}"

    # Apply significance markers for 'g3' (low group)
    if row.name[1] == "LowPop":  # 'user_group' is 'g3' (formerly 'low')
        for metric in col_group:
            metric_name = metric[0]  # Extract metric name (ndcg, arp, etc.)
            method_name = metric[1]  # Extract method (baseline, cp, etc.)

            try:
                p_values = ttest_results[model][method_name][metric_name]
                low_high_p = p_values["low_high"]
                low_medium_p = p_values["low_medium"]
                
                significance_marker = ""
                if low_high_p < 0.05:
                    significance_marker += "\\textcolor{blue}{*}"
                if low_medium_p < 0.05:
                    significance_marker += "\\textcolor{red}{*}"

                # Append significance markers to formatted value
                formatted[col_group.index(metric)] += significance_marker

            except KeyError:
                pass  # Skip if no p-values exist for this combination

    return formatted


for dataset in available_datasets:
    ttest_path = f'{BASE_DIR}/{dataset}_dataset/evaluation_results_ttest.json'
    with open(ttest_path, "r") as f:
        ttest_results = json.load(f)

        df = pd.read_csv(f"{BASE_DIR}/{dataset}_dataset/evaluation_results.csv")

        df = df.loc[df["model"].isin(["BPR", "LORE", "USG"])]
        df = df.loc[df["method"] != "upd"]
        df.drop(columns=["js"], inplace=True)

        df["user_group"] = df["user_group"].replace({"high": "HighPop", "medium": "MedPop", "low": "LowPop", "all": "All"})

        float_columns = df.select_dtypes(include="float").columns
        df[float_columns] = df[float_columns].round(4)

        df_pivot = df.pivot(
            index=["model", "user_group"],
            columns="method",
            values=["ndcg", "arp", "poplift", "gini"]
        )

        metric_groups = ["ndcg", "arp", "poplift", "gini"]
        criteria = {"ndcg": "highest", "arp": "lowest", "poplift": "closest_to_zero", "gini": "lowest"}

        for metric in metric_groups:
            method_cols = [col for col in df_pivot.columns if col[0] == metric]
            df_pivot[method_cols] = df_pivot.apply(
                lambda row: format_best(row, method_cols, criteria[metric], row.name[0], row.name[1], ttest_results),
                axis=1
            )

        df_reset = df_pivot.reset_index()

        # Generate LaTeX table with full structure
        latex_table = df_reset.to_latex(
            escape=False,
            index=False,
            multicolumn=True,
            multirow=True,
            column_format="llcccccccccccc",
            header=True
        )

        # Modify LaTeX table formatting and wrap in \begin{table} environment
        latex_table = latex_table.replace(
            "\\toprule",
            "\\toprule\n\\multicolumn{2}{c}{} & \\multicolumn{3}{c}{\\textbf{ndcg}} & \\multicolumn{3}{c}{\\textbf{arp}} & \\multicolumn{3}{c}{\\textbf{poplift}} & \\multicolumn{3}{c}{\\textbf{gini}} \\\\ \\cmidrule(lr){3-5} \\cmidrule(lr){6-8} \\cmidrule(lr){9-11} \\cmidrule(lr){12-14}"
        )
        latex_table = latex_table.replace("\\midrule", "\\hline")
        latex_table = latex_table.replace("\\bottomrule", "\\hline")
        latex_table = latex_table.replace("user_group", "group")
        latex_table = latex_table.replace("cp_min_js", "CP_\Im")
        latex_table = latex_table.replace("cp", "CP_H")
        latex_table = latex_table.replace("baseline", "Base")

        # Wrap the LaTeX table in the full structure
        latex_output = f"""
    \\begin{{table}}[]
    \\centering
    \\resizebox{{\\textwidth}}{{!}}{{%
    {latex_table}
    }}
    \\caption{{{dataset.capitalize()} evaluation results}}
    \\label{{tab:{dataset}_eval}}
    \\end{{table}}
    """

        # Save LaTeX file
        with open(f"{BASE_DIR}/{dataset}_dataset/{dataset}_eval_table.tex", "w") as f:
            f.write(latex_output)


  best_value = values[sorted_indices[0]]
  formatted[idx] = f"\\textbf{{{formatted[idx]}}}"
  formatted[idx] = f"\\textbf{{{formatted[idx]}}}"
  best_value = values[sorted_indices[0]]
  formatted[idx] = f"\\textbf{{{formatted[idx]}}}"
  formatted[idx] = f"\\textbf{{{formatted[idx]}}}"
  best_value = values[sorted_indices[0]]
  formatted[idx] = f"\\textbf{{{formatted[idx]}}}"
  formatted[idx] = f"\\textbf{{{formatted[idx]}}}"
  formatted[col_group.index(metric)] += significance_marker
  formatted[col_group.index(metric)] += significance_marker
  best_value = values[sorted_indices[0]]
  formatted[idx] = f"\\textbf{{{formatted[idx]}}}"
  formatted[idx] = f"\\textbf{{{formatted[idx]}}}"
  best_value = values[sorted_indices[0]]
  formatted[idx] = f"\\textbf{{{formatted[idx]}}}"
  formatted[idx] = f"\\textbf{{{formatted[idx]}}}"
  best_value = values[sorted_indices[0]]
  formatted[idx] = f"\\textbf{{{formatted[idx]}}}"
  formatted[idx] = f"\\textbf{{{formatted[idx]}}}"
  best_value = values[

In [7]:
def format_best(row, col_group, best_criteria, p_values=None):
    """Format the best values as bold and add significance markers for low/high and low/medium comparisons."""
    values = row[col_group]
    
    if best_criteria == "highest":  # For ndcg
        sorted_indices = np.argsort(values)[::-1]  # Descending
    elif best_criteria == "lowest":  # For arp
        sorted_indices = np.argsort(values)  # Ascending
    elif best_criteria == "closest_to_zero":  # For poplift
        sorted_indices = np.argsort(np.abs(values))  # Closest to zero

    formatted = values.astype(str)  # Convert to strings for LaTeX formatting

    if len(sorted_indices) > 0:
        # Identify the highest value and all its ties
        best_value = values[sorted_indices[0]]
        best_indices = np.where(values == best_value)[0]

        # Bold all highest values
        for idx in best_indices:
            formatted[idx] = f"\\textbf{{{formatted[idx]}}}"
    
    # Add significance markers for low rows
    if p_values and row['user_group'] == 'low':
        method = row['method']
        metric = col_group
        
        if method in p_values and metric in p_values[method]:
            low_high_p = p_values[method][metric].get('low_high', 1)
            low_medium_p = p_values[method][metric].get('low_medium', 1)
            
            markers = ""
            if low_high_p < 0.05:
                markers += "\\textcolor{blue}{*}"
            if low_medium_p < 0.05:
                markers += "\\textcolor{green}{*}"
            
            if markers:
                formatted[row.name] = f"{formatted[row.name]} {markers}"
    
    return formatted


def format_best_js(row, method_cols, p_values=None):
    for method_col in method_cols:
        values = row[method_col]
        min_value_idx = values.idxmin()  # Get the index of the minimum value

        # Apply bold formatting only to the best (minimum) value
        row[method_col] = row[method_col].apply(
            lambda x: f"\\textbf{{{x:.4f}}}" if x == min_value_idx else f"{x:.4f}"
        )

        # Add significance markers for low rows
        if p_values and row['user_group'] == 'low':
            method = row['method']
            metric = method_col
            
            if method in p_values and metric in p_values[method]:
                low_high_p = p_values[method][metric].get('low_high', 1)
                low_medium_p = p_values[method][metric].get('low_medium', 1)
                
                markers = ""
                if low_high_p < 0.05:
                    markers += "\\textcolor{blue}{*}"
                if low_medium_p < 0.05:
                    markers += "\\textcolor{green}{*}"
                
                if markers:
                    row[method_col] = row[method_col].apply(
                        lambda x: f"{x} {markers}" if x == min_value_idx else x
                    )
    
    return row


In [8]:
df = pd.read_csv(f"{BASE_DIR}/{dataset}_dataset/evaluation_results.csv")
    


In [9]:
df = df.loc[df["model"].isin (["BPR", "LORE", "USG"])]
df = df.loc[df["method"] != "upd"]
df.drop(columns=["js"], inplace=True)



In [10]:
df

Unnamed: 0,dataset,model,method,user_group,ndcg,arp,poplift,gini
0,yelp,BPR,baseline,high,0.064965,0.009303,1.054221,0.65147
1,yelp,BPR,baseline,medium,0.030369,0.007904,2.398288,0.748125
2,yelp,BPR,baseline,low,0.019173,0.003965,1.770248,0.637369
3,yelp,BPR,baseline,all,0.035049,0.007396,2.003867,0.769241
4,yelp,BPR,cp,high,0.064965,0.009302,1.053849,0.65147
5,yelp,BPR,cp,medium,0.030719,0.007894,2.393022,0.753261
6,yelp,BPR,cp,low,0.020503,0.00244,0.616124,0.58902
7,yelp,BPR,cp,all,0.035525,0.007085,1.769808,0.798218
8,yelp,BPR,cp_min_js,high,0.053499,0.007575,0.655935,0.713202
9,yelp,BPR,cp_min_js,medium,0.021922,0.005138,1.183131,0.703714


In [11]:

all_results = []
dataset_latex_parts = []

for dataset in available_datasets:
    df = pd.read_csv(f"{BASE_DIR}/{dataset}_dataset/evaluation_results.csv")
    
    df = df.loc[df["user_group"] == "all"]
    
    # Round float columns to 4 decimals
    float_columns = df.select_dtypes(include="float").columns
    df[float_columns] = df[float_columns].round(4)

    # Pivot table for Gini
    df_pivot_gini = df.pivot(
        index=["model", "user_group"],
        columns="method",
        values=["gini"]
    )

    # Apply formatting to highlight best/second-best values
    method_cols = [col for col in df_pivot_gini.columns if col[0] == "gini"]
    df_pivot_gini[method_cols] = df_pivot_gini.apply(
        lambda row: format_best(row, method_cols, "lowest"), axis=1
    )

    # Reset index for proper formatting
    df_reset_gini = df_pivot_gini.reset_index()

    # Convert DataFrame to LaTeX (without table headers, since we merge all)
    gini_latex_part = df_reset_gini.to_latex(
        escape=False,
        index=False,
        header=False,  # No headers inside merged sections
        multicolumn=True,
        multirow=True,
        column_format="llcccc"
    )

    # Add dataset separator before this section
    dataset_separator = f"\\midrule\n\\multicolumn{{6}}{{c}}{{\\textbf{{{dataset.capitalize()}}}}} \\\\"
    dataset_latex_parts.append(dataset_separator + "\n" + gini_latex_part)

# Combine all LaTeX table parts
gini_latex = "\n".join(dataset_latex_parts)

# Add table header and formatting
gini_latex = (
    "\\begin{table}[h]\n"
    "\\centering\n"
    "\\caption{Gini Coefficient Evaluation}\n"
    "\\begin{tabular}{llcccc}\n"
    "\\toprule\n"
    "\\multicolumn{2}{c}{} & \\multicolumn{4}{c}{\\textbf{Gini Coefficient}} \\\\ \\cmidrule(lr){3-6}\n"
    "Model & User Group &  & Method2 & Method3 & Method4 \\\\\n"
    "\\midrule\n"
    + gini_latex +
    "\\bottomrule\n"
    "\\end{tabular}\n"
    "\\end{table}"
)

# Save the LaTeX table
with open(f"{BASE_DIR}/gini_evaluation_combined.tex", "w") as f:
    f.write(gini_latex)

print("Gini table saved as a single LaTeX table with dataset separators.")


Gini table saved as a single LaTeX table with dataset separators.


  best_value = values[sorted_indices[0]]
  formatted[idx] = f"\\textbf{{{formatted[idx]}}}"
  formatted[idx] = f"\\textbf{{{formatted[idx]}}}"
  best_value = values[sorted_indices[0]]
  formatted[idx] = f"\\textbf{{{formatted[idx]}}}"
  formatted[idx] = f"\\textbf{{{formatted[idx]}}}"
  best_value = values[sorted_indices[0]]
  formatted[idx] = f"\\textbf{{{formatted[idx]}}}"
  formatted[idx] = f"\\textbf{{{formatted[idx]}}}"
  best_value = values[sorted_indices[0]]
  formatted[idx] = f"\\textbf{{{formatted[idx]}}}"
  formatted[idx] = f"\\textbf{{{formatted[idx]}}}"
  best_value = values[sorted_indices[0]]
  formatted[idx] = f"\\textbf{{{formatted[idx]}}}"
  formatted[idx] = f"\\textbf{{{formatted[idx]}}}"
  best_value = values[sorted_indices[0]]
  formatted[idx] = f"\\textbf{{{formatted[idx]}}}"
  formatted[idx] = f"\\textbf{{{formatted[idx]}}}"
  best_value = values[sorted_indices[0]]
  formatted[idx] = f"\\textbf{{{formatted[idx]}}}"
  formatted[idx] = f"\\textbf{{{formatted[idx]}}}"

In [12]:
for dataset in available_datasets:
    df = pd.read_csv(f"{BASE_DIR}/{dataset}_dataset/evaluation_results.csv")
    
    df = df.loc[df["user_group"]== "all"]
    
    # Replace user group names with shorter labels
    #df["user_group"] = df["user_group"].apply(lambda x: x.replace("high", "g1").replace("medium", "g2").replace("low", "g3"))

    # Round float columns to 4 decimals
    float_columns = df.select_dtypes(include="float").columns
    df[float_columns] = df[float_columns].round(4)

    # Create Gini pivot table
    df_pivot_gini = df.pivot(
        index=["model", "user_group"],
        columns="method",
        values=["gini"]
    )

    # Apply formatting function
    method_cols = [col for col in df_pivot_gini.columns if col[0] == "gini"]
    df_pivot_gini[method_cols] = df_pivot_gini.apply(
        lambda row: format_best(row, method_cols, "lowest"), axis=1
    )

    # Reset index to include model and user group
    df_reset_gini = df_pivot_gini.reset_index()

    # Convert DataFrame to LaTeX
    gini_latex = df_reset_gini.to_latex(
        escape=False,  # Allow LaTeX formatting
        index=False,   # Don't include DataFrame index
        multicolumn=True,
        multirow=True,
        column_format="llcccc",  # Adjust columns
        header=True
    )

    # Add custom LaTeX formatting
    gini_latex = gini_latex.replace(
        "\\toprule", 
        "\\toprule\n\\multicolumn{2}{c}{} & \\multicolumn{4}{c}{\\textbf{Gini Coefficient}} \\ \\cmidrule(lr){3-6}"
    )
    gini_latex = gini_latex.replace("\\midrule", "\\hline")
    gini_latex = gini_latex.replace("\\bottomrule", "\\hline")

    # Save the LaTeX table
    with open(f"{BASE_DIR}/{dataset}_dataset/{dataset}_gini_eval.tex", "w") as f:
        f.write(gini_latex)

    print(f"Gini table saved for {dataset}")


Gini table saved for brightkite
Gini table saved for foursquaretky
Gini table saved for gowalla
Gini table saved for yelp


  best_value = values[sorted_indices[0]]
  formatted[idx] = f"\\textbf{{{formatted[idx]}}}"
  formatted[idx] = f"\\textbf{{{formatted[idx]}}}"
  best_value = values[sorted_indices[0]]
  formatted[idx] = f"\\textbf{{{formatted[idx]}}}"
  formatted[idx] = f"\\textbf{{{formatted[idx]}}}"
  best_value = values[sorted_indices[0]]
  formatted[idx] = f"\\textbf{{{formatted[idx]}}}"
  formatted[idx] = f"\\textbf{{{formatted[idx]}}}"
  best_value = values[sorted_indices[0]]
  formatted[idx] = f"\\textbf{{{formatted[idx]}}}"
  formatted[idx] = f"\\textbf{{{formatted[idx]}}}"
  best_value = values[sorted_indices[0]]
  formatted[idx] = f"\\textbf{{{formatted[idx]}}}"
  formatted[idx] = f"\\textbf{{{formatted[idx]}}}"
  best_value = values[sorted_indices[0]]
  formatted[idx] = f"\\textbf{{{formatted[idx]}}}"
  formatted[idx] = f"\\textbf{{{formatted[idx]}}}"
  best_value = values[sorted_indices[0]]
  formatted[idx] = f"\\textbf{{{formatted[idx]}}}"
  formatted[idx] = f"\\textbf{{{formatted[idx]}}}"

In [13]:
for dataset in available_datasets:
    df = pd.read_csv(f"{BASE_DIR}/{dataset}_dataset/evaluation_results.csv")

    df = df.loc[df["model"].isin (["BPR", "LORE", "USG"])]
    df = df.loc[df["method"] != "upd"]
    df.drop(columns=["js"], inplace=True)
    
    df["user_group"] = df["user_group"].apply(lambda x: x.replace("high", "g1").replace("medium", "g2").replace("low", "g3"))

    float_columns = df.select_dtypes(include="float").columns
    df[float_columns] = df[float_columns].round(4)



    df_pivot = df.pivot(
    index=["model", "user_group"],
    columns="method",
    values=["ndcg", "arp", "poplift", "gini"]
    )

    # Apply formatting for each metric group
    metric_groups = ["ndcg", "arp", "poplift", "gini"]
    criteria = {"ndcg": "highest", "arp": "lowest", "poplift": "closest_to_zero", "gini": "lowest"}

    for metric in metric_groups:
        method_cols = [col for col in df_pivot.columns if col[0] == metric]
        df_pivot[method_cols] = df_pivot.apply(
            lambda row: format_best(row, method_cols, criteria[metric]), axis=1
        )


    # Reset the index to include 'model' and 'user_group' in the DataFrame
    df_reset = df_pivot.reset_index()

    # Export to LaTeX while keeping multicolumn and multirow formatting
    latex = df_reset.to_latex(
        escape=False,  # To allow LaTeX formatting
        index=False,   # Don't write the default DataFrame index
        multicolumn=True,
        multirow=True,
        column_format="llcccccccccccc",  # Adjust for the number of columns
        header=True
    )

    # Modify LaTeX for custom formatting
    latex = latex.replace(
        "\\toprule", 
        "\\toprule\n\\multicolumn{2}{c}{} & \\multicolumn{3}{c}{\\textbf{ndcg}} & \\multicolumn{3}{c}{\\textbf{arp}} & \\multicolumn{3}{c}{\\textbf{poplift}} & \\multicolumn{3}{c}{\\textbf{gini}} \\\\ \\cmidrule(lr){3-5} \\cmidrule(lr){6-8} \\cmidrule(lr){9-11} \\cmidrule(lr){12-14}"
    )
    latex = latex.replace("\\midrule", "\\hline")
    latex = latex.replace("\\bottomrule", "\\hline")

    # Save

    with open(f"{BASE_DIR}/{dataset}_dataset/{dataset}_eval_test_if_okay.tex", "w") as f:
        f.write(latex)

    
   

    

  best_value = values[sorted_indices[0]]
  formatted[idx] = f"\\textbf{{{formatted[idx]}}}"
  formatted[idx] = f"\\textbf{{{formatted[idx]}}}"
  best_value = values[sorted_indices[0]]
  formatted[idx] = f"\\textbf{{{formatted[idx]}}}"
  formatted[idx] = f"\\textbf{{{formatted[idx]}}}"
  best_value = values[sorted_indices[0]]
  formatted[idx] = f"\\textbf{{{formatted[idx]}}}"
  formatted[idx] = f"\\textbf{{{formatted[idx]}}}"
  best_value = values[sorted_indices[0]]
  formatted[idx] = f"\\textbf{{{formatted[idx]}}}"
  formatted[idx] = f"\\textbf{{{formatted[idx]}}}"
  best_value = values[sorted_indices[0]]
  formatted[idx] = f"\\textbf{{{formatted[idx]}}}"
  formatted[idx] = f"\\textbf{{{formatted[idx]}}}"
  best_value = values[sorted_indices[0]]
  formatted[idx] = f"\\textbf{{{formatted[idx]}}}"
  formatted[idx] = f"\\textbf{{{formatted[idx]}}}"
  best_value = values[sorted_indices[0]]
  formatted[idx] = f"\\textbf{{{formatted[idx]}}}"
  formatted[idx] = f"\\textbf{{{formatted[idx]}}}"

In [14]:
# for dataset in available_datasets:
#     df = pd.read_csv(f"{BASE_DIR}/{dataset}_dataset/evaluation_results.csv")
    
#     df["user_group"] = df["user_group"].apply(lambda x: x.replace("high", "g1").replace("medium", "g2").replace("low", "g3"))

#     float_columns = df.select_dtypes(include="float").columns
#     df[float_columns] = df[float_columns].round(4)


#     df_pivot = df.pivot(
#     index=["model", "user_group"],
#     columns="method",
#     values=["ndcg", "arp", "poplift"]
#     )

#     # Apply formatting for each metric group
#     metric_groups = ["ndcg", "arp", "poplift"]
#     criteria = {"ndcg": "highest", "arp": "lowest", "poplift": "closest_to_zero"}

#     for metric in metric_groups:
#         method_cols = [col for col in df_pivot.columns if col[0] == metric]
#         df_pivot[method_cols] = df_pivot.apply(
#             lambda row: format_best_second(row, method_cols, criteria[metric]), axis=1
#         )


#     df_pivot_js = df.pivot(
#     index=["model", "user_group"],
#     columns="method",
#     values=["ndcg", "arp", "poplift", "js"]
#     )
#     # ----- Extra Table for JS -----
#     # Filter JS columns
#     js_cols = [col for col in df_pivot_js.columns if col[0] == "js"]
#     js_table = df_pivot_js[js_cols].reset_index()

#     # Apply formatting for JS (highlight the lowest value)
#     js_table[js_cols] = js_table.apply(
#         lambda row: format_best_second(row, js_cols, criteria["js"]), axis=1
#     )

#     # Export JS-only table to LaTeX
#     js_latex = js_table.to_latex(
#         escape=False,
#         index=False,
#         multicolumn=True,
#         multirow=True,
#         column_format="llcccc",  # Adjust for JS columns
#         header=True
#     )

#     # Custom LaTeX formatting for JS table
#     js_latex = js_latex.replace(
#         "\\toprule", 
#         "\\toprule\n\\multicolumn{2}{c}{} & \\multicolumn{4}{c}{\\textbf{Jensen-Shannon Divergence (JSD)}}\\\\ \\cmidrule(lr){3-6}"
#     )
#     js_latex = js_latex.replace("\\midrule", "\\hline")
#     js_latex = js_latex.replace("\\bottomrule", "\\hline")

#     # Save JS-only table
#     with open(f"{BASE_DIR}/{dataset}_dataset/{dataset}_js_eval.tex", "w") as f:
#         f.write(js_latex)