In [35]:
import os
import pandas as pd
import glob


In [36]:
files = glob.glob('*.csv')
# Create global pandas table with batchsize and model for each fingerprint size
# No fingeprint

no_fp = pd.DataFrame(columns=['Model', 32, 64, 128, 256, 512])
no_fp['Model'] = ['CategoryEmbedding', 'AutoInt', 'TabTransformer']
for batch_size in [32, 64, 128, 256, 512]:
    path = f'./pytorch_tabular_no_fp_batch_{batch_size}_seed_0.csv'
    df = pd.read_csv(path)
    # Check the model_names
    model_names = list(df['Model']) 
    wpc = list(df['LB WPC'])
    # Add the WPC for each model, if the model does not exist, add -
    for i, model in enumerate(no_fp['Model']):
        if model in model_names:
            no_fp.loc[i, batch_size] = wpc[model_names.index(model)]
        else:
            no_fp.loc[i, batch_size] = '-'

     

no_fp

Unnamed: 0,Model,32,64,128,256,512
0,CategoryEmbedding,0.184725,0.177091,0.148471,0.161178,0.193829
1,AutoInt,0.258447,0.274583,0.266002,0.281492,0.276155
2,TabTransformer,0.210695,0.239527,0.202102,0.167703,0.148261


In [37]:
files = glob.glob('*.csv')
# Create global pandas table with batchsize and model for each fingerprint size
# No fingeprint

fp_1024 = pd.DataFrame(columns=['Model', 32, 64, 128, 256, 512])
fp_1024['Model'] = ['CategoryEmbedding', 'AutoInt', 'TabTransformer']
for batch_size in [32, 64, 128, 256, 512]:
    path = f'./pytorch_tabular_fp_1024_batch_{batch_size}_seed_0.csv'
    df = pd.read_csv(path)
    # Check the model_names
    model_names = list(df['Model']) 
    wpc = list(df['LB WPC'])
    # Add the WPC for each model, if the model does not exist, add -
    for i, model in enumerate(no_fp['Model']):
        if model in model_names:
            fp_1024.loc[i, batch_size] = wpc[model_names.index(model)]
        else:
            fp_1024.loc[i, batch_size] = '-'

     

fp_1024

Unnamed: 0,Model,32,64,128,256,512
0,CategoryEmbedding,0.206052,0.184729,0.164263,0.151062,0.119197
1,AutoInt,-,-,-,-,-
2,TabTransformer,0.22761,0.174891,0.14291,0.208954,0.19893


In [38]:
files = glob.glob('*.csv')
# Create global pandas table with batchsize and model for each fingerprint size
# No fingeprint

fp_2048 = pd.DataFrame(columns=['Model', 32, 64, 128, 256, 512])
fp_2048['Model'] = ['CategoryEmbedding', 'AutoInt', 'TabTransformer']
for batch_size in [32, 64, 128, 256, 512]:
    path = f'./pytorch_tabular_fp_1024_batch_{batch_size}_seed_0.csv'
    df = pd.read_csv(path)
    # Check the model_names
    model_names = list(df['Model']) 
    wpc = list(df['LB WPC'])
    # Add the WPC for each model, if the model does not exist, add -
    for i, model in enumerate(no_fp['Model']):
        if model in model_names:
            fp_2048.loc[i, batch_size] = wpc[model_names.index(model)]
        else:
            fp_2048.loc[i, batch_size] = '-'

     

fp_2048

Unnamed: 0,Model,32,64,128,256,512
0,CategoryEmbedding,0.206052,0.184729,0.164263,0.151062,0.119197
1,AutoInt,-,-,-,-,-
2,TabTransformer,0.22761,0.174891,0.14291,0.208954,0.19893


In [None]:
import pandas as pd
import glob
import os

benchmark_dir = "."


# Models you're interested in
models = ['CategoryEmbedding', 'AutoInt', 'TabTransformer']

# Fingerprint types
fp_types = {
    "No fingerprints": "pytorch_tabular_no_fp_batch_{}_seed_0.csv",
    "Morgan 1024 bits":  "pytorch_tabular_fp_1024_batch_{}_seed_0.csv",
    "Morgan 2048 bits":  "pytorch_tabular_fp_2048_batch_{}_seed_0.csv"
}

# Batch sizes to include
batch_sizes = [32, 64, 128, 256, 512]

# Create a dict to hold DataFrames per model
model_tables = {model: pd.DataFrame(index=fp_types.keys(), columns=batch_sizes) for model in models}

# Fill the tables
for fp_label, file_template in fp_types.items():
    for batch_size in batch_sizes:
        file = file_template.format(batch_size)
        if not os.path.exists(file):
            continue
        df = pd.read_csv(file)
        for model in models:
            if model in df['Model'].values:
                val = df[df['Model'] == model]['LB WPC'].values[0]
                model_tables[model].loc[fp_label, batch_size] = val
            else:
                model_tables[model].loc[fp_label, batch_size] = '-'

# Convert to LaTeX
for model, table in model_tables.items():
    # Format and bold best per row
    for idx, row in table.iterrows():
        numeric_row = row.apply(pd.to_numeric, errors='coerce')
        if numeric_row.notna().any():
            best_col = numeric_row.idxmax()
            for col in table.columns:
                val = table.loc[idx, col]
                if val == '-' or pd.isna(val):
                    table.loc[idx, col] = '-'
                elif col == best_col:
                    table.loc[idx, col] = f"\\textbf{{{float(val):.3f}}}"
                else:
                    table.loc[idx, col] = f"{float(val):.3f}"
        else:
            table.loc[idx] = ['-'] * len(table.columns)

    print(f"\n% === {model} ===")
    print(table.to_latex(
        escape=False,
        column_format="l" + "c" * len(table.columns),
        caption=f"LB WPC results for {model} across batch sizes and fingerprint settings.",
        label=f"tab:lbwpc_{model.lower()}"
    ))



% === CategoryEmbedding ===
\begin{table}
\caption{LB WPC results for CategoryEmbedding across batch sizes and fingerprint settings.}
\label{tab:lbwpc_categoryembedding}
\begin{tabular}{lccccc}
\toprule
 & 32 & 64 & 128 & 256 & 512 \\
\midrule
no_fp & 0.185 & 0.177 & 0.148 & 0.161 & \textbf{0.194} \\
1024 & \textbf{0.206} & 0.185 & 0.164 & 0.151 & 0.119 \\
2048 & 0.022 & \textbf{0.114} & 0.093 & 0.062 & 0.071 \\
\bottomrule
\end{tabular}
\end{table}


% === AutoInt ===
\begin{table}
\caption{LB WPC results for AutoInt across batch sizes and fingerprint settings.}
\label{tab:lbwpc_autoint}
\begin{tabular}{lccccc}
\toprule
 & 32 & 64 & 128 & 256 & 512 \\
\midrule
no_fp & 0.258 & 0.275 & 0.266 & \textbf{0.281} & 0.276 \\
1024 & - & - & - & - & - \\
2048 & - & - & - & - & - \\
\bottomrule
\end{tabular}
\end{table}


% === TabTransformer ===
\begin{table}
\caption{LB WPC results for TabTransformer across batch sizes and fingerprint settings.}
\label{tab:lbwpc_tabtransformer}
\begin{tabular

In [41]:
import pandas as pd
import os

benchmark_dir = "."

# Models you're interested in
models = ['CategoryEmbedding', 'AutoInt', 'TabTransformer']

# Fingerprint types
fp_types = {
    "No fingerprints": "pytorch_tabular_no_fp_batch_{}_seed_0.csv",
    "Morgan 1024 bits":  "pytorch_tabular_fp_1024_batch_{}_seed_0.csv",
    "Morgan 2048 bits":  "pytorch_tabular_fp_2048_batch_{}_seed_0.csv"
}

# Batch sizes to include
batch_sizes = [32, 64, 128, 256, 512]

# Create a dict to hold DataFrames per model
model_tables = {model: pd.DataFrame(index=fp_types.keys(), columns=batch_sizes) for model in models}

# Fill the tables
for fp_label, file_template in fp_types.items():
    for batch_size in batch_sizes:
        file = file_template.format(batch_size)
        path = os.path.join(benchmark_dir, file)
        if not os.path.exists(path):
            continue
        df = pd.read_csv(path)
        for model in models:
            if model in df['Model'].values:
                val = df[df['Model'] == model]['LB WPC'].values[0]
                model_tables[model].loc[fp_label, batch_size] = val
            else:
                model_tables[model].loc[fp_label, batch_size] = '-'

# Start writing LaTeX file
latex_lines = [r"\documentclass{article}",
               r"\usepackage{booktabs}",
               r"\usepackage[margin=1in]{geometry}",
               r"\begin{document}"]

# Generate each model's table
for model, table in model_tables.items():
    # Format and bold best per row
    for idx, row in table.iterrows():
        numeric_row = row.apply(pd.to_numeric, errors='coerce')
        if numeric_row.notna().any():
            best_col = numeric_row.idxmax()
            for col in table.columns:
                val = table.loc[idx, col]
                if val == '-' or pd.isna(val):
                    table.loc[idx, col] = '-'
                elif col == best_col:
                    table.loc[idx, col] = f"\\textbf{{{float(val):.3f}}}"
                else:
                    table.loc[idx, col] = f"{float(val):.3f}"
        else:
            table.loc[idx] = ['-'] * len(table.columns)

    # Build LaTeX table with extra batch size header
    col_format = "l" + "c" * len(table.columns)
    batch_header = " & " + " & ".join(str(b) for b in table.columns) + r" \\"
    midrule = r"\cmidrule(lr){2-" + str(len(table.columns)+1) + "}"

    latex_lines += [
        r"\begin{table}[h!]",
        r"\centering",
        rf"\caption{{LB WPC results for {model} across batch sizes and fingerprint settings.}}",
        rf"\label{{tab:lbwpc_{model.lower()}}}",
        rf"\begin{{tabular}}{{{col_format}}}",
        r"\toprule",
        r"& \multicolumn{" + str(len(table.columns)) + r"}{c}{Batch size} \\",
        midrule,
        r"Fingerprint & " + " & ".join(str(b) for b in table.columns) + r" \\",
        r"\midrule"
    ]

    for idx, row in table.iterrows():
        latex_lines.append(f"{idx} & " + " & ".join(str(row[col]) for col in table.columns) + r" \\")

    latex_lines += [
        r"\bottomrule",
        r"\end{tabular}",
        r"\end{table}",
        ""
    ]

# Close LaTeX document
latex_lines.append(r"\end{document}")

# Write to file
with open("lbwpc_tables.tex", "w") as f:
    f.write("\n".join(latex_lines))

print("✅ LaTeX file 'lbwpc_tables.tex' created successfully.")


✅ LaTeX file 'lbwpc_tables.tex' created successfully.
