# Tables

## Setup

In [20]:
import pandas as pd
import numpy as np
from pathlib import Path

DATA_DIR = Path('../data/')

In [21]:
models = {
    'codegen-350M-multi': 'CodeGen-350M-multi',
    'codegen2-1B_P': 'CodeGen2-1B',
    'starcoder2-3b': 'StarCoder2-3B',
    'Qwen2.5-Coder-3B': 'Qwen2.5-Coder-3B',
    'codegen2-3_7B_P': 'CodeGen2-3.7B',
    'CodeLlama-7b-hf': 'CodeLlama-7B',
    'codegen2-7B_P': 'CodeGen2-7B',
    'starcoder2-7b': 'StarCoder2-7B',
    'Qwen2.5-Coder-7B': 'Qwen2.5-Coder-7B',
    'Qwen2.5-Coder-14B': 'Qwen2.5-Coder-14B',
    'starcoderbase': 'StarCoderBase',
    'starcoder2-15b': 'StarCoder2-15B',
    'codegen2-16B_P': 'CodeGen2-16B',
}


datasets = {
    'methods2test_runnable': '\\textsc{Methods2Test\\textsubscript{runnable}}',
    'humaneval-x': '\\textsc{HumanEval-X\\textsubscript{java}}',
}

columns = {"methods2test_runnable": ['valid_syntax', 'codebleu_scores', 'passing_rate', 'coverage_instruction', 'coverage_branch', 'mutation_score'],
           "humaneval-x": ['valid_syntax', 'codebleu_scores', 'passing_rate']}

methods = {
    'pre-trained': 'None',
    'fine-tuning': 'Fine-tuning',
    'lora': 'LoRA',
    'ia3': '(IA)\\textsuperscript{3}',
    'prompt-tuning': 'Prompt tuning',
}

## Table 2

In [22]:
model_trainable_params =  pd.read_csv(DATA_DIR / 'params_data.csv', index_col=0)
model_trainable_params.index = model_trainable_params.index.str.split('/').str[1]
model_trainable_params = model_trainable_params.loc[models.keys()]
model_trainable_params["pre-trained"] = '0'

In [23]:
import pandas as pd
from pathlib import Path

data = {}

for dataset in datasets.keys():
    dataset_dir = DATA_DIR / dataset
    dataset_data = {}
    dataset_columns = []

    for name in columns[dataset]:
        file_path = dataset_dir / f"{name}.csv"
        try:
            print(file_path)
            df = pd.read_csv(file_path, index_col=0)
            df.index = df.index.str.split('/').str[1]
            print(df)
            df = df.loc[models.keys()]
            dataset_data[name] = df
            dataset_columns.append(name)
        except FileNotFoundError:
            print(f"Missing file: {file_path} â€” skipping.")

    if dataset_data:
        data[dataset] = pd.concat(
            [dataset_data[col] for col in dataset_columns],
            axis=1,
            keys=dataset_columns
        )

# Combine all datasets (outer concat across dataset names)
if data:
    data = pd.concat(data, axis=1)
    data = data.astype(float).round(2)
else:
    data = pd.DataFrame()


../data/methods2test_runnable/valid_syntax.csv
                    pre-trained  fine-tuning       ia3      lora  \
model                                                              
codegen2-7B_P          0.987090     0.978484  0.984773  0.986759   
codegen2-16B_P         0.976167     0.986759  0.977160  0.989738   
CodeLlama-7b-hf        0.977424     0.978420  0.980744  0.977424   
codegen2-1B_P          0.000000     0.749421  0.019530  0.383979   
starcoder2-7b          0.918367     0.969388  0.951942  0.970046   
Qwen2.5-Coder-3B       0.969787     0.980624  0.973071  0.978325   
codegen2-3_7B_P        0.000000     0.405826  0.000000  0.415094   
starcoder2-15b         0.928242     0.979263  0.928242  0.980579   
codegen-350M-multi     0.962595     0.980801  0.954651  0.963588   
Qwen2.5-Coder-14B      0.978654     0.958621  0.975452  0.971429   
starcoder2-3b          0.926926     0.963792  0.940421  0.978934   
Qwen2.5-Coder-7B       0.972085     0.943186  0.975041  0.976026   
s

In [24]:
best_method_data = {}

for dataset in datasets.keys():
    for column in columns[dataset]:
        for model in models.keys():
            model_data = data[dataset, column].loc[model]
            # Find max value and what method it corresponds to
            max_value = model_data.max()
            max_indices = model_data[model_data == max_value].index.tolist()
            # If all values are the same, max_indices will be empty
            #if set(max_indices) == set(model_data.index.tolist()):
            #    print(f"All values are the same for {dataset}, {column}, {model}.")
            #    continue
            
            for method_index in max_indices:
                best_method_data.setdefault(model, [])
                best_method_data[model].append((dataset, column, method_index))


  model_data = data[dataset, column].loc[model]


In [25]:
decreased_performance_data = {}

for dataset in datasets.keys():
    for column in columns[dataset]:
        for model in models.keys():
            model_data = data[dataset, column].loc[model]
            # Find max value and what method it corresponds to
            
            baseline_value = model_data.loc['pre-trained']
            
            decreased_indices = model_data[model_data < baseline_value].index.tolist()
            
            for method_index in decreased_indices:
                decreased_performance_data.setdefault(model, [])
                decreased_performance_data[model].append((dataset, column, method_index))

  model_data = data[dataset, column].loc[model]


In [26]:
bad_syntactical_performance_data = {}

for dataset in datasets.keys():
    for method in methods.keys():
        for model in models.keys():
            valid_syntax = data[dataset, "valid_syntax", method].loc[model]
            if valid_syntax < 0.5:
                for column in columns[dataset]:  # Skip 'valid_syntax'
                    bad_syntactical_performance_data.setdefault(model, [])
                    bad_syntactical_performance_data[model].append((dataset, column, method))

In [27]:
def escape_latex(text):
    """Escape LaTeX special characters."""
    return text.replace('_', '\\_').replace('%', '\\%').replace('&', '\\&').replace('$', '\\$')

In [28]:
def int_to_letter(n):
    return chr(ord('a') + n - 1)

In [29]:
table = []

#table.append("\\begin{table*}[htbp]")
table.append("\\begin{ThreePartTable}")
table.append("    \\newcolumntype{Y}{>{\\centering\\arraybackslash}X}")
table.append("    \\newcolumntype{R}{>{\\raggedright\\arraybackslash}X}")
table.append("    \\newcolumntype{L}{>{\\raggedleft\\arraybackslash}X}")
table.append("    \\centering")
table.append("    \\renewcommand{\\arraystretch}{1.25}")
table.append("    \\footnotesize")
#table.append("    \\caption{Evaluation metrics experiment results using different tuning methods across various models.}\\label{tab:eval-summary}")

table.append("    \\begin{TableNotes}[flushleft, para]\\small")
table.append("      \\item \\textbf{Bold}: best-performing training method per model. (Parentheses): decreased performance compared to baseline. \\colorbox{red!10}{Red}: $<$ 50\\% syntactical valid samples.")
table.append("    \\end{TableNotes}")

row = "    \\begin{xltabular}{\\textwidth}{lr!{\\color{white}\\hspace{.5em}}"

for i, dataset in enumerate(datasets, start=1):
    
    row += "Y" * len(columns[dataset])  # 5 columns for each dataset
    if i < len(datasets):
        row += "!{\\color{white}\\hspace{1em}}"
row += "}"
table.append(row)

for i in range(2):
    print
    if i == 0:
        table.append("    \\caption{Evaluation metrics experiment results using different tuning methods across various models.}\\label{tab:eval-summary}\\\\")
    else:
        table.append("            \\caption{(continued) Evaluation metrics experiment results using different tuning methods across various models.}\\\\")
    
    #table.append("    \\begin{tabularx}{\\textwidth}{lrLLLLL!{\\color{white}\\ }LLLLL}")
    #\multicolumn{2}{c}{\normalsize\textbf{(a)}} & \multicolumn{5}{c}{\normalsize\textbf{(b)}} & \multicolumn{3}{c}{\normalsize\textbf{(c)}}\\[.5em]

    row = "        \\multicolumn{2}{c}{\\normalsize\\textbf{(a)}} & "
    for j, dataset in enumerate(datasets, start=2):
        print(dataset)
        row += "\\multicolumn{" + str(len(columns[dataset])) + "}{c}{\\normalsize\\textbf{(" + str(int_to_letter(j)) + ")}}"
        if j < len(datasets) + 1:
            print(f"Adding dataset {dataset} with {len(columns[dataset])} columns")
            row += " & "
    row += "\\\\[.5em]"
    table.append(row)

    table.append("        \\cmidrule(lr){1-2}\\cmidrule(lr){3-8}\\cmidrule(lr){9-11}")

    row = "        \\multirow{2}{*}{\\textbf{Method}} & \\multirow{2}{*}{\\parbox[t]{1cm}{\\centering \\textbf{Trainable\\\\params}}}"

    for _, dataset in enumerate(datasets):
        row += " & \\multicolumn{" + str(len(columns[dataset])) + "}{c}{\\textbf{" + escape_latex(datasets[dataset]) + "}}"
    #row += " & \\multicolumn{5}{c}{\\textbf{" + datasets[dataset] + "}}"
    #row += " & \\multicolumn{3}{c}{\\textbf{" + datasets[dataset] + "}}"
    row += "\\\\"
    table.append(row)

    #table.append("        \\hline")
    #\cmidrule(lr){3-7}\cmidrule(lr){8-12}
    row = "        "
    index = 3
    for _, dataset in enumerate(datasets):
        row += "\\cmidrule(lr){" + f"{index}-{index + len(columns[dataset])-1}" + "}"
        index += len(columns[dataset])
    table.append(row)
    
    row  = "        &"
    row += " & \\rotatebox[origin=l]{90}{Valid syntax} & \\rotatebox[origin=l]{90}{CodeBLEU} & \\rotatebox[origin=l]{90}{pass@1} & \\rotatebox[origin=l]{90}{Instr. Cov.} & \\rotatebox[origin=l]{90}{Branch Cov.} & \\rotatebox[origin=l]{90}{Mutation Score}"
    row += " & \\rotatebox[origin=l]{90}{Valid syntax} & \\rotatebox[origin=l]{90}{CodeBLEU} & \\rotatebox[origin=l]{90}{pass@1}"
    row += "\\\\"
    table.append(row)

    table.append("        \\hline")
    
    print(i)
    if i == 0:
        print("OKOKOKOk")
        table.append("        \\endfirsthead")
    else:
        table.append("        \\endhead")

table.append("        \\bottomrule")
table.append("        \\multicolumn{11}{r}{to be continued on the next page}")
table.append("        \\endfoot")
table.append("        \\bottomrule")
table.append("        \\insertTableNotes")
table.append("        \\endlastfoot")



for model in models.keys():
    table.append("        \\multicolumn{" + str(2+6+3) + "}{l}{\\cellcolor{gray!10}{\\textbf{" + models[model] + "}}} \\bigstrut \\\\*")
    for method in methods:
        col = []
        for dataset in datasets:
            for column in columns[dataset]:
                try:
                    value = data[dataset, column, method].loc[model]
                except KeyError:
                    value = "N/A"
            
                if (dataset, column, method) in decreased_performance_data.get(model, []):
                    value = f"({value})"
                    
                if (dataset, column, method) in best_method_data.get(model, []):
                    value = f"\\textbf{{{value}}}"
                    
                if (dataset, column, method) in bad_syntactical_performance_data.get(model, []):
                    value = f"\\cellcolor{{red!10}}{{{value}}}"
                    
                col.append(f"{value}")
        row = " & ".join(col)
        params = model_trainable_params[method].loc[model]
        row = "        " + methods[method] + " & " + params + " & " + row + " \\\\*"
        table.append(row)
    table[-1] = table[-1][:-1]
    table.append("")

table[-2] = table[-2][:-3]  # Remove last extra newline

#table.append("       \\bottomrule")
table.append("    \\end{xltabular}")
table.append("\\end{ThreePartTable}")
#table.append("\\end{table*}")
#print("\n".join(table))

methods2test_runnable
Adding dataset methods2test_runnable with 6 columns
humaneval-x
0
OKOKOKOk
methods2test_runnable
Adding dataset methods2test_runnable with 6 columns
humaneval-x
1


In [30]:
result = "\n".join(table)
table_path = Path.cwd().parent / 'tables' / 'eval_summary.tex'
table_path.parent.mkdir(parents=True, exist_ok=True)
with open(table_path, 'w') as f:
    f.write(result)

## Table 3

In [31]:
keywords = ["assert", "verify", "fail"]

def contains_keyword(text):
    for keyword in keywords:
        if keyword in text.lower():
            return True
    return False

In [32]:
statuses = {
    "success": "Succ.",
    "failed": "Failed",
    "error": "Interrupt",
    "compilation error": "CompErr",
    "no_assertions": "NoAssert"
}

In [33]:
import pandas as pd

table = []

table.append("\\begin{table}[htbp]")
table.append("    \\newcolumntype{Y}{>{\\centering\\arraybackslash}X}")
table.append("    \\centering")
table.append("    \\caption{Test execution statuses for StarCode2-7B.}")
table.append("    \\label{tab:test-statuses}")
table.append("    \\small")
table.append("    \\begin{tabularx}{\\columnwidth}{lYYYYYY}")
table.append("    \\toprule")
table.append("    \\textbf{Method} & \\textbf{Succ.} & \\textbf{Failed} & \\textbf{Interrupt} & \\textbf{CompErr} & \\textbf{NoAssert} \\\\")
table.append("    \\midrule")

for method in methods:
    col = []
    
    path = DATA_DIR / "methods2test_runnable/executed" / method / "bigcode/starcoder2-7b/jacoco.jsonl"
    df = pd.read_json(path, lines=True, dtype=False).set_index("id")
    df = df[df["status"] != "exception"]
    
    gen_data_path = Path(str(path).replace("executed", "fixed").replace("jacoco.jsonl", "00001-of-00001.jsonl"))
    gen_df = pd.read_json(gen_data_path, lines=True, dtype=False).set_index("id")
    ids = gen_df[~gen_df["prediction"].apply(contains_keyword)].index.tolist()
    keys = df.index.intersection(ids)
    no_assertions_df = df.loc[(df.index.isin(keys)) & (df["status"] == "success")]
    
    values = df['status'].value_counts().sort_index()
    values.loc['success'] = values.loc['success'] - no_assertions_df.shape[0]
    values.loc['no_assertions'] = no_assertions_df.shape[0]

    percentages = (values / values.sum())
    
    for status in statuses.keys():
        value = str(int(percentages.loc[status].round(2) * 100)) + "\\%"
        col.append(value)

    
    row = " & ".join(col)
    row = methods[method] + " & " + row
    table.append("    " + row + " \\\\")
    
table.append("    \\bottomrule")
table.append("    \\end{tabularx}")
table.append("\\end{table}")

#print("\n".join(table))

In [34]:
result = "\n".join(table)
table_path = Path.cwd().parent / 'tables' / 'test_execution_example.tex'
table_path.parent.mkdir(parents=True, exist_ok=True)
with open(table_path, 'w') as f:
    f.write(result)

# Model Downloads

In [35]:
import pandas as pd

table = []

table.append("\\begin{table}[htbp]")
table.append("    \\newcolumntype{Y}{>{\\centering\\arraybackslash}X}")
table.append("    \\newcolumntype{R}{>{\\raggedleft\\arraybackslash}X}")
table.append("    \\centering")
table.append("    \\caption{HuggingFace model download statistics from cfahlgren1/hub-stats as of 13 November 2025.}")
table.append("    \\label{tab:model-downloads}")
table.append("    \\small")
table.append("    \\begin{tabularx}{\\columnwidth}{lRRY}")
table.append("    \\toprule")
table.append("    \\textbf{Model} & \\textbf{Downloads last month} & \\textbf{Downloads all time} & \\textbf{Created at} \\\\")
table.append("    \\midrule")

path = DATA_DIR / "model_downloads.csv"
df = pd.read_csv(path).set_index("id")

for model_name, row in df.iterrows():
    col = []
    
    col.append(escape_latex(model_name))
    col.append(str(row["downloads"]))
    col.append(str(row["downloadsAllTime"]))
    col.append(str(row["createdAt"].split()[0]))

    row = " & ".join(col)
    table.append("    " + row + " \\\\")
    
table.append("    \\bottomrule")
table.append("    \\end{tabularx}")
table.append("\\end{table}")

#print("\n".join(table))

In [36]:
result = "\n".join(table)
table_path = Path.cwd().parent / 'tables' / 'model_downloads.tex'
table_path.parent.mkdir(parents=True, exist_ok=True)
with open(table_path, 'w') as f:
    f.write(result)