In [1]:
# Standard library imports
import os

# Third-party library imports
import pandas as pd
from tqdm import tqdm
tqdm.pandas()

In [29]:
TASK = ("code_translation", "code_repair", "assert_generation")
NAME_MAPPING = {
    "codet5p-220m": "CodeT5+ 220M",
    "codet5p-770m": "CodeT5+ 770M"
}
RENAME_TUNING_METHOD_DICT = {
    "full-finetuning": "Full Fine-Tuning",
    "no-gnn": "Linear Adapter",
    "concatpervector": "GVE + ABF",
    "lora": "LoRA",
    "prompt-tuning": "Prompt-Tuning",
    "prefix-tuning": "Prefix-Tuning",
    "no-finetuning": "No Fine-Tuning",
    "concatpervector_linear": "Linear",
    "concatpervector_no_gve": "No GVE",
    "concatpervector_no_abf": "No ABF"
}

SEEDS = ("seed_18_1", "seed_99_1")
DATASET_BASEPATH = "/data/datasets/fix/"

RENAME_COLUMNS = {
    "model": "Model",
    "tuning_method": "Tuning Method",

    "assert_generation_mean": "Assert Generation",
    "assert_generation_std": "Assert Generation (std)",
    
    "code_translation_mean": "Code Translation",
    "code_translation_std": "Code Translation (std)",
    
    "code_repair_mean": "Code Repair",
    "code_repair_std": "Code Repair (std)",

    "summarization_mean": "Summarization",
    "summarization_std": "Summarization (std)",

    "average": "Average"
}

# Read Results

In [30]:
def read_csv(root_path: str)->list:
    output_paths = []
    for filename in os.listdir(root_path):
        filepath = os.path.join(root_path, filename)
        output_paths.append(filepath)
    return output_paths

output_paths = []
for task in TASK:
    output_paths += read_csv(task)
output_paths

['code_translation/codet5p-770m_concatpervector_no_gve.csv',
 'code_translation/codet5p-220m_concatpervector_no_gve.csv',
 'code_translation/codet5p-220m_concatpervector.csv',
 'code_translation/codet5p-770m_concatpervector_no_abf.csv',
 'code_translation/codet5p-770m_concatpervector.csv',
 'code_translation/codet5p-220m_concatpervector_no_abf.csv',
 'code_translation/codet5p-770m_concatpervector_linear.csv',
 'code_translation/codet5p-220m_concatpervector_linear.csv',
 'code_repair/codet5p-770m_concatpervector_no_gve.csv',
 'code_repair/codet5p-220m_concatpervector_no_gve.csv',
 'code_repair/codet5p-220m_concatpervector.csv',
 'code_repair/codet5p-770m_concatpervector_no_abf.csv',
 'code_repair/codet5p-770m_concatpervector.csv',
 'code_repair/codet5p-220m_concatpervector_no_abf.csv',
 'code_repair/codet5p-770m_concatpervector_linear.csv',
 'code_repair/codet5p-220m_concatpervector_linear.csv',
 'assert_generation/codet5p-770m_concatpervector_no_gve.csv',
 'assert_generation/codet5p-22

In [31]:
def create_df(paths: list)->pd.DataFrame:
    temp_list = []
    for path in paths:
        if "ipynb_checkpoints" not in path:
            filename = os.path.basename(path)
            model = filename.split("_")
            model = model[0]
            if model in NAME_MAPPING.keys():
                
                df = pd.read_csv(path)

                task = path.split("/")[0]
                df["task"] = task

                # filter seed and similar ids
                temp_task = task if task != "code_repair" else "code_repair_long"
                ids_path = os.path.join(DATASET_BASEPATH, f"{temp_task}/included_ids.csv")
                included_ids = pd.read_csv(ids_path)
                mask_ids = df["idx.1"].isin(included_ids["idx"])
                mask_seed = df["seed"].isin(SEEDS)
                df = df[(mask_seed) & (mask_ids)].copy()
                df["model"] = model

                tuning_method = "_".join(os.path.splitext(filename)[0].split("_")[1:])
                df["tuning_method"] = tuning_method

                if task != "summarization":
                    # df.drop(columns=["codebleu_stat"], inplace=True)
                    df = df[["model", "tuning_method", "task", "seed", "codebleu-cn"]].copy()
                else:
                    df = df[["model", "tuning_method", "task",  "seed", "bleu-cn"]].copy().round(2)

                temp_list.append(df)

    df = pd.concat(temp_list)
    return df

In [32]:
df = create_df(output_paths)

In [33]:
df.tuning_method.value_counts()

tuning_method
concatpervector_no_gve    26244
concatpervector           26244
concatpervector_no_abf    26244
concatpervector_linear    26244
Name: count, dtype: int64

In [34]:
temp_df = df.groupby(["model", "tuning_method", "task", "seed"], as_index=False).mean()
temp_std = temp_df.groupby(["model", "tuning_method", "task"], as_index=False)["codebleu-cn"].std().round(2)

df.drop(columns=["seed"], inplace=True)

# Calculate the mean and standard deviation for each group
temp_mean = df.groupby(["model", "tuning_method", "task"], as_index=False).mean().round(2)

# Add a suffix to the columns to distinguish between mean and std
temp_mean = temp_mean.add_suffix('_mean')
temp_std = temp_std.add_suffix('_std')

# Merge mean and std DataFrames
temp1 = pd.merge(temp_mean, temp_std, left_on=["model_mean", "tuning_method_mean", "task_mean"], 
                         right_on=["model_std", "tuning_method_std", "task_std"])

# Drop redundant columns after merge
temp1.drop(columns=["model_std", "tuning_method_std", "task_std"], inplace=True)

# Rename columns for clarity
temp1.rename(columns={"model_mean": "model", "tuning_method_mean": "tuning_method", "task_mean": "task"}, inplace=True)

# Melt the combined DataFrame to have mean and std in the metrics column
temp1 = pd.melt(temp1, 
                      id_vars=["model", "tuning_method", "task"], 
                      var_name="metrics", 
                      value_name="value")

# Pivot the DataFrame to organize tasks as columns and keep both mean and std under the metrics
temp1 = temp1.pivot_table(index=['model', 'tuning_method', 'metrics'], 
                                       columns='task', values='value').reset_index()

# Drop the "metrics" column if needed or leave it to distinguish between mean and std
# temp_pivoted.drop(columns=["metrics"], inplace=True)  # Uncomment if you don't want to keep "metrics"

# Optional: Clean up column names
temp1.columns.name = None

# Fill NaN values with 0.0
temp1.fillna(0.0, inplace=True)


df_mean = temp1[temp1['metrics'].str.contains('_mean')].copy()
df_std = temp1[temp1['metrics'].str.contains('_std')].copy()

# Remove the '_mean' and '_std' suffixes from the 'metrics' column
df_mean['metrics'] = df_mean['metrics'].str.replace('_mean', '')
df_std['metrics'] = df_std['metrics'].str.replace('_std', '')

# Merge the mean and std DataFrames on 'model', 'tuning_method', and 'metrics'
temp1 = pd.merge(df_mean, df_std, on=['model', 'tuning_method', 'metrics'], suffixes=('_mean', '_std'))
temp1 = temp1[["model", "tuning_method", "assert_generation_mean", "assert_generation_std", "code_translation_mean", "code_translation_std", "code_repair_mean", "code_repair_std"]].copy()
temp1

Unnamed: 0,model,tuning_method,assert_generation_mean,assert_generation_std,code_translation_mean,code_translation_std,code_repair_mean,code_repair_std
0,codet5p-220m,concatpervector,82.32,0.3,96.6,1.31,98.1,0.39
1,codet5p-220m,concatpervector_linear,83.08,0.07,91.76,0.36,99.45,0.59
2,codet5p-220m,concatpervector_no_abf,83.14,0.04,96.03,0.0,99.37,0.09
3,codet5p-220m,concatpervector_no_gve,77.07,8.62,92.53,2.65,97.48,3.38
4,codet5p-770m,concatpervector,81.16,0.71,94.88,0.08,96.75,3.94
5,codet5p-770m,concatpervector_linear,79.31,4.64,90.4,0.02,93.27,0.88
6,codet5p-770m,concatpervector_no_abf,78.79,2.51,97.78,0.0,99.54,0.12
7,codet5p-770m,concatpervector_no_gve,83.16,0.01,91.11,0.33,94.33,4.16


In [35]:
output_paths = read_csv("summarization")
output_paths[:4]

['summarization/codet5p-770m_concatpervector_no_gve.csv',
 'summarization/codet5p-220m_concatpervector_no_gve.csv',
 'summarization/codet5p-220m_concatpervector.csv',
 'summarization/codet5p-770m_concatpervector_no_abf.csv']

In [36]:
df = create_df(output_paths)

temp_df = df.groupby(["model", "tuning_method", "task", "seed"], as_index=False).mean()
temp_std = temp_df.groupby(["model", "tuning_method", "task"], as_index=False)["bleu-cn"].std().round(2)

df.drop(columns=["seed"], inplace=True)

# Calculate the mean and standard deviation for each group
temp_mean = df.groupby(["model", "tuning_method", "task"], as_index=False).mean().round(2)

# Add a suffix to the columns to distinguish between mean and std
temp_mean = temp_mean.add_suffix('_mean')
temp_std = temp_std.add_suffix('_std')

# Merge mean and std DataFrames
temp2 = pd.merge(temp_mean, temp_std, left_on=["model_mean", "tuning_method_mean", "task_mean"], 
                         right_on=["model_std", "tuning_method_std", "task_std"])

# Drop redundant columns after merge
temp2.drop(columns=["model_std", "tuning_method_std", "task_std"], inplace=True)

# Rename columns for clarity
temp2.rename(columns={"model_mean": "model", "tuning_method_mean": "tuning_method", "task_mean": "task"}, inplace=True)

# Melt the combined DataFrame to have mean and std in the metrics column
temp2 = pd.melt(temp2, 
                      id_vars=["model", "tuning_method", "task"], 
                      var_name="metrics", 
                      value_name="value")

# Pivot the DataFrame to organize tasks as columns and keep both mean and std under the metrics
temp2 = temp2.pivot_table(index=['model', 'tuning_method', 'metrics'], 
                                       columns='task', values='value').reset_index()

# Drop the "metrics" column if needed or leave it to distinguish between mean and std
# temp_pivoted.drop(columns=["metrics"], inplace=True)  # Uncomment if you don't want to keep "metrics"

# Optional: Clean up column names
temp2.columns.name = None

# Fill NaN values with 0.0
temp2.fillna(0.0, inplace=True)

df_mean = temp2[temp2['metrics'].str.contains('_mean')].copy()
df_std = temp2[temp2['metrics'].str.contains('_std')].copy()

# Remove the '_mean' and '_std' suffixes from the 'metrics' column
df_mean['metrics'] = df_mean['metrics'].str.replace('_mean', '')
df_std['metrics'] = df_std['metrics'].str.replace('_std', '')

# Merge the mean and std DataFrames on 'model', 'tuning_method', and 'metrics'
temp2 = pd.merge(df_mean, df_std, on=['model', 'tuning_method', 'metrics'], suffixes=('_mean', '_std'))
temp2 = temp2[["model", "tuning_method", "summarization_mean", "summarization_std"]].copy()
temp2


Unnamed: 0,model,tuning_method,summarization_mean,summarization_std
0,codet5p-220m,concatpervector,99.84,0.21
1,codet5p-220m,concatpervector_linear,99.54,0.09
2,codet5p-220m,concatpervector_no_abf,99.31,0.06
3,codet5p-220m,concatpervector_no_gve,94.33,7.32
4,codet5p-770m,concatpervector,98.11,1.61
5,codet5p-770m,concatpervector_linear,99.17,0.42
6,codet5p-770m,concatpervector_no_abf,96.23,1.55
7,codet5p-770m,concatpervector_no_gve,98.64,0.52


In [37]:
merge = pd.merge(temp2, temp1, on=["model", "tuning_method"])
merge['average'] = merge[['assert_generation_mean', 'code_repair_mean', 'code_translation_mean', 'summarization_mean']].mean(axis=1).round(2)
merge["tuning_method"] = merge["tuning_method"].apply(lambda x: RENAME_TUNING_METHOD_DICT[x])
merge.rename(columns=RENAME_COLUMNS, inplace=True)
merge

Unnamed: 0,Model,Tuning Method,Summarization,Summarization (std),Assert Generation,Assert Generation (std),Code Translation,Code Translation (std),Code Repair,Code Repair (std),Average
0,codet5p-220m,GVE + ABF,99.84,0.21,82.32,0.3,96.6,1.31,98.1,0.39,94.22
1,codet5p-220m,Linear,99.54,0.09,83.08,0.07,91.76,0.36,99.45,0.59,93.46
2,codet5p-220m,No ABF,99.31,0.06,83.14,0.04,96.03,0.0,99.37,0.09,94.46
3,codet5p-220m,No GVE,94.33,7.32,77.07,8.62,92.53,2.65,97.48,3.38,90.35
4,codet5p-770m,GVE + ABF,98.11,1.61,81.16,0.71,94.88,0.08,96.75,3.94,92.72
5,codet5p-770m,Linear,99.17,0.42,79.31,4.64,90.4,0.02,93.27,0.88,90.54
6,codet5p-770m,No ABF,96.23,1.55,78.79,2.51,97.78,0.0,99.54,0.12,93.08
7,codet5p-770m,No GVE,98.64,0.52,83.16,0.01,91.11,0.33,94.33,4.16,91.81


In [38]:
merge.to_csv("table_1.csv", index=False)