In [1]:
# Standard library imports
import os

# Third-party library imports
import pandas as pd
from tqdm import tqdm
tqdm.pandas()

In [13]:
TASK = ("code_translation", "code_repair", "assert_generation")
NAME_MAPPING = {
    "codet5p-220m": "CodeT5+ 220M",
    "codet5p-770m": "CodeT5+ 770M"
}
RENAME_TUNING_METHOD_DICT = {
    "full-finetuning": "Full Fine-Tuning",
    "no-gnn": "Linear Adapter",
    "concatpervector": "Transducers Tuning",
    "lora": "LoRA",
    "prompt-tuning": "Prompt-Tuning",
    "prefix-tuning": "Prefix-Tuning",
    "no-finetuning": "No Fine-Tuning"
}

SEEDS = ("seed_18_1", "seed_99_1")
DATASET_BASEPATH = "/data/datasets/fix/"

RENAME_COLUMNS = {
    "model": "Model",
    "tuning_method": "Tuning Method",

    "assert_generation_mean": "Assert Generation",
    "assert_generation_std": "Assert Generation (std)",
    
    "code_translation_mean": "Code Translation",
    "code_translation_std": "Code Translation (std)",
    
    "code_repair_mean": "Code Repair",
    "code_repair_std": "Code Repair (std)",

    "summarization_mean": "Summarization",
    "summarization_std": "Summarization (std)",

    "average": "Average"
}

# Read Results

In [5]:
def read_csv(root_path: str)->list:
    output_paths = []
    for filename in os.listdir(root_path):
        filepath = os.path.join(root_path, filename)
        output_paths.append(filepath)
    return output_paths

output_paths = []
for task in TASK:
    output_paths += read_csv(task)
output_paths[:5]

['code_translation/codet5p-220m_prompt-tuning.csv',
 'code_translation/codet5p-220m_concatpervector.csv',
 'code_translation/codet5p-770m_full-finetuning.csv',
 'code_translation/codet5p-770m_lora.csv',
 'code_translation/codet5p-220m_prefix-tuning.csv']

In [8]:
def create_df(paths: list)->pd.DataFrame:
    temp_list = []
    for path in paths:
        if "ipynb_checkpoints" not in path:
            filename = os.path.basename(path)
            filename = filename.split("_")
            model = filename[0]
            if model in NAME_MAPPING.keys():
                
                df = pd.read_csv(path)

                task = path.split("/")[0]
                df["task"] = task

                # filter seed and similar ids
                temp_task = task if task != "code_repair" else "code_repair_long"
                ids_path = os.path.join(DATASET_BASEPATH, f"{temp_task}/included_ids.csv")
                included_ids = pd.read_csv(ids_path)
                mask_ids = df["idx.1"].isin(included_ids["idx"])
                mask_seed = df["seed"].isin(SEEDS)
                df = df[(mask_seed) & (mask_ids)].copy()
                df["model"] = model

                tuning_method = os.path.splitext(filename[1])[0]
                df["tuning_method"] = tuning_method

                if task != "summarization":
                    # df.drop(columns=["codebleu_stat"], inplace=True)
                    df = df[["model", "tuning_method", "task", "seed", "codebleu-cn"]].copy()
                else:
                    df = df[["model", "tuning_method", "task",  "seed", "bleu-cn"]].copy().round(2)

                temp_list.append(df)

    df = pd.concat(temp_list)
    return df

In [9]:
df = create_df(output_paths)

In [10]:
temp_df = df.groupby(["model", "tuning_method", "task", "seed"], as_index=False).mean()
temp_std = temp_df.groupby(["model", "tuning_method", "task"], as_index=False)["codebleu-cn"].std().round(2)

df.drop(columns=["seed"], inplace=True)

# Calculate the mean and standard deviation for each group
temp_mean = df.groupby(["model", "tuning_method", "task"], as_index=False).mean().round(2)

# Add a suffix to the columns to distinguish between mean and std
temp_mean = temp_mean.add_suffix('_mean')
temp_std = temp_std.add_suffix('_std')

# Merge mean and std DataFrames
temp1 = pd.merge(temp_mean, temp_std, left_on=["model_mean", "tuning_method_mean", "task_mean"], 
                         right_on=["model_std", "tuning_method_std", "task_std"])

# Drop redundant columns after merge
temp1.drop(columns=["model_std", "tuning_method_std", "task_std"], inplace=True)

# Rename columns for clarity
temp1.rename(columns={"model_mean": "model", "tuning_method_mean": "tuning_method", "task_mean": "task"}, inplace=True)

# Melt the combined DataFrame to have mean and std in the metrics column
temp1 = pd.melt(temp1, 
                      id_vars=["model", "tuning_method", "task"], 
                      var_name="metrics", 
                      value_name="value")

# Pivot the DataFrame to organize tasks as columns and keep both mean and std under the metrics
temp1 = temp1.pivot_table(index=['model', 'tuning_method', 'metrics'], 
                                       columns='task', values='value').reset_index()

# Drop the "metrics" column if needed or leave it to distinguish between mean and std
# temp_pivoted.drop(columns=["metrics"], inplace=True)  # Uncomment if you don't want to keep "metrics"

# Optional: Clean up column names
temp1.columns.name = None

# Fill NaN values with 0.0
temp1.fillna(0.0, inplace=True)


df_mean = temp1[temp1['metrics'].str.contains('_mean')].copy()
df_std = temp1[temp1['metrics'].str.contains('_std')].copy()

# Remove the '_mean' and '_std' suffixes from the 'metrics' column
df_mean['metrics'] = df_mean['metrics'].str.replace('_mean', '')
df_std['metrics'] = df_std['metrics'].str.replace('_std', '')

# Merge the mean and std DataFrames on 'model', 'tuning_method', and 'metrics'
temp1 = pd.merge(df_mean, df_std, on=['model', 'tuning_method', 'metrics'], suffixes=('_mean', '_std'))
temp1 = temp1[["model", "tuning_method", "assert_generation_mean", "assert_generation_std", "code_translation_mean", "code_translation_std", "code_repair_mean", "code_repair_std"]].copy()
temp1

Unnamed: 0,model,tuning_method,assert_generation_mean,assert_generation_std,code_translation_mean,code_translation_std,code_repair_mean,code_repair_std
0,codet5p-220m,concatpervector,82.32,0.3,96.6,1.31,98.1,0.39
1,codet5p-220m,full-finetuning,83.16,0.01,97.78,0.0,99.87,0.0
2,codet5p-220m,lora,83.17,0.0,97.78,0.0,99.87,0.0
3,codet5p-220m,no-finetuning,76.85,0.0,94.47,0.0,96.0,0.0
4,codet5p-220m,no-gnn,82.48,0.02,97.7,0.12,99.31,0.71
5,codet5p-220m,prefix-tuning,83.17,0.0,97.78,0.0,99.87,0.0
6,codet5p-220m,prompt-tuning,83.17,0.0,94.4,0.27,97.46,1.13
7,codet5p-770m,concatpervector,81.16,0.71,94.88,0.08,96.75,3.94
8,codet5p-770m,full-finetuning,83.16,0.01,97.78,0.0,99.87,0.0
9,codet5p-770m,lora,83.17,0.0,97.78,0.0,99.87,0.0


In [11]:
output_paths = read_csv("summarization")
output_paths[:4]

['summarization/codet5p-220m_prompt-tuning.csv',
 'summarization/codet5p-220m_concatpervector.csv',
 'summarization/codet5p-770m_full-finetuning.csv',
 'summarization/codet5p-770m_lora.csv']

In [12]:
df = create_df(output_paths)

temp_df = df.groupby(["model", "tuning_method", "task", "seed"], as_index=False).mean()
temp_std = temp_df.groupby(["model", "tuning_method", "task"], as_index=False)["bleu-cn"].std().round(2)

df.drop(columns=["seed"], inplace=True)

# Calculate the mean and standard deviation for each group
temp_mean = df.groupby(["model", "tuning_method", "task"], as_index=False).mean().round(2)

# Add a suffix to the columns to distinguish between mean and std
temp_mean = temp_mean.add_suffix('_mean')
temp_std = temp_std.add_suffix('_std')

# Merge mean and std DataFrames
temp2 = pd.merge(temp_mean, temp_std, left_on=["model_mean", "tuning_method_mean", "task_mean"], 
                         right_on=["model_std", "tuning_method_std", "task_std"])

# Drop redundant columns after merge
temp2.drop(columns=["model_std", "tuning_method_std", "task_std"], inplace=True)

# Rename columns for clarity
temp2.rename(columns={"model_mean": "model", "tuning_method_mean": "tuning_method", "task_mean": "task"}, inplace=True)

# Melt the combined DataFrame to have mean and std in the metrics column
temp2 = pd.melt(temp2, 
                      id_vars=["model", "tuning_method", "task"], 
                      var_name="metrics", 
                      value_name="value")

# Pivot the DataFrame to organize tasks as columns and keep both mean and std under the metrics
temp2 = temp2.pivot_table(index=['model', 'tuning_method', 'metrics'], 
                                       columns='task', values='value').reset_index()

# Drop the "metrics" column if needed or leave it to distinguish between mean and std
# temp_pivoted.drop(columns=["metrics"], inplace=True)  # Uncomment if you don't want to keep "metrics"

# Optional: Clean up column names
temp2.columns.name = None

# Fill NaN values with 0.0
temp2.fillna(0.0, inplace=True)

df_mean = temp2[temp2['metrics'].str.contains('_mean')].copy()
df_std = temp2[temp2['metrics'].str.contains('_std')].copy()

# Remove the '_mean' and '_std' suffixes from the 'metrics' column
df_mean['metrics'] = df_mean['metrics'].str.replace('_mean', '')
df_std['metrics'] = df_std['metrics'].str.replace('_std', '')

# Merge the mean and std DataFrames on 'model', 'tuning_method', and 'metrics'
temp2 = pd.merge(df_mean, df_std, on=['model', 'tuning_method', 'metrics'], suffixes=('_mean', '_std'))
temp2 = temp2[["model", "tuning_method", "summarization_mean", "summarization_std"]].copy()
temp2


Unnamed: 0,model,tuning_method,summarization_mean,summarization_std
0,codet5p-220m,concatpervector,99.84,0.21
1,codet5p-220m,full-finetuning,99.91,0.01
2,codet5p-220m,lora,99.91,0.0
3,codet5p-220m,no-finetuning,95.49,0.0
4,codet5p-220m,no-gnn,98.05,0.88
5,codet5p-220m,prefix-tuning,99.93,0.01
6,codet5p-220m,prompt-tuning,99.91,0.01
7,codet5p-770m,concatpervector,98.11,1.61
8,codet5p-770m,full-finetuning,99.81,0.01
9,codet5p-770m,lora,99.79,0.02


In [14]:
merge = pd.merge(temp2, temp1, on=["model", "tuning_method"])
merge['average'] = merge[['assert_generation_mean', 'code_repair_mean', 'code_translation_mean', 'summarization_mean']].mean(axis=1).round(2)
merge["tuning_method"] = merge["tuning_method"].apply(lambda x: RENAME_TUNING_METHOD_DICT[x])
merge.rename(columns=RENAME_COLUMNS, inplace=True)
merge

Unnamed: 0,Model,Tuning Method,Summarization,Summarization (std),Assert Generation,Assert Generation (std),Code Translation,Code Translation (std),Code Repair,Code Repair (std),Average
0,codet5p-220m,Transducers Tuning,99.84,0.21,82.32,0.3,96.6,1.31,98.1,0.39,94.22
1,codet5p-220m,Full Fine-Tuning,99.91,0.01,83.16,0.01,97.78,0.0,99.87,0.0,95.18
2,codet5p-220m,LoRA,99.91,0.0,83.17,0.0,97.78,0.0,99.87,0.0,95.18
3,codet5p-220m,No Fine-Tuning,95.49,0.0,76.85,0.0,94.47,0.0,96.0,0.0,90.7
4,codet5p-220m,Linear Adapter,98.05,0.88,82.48,0.02,97.7,0.12,99.31,0.71,94.38
5,codet5p-220m,Prefix-Tuning,99.93,0.01,83.17,0.0,97.78,0.0,99.87,0.0,95.19
6,codet5p-220m,Prompt-Tuning,99.91,0.01,83.17,0.0,94.4,0.27,97.46,1.13,93.73
7,codet5p-770m,Transducers Tuning,98.11,1.61,81.16,0.71,94.88,0.08,96.75,3.94,92.72
8,codet5p-770m,Full Fine-Tuning,99.81,0.01,83.16,0.01,97.78,0.0,99.87,0.0,95.16
9,codet5p-770m,LoRA,99.79,0.02,83.17,0.0,97.78,0.0,99.87,0.0,95.15


In [19]:
merge.to_csv("table_1.csv", index=False)