In [1]:
import pandas as pd
import re
import os
import numpy as np

comment_types = ["no_comments", "comments", "added_comments"]

In [2]:
# import numpy as np
# import random
# random.seed(42)

# path = f"./fold0/no_comments"
# project = 'commons-jexl3-3.2.1-src'
# project_data = pd.read_csv(f"{path}/{project}/test_stats.csv")
# pass_rate = 0.7634
# df = project_data.copy()
# num_rows = len(df)
# coin_accuracy = []
# coin_f1 = []

# iterations = int(np.ceil(10000 / num_rows))

# for i in range(iterations):
#     coin_simulation = [1 if (random.random() < pass_rate) else 0 for i in range(num_rows)]
#     df["coin_simulation"] = coin_simulation
#     coin_tp = 0
#     coin_fn = 0
#     coin_tn = 0
#     coin_fp = 0

#     for i in range(num_rows):

#         # row[0] is the prediction, Row[1] is the actual
#         # predicted and actual pass
#         if df.loc[i, "Actual Label"] == 1 and df.loc[i, "coin_simulation"] == 1:
#             coin_tp += 1
#         # predicted fail and actual pass
#         elif df.loc[i, "Actual Label"] == 1 and df.loc[i, "coin_simulation"] == 0:
#             coin_fn += 1
#         # predicted and actual fail
#         elif df.loc[i, "Actual Label"] == 0 and df.loc[i, "coin_simulation"] == 0:
#             coin_tn += 1
#         # predicted pass and actual fail
#         elif df.loc[i, "Actual Label"] == 0 and df.loc[i, "coin_simulation"] == 1:
#             coin_fp += 1
#     accuracy = (coin_tp + coin_tn) / num_rows if num_rows > 0 else 0
#     f1 = (2 * coin_tp) / (2 * coin_tp + coin_fp + coin_fn) if (2 * coin_tp + coin_fp + coin_fn) > 0 else 0
#     coin_accuracy.append(accuracy)
#     coin_f1.append(f1)

# average_coin_accuracy = np.round(np.mean(coin_accuracy), 4)
# average_coin_f1 = np.round(np.mean(coin_f1), 4)

# Overall Stats

In [3]:
for comment_type in comment_types:
    for val in ["all", "50", "25", "10", "05"]:
        df = pd.read_csv(f"./fold0/{comment_type}/project_stats_{val}.csv")
        # Simplify project names
        df["project"] = df.apply(lambda row: re.split(r"-\d", row["project"])[0], axis=1)
        df.rename(
            columns={
                "accuracy": "acc.",
                "pass_accuracy": "pass_acc.",
                "fail_accuracy": "fail_acc.",
                "pass_rate": "dataset_pass_%",
                "fail_rate": "dataset_fail_%",
                "accuracy_improvement": "acc_\Delta",
                "fail_accuracy_improvement": "fail_acc_\Delta",
                "f1_improvement": "f1_improvement",
                "coin_accuracy": "coin_acc.",
                "out_vocab_C_ratio": "missing_C_%",
                "out_vocab_T_ratio": "missing_T_%",
                "out_vocab_combined_ratio": "missing_token_%",
            },
            inplace=True,
        )

        table1 = df[
            [
                "project",
                "N",
                "dataset_pass_%",
                "dataset_fail_%",
                "missing_C_%",
                "missing_T_%",
                "missing_token_%",
            ]
        ]

        table2 = df[
            [
                "project",
                "fail_acc_\Delta",
                "acc_\Delta",
                "f1_\Delta",
                "acc.",
                "pass_acc.",
                "fail_acc.",
                "f1",
                "coin_acc.",
                "coin_f1",
                "tp",
                "fn",
                "tn",
                "fp",
            ]
        ]

        if val == "all":
            table1.to_latex(
                f"./latex/{comment_type}/toga_dataset_stats_{val}.tex",
                index=False,
                caption=f"TOGA* Dataset Statistics ({comment_type})",
                label=f"tab:toga_stats_{val}",
            )
            table2.to_latex(
                f"./latex/{comment_type}/toga_results_{val}.tex",
                index=False,
                caption=f"SEER Results on TOGA* ({comment_type}), sorted by failure accuracy $\Delta$",
                label=f"tab:toga_results_{val}",
            )
        else:
            table2 = df[
                [
                    "project",
                    "N",
                    "fail_acc_\Delta",
                    "acc_\Delta",
                    "f1_\Delta",
                    "acc.",
                    "pass_acc.",
                    "fail_acc.",
                    "f1",
                    "coin_acc.",
                    "coin_f1",
                    "tp",
                    "fn",
                    "tn",
                    "fp",
                ]
            ]

            table1.to_latex(
                f"./latex/{comment_type}/toga_dataset_stats_{val}.tex",
                index=False,
                caption=f"TOGA* Dataset Statistics ({comment_type}), restricted to minimum {val}\% of tokens present",
                label=f"tab:toga_stats_{val}",
            )
            table2.to_latex(
                f"./latex/{comment_type}/toga_results_{val}.tex",
                index=False,
                caption=f"SEER Results on TOGA* ({comment_type}), restricted to minimum {str(100-int(val))}\% of tokens present",
                label=f"tab:toga_results_{val}",
            )

In [131]:
# Fixing some LaTeX issues
for comment_type in comment_types:

    for filename in os.listdir(f"./latex/{comment_type}"):
        with open(f"./latex/{comment_type}/{filename}", "r+") as f:
            text = f.read()
            text = re.sub(r"\\textbackslash Delta", "$\Delta$", text)
            text = re.sub("table", "table*", text)
            text = re.sub("_comments", " comments", text)
            f.seek(0)
            f.write(text)
            f.truncate()

# Vocab analysis

In [117]:
comment_type = "no_comments"
thresholds = ["all", "50", "25", "20", "15", "10"]
for val in thresholds:
    df = pd.read_csv(f"./fold0/{comment_type}/project_stats_{val}.csv")
    # Simplify project names
    df["project"] = df.apply(lambda row: re.split(r"-\d", row["project"])[0], axis=1)
    table2 = df[["project", "N", "fail_accuracy_improvement", "accuracy_improvement", "f1_improvement"]]

    if val == "all":
        df_merge = table2.copy()
    else:
        df_merge = df_merge.merge(table2, on="project", how="left")
        # print(df_merge.columns)
        df_merge.rename(
            columns={
                "N_x": f"N_{last}",
                "N_y": f"N_{val}",
                "fail_accuracy_improvement_x": f"fail_accuracy_improvement_{last}",
                "fail_accuracy_improvement_y": f"fail_accuracy_improvement_{val}",
                "accuracy_improvement_x": f"accuracy_improvement_{last}",
                "accuracy_improvement_y": f"accuracy_improvement_{val}",
                "f1_improvement_x": f"f1_improvement_{last}",
                "f1_improvement_y": f"f1_improvement_{val}",
            },
            inplace=True,
        )
    last = val

df_merge.to_csv("vocab_analysis.csv")
# df_merge[df_merge['project']=='all']

In [118]:
df_merge


Unnamed: 0,project,N_all,fail_accuracy_improvement_all,accuracy_improvement_all,f1_improvement_all,N_50,fail_accuracy_improvement_50,accuracy_improvement_50,f1_improvement_50,N_25,...,accuracy_improvement_20,f1_improvement_20,N_15,fail_accuracy_improvement_15,accuracy_improvement_15,f1_improvement_15,N_10,fail_accuracy_improvement_10,accuracy_improvement_10,f1_improvement_10
0,commons-pool2,11244,0.1647,-0.0611,-0.0321,4383,0.1677,-0.1528,-0.0844,319,...,-0.0009,-0.0003,3.0,-0.0359,-0.923,-0.9598,1.0,-0.0235,-0.9849,-0.9924
1,commons-collections4,1389,0.0574,-0.3932,-0.3289,1126,0.0899,-0.4262,-0.3692,17,...,-1.0,0.0,,,,,,,,
2,commons-numbers,39866,0.0482,0.0055,0.0028,39036,0.0482,0.0054,0.0028,18974,...,0.0149,0.0077,941.0,-0.01,-0.0288,-0.0154,100.0,0.0476,-0.1975,-0.1123
3,JSON-java,12911,0.0199,0.0149,0.0076,10813,0.0206,0.0122,0.0062,391,...,0.0289,0.0309,38.0,-0.0772,-0.2392,-0.2067,17.0,0.495,-0.9093,-0.9838
4,spark,5280,0.0114,0.0517,0.0292,3872,-0.0052,0.0488,0.0277,201,...,0.0203,0.1349,37.0,-0.4545,0.0143,0.1409,,,,
5,joda-time,27480,0.0096,0.0475,0.0264,18505,0.0098,0.0133,0.0074,2523,...,0.1336,0.1144,340.0,-0.1453,0.0732,0.0692,79.0,0.1522,-0.312,-0.2515
6,http-request,4069,-0.0185,0.0017,0.0008,3987,-0.0185,0.0018,0.0009,301,...,,,,,,,,,,
7,jsoup,8002,-0.029,0.0398,0.0222,7930,-0.0282,0.0408,0.0228,3546,...,0.0761,0.0447,175.0,-0.064,-0.2084,-0.1436,58.0,-0.0143,-0.2939,-0.2027
8,commons-lang3,12118,-0.044,0.0619,0.0382,12046,-0.0442,0.062,0.0383,4369,...,0.0752,0.0476,684.0,-0.0356,0.0098,0.0061,176.0,-0.0642,-0.077,-0.0507
9,bcel,15379,-0.0487,0.0901,0.0609,14767,-0.0465,0.0963,0.0647,1414,...,0.0523,0.0348,49.0,0.1167,-0.1735,-0.179,5.0,-0.1655,-0.7215,-0.8328


In [127]:
project_only_df = df_merge[df_merge["project"] != "all"]
min_sample = 20

table_vocab_analysis = pd.DataFrame(
    {
        "thresholds": ["50%", "25%", "20%", "15%"],
        "N": [df_merge.loc[25, "N_50"], df_merge.loc[25, "N_25"], df_merge.loc[25, "N_20"], df_merge.loc[25, "N_15"]],
        "fail_accuracy_improvement_total": [
            df_merge.loc[25, "fail_accuracy_improvement_50"],
            df_merge.loc[25, "fail_accuracy_improvement_25"],
            df_merge.loc[25, "fail_accuracy_improvement_20"],
            df_merge.loc[25, "fail_accuracy_improvement_15"],
        ],
        "accuracy_improvement_total": [
            df_merge.loc[25, "accuracy_improvement_50"],
            df_merge.loc[25, "accuracy_improvement_25"],
            df_merge.loc[25, "accuracy_improvement_20"],
            df_merge.loc[25, "accuracy_improvement_15"],
        ],
        "f1_improvement_total": [
            df_merge.loc[25, "f1_improvement_50"],
            df_merge.loc[25, "f1_improvement_25"],
            df_merge.loc[25, "f1_improvement_20"],
            df_merge.loc[25, "f1_improvement_15"],
        ],
        "fail_accuracy_improvement_avg": [
            df_merge.loc[project_only_df[project_only_df["N_50"]>min_sample].index, "fail_accuracy_improvement_50"].mean(),
            df_merge.loc[project_only_df[project_only_df["N_25"]>min_sample].index, "fail_accuracy_improvement_25"].mean(),
            df_merge.loc[project_only_df[project_only_df["N_20"]>min_sample].index, "fail_accuracy_improvement_20"].mean(),
            df_merge.loc[project_only_df[project_only_df["N_15"]>min_sample].index, "fail_accuracy_improvement_15"].mean(),
        ],
        "accuracy_improvement_avg": [
            df_merge.loc[project_only_df[project_only_df["N_50"]>min_sample].index, "accuracy_improvement_50"].mean(),
            df_merge.loc[project_only_df[project_only_df["N_25"]>min_sample].index, "accuracy_improvement_25"].mean(),
            df_merge.loc[project_only_df[project_only_df["N_20"]>min_sample].index, "accuracy_improvement_20"].mean(),
            df_merge.loc[project_only_df[project_only_df["N_15"]>min_sample].index, "accuracy_improvement_15"].mean(),
        ],
        "f1_improvement_avg": [
            df_merge.loc[project_only_df[project_only_df["N_50"]>min_sample].index, "f1_improvement_50"].mean(),
            df_merge.loc[project_only_df[project_only_df["N_25"]>min_sample].index, "f1_improvement_25"].mean(),
            df_merge.loc[project_only_df[project_only_df["N_20"]>min_sample].index, "f1_improvement_20"].mean(),
            df_merge.loc[project_only_df[project_only_df["N_15"]>min_sample].index, "f1_improvement_15"].mean(),
        ],
    }
)
table_vocab_analysis = table_vocab_analysis.astype({"N": int})

In [129]:
for col in ["fail_accuracy_improvement_total", "accuracy_improvement_total", "f1_improvement_total"]:
    string = "_".join(col.split("_")[:-1])
    table_vocab_analysis[col] = table_vocab_analysis[col].apply(lambda x: np.round(x - df_merge.loc[25, f"{string}_all"], 3))

for col in ["fail_accuracy_improvement_avg", "accuracy_improvement_avg", "f1_improvement_avg"]:
    string = "_".join(col.split("_")[:-1])
    table_vocab_analysis[col] = table_vocab_analysis[col].apply(lambda x: np.round(x - project_only_df[f"{string}_all"].mean(), 3))

table_vocab_analysis

Unnamed: 0,thresholds,N,fail_accuracy_improvement_total,accuracy_improvement_total,f1_improvement_total,fail_accuracy_improvement_avg,accuracy_improvement_avg,f1_improvement_avg
0,50%,145474,-0.0,-0.01,-0.005,-0.0,-0.005,-0.003
1,25%,35918,-0.016,0.011,0.006,-0.006,-0.026,-0.021
2,20%,10156,-0.05,-0.007,-0.002,-0.064,-0.001,0.024
3,15%,2643,-0.022,-0.074,-0.045,0.014,-0.07,-0.054


In [130]:
table_vocab_analysis.to_latex(
    f"./latex/vocab_analysis.tex",
    index=False,
    caption=f"Performance of SEER on TOGA* with varying minimum \% of tokens in-vocab threshold.",
    label=f"tab:vocab_analysis",
)

with open(f"./latex/vocab_analysis.tex", "r+") as f:
    text = f.read()
    text = re.sub("table", "table*", text)
    text = re.sub("accuracy", "acc.", text)
    text = re.sub("_improvement", "_$\Delta$", text)
    text = re.sub("_total", "_all", text)
    text = re.sub("_avg", "_project\_avg", text)
    f.seek(0)
    f.write(text)
    f.truncate()

# similarity

In [None]:
df_common_unique = pd.read_csv(f"./similarity_analysis/similarity_unique_mut.csv")
# Simplify project names
df_common_unique["triplets"] = df_common_unique.apply(lambda row: re.split(r"-\d", row["triplets"])[0], axis=1)

df_common_unique.rename(
    columns={
        "phase2": "SEER",
        "triplets": "TOGA*",
        "triplets_unique_count": "TOGA*_count",
        "phase2_unique_count": "SEER_count",
    },
    inplace=True,
)

df_common_unique[["SEER", "TOGA*", "SEER_count", "TOGA*_count"]].to_latex(
    f"./latex/common_projects_unique.tex",
    index=False,
    caption=f"Unique Methods Under Test",
    label=f"tab:common_unique_MUT",
)