In [82]:
import pandas as pd
import re
import os
import numpy as np

comment_types = ["no_comments", "comments", "added_test_comments", "added_code_comments", "added_CT_comments"]

# Overall Stats

In [83]:
for comment_type in comment_types:
    for val in ["all", "50", "25", "10", "05"]:
        df = pd.read_csv(f"./fold0/{comment_type}/project_stats_{val}.csv")
        # Simplify project names
        df["project"] = df.apply(lambda row: re.split(r"-\d", row["project"])[0], axis=1)
        df.rename(
            columns={
                "accuracy": "Accuracy",
                "pass_accuracy": "Pass Class Accuracy",
                "fail_accuracy": "Fail Class Accuracy",
                "pass_rate": "Dataset Pass Rate",
                "fail_rate": "Dataset Fail Rate",
                "accuracy_improvement": "Accuracy \Delta",
                "fail_accuracy_improvement": "Fail Accuracy \Delta",
                "f1": "F1",
                "f1_improvement": "F1 \Delta",
                "coin_accuracy": "Coin Accuracy",
                "out_vocab_C_ratio": "Missing MUT Token Rate",
                "out_vocab_T_ratio": "Missing Test Token Rate",
                "out_vocab_combined_ratio": "Missing Overall Token Rate",
            },
            inplace=True,
        )

        table1 = df[
            [
                "project",
                "N",
                "Dataset Pass Rate",
                "Dataset Fail Rate",
                "Missing MUT Token Rate",
                "Missing Test Token Rate",
                "Missing Overall Token Rate",
            ]
        ]

        table2 = df[
            [
                "project",
                "Fail Accuracy \Delta",
                "Accuracy \Delta",
                "F1 \Delta",
                "Accuracy",
                "Pass Class Accuracy",
                "Fail Class Accuracy",
                "F1",
                "Coin Accuracy",
                "tp",
                "fn",
                "tn",
                "fp",
            ]
        ]

        if val == "all":
            table1.to_latex(
                f"./latex/{comment_type}/dataset_stats_{val}.tex",
                index=False,
                caption=f"New Dataset Statistics ({comment_type})",
                label=f"tab:stats_{val}",
            )
            table2.to_latex(
                f"./latex/{comment_type}/results_{val}.tex",
                index=False,
                caption=f"SEER Results on New Data ({comment_type}), sorted by failure accuracy $\Delta$",
                label=f"tab:results_{val}",
            )
        else:
            table2 = df[
                [
                    "project",
                    "N",
                    "Fail Accuracy \Delta",
                    "Accuracy \Delta",
                    "F1 \Delta",
                    "Accuracy",
                    "Pass Class Accuracy",
                    "Fail Class Accuracy",
                    "F1",
                    "Coin Accuracy",
                    "tp",
                    "fn",
                    "tn",
                    "fp",
                ]
            ]

            # table1.to_latex(
            #     f"./latex/{comment_type}/dataset_stats_{val}.tex",
            #     index=False,
            #     caption=f"New Dataset Statistics ({comment_type}), restricted to minimum {val}\% of tokens present",
            #     label=f"tab:stats_{val}",
            # )
            table2.to_latex(
                f"./latex/{comment_type}/results_{val}.tex",
                index=False,
                caption=f"SEER Results on New Data ({comment_type}), restricted to minimum {str(100-int(val))}\% of tokens present",
                label=f"tab:results_{val}",
            )

In [84]:
# Fixing some LaTeX issues
for comment_type in comment_types:

    for filename in os.listdir(f"./latex/{comment_type}"):
        with open(f"./latex/{comment_type}/{filename}", "r+") as f:
            text = f.read()
            text = re.sub(r"\\textbackslash Delta", "$\Delta$", text)
            text = re.sub(r"\\textbackslash delta", "$\delta$", text)
            text = re.sub("table", "table*", text)
            text = re.sub("_comments", " comments", text)
            f.seek(0)
            f.write(text)
            f.truncate()

# Vocab threshold analysis

In [85]:
comment_type = "no_comments"
thresholds = ["all", "50", "25", "20", "15", "10"]
for val in thresholds:
    df = pd.read_csv(f"./fold0/{comment_type}/project_stats_{val}.csv")
    # Simplify project names
    df["project"] = df.apply(lambda row: re.split(r"-\d", row["project"])[0], axis=1)
    table2 = df[["project", "N", "fail_accuracy_improvement", "accuracy_improvement", "f1_improvement"]]

    if val == "all":
        df_merge = table2.copy()
    else:
        df_merge = df_merge.merge(table2, on="project", how="left")
        # print(df_merge.columns)
        df_merge.rename(
            columns={
                "N_x": f"N_{last}",
                "N_y": f"N_{val}",
                "fail_accuracy_improvement_x": f"fail_accuracy_improvement_{last}",
                "fail_accuracy_improvement_y": f"fail_accuracy_improvement_{val}",
                "accuracy_improvement_x": f"accuracy_improvement_{last}",
                "accuracy_improvement_y": f"accuracy_improvement_{val}",
                "f1_improvement_x": f"f1_improvement_{last}",
                "f1_improvement_y": f"f1_improvement_{val}",
            },
            inplace=True,
        )
    last = val

df_merge.to_csv("vocab_analysis.csv")
# df_merge[df_merge['project']=='all']

In [86]:
project_only_df = df_merge[df_merge["project"] != "all"]
min_sample = 20

table_vocab_analysis = pd.DataFrame(
    {
        "thresholds": ["50%", "25%", "20%", "15%"],
        "N": [df_merge.loc[25, "N_50"], df_merge.loc[25, "N_25"], df_merge.loc[25, "N_20"], df_merge.loc[25, "N_15"]],
        "fail_accuracy_improvement_total": [
            df_merge.loc[25, "fail_accuracy_improvement_50"],
            df_merge.loc[25, "fail_accuracy_improvement_25"],
            df_merge.loc[25, "fail_accuracy_improvement_20"],
            df_merge.loc[25, "fail_accuracy_improvement_15"],
        ],
        "accuracy_improvement_total": [
            df_merge.loc[25, "accuracy_improvement_50"],
            df_merge.loc[25, "accuracy_improvement_25"],
            df_merge.loc[25, "accuracy_improvement_20"],
            df_merge.loc[25, "accuracy_improvement_15"],
        ],
        "f1_improvement_total": [
            df_merge.loc[25, "f1_improvement_50"],
            df_merge.loc[25, "f1_improvement_25"],
            df_merge.loc[25, "f1_improvement_20"],
            df_merge.loc[25, "f1_improvement_15"],
        ],
        "fail_accuracy_improvement_avg": [
            df_merge.loc[project_only_df[project_only_df["N_50"] > min_sample].index, "fail_accuracy_improvement_50"].mean(),
            df_merge.loc[project_only_df[project_only_df["N_25"] > min_sample].index, "fail_accuracy_improvement_25"].mean(),
            df_merge.loc[project_only_df[project_only_df["N_20"] > min_sample].index, "fail_accuracy_improvement_20"].mean(),
            df_merge.loc[project_only_df[project_only_df["N_15"] > min_sample].index, "fail_accuracy_improvement_15"].mean(),
        ],
        "accuracy_improvement_avg": [
            df_merge.loc[project_only_df[project_only_df["N_50"] > min_sample].index, "accuracy_improvement_50"].mean(),
            df_merge.loc[project_only_df[project_only_df["N_25"] > min_sample].index, "accuracy_improvement_25"].mean(),
            df_merge.loc[project_only_df[project_only_df["N_20"] > min_sample].index, "accuracy_improvement_20"].mean(),
            df_merge.loc[project_only_df[project_only_df["N_15"] > min_sample].index, "accuracy_improvement_15"].mean(),
        ],
        "f1_improvement_avg": [
            df_merge.loc[project_only_df[project_only_df["N_50"] > min_sample].index, "f1_improvement_50"].mean(),
            df_merge.loc[project_only_df[project_only_df["N_25"] > min_sample].index, "f1_improvement_25"].mean(),
            df_merge.loc[project_only_df[project_only_df["N_20"] > min_sample].index, "f1_improvement_20"].mean(),
            df_merge.loc[project_only_df[project_only_df["N_15"] > min_sample].index, "f1_improvement_15"].mean(),
        ],
    }
)
table_vocab_analysis = table_vocab_analysis.astype({"N": int})

In [87]:
for col in ["fail_accuracy_improvement_total", "accuracy_improvement_total", "f1_improvement_total"]:
    string = "_".join(col.split("_")[:-1])
    table_vocab_analysis[col] = table_vocab_analysis[col].apply(lambda x: np.round(x - df_merge.loc[25, f"{string}_all"], 3))

for col in ["fail_accuracy_improvement_avg", "accuracy_improvement_avg", "f1_improvement_avg"]:
    string = "_".join(col.split("_")[:-1])
    table_vocab_analysis[col] = table_vocab_analysis[col].apply(lambda x: np.round(x - project_only_df[f"{string}_all"].mean(), 3))

table_vocab_analysis.drop(columns=["fail_accuracy_improvement_avg", "accuracy_improvement_avg", "f1_improvement_avg"], inplace=True)

table_vocab_analysis.rename(columns={"thresholds": "Max % Missing Vocabulary", 
                            "fail_accuracy_improvement_total": "Fail Accuracy \delta", 
                            "accuracy_improvement_total": "Accuracy \delta",
                            "f1_improvement_total": "F1 \delta",
                            }, inplace=True)

table_vocab_analysis

Unnamed: 0,Maximum % Missing Vocabulary,N,Fail Accuracy \delta,Accuracy \delta,F1 \delta
0,50%,145474,-0.0,-0.01,-0.005
1,25%,35918,-0.016,0.011,0.006
2,20%,10156,-0.05,-0.007,-0.002
3,15%,2643,-0.022,-0.074,-0.045


In [88]:
table_vocab_analysis.to_latex(
    f"./latex/vocab_analysis.tex",
    index=False,
    caption=f"Performance of SEER on New Data with varying minimum \% of tokens in-vocab threshold.",
    label=f"tab:vocab_analysis",
)

with open(f"./latex/vocab_analysis.tex", "r+") as f:
    text = f.read()
    text = re.sub(r"\\textbackslash delta", "$\delta$", text)
    # text = re.sub("accuracy", "Accuracy", text)
    # text = re.sub("_improvement", "_$\Delta$", text)
    # text = re.sub("_total", "_all", text)
    # text = re.sub("_avg", "_project\_avg", text)
    f.seek(0)
    f.write(text)
    f.truncate()

# robustness analysis

In [89]:
for comment_type in comment_types:
    df = pd.read_csv(f"./fold0/{comment_type}/project_stats_all.csv")
    # Simplify project names
    df["project"] = df.apply(lambda row: re.split(r"-\d", row["project"])[0], axis=1)
    table2 = df[["project", "N", "fail_accuracy_improvement", "accuracy_improvement", "f1_improvement"]].copy()
    table2.rename(
        columns={
            "N": f"N_{comment_type}",
            "fail_accuracy_improvement": f"fail_accuracy_improvement_{comment_type}",
            "accuracy_improvement": f"accuracy_improvement_{comment_type}",
            "f1_improvement": f"f1_improvement_{comment_type}",
        },
        inplace=True,
    )

    if comment_type == comment_types[0]:
        df_merge_comments = table2.copy()
    else:
        df_merge_comments = df_merge_comments.merge(table2, on="project", how="left")


In [90]:
project_only_comments_df = df_merge_comments[df_merge_comments["project"] != "all"]
min_sample = 20

table_comment_analysis = pd.DataFrame(
    {
        "comment_types": ["No Comments", "Preserved Comments", "Added Test Comment", "Added MUT Comment", "Added MUT/Test Comments"],
        "N": [df_merge_comments.loc[25, f"N_{comment_type}"] for comment_type in comment_types],
        "fail_accuracy_improvement_total": [df_merge_comments.loc[25, f"fail_accuracy_improvement_{comment_type}"] for comment_type in comment_types],
        "accuracy_improvement_total": [df_merge_comments.loc[25, f"accuracy_improvement_{comment_type}"] for comment_type in comment_types],
        "f1_improvement_total": [df_merge_comments.loc[25, f"f1_improvement_{comment_type}"] for comment_type in comment_types],
        "fail_accuracy_improvement_avg": [
            df_merge_comments.loc[project_only_comments_df[project_only_comments_df[f"N_{comment_type}"] > min_sample].index, f"fail_accuracy_improvement_{comment_type}"].mean()
            for comment_type in comment_types
        ],
        "accuracy_improvement_avg": [
            df_merge_comments.loc[project_only_comments_df[project_only_comments_df[f"N_{comment_type}"] > min_sample].index, f"accuracy_improvement_{comment_type}"].mean()
            for comment_type in comment_types
        ],
        "f1_improvement_avg": [
            df_merge_comments.loc[project_only_comments_df[project_only_comments_df[f"N_{comment_type}"] > min_sample].index, f"f1_improvement_{comment_type}"].mean()
            for comment_type in comment_types
        ],
    }
)

table_comment_analysis = table_comment_analysis.astype({"N": int})

In [91]:
for col in ["fail_accuracy_improvement_total", "accuracy_improvement_total", "f1_improvement_total"]:
    string = "_".join(col.split("_")[:-1])
    table_comment_analysis[col] = table_comment_analysis[col].apply(lambda x: np.round(x - df_merge_comments.loc[25, f"{string}_no_comments"], 4))

for col in ["fail_accuracy_improvement_avg", "accuracy_improvement_avg", "f1_improvement_avg"]:
    table_comment_analysis.drop(columns=[col], inplace=True)
    # string = "_".join(col.split("_")[:-1])
    # table_comment_analysis[col] = table_comment_analysis[col].apply(lambda x: np.round(x - project_only_comments_df[f"{string}_no_comments"].mean(), 4))

table_comment_analysis.drop(columns=["N"], inplace=True)

table_comment_analysis.rename(columns={"comment_types": "Comment Type",
                            "fail_accuracy_improvement_total": "Fail Accuracy \delta",
                            "accuracy_improvement_total": "Accuracy \delta",
                            "f1_improvement_total": "F1 \delta",
                            }, inplace=True)
table_comment_analysis

Unnamed: 0,Comment Type,Fail Accuracy \delta,Accuracy \delta,F1 \delta
0,No Comments,0.0,0.0,0.0
1,Preserved Comments,0.0055,-0.0002,-0.0002
2,Added Test Comment,-0.0086,0.0005,0.0003
3,Added MUT Comment,-0.0446,0.0096,0.0055
4,Added MUT/Test Comments,-0.0558,0.0135,0.0077


In [92]:
table_comment_analysis.to_latex(
    f"./latex/comment_analysis.tex",
    index=False,
    caption=f"Performance of SEER on New Data with different comment types (compared to a no-comment baseline).",
    label=f"tab:comment_analysis",
)

with open(f"./latex/comment_analysis.tex", "r+") as f:
    text = f.read()
    # text = re.sub("table", "table*", text)
    text = re.sub(r"\\textbackslash delta", "$\delta$", text)
    text = re.sub("accuracy", "Accuracy", text)
    text = re.sub("\\\_improvement", "", text)
    text = re.sub("_total", "_all", text)
    text = re.sub("_avg", "_project\_avg", text)
    f.seek(0)
    f.write(text)
    f.truncate()

# similarity

In [93]:
df_common_unique = pd.read_csv(f"./similarity_analysis/similarity_unique_mut.csv")
# Simplify project names
df_common_unique["triplets"] = df_common_unique.apply(lambda row: re.split(r"-\d", row["triplets"])[0], axis=1)

df_common_unique.rename(
    columns={
        "phase2": "SEER",
        "triplets": "New Data",
        "triplets_unique_count": "New Data Count",
        "phase2_unique_count": "SEER Count",
    },
    inplace=True,
)

df_common_unique[["SEER", "New Data", "SEER Count", "New Data Count"]].to_latex(
    f"./latex/common_projects_unique.tex",
    index=False,
    caption=f"Unique Methods Under Test",
    label=f"tab:common_unique_MUT",
)