In [4]:
import os
def find_file_paths(data_dir):
    file_paths = []
    for root, dirs, files in os.walk(data_dir):
        for file in files:
            if file.endswith("jacoco.jsonl"):
                file_path = os.path.join(root, file)
                file_paths.append(file_path)

    return file_paths

In [5]:

keywords = ["assert", "verify", "fail"]

def contains_keyword(text):
    for keyword in keywords:
        if keyword in text.lower():
            return True
    return False

In [None]:
import os
from pathlib import Path
from tqdm.auto import tqdm
import pandas as pd

import os

root, dirs, files = next(os.walk("../data"))
datasets = dirs

for dataset in datasets:
    dataset_path = Path(root, dataset, "executed")
    
    paths = find_file_paths(dataset_path)
        
    runnable_data = {}
    instruction_data = {}
    branch_data = {}
    mutation_data = {}
    for path in tqdm(map(Path, paths), desc=f"Processing {dataset}"):

        method = path.parts[4]
        try:
            model = path.parts[5] + "/" + path.parts[6]
        except IndexError:
            print(path)
        
        
        df = pd.read_json(path, lines=True, dtype=False)
        # if not empty, set index to "id" column
        if not df.empty:
            df = df.set_index("id")

            if dataset == "methods2test_runnable":
                # remove all rows where "status": "exception". First check if the column exists
                df = df[df["status"] != "exception"]
                df = df[df["status"] != "skipped"]
                # set status to failed if the prediction does not contain any of the keywords. else keep existing status
                gen_data_path = Path(str(path).replace("executed", "fixed").replace("jacoco.jsonl", "00001-of-00001.jsonl"))
                gen_df = pd.read_json(gen_data_path, lines=True, dtype=False).set_index("id")
                ids = gen_df[~gen_df["prediction"].apply(contains_keyword)].index.tolist()

                keys = df.index.intersection(ids)
                df.loc[keys, "status"] = "failed"

            runnable_data.setdefault(model, {})
            # Select all with status success
            runnable_df = df[df["status"].isin(["success"])]
            runnable_res = len(runnable_df) / len(df) if len(df) > 0 else 0
            runnable_data[model][method] = runnable_res
            
            instruction_data.setdefault(model, {})
            instruction_coverage = 0
            if not runnable_df.empty and "instruction_coverage" in runnable_df.columns:
                instruction_coverage = runnable_df["instruction_coverage"].mean()
            instruction_data[model][method] = instruction_coverage
            

            branch_data.setdefault(model, {})
            branch_coverage = 0
            if not runnable_df.empty and "branch_coverage" in runnable_df.columns:
                branch_coverage = runnable_df["branch_coverage"].mean()
            branch_data[model][method] = branch_coverage
            
            mutation_data.setdefault(model, {})
            mutation_score = 0
            if not runnable_df.empty and "mutation_score" in runnable_df.columns:
                mutation_score = runnable_df["mutation_score"].mean()
            mutation_data[model][method] = mutation_score

        
    print(f"Dataset: {dataset}")
            
    runnable_df = pd.DataFrame(runnable_data)
    runnable_df = runnable_df.fillna(0)
    
    instruction_df = pd.DataFrame(instruction_data)
    instruction_df = instruction_df.fillna(0)
    
    branch_df = pd.DataFrame(branch_data)
    branch_df = branch_df.fillna(0)
    
    mutation_df = pd.DataFrame(mutation_data)
    mutation_df = mutation_df.fillna(0)
    
    
    
    runnable_df.T.to_csv(Path(root, dataset, "passing_rate.csv"), index_label = 'model')
    print("Avg. mean. runnable tests: ", runnable_df.T.median().mean())
    
    
    instruction_df.T.to_csv(Path(root, dataset, "coverage_instruction.csv"), index_label = 'model')
    print("Avg. mean. instruction coverage: ", instruction_df.T.median().mean())
    
    
    branch_df.T.to_csv(Path(root, dataset, "coverage_branch.csv"), index_label = 'model')
    print("Avg. mean. branch coverage: ", branch_df.T.median().mean())  
    
    mutation_df.T.to_csv(Path(root, dataset, "mutation_score.csv"), index_label = 'model')
    print("Avg. mean. mutation score: ", mutation_df.T.median().mean())
    
    
    

Processing humaneval-x: 0it [00:00, ?it/s]

Dataset: humaneval-x
Avg. mean. runnable tests:  0.19829713498225643
Avg. mean. instruction coverage:  0.0
Avg. mean. branch coverage:  0.0
Avg. mean. mutation score:  0.0


Processing methods2test_runnable: 0it [00:00, ?it/s]

Dataset: methods2test_runnable
Avg. mean. runnable tests:  0.18534436780934227
Avg. mean. instruction coverage:  0.5440336713736302
Avg. mean. branch coverage:  0.17453014968906266
Avg. mean. mutation score:  0.36223431439874
