In [37]:
import os
def find_file_paths(data_dir):
    file_paths = []
    for root, dirs, files in os.walk(data_dir):
        for file in files:
            if file.endswith("jacoco.jsonl"):
                file_path = os.path.join(root, file)
                file_paths.append(file_path)

    return file_paths

In [51]:





import os
from pathlib import Path
from tqdm.auto import tqdm
import pandas as pd
import fnmatch
import os

root, dirs, files = next(os.walk("../data"))
datasets = dirs

for dataset in datasets:
    dataset_path = Path(root, dataset, "coverage")
    
    paths = find_file_paths(dataset_path)

    baseline_path = Path(root, dataset, "coverage", "runnable_test.jsonl")
    baseline_runnable_res = 1
    baseline_instruction_coverage = 1
    baseline_branch_coverage = 1
    if baseline_path.exists():
        baseline_df = pd.read_json(baseline_path, lines=True)
        baseline_runnable_df = baseline_df[baseline_df["status"].isin(["success"])]
        
        baseline_runnable_res = len(baseline_runnable_df) / len(baseline_df) if len(baseline_df) > 0 else 0
        baseline_instruction_coverage = baseline_runnable_df["instruction_coverage"].mean()
        baseline_branch_coverage = baseline_runnable_df["branch_coverage"].mean()
        
        
        
    runnable_data = {}
    instruction_data = {}
    branch_data = {}
    for path in tqdm(map(Path, paths), desc=f"Processing {dataset}"):

        method = path.parts[4]
        try:
            model = path.parts[5] + "/" + path.parts[6]
        except IndexError:
            print(path)
        
        df = pd.read_json(path, lines=True)
        
        runnable_data.setdefault(model, {})
        if not df.empty:
            baseline_path = Path(root, dataset, "coverage", "runnable_test.jsonl")
            # Select all with status success
            runnable_df = df[df["status"].isin(["success"])]
            runnable_res = len(runnable_df) / len(df) if len(df) > 0 else 0
            runnable_res = runnable_res / baseline_runnable_res
            
            instruction_data.setdefault(model, {})
            instruction_coverage = 0
            if not runnable_df.empty and "instruction_coverage" in runnable_df.columns:
                instruction_coverage = runnable_df["instruction_coverage"].mean()
                instruction_coverage = instruction_coverage / baseline_instruction_coverage
                

            branch_data.setdefault(model, {})
            branch_coverage = 0
            if not runnable_df.empty and "branch_coverage" in runnable_df.columns:
                branch_coverage = runnable_df["branch_coverage"].mean()
                branch_coverage = branch_coverage / baseline_branch_coverage
            
            runnable_data[model][method] = round(runnable_res, 4)
            instruction_data[model][method] = round(instruction_coverage, 4)
            branch_data[model][method] = round(branch_coverage, 4)
    
    
    print(f"Dataset: {dataset}")
            
    runnable_df = pd.DataFrame(runnable_data)
    runnable_df = runnable_df.fillna(0)
    
    instruction_df = pd.DataFrame(instruction_data)
    instruction_df = instruction_df.fillna(0)
    
    branch_df = pd.DataFrame(branch_data)
    branch_df = branch_df.fillna(0)
    
    
    
    
    
    runnable_df.T.to_csv(Path(root, dataset, "coverage_runnable.csv"), index_label = 'model')
    print("Avg. mean. runnable tests: ", runnable_df.T.median().mean())
    
    
    instruction_df.T.to_csv(Path(root, dataset, "coverage_instruction.csv"), index_label = 'model')
    print("Avg. mean. instruction coverage: ", instruction_df.T.median().mean())
    
    
    branch_df.T.to_csv(Path(root, dataset, "coverage_branch.csv"), index_label = 'model')
    print("Avg. mean. branch coverage: ", branch_df.T.median().mean())  
    
    

Processing humaneval-x: 0it [00:00, ?it/s]

Dataset: humaneval-x
Avg. mean. runnable tests:  0.23181000000000002
Avg. mean. instruction coverage:  0.98805
Avg. mean. branch coverage:  0.78431


Processing methods2test_small: 0it [00:00, ?it/s]

Dataset: methods2test_small
Avg. mean. runnable tests:  nan
Avg. mean. instruction coverage:  nan
Avg. mean. branch coverage:  nan


Processing methods2test_runnable: 0it [00:00, ?it/s]

Dataset: methods2test_runnable
Avg. mean. runnable tests:  0.36439
Avg. mean. instruction coverage:  0.45285000000000003
Avg. mean. branch coverage:  0.39166
