In [19]:
from evalplus.data import (
    get_human_eval_plus,
    get_human_eval_plus_hash,
    get_mbpp_plus,
    get_mbpp_plus_hash,
    load_solutions,
)
import os
import json
from os.path import join
from tqdm import tqdm
work_dir = "/mnt/scratch-artemis/haausing/code_reranking/evalplus_outputs"
dataset = "humaneval"
model_name = "deepseek-coder-6.7b-instruct_temp_1.2"
#model_name = "deepseek-coder-7b-instruct-v1.5_temp_1.2"
#model_name = "code-llama-7b-instruct_temp_1.6"
debug_info = "_debug1_sd-ut"
#debug_info = ""
gen_dir = model_name + debug_info

with open(f"{work_dir}/{dataset}/{gen_dir}/eval_results.json", "r") as f:
    eval_results = json.load(f)
for task_id in eval_results["eval"]:
    eval_results["eval"][task_id] = sorted(eval_results["eval"][task_id], key=lambda x: int(x["solution_id"]))
if dataset == "humaneval":
    problems = get_human_eval_plus()
else:
    problems = get_mbpp_plus()

In [20]:
saved_data = []
for task_id in eval_results["eval"]:
    solutions = []
    for solution in eval_results["eval"][task_id]:
        solutions.append(solution["solution"])
    solutions = list(set(solutions))
    solution_ids = {}
    for solution in solutions:
        solution_ids[solution] = []
        for other_solution in eval_results["eval"][task_id]:
            if solution == other_solution["solution"]:
                solution_ids[solution].append(int(other_solution["solution_id"]))
            solution_ids[solution] = sorted(solution_ids[solution])
    for solution in solutions:
        saved_data.append({
            "task_id": task_id,
            "solution_ids": solution_ids[solution],
            "solution": solution
        })

#save data to file
with open(f"{work_dir}/{dataset}/{gen_dir}/mbr_neural_metric_data.jsonl", "w") as f:
    for data in saved_data:
        f.write(json.dumps(data) + "\n")

In [21]:
def process_prompt(prompt:str):
    prompt = ''.join(prompt.split('"""')[1:])
    prompt = [e.strip() for e in prompt.split('\n')]
    prompt = " ".join(prompt)
    return prompt

In [22]:
len(saved_data)

27122

In [23]:
import concurrent.futures
from tqdm import tqdm

def process_task(task_id):
    local_reference_free_final_data = []
    solutions = [e for e in saved_data if e["task_id"] == task_id]
    for i, solution in enumerate(solutions):
        if dataset == "mbpp":
            prompt = problems[task_id]["prompt"].replace('"""', "").strip()
        elif dataset == "humaneval":
            prompt = process_prompt(problems[task_id]["prompt"])
        local_reference_free_final_data.append({
            "source": prompt,
            "generated_code": solution["solution"],
            "golden_code": problems[task_id]["canonical_solution"],
            "generated_code_solution_ids": solution["solution_ids"],
            "task_id": task_id
        })
    return local_reference_free_final_data

reference_free_final_data = []
with concurrent.futures.ThreadPoolExecutor(max_workers=20) as executor:
    results = list(tqdm(executor.map(process_task, problems), total=len(problems)))
    for result in results:
        reference_free_final_data.extend(result)

# save final data to file
with open(f"{work_dir}/{dataset}/{gen_dir}/reference_free_neural_metric_data_final.jsonl", "w") as f:
    for data in reference_free_final_data:
        f.write(json.dumps(data) + "\n")



100%|██████████| 164/164 [00:00<00:00, 263146.85it/s]


In [29]:
import concurrent.futures
from tqdm import tqdm

def process_task(task_id):
    local_final_data = []
    solutions = [e for e in saved_data if e["task_id"] == task_id]
    for i, solution in enumerate(solutions):
        for j, other_solution in enumerate(solutions):
            if i > j:
                continue
            if dataset == "mbpp":
                prompt = problems[task_id]["prompt"].replace('"""', "").strip()
            elif dataset == "humaneval":
                prompt = process_prompt(problems[task_id]["prompt"])
            local_final_data.append({
                "source": prompt,
                "generated_code": solution["solution"],
                "golden_code": other_solution["solution"],
                "generated_code_solution_ids": solution["solution_ids"],
                "golden_code_solution_ids": other_solution["solution_ids"],
                "task_id": task_id
            })
    return local_final_data

final_data = []
with concurrent.futures.ThreadPoolExecutor(max_workers=20) as executor:
    results = list(tqdm(executor.map(process_task, problems), total=len(problems)))
    for result in results:
        final_data.extend(result)


100%|██████████| 164/164 [00:18<00:00,  8.88it/s]


In [30]:
# save final data to file
with open(f"{work_dir}/{dataset}/{gen_dir}/mbr_neural_metric_data_final.jsonl", "w") as f:
    for data in final_data:
        f.write(json.dumps(data) + "\n")



In [31]:
len(final_data)

2145704