In [1]:
import os
import pandas as pd
import numpy as np
import json
import textwrap
import ast
import importlib
import sys

#### Helper functions

In [2]:
def read_csv(file_path, delimiter=';'):
    if not os.path.exists(file_path):
        print("File not found: {}".format(file_path))
        return None
    return pd.read_csv(file_path, delimiter=delimiter)

def write_csv(df, file_path):
    df.to_csv(file_path, index=False)

def read_json(filename):
    with open(filename, "r") as file:
        json_file = json.loads(file.read())
        return json_file
    
def read_jsonl(filename):
    with open(filename, "r") as file:
        return [json.loads(line) for line in file.readlines()]

def extract_main_block(complete_code):
    # get if __name__ == "__main__" 
    complete_code_dedented = textwrap.dedent(complete_code)
    parsed_code = ast.parse(complete_code_dedented)
    for node in ast.walk(parsed_code):
        if isinstance(node, ast.If) and isinstance(node.test, ast.Compare):
            if isinstance(node.test.left, ast.Name) and node.test.left.id == "__name__":
                if isinstance(node.test.comparators[0], ast.Constant) and node.test.comparators[0].s == "__main__":
                    main_block = ast.unparse(node)
                    return main_block
    return None


def extract_function_from_code(complete_code, function_to_extract):
    complete_code_dedented = textwrap.dedent(complete_code)
    parsed_code = ast.parse(complete_code_dedented)

    for node in ast.walk(parsed_code):
        if isinstance(node, ast.FunctionDef) and node.name == function_to_extract:
            # If the function is found, reconstruct its code
            function_code = ast.unparse(node)
            return function_code

    return None


def get_tests(test_name, question_id, all_tests):

    test_file_content = all_tests[question_id]

    if test_name != "":
        # if the test exists, extract it
        tests = extract_function_from_code(test_file_content, test_name)
        if tests == None:
            print("Test " , test_name, " could not be extracted question_id: ", question_id)
    else:
        # if the test does not exist, it can either be in main() or under id __name__ == "__main__"     
        tests = extract_function_from_code(test_file_content, "main")
        if tests == None:
            tests = extract_main_block(test_file_content)
        if tests == None:
            print("Test in main could not be extracted question_id: ", question_id)
    
    return tests


In [3]:

def get_combinations():
    all_combinations = [
        (is_zero_shot, is_few_shot, is_chain_of_thought, is_persona, is_packages, is_signature)
        for is_zero_shot in [True, False]
        for is_few_shot in [True, False]
        for is_chain_of_thought in [True, False]
        for is_persona in [True, False]
        for is_packages in [True, False]
        for is_signature in [True, False]
    ]
    # remove the combination where zero_shot and few shot are both True or both False:
    all_combinations = [
        combination
        for combination in all_combinations
        if not (combination[0] and combination[1]) and not (not combination[0] and not combination[1])
    ]

    return all_combinations


def get_persona():
    # persona = "As an expert " + programming_language + " developer,"
    persona = "As a software developer who follows best coding practices for maintainability such as avoiding code smells and writing simple and clean code"
    return persona

def get_chain_of_thought():
    chain_of_thought = "Think carefully and logically, explaining your answer step by step."
    return chain_of_thought


In [10]:
version = 14
language = 'Python'
benchmark_data_path = "../../benchmarks/CoderEval/codereval_with_fewshot.csv"
benchmark_code_data_path = "../../benchmarks/CoderEval/CoderEval4" + language + ".json"
benchmark_test_data_path = f"../../benchmarks/CoderEval/tests/record_testcases_map_" + language.lower() + ".json"

# the output file
complete_codereval_path = f"../../benchmarks/CoderEval/codereval_benchmark.csv"
combination_dataset_path = f"../../datasets/CoderEval/{language}/codereval_combination_dataset_v{str(version)}.csv"


#### Read the dataset

In [81]:
benchmark_data = read_csv(benchmark_data_path)
benchmark_data = benchmark_data[benchmark_data['groundtruth_passed'] == True]
benchmark_data["groundtruth_code"] = ""
benchmark_data["packages"] = ""
benchmark_data["tests"] = ""
benchmark_data["function_name"] = ""
benchmark_data["test_name"] = ""
benchmark_data["level"] = ""
benchmark_data["file_content"] = ""
benchmark_data["test_file_path"] = ""
# rename docstring column to zero-shot
benchmark_data.rename(columns={"docstring": "zero_shot"}, inplace=True)
print("Loaded {} datapoints from benchmark data".format(len(benchmark_data)))

benchmark_code_data = read_json(benchmark_code_data_path)["RECORDS"]
benchmark_test_data = read_json(benchmark_test_data_path)

# benchmark_data

Loaded 221 datapoints from benchmark data


#### Merge all datasets in one file (updated CoderEval)

In [82]:
for task_object in benchmark_code_data:
    row_index = benchmark_data[benchmark_data["question_id"] == task_object["_id"]].index
    if len(row_index) == 0:
        print(f"Task {task_object['_id']} not found in benchmark data")
        continue

    benchmark_data.loc[row_index, "groundtruth_code"] = task_object["code"]
    benchmark_data.loc[row_index, "function_name"] = task_object["name"]
    benchmark_data.loc[row_index, "test_name"] = task_object["test_name"]
    benchmark_data.loc[row_index, "level"] = task_object["level"]
    benchmark_data.loc[row_index, "file_content"] = task_object["file_content"]
    benchmark_data.loc[row_index, "tests"] = get_tests(task_object["test_name"], task_object["_id"], benchmark_test_data)
    benchmark_data.loc[row_index, "packages"] = task_object["all_context"].split("\"")[3].strip()
    benchmark_data.loc[row_index, "test_file_path"] = os.path.join(task_object["project"].replace("/", "---"), task_object["file_path"].replace(".py", f"_test_{task_object['_id']}.py"))



Task 62b87d23d292efb640a5566b not found in benchmark data
Task 62b45b396decaeff903e1001 not found in benchmark data
Task 62ece4982e6aefcf4aabbd60 not found in benchmark data
Task 62ece4982e6aefcf4aabbd6b not found in benchmark data
Task 62ece4982e6aefcf4aabbd6f not found in benchmark data
Task 62ece4982e6aefcf4aabbd74 not found in benchmark data
Task 62ece4992e6aefcf4aabbd88 not found in benchmark data
Task 6306092373426c38ae68acfa not found in benchmark data
Task 62ece4982e6aefcf4aabbd64 not found in benchmark data


In [83]:
# write_csv(benchmark_data, complete_codereval_path)

#### Create dataset with combinations

In [88]:
data = read_csv(complete_codereval_path, delimiter=',')
combinations = get_combinations()

data[:2]


Unnamed: 0,question_id,input,signature,zero_shot,project,file_path,few_shot,groundtruth_passed,groundtruth_code,packages,tests,function_name,test_name,level,file_content,test_file_path
0,62ece4992e6aefcf4aabbd86,"def _dump_string(obj, dumper=None):\n\t""""""\n\t...","def _dump_string(obj, dumper=None):","Given a python object, serialize it into a YAM...",ansible-security/ansible_collections.ibm.qradar,tests/unit/mock/yaml_helper.py,"If the input is {""cats"": 2, ""dogs"": 4} and dum...",True,"import yaml\ndef _dump_string(obj, dumper=None...",yaml,"def test__dump_string():\n """"""Check the cor...",_dump_string,test__dump_string,plib_runnable,"from __future__ import absolute_import, divisi...",ansible-security---ansible_collections.ibm.qra...
1,62ece4992e6aefcf4aabbd8b,"def force_string(obj):\n\t""""""\n\tForce decodin...",def force_string(obj):,Force decoding obj with 'utf-8' if it is a num...,awsteiner/o2sclpy,o2sclpy/utils.py,"If the input is b'hello darkness', then the ou...",True,"import numpy\ndef force_string(obj):\n """"""\...",numpy,"def test_force_string():\n """"""Check the cor...",force_string,test_force_string,plib_runnable,# -------------------------------------------...,awsteiner---o2sclpy/o2sclpy/utils_test_62ece49...


In [98]:

# new pandas dataframe
combination_data = pd.DataFrame(columns=["task_id", "combination_id", "prompt", "project", "file_path", "groundtruth_code", "tests", "packages", "function_name", "test_name", "level", "file_content" ,"test_file_path", "original_prompt_length", "is_zero_shot", "is_few_shot", "is_chain_of_thought", "is_persona", "is_packages", "is_signature"])

for combination in combinations:
    print("Combination: ", combination)

    for i, row in data.iterrows():
        is_zero_shot, is_few_shot, is_chain_of_thought, is_persona, is_packages, is_signature = combination
        
        # construct the prompt
        final_prompt = ""
        if is_persona:
            persona = get_persona()
            final_prompt += persona + ", "
        
        if is_chain_of_thought:
            chain_of_thought = get_chain_of_thought()
            final_prompt += chain_of_thought + " "

        # The content of zero_shot should always be present
        zero_shot = row["zero_shot"]
        final_prompt += zero_shot + " "

        if is_few_shot:
            final_prompt += row["few_shot"] + " "

        
        signature = row["signature"]
        if is_signature:
            final_prompt += "The function signature is: " + signature + ". "

        
        if is_packages:
            packages = str(row["packages"])
            if packages == "" or packages == "nan":
                final_prompt += "The function does not use any libraries. "
            else:
                final_prompt += "The function has access (but does not necessarily use) the following packages: " + packages + ". "

        
        constraint = "Respond with a " + language + " function in one code block."
        final_prompt = constraint + " " + final_prompt
        
        # add row to combination_data
        combination_data = combination_data._append({
            "task_id": row["question_id"],
            "combination_id": combination,
            "prompt": final_prompt,
            "project": row["project"],
            "file_path": row["file_path"],
            "groundtruth_code": row["groundtruth_code"],
            "tests": row["tests"],
            "level": row["level"],
            "zero_shot_prompt": row["zero_shot"],
            "packages": row["packages"],
            "function_name": row["function_name"],
            "test_name": row["test_name"],
            "file_content": row["file_content"],
            "test_file_path": row["test_file_path"],
            "original_prompt_length": len(row["zero_shot"].split()),
            "is_zero_shot": is_zero_shot,
            "is_few_shot": is_few_shot,
            "is_chain_of_thought": is_chain_of_thought,
            "is_persona": is_persona,
            "is_packages": is_packages,
            "is_signature": is_signature
        }, ignore_index=True)
            

# write_csv(combination_data, combination_dataset_path)
len(combination_data)

Combination:  (True, False, True, True, True, True)
Combination:  (True, False, True, True, True, False)
Combination:  (True, False, True, True, False, True)
Combination:  (True, False, True, True, False, False)
Combination:  (True, False, True, False, True, True)
Combination:  (True, False, True, False, True, False)
Combination:  (True, False, True, False, False, True)
Combination:  (True, False, True, False, False, False)
Combination:  (True, False, False, True, True, True)
Combination:  (True, False, False, True, True, False)
Combination:  (True, False, False, True, False, True)
Combination:  (True, False, False, True, False, False)
Combination:  (True, False, False, False, True, True)
Combination:  (True, False, False, False, True, False)
Combination:  (True, False, False, False, False, True)
Combination:  (True, False, False, False, False, False)
Combination:  (False, True, True, True, True, True)
Combination:  (False, True, True, True, True, False)
Combination:  (False, True, Tru

In [17]:
combination_data = read_csv(combination_dataset_path, delimiter=',')
combination_data[:2]
# drop file_content column

combination_data = combination_data.drop(columns=['file_content'])
write_csv(combination_data, combination_dataset_path)


In [18]:
len(combination_data)
combination_data[:2]

Unnamed: 0,task_id,combination_id,prompt,project,file_path,groundtruth_code,tests,packages,function_name,test_name,level,test_file_path,original_prompt_length,is_zero_shot,is_few_shot,is_chain_of_thought,is_persona,is_packages,is_signature,zero_shot_prompt
0,62ece4992e6aefcf4aabbd86,"(True, False, True, True, True, True)",Respond with a Python function in one code blo...,ansible-security/ansible_collections.ibm.qradar,tests/unit/mock/yaml_helper.py,"import yaml\ndef _dump_string(obj, dumper=None...","def test__dump_string():\n """"""Check the cor...",yaml,_dump_string,test__dump_string,plib_runnable,ansible-security---ansible_collections.ibm.qra...,10,True,False,True,True,True,True,"Given a python object, serialize it into a YAM..."
1,62ece4992e6aefcf4aabbd8b,"(True, False, True, True, True, True)",Respond with a Python function in one code blo...,awsteiner/o2sclpy,o2sclpy/utils.py,"import numpy\ndef force_string(obj):\n """"""\...","def test_force_string():\n """"""Check the cor...",numpy,force_string,test_force_string,plib_runnable,awsteiner---o2sclpy/o2sclpy/utils_test_62ece49...,30,True,False,True,True,True,True,Force decoding obj with 'utf-8' if it is a num...
