In [1]:
from datasets import load_dataset
import os
import json
import subprocess
import re, ast, astunparse
import random


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
dataset = load_dataset("openai_humaneval")

save_path = "HumanEval"
os.makedirs(save_path, exist_ok=True)


for split in dataset.keys():
    for i in range(len(dataset[split])):
        json.dump(dataset[split][i], open(f"{save_path}/{i}.json", "w"), indent=4)

print(f"HumanEval dataset saved in {save_path}")

Downloading readme: 100%|██████████| 6.52k/6.52k [00:00<00:00, 51.0MB/s]
Downloading data: 100%|██████████| 83.9k/83.9k [00:00<00:00, 88.9kB/s]
Generating test split: 100%|██████████| 164/164 [00:00<00:00, 19413.14 examples/s]

HumanEval dataset saved in HumanEval





## HumanEvalCoverageParsing

In [3]:
save_parsing_path = "HumanEvalCoverage"
os.makedirs(save_parsing_path, exist_ok=True)

In [4]:
for i in range(len(dataset['test'])): 
    with open(f'HumanEval/{i}.json') as f:
        data = json.load(f)

        method = data['prompt'] + data['canonical_solution']
        method_name = data['entry_point']
        with open('method.py', 'w') as out:
            out.write(method)

        test = ""
        tree = ast.parse(method)
        functions = [node.name for node in tree.body if isinstance(node, ast.FunctionDef)]
        for j in range(len(functions)):
            test += f"from method import {functions[j]}\n"
        test += "\n\n"
        test += data['test']
        test += f"\nif __name__ == '__main__':\n    check({method_name})"
        with open('test.py', 'w') as out:
            out.write(test)

    subprocess.run(["coverage", "run", "test.py"], check=True)
    result = subprocess.run(["coverage", "report", "-m"], capture_output=True, text=True, check=True)
    match = re.search(r'method.py.* (\d+%)', result.stdout)
    if match:
        coverage_percentage = match.group(1)
        coverage_percentage = float(coverage_percentage[:-1])
        if coverage_percentage != 100:
            print(i)
            print(result.stdout)
            print(f"Coverage: {coverage_percentage}")
    else:
        coverage_percentage = None
        print("Could not find coverage percentage.")
    data['coverage'] = coverage_percentage
    with open(f'{save_parsing_path}/{i}.json', 'w') as out:
        json.dump(data, out, indent=4)

46
Name        Stmts   Miss  Cover   Missing
-----------------------------------------
method.py       8      1    88%   20
test.py         9      0   100%
-----------------------------------------
TOTAL          17      1    94%

Coverage: 88.0
59
Name        Stmts   Miss  Cover   Missing
-----------------------------------------
method.py      13      1    92%   12
test.py        10      0   100%
-----------------------------------------
TOTAL          23      1    96%

Coverage: 92.0
81
Name        Stmts   Miss  Cover   Missing
-----------------------------------------
method.py      29      3    90%   34, 42, 44
test.py        11      0   100%
-----------------------------------------
TOTAL          40      3    92%

Coverage: 90.0
89
Name        Stmts   Miss  Cover   Missing
-----------------------------------------
method.py       8      1    88%   19
test.py        12      0   100%
-----------------------------------------
TOTAL          20      1    95%

Coverage: 88.0
99
Name 

## HumanEvalCoverageTestBase

In [3]:
save_parsing_testbase_path = "HumanEvalCoverageTestBase"
os.makedirs(save_parsing_testbase_path, exist_ok=True)

In [4]:
class RandomTest(ast.NodeTransformer):
    def visit_FunctionDef(self, node):
        if node.name == "check":
            node.body = random.choice(node.body)
        return node

In [5]:
class FirstTest(ast.NodeTransformer):
    def visit_FunctionDef(self, node):
        if node.name == "check":
            node.body = node.body[:2]
        return node

In [6]:
import astor
def generate_split_test_files(generated_test_path, filtered_dir, method_name, method):
    code = open(generated_test_path).read()
    tree = ast.parse(code)

    # Extract the function definition and its body
    func_def = next(node for node in tree.body if isinstance(node, ast.FunctionDef))

    # Get the assertions
    asserts = [stmt for stmt in func_def.body if isinstance(stmt, ast.Assert)]

    # Split into separate files
    for i, assertion in enumerate(asserts, 1):
        new_tree = ast.Module(body=[
            ast.FunctionDef(
                name="check",
                args=func_def.args,
                body=[assertion],
                decorator_list=[]
            ),
            ast.parse(f"if __name__ == '__main__':\n    check({method_name})")
        ], type_ignores=[])

        # Convert the AST back to source code
        new_code = astor.to_source(new_tree)
        tree = ast.parse(method)
        functions = [node.name for node in tree.body if isinstance(node, ast.FunctionDef)]
        for j in range(len(functions)):
            new_code = f"from method import {functions[j]}\n" + new_code

        # Write to a new file
        filename = f"{filtered_dir}/assertion_{i}.py"
        with open(filename, "w") as f:
            f.write(new_code)

In [7]:
for i in range(len(dataset['test'])): 
    with open(f'HumanEval/{i}.json') as f:
        data = json.load(f)
        test = data['test']
        
        if i in [32, 38, 50]:
            data['testbase'] = test
        elif i in [44, 53, 151]:
            tree = ast.parse(test)
            transformer = FirstTest()
            new_tree = transformer.visit(tree)
            new_test = astunparse.unparse(new_tree)
            data['testbase'] = new_test
            
        else:
            new_test = ''
            tree = ast.parse(test)
            transformer = RandomTest()  
            new_tree = transformer.visit(tree)
            new_test = astunparse.unparse(new_tree)
            data['testbase'] = new_test
        with open(f'{save_parsing_testbase_path}/{i}.json', 'w') as out:
            json.dump(data, out, indent=4)

In [8]:
for i in range(len(dataset['test'])): 
    with open(f'{save_parsing_testbase_path}/{i}.json') as f:
        data = json.load(f)

        method = data['prompt'] + data['canonical_solution']
        method_name = data['entry_point']
        with open('method.py', 'w') as out:
            out.write(method)

        test = ""
        tree = ast.parse(method)
        functions = [node.name for node in tree.body if isinstance(node, ast.FunctionDef)]
        for j in range(len(functions)):
            test += f"from method import {functions[j]}\n"
        test += "\n\n"
        test += data['test']
        test += f"\nif __name__ == '__main__':\n    check({method_name})"
        with open('test.py', 'w') as out:
            out.write(test)

    subprocess.run(["coverage", "run", "test.py"], check=True)
    result = subprocess.run(["coverage", "report", "-m"], capture_output=True, text=True, check=True)
    match = re.search(r'method.py.* (\d+%)', result.stdout)
    if match:
        coverage_percentage = match.group(1)
        coverage_percentage = float(coverage_percentage[:-1])
        if coverage_percentage != 100:
            print(i)
            print(result.stdout)
            print(f"Coverage: {coverage_percentage}")
    else:
        coverage_percentage = None
        print("Could not find coverage percentage.")
    data['coverage'] = coverage_percentage
    with open(f'{save_parsing_testbase_path}/{i}.json', 'w') as out:
        json.dump(data, out, indent=4)

46
Name        Stmts   Miss  Cover   Missing
-----------------------------------------
method.py       8      1    88%   20
test.py         9      0   100%
-----------------------------------------
TOTAL          17      1    94%

Coverage: 88.0
59
Name        Stmts   Miss  Cover   Missing
-----------------------------------------
method.py      13      1    92%   12
test.py        10      0   100%
-----------------------------------------
TOTAL          23      1    96%

Coverage: 92.0
81
Name        Stmts   Miss  Cover   Missing
-----------------------------------------
method.py      29      3    90%   34, 42, 44
test.py        11      0   100%
-----------------------------------------
TOTAL          40      3    92%

Coverage: 90.0
89
Name        Stmts   Miss  Cover   Missing
-----------------------------------------
method.py       8      1    88%   19
test.py        12      0   100%
-----------------------------------------
TOTAL          20      1    95%

Coverage: 88.0
99
Name 

In [9]:
import shutil
for i in range(len(dataset['test'])):
    if i not in [32, 38, 50, 44, 53, 151]:
        with open(f'{save_parsing_testbase_path}/{i}.json') as f:
            data = json.load(f)

            method = data['prompt'] + data['canonical_solution']
            method_name = data['entry_point']
            with open('method.py', 'w') as out:
                out.write(method)
            # with open(f'method/method_{i}.py', 'w') as out:
            #     out.write(method)

            with open('test.py', 'w') as out:
                out.write(data['test'])
            
            filtered_dir = 'filtered'
            os.makedirs(filtered_dir, exist_ok=True)

            with open(f'{filtered_dir}/method.py', 'w') as out:
                out.write(method)

            generate_split_test_files('test.py', filtered_dir, method_name, method)

            lowest_coverage = 100
            testbase = ''
            for j in range(len(os.listdir(filtered_dir))-2):
                unit_test_path = f'{filtered_dir}/assertion_{j+1}.py'
                with open(unit_test_path) as f:
                    subprocess.run(["coverage", "run", unit_test_path], check=True)
                    result = subprocess.run(["coverage", "report", "-m"], capture_output=True, text=True, check=True)
                    match = re.search(r'method.py.* (\d+%)', result.stdout)
                    if match:
                        coverage_percentage = match.group(1)
                        coverage_percentage = float(coverage_percentage[:-1])
                    else:
                        coverage_percentage = 0
                    
                    if coverage_percentage <= lowest_coverage:
                        lowest_coverage = coverage_percentage
                        testbase = f.read()
            
            data['coverage_testbase'] = lowest_coverage
            data['testbase'] = testbase
            # with open(f'testbase/testbase_{i}.py', 'w') as out:
            #     test_ = testbase.replace('from method import', f'from method.method_{i} import')
            #     out.write(test_)

            shutil.rmtree(filtered_dir)

            with open(f'{save_parsing_testbase_path}/{i}.json', 'w') as out:
                json.dump(data, out, indent=4)

In [10]:
for i in [32, 38, 50, 44, 53, 151]: 
    with open(f'{save_parsing_testbase_path}/{i}.json') as f:
        data = json.load(f)

        method = data['prompt'] + data['canonical_solution']
        method_name = data['entry_point']
        with open('method.py', 'w') as out:
            out.write(method)
        # with open(f'method/method_{i}.py', 'w') as out:
        #     out.write(method)

        test = ""
        tree = ast.parse(method)
        functions = [node.name for node in tree.body if isinstance(node, ast.FunctionDef)]
        for j in range(len(functions)):
            test += f"from method import {functions[j]}\n"
        test += "\n\n"
        test += data['testbase']
        test += f"\nif __name__ == '__main__':\n    check({method_name})"
        print(test)
        with open('testbase.py', 'w') as out:
            out.write(test)
        # with open(f'testbase/testbase_{i}.py', 'w') as out:
        #     test_ = test.replace('from method import', f'from method.method_{i} import')
        #     out.write(test_)

    subprocess.run(["coverage", "run", "testbase.py"], check=True)
    result = subprocess.run(["coverage", "report", "-m"], capture_output=True, text=True, check=True)
    match = re.search(r'method.py.* (\d+%)', result.stdout)
    if match:
        coverage_percentage = match.group(1)
        coverage_percentage = float(coverage_percentage[:-1])
        if coverage_percentage != 100:
            print(i)
            print(result.stdout)
            print(f"Coverage: {coverage_percentage}")
    else:
        coverage_percentage = None
        print("Could not find coverage percentage.")
    data['coverage_testbase'] = coverage_percentage
    with open(f'{save_parsing_testbase_path}/{i}.json', 'w') as out:
        json.dump(data, out, indent=4)

from method import poly
from method import find_zero




METADATA = {}


def check(candidate):
    import math
    import random
    rng = random.Random(42)
    import copy
    for _ in range(100):
        ncoeff = 2 * rng.randint(1, 4)
        coeffs = []
        for _ in range(ncoeff):
            coeff = rng.randint(-10, 10)
            if coeff == 0:
                coeff = 1
            coeffs.append(coeff)
        solution = candidate(copy.deepcopy(coeffs))
        assert math.fabs(poly(coeffs, solution)) < 1e-4


if __name__ == '__main__':
    check(find_zero)
from method import encode_cyclic
from method import decode_cyclic




METADATA = {}


def check(candidate):
    from random import randint, choice
    import string

    letters = string.ascii_lowercase
    for _ in range(100):
        str = ''.join(choice(letters) for i in range(randint(10, 20)))
        encoded_str = encode_cyclic(str)
        assert candidate(encoded_str) == str


if __name__ == '__main__':
    check(de

In [None]:
import os
import json

os.makedirs("HumanEvalMethodAndTest", exist_ok=True)
num_samples = len(os.listdir('HumanEvalCoverageTestBase'))

for i in range(num_samples):
    with open(f'HumanEvalCoverageTestBase/{i}.json') as f:
        data = json.load(f)
        with open(f'HumanEvalMethodAndTest/_{i}.py', 'w') as out:
            out.write(data['prompt'] + data['canonical_solution'] + '\n\n\n' + data['test'] + '\n\n\n' + data['testbase'])