**Goal**: use the data in the Human eval in the reverse direction to predict the next assert (aka test) given the correct function.

In [44]:
import project_path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
from termcolor import colored
import re
import pathlib
from typing import List, Tuple, Dict, Any, Union

In [42]:
from human_eval.data import write_jsonl, read_problems

problems = read_problems()

In [45]:
OUTPUT_FOLDER = "../data/prompts_collection/reverse_human_eval"

In [8]:
task_id = list(problems.keys())[0]
print("task id:", task_id)
c_problem = problems[task_id]
c_problem.keys()

task id: HumanEval/0


dict_keys(['task_id', 'prompt', 'entry_point', 'canonical_solution', 'test'])

In [11]:
print(c_problem["prompt"])

from typing import List


def has_close_elements(numbers: List[float], threshold: float) -> bool:
    """ Check if in given list of numbers, are any two numbers closer to each other than
    given threshold.
    >>> has_close_elements([1.0, 2.0, 3.0], 0.5)
    False
    >>> has_close_elements([1.0, 2.8, 3.0, 4.0, 5.0, 2.0], 0.3)
    True
    """



In [12]:
print(c_problem["canonical_solution"])

    for idx, elem in enumerate(numbers):
        for idx2, elem2 in enumerate(numbers):
            if idx != idx2:
                distance = abs(elem - elem2)
                if distance < threshold:
                    return True

    return False



In [10]:
print(c_problem["test"])



METADATA = {
    'author': 'jt',
    'dataset': 'test'
}


def check(candidate):
    assert candidate([1.0, 2.0, 3.9, 4.0, 5.0, 2.2], 0.3) == True
    assert candidate([1.0, 2.0, 3.9, 4.0, 5.0, 2.2], 0.05) == False
    assert candidate([1.0, 2.0, 5.9, 4.0, 5.0], 0.95) == True
    assert candidate([1.0, 2.0, 5.9, 4.0, 5.0], 0.8) == False
    assert candidate([1.0, 2.0, 3.0, 4.0, 5.0, 2.0], 0.1) == True
    assert candidate([1.1, 2.2, 3.1, 4.1, 5.1], 1.0) == True
    assert candidate([1.1, 2.2, 3.1, 4.1, 5.1], 0.5) == False




## Generate a new prompt dataset

**Input prompt**:
1. function (without doctests)
2. assert function_name(...

**Expected Output**: 
1. completion of the test (HUMAN and MACHINE)

In [75]:
def remove_doctests(text: str) -> str:
    """Remove all the text after the first occurrence of `>>>`."""
    docstring_opening_sequence = "'''" if "'''" in text else '"""'
    if ">>>" in text:
        return text[:text.index(">>>")] + docstring_opening_sequence + '\n'
    return text

def remove_examples(text: str) -> str:
    """Remove all the examples in the code."""
    docstring_opening_sequence = "'''" if "'''" in text else '"""'
    first_line_with_example = None

    if "example" in text.lower():
        lines = text.split("\n")
        for i, line in enumerate(lines):
            if "example" in line.lower():
                first_line_with_example = i
                break
    if first_line_with_example is None:
        return text
    lines = text.split("\n")
    return "\n".join(lines[:first_line_with_example]) + f'\n    {docstring_opening_sequence}\n'

def remove_example_of_api_usage(text: str) -> str:
    """Remove the example of API usage from the docstring.
    
    Note that this includes examples which are not in the form or doctest, 
    neither prefaced by 'example' string.
    """
    docstring_opening_sequence = "'''" if "'''" in text else '"""'
    function_name = get_function_name(text)
    lines = text.split("\n")
    consider_this_line_for_removal = False
    new_lines = []
    for i, line in enumerate(lines):
        # consider only examples in the docstring
        if docstring_opening_sequence in line:
            consider_this_line_for_removal = True
        
        # the example should have the opened bracket
        if consider_this_line_for_removal:
            if not f"{function_name}(" in line.lower():
                new_lines.append(line)
        else:
            new_lines.append(line)
    return "\n".join(new_lines)

def remove_evety_thing_before_def(text: str) -> str:
    """Remove everything before the first occurrence of `def`."""
    lines = text.split("\n")
    for i, line in enumerate(lines):
        if line.strip().startswith("def"):
            return "\n".join(lines[i:])

def get_function_name(function_text: str) -> str: 
    """Extract the function name from the function text."""
    match = re.search(r"def ([^\(]+)", function_text)
    if not match:
        raise ValueError(f"Could not find function name in: {function_text}")
    return match.group(1)

#print(remove_examples(problems["HumanEval/163"]["prompt"]))


print(remove_evety_thing_before_def(problems["HumanEval/64"]["prompt"]))

def vowels_count(s):
    """Write a function vowels_count which takes a string representing
    a word as input and returns the number of vowels in the string.
    Vowels in this case are 'a', 'e', 'i', 'o', 'u'. Here, 'y' is also a
    vowel, but only when it is at the end of the given word.

    Example:
    >>> vowels_count("abcde")
    2
    >>> vowels_count("ACEDY")
    3
    """



In [76]:
COMPLETION_HOOK = "\n# Write a test for the function [[FUNCTION_NAME]] below\nassert [[FUNCTION_NAME]]("
INPSECT_FIRST = 3

# create output folder if it does not exist
if not os.path.exists(OUTPUT_FOLDER):
    pathlib.Path(OUTPUT_FOLDER).mkdir(parents=True, exist_ok=True)

# for each problem 
for i, task_id in enumerate(list(problems.keys())):
    #if i < INPSECT_FIRST:
    print("=" * 80)
    print("task id:", task_id)
    c_problem = problems[task_id]
    clean_prompt = \
        remove_example_of_api_usage(
            remove_examples(
                remove_doctests(
                    remove_evety_thing_before_def(
                        c_problem["prompt"]))))
    canonical_solution = c_problem["canonical_solution"]
    function_name = get_function_name(clean_prompt)
    completion_hook = COMPLETION_HOOK.replace("[[FUNCTION_NAME]]", function_name)


    reverse_task_prompt = clean_prompt + canonical_solution + completion_hook

    if i < INPSECT_FIRST:
        print("INPUT: To the model: ")
        print(colored(reverse_task_prompt, 'blue'))
        print("OUTPUT: From the model: ")
        print(colored(c_problem["test"], 'magenta'))


    # save the output
    escaped_task_id = task_id.replace("/", "_")
    output_file = os.path.join(
        OUTPUT_FOLDER, f"{escaped_task_id}_{function_name}.py")
    with open(output_file, "w") as f:
        f.write(reverse_task_prompt)
    output_reference_file = os.path.join(
        OUTPUT_FOLDER, f"{escaped_task_id}_{function_name}.ref")
    with open(output_reference_file, "w") as f:
        f.write(c_problem["test"])
    # save the original prompt
    output_original_prompt_file = os.path.join(
        OUTPUT_FOLDER, f"{escaped_task_id}_{function_name}.original")
    with open(output_original_prompt_file, "w") as f:
        f.write(c_problem["prompt"] + c_problem["canonical_solution"])

task id: HumanEval/0
INPUT: To the model: 
[34mdef has_close_elements(numbers: List[float], threshold: float) -> bool:
    """ Check if in given list of numbers, are any two numbers closer to each other than
    given threshold.
    """
    for idx, elem in enumerate(numbers):
        for idx2, elem2 in enumerate(numbers):
            if idx != idx2:
                distance = abs(elem - elem2)
                if distance < threshold:
                    return True

    return False

# Write a test for the function has_close_elements below
assert has_close_elements([0m
OUTPUT: From the model: 
[35m

METADATA = {
    'author': 'jt',
    'dataset': 'test'
}


def check(candidate):
    assert candidate([1.0, 2.0, 3.9, 4.0, 5.0, 2.2], 0.3) == True
    assert candidate([1.0, 2.0, 3.9, 4.0, 5.0, 2.2], 0.05) == False
    assert candidate([1.0, 2.0, 5.9, 4.0, 5.0], 0.95) == True
    assert candidate([1.0, 2.0, 5.9, 4.0, 5.0], 0.8) == False
    assert candidate([1.0, 2.0, 3.0, 4.0, 5.0, 2.0

# Test with coverage

Need for a sandbox to run the tests.