# Code Generation and Evaluation

In [14]:
from human_eval.data import write_jsonl, read_problems
from human_eval.evaluation import evaluate_functional_correctness
import logging
import sys
import time, datetime
from langchain_community.llms import Replicate
import re

%load_ext autoreload
%autoreload 2

In [14]:
def remove_triple_backticks_at_edges(text: str) -> str:
    """
    Removes triple backticks (` ``` `) if they appear at the beginning or the end of the text.

    :param text: The input text as a string.
    :return: A string with triple backticks removed from the edges.
    """
    s1 = "```python\n"
    s2 = "```python"
    s3 = "```"
    for s in [s1, s2, s3]:
        if text.startswith(s):
            text = text[len(s):].lstrip()      # Remove a version of backticks + extra whitespace at the start
    if text.endswith(s3):
        text = text[: -len(s3)].rstrip()       # Remove the backticks + extra whitespace at the end
    return text


def add_import(text: str) -> str:
    ''' Adds import statements stripped by LLM '''
    text = text.strip()
    s = 'from typing import List'
    if 'def ' in text and s not in text:
        text = s + '\n\n' + text
    return text

string = 'def my_func(input):\n    return output'
print(add_import(string))

from typing import List

def my_func(input):
    return output


In [None]:
def is_full_program(code):
    code = code.splitlines()

In [12]:
def classify_code(code_str: str) -> str:
    '''
    Determine if input string is a complete Python function.

    - We skip lines that are empty, comments, or start with "import".
    - Then we check if there is a line that starts with 'def '.
    - If so, we also check if there is at least one subsequent line
      that starts with indentation (e.g., two or four spaces).
    - If both conditions hold, return True. Otherwise, return False.
    '''
    found_def    = False
    def_has_body = False
    lines        = code_str.splitlines()

    # not empty, not comment lines (#...), not imports
    relevant_lines = []
    for line in lines:
        stripped = line.strip()
        if not stripped:  # empty line
            continue
        if stripped.startswith('#'):  # comment line
            continue
        if stripped.startswith('import '):  # import statement
            continue
        relevant_lines.append(line)

    for i, line in enumerate(relevant_lines):
        # Check if, after stripping left whitespace, this line starts with "def "
        if line.lstrip().startswith('def '):
            found_def = True

            # Look for subsequent line with indentation (2 spaces, 4 spaces, or a tab symbol)
            pattern = re.compile(r'^(?:  |    |\t)\S')
            for j in range(i+1, len(relevant_lines)):
                next_line = relevant_lines[j]
                if pattern.match(next_line):
                    def_has_body = True
                    break
            break

    if found_def and def_has_body:
        return True
    else:
        return False

In [15]:
snippet1 = """import math

# This is a comment

def my_function(x):
    \"\"\"Docstring\"\"\"
    return math.sqrt(x)
"""
print(snippet1)
print(classify_code(snippet1))  # Expected: code1

    # Test 2: code2 (only body, no def)
snippet2 = """x_squared = x * x
return x_squared
"""
print(snippet2)
print(classify_code(snippet2))  # Expected: code2

    # Test 3: Trick: "abcdef " is not "def "
snippet3 = """
abcdef 
y = 5
"""
print(snippet3)
print(classify_code(snippet3))  # Expected: code2 (no real 'def ' line)

    # Test 4: Has 'def ', but no indented body
snippet4 = """
def my_func(x):
y = x + 1
"""
print(snippet4)
print(classify_code(snippet4))  # Expected: code2 (no indentation after def)

snippet5 = """
def my_func(x):
"""
print(snippet5)
print(classify_code(snippet5))

snippet6 = """
def my_func(x):
  return x
"""
print(snippet6)
print(classify_code(snippet6))  # Expected: code2 (no indentation after def)

snippet7 = """
def my_func(x):
    return x
"""
print(snippet7)
print(classify_code(snippet7))  # Expected: code2 (no indentation after def)

snippet8 = """
def my_func(x):
    return x
"""
print(snippet8)
print(classify_code(snippet8))  # Expected: code2 (no indentation after def)

snippet9 = """
def my_func(x):
       return x
"""
print(snippet9)
print(classify_code(snippet9))  # Expected: code2 (no indentation after def)

import math

# This is a comment

def my_function(x):
    """Docstring"""
    return math.sqrt(x)

True
x_squared = x * x
return x_squared

False

abcdef 
y = 5

False

def my_func(x):
y = x + 1

False

def my_func(x):

False

def my_func(x):
  return x

True

def my_func(x):
    return x

True

def my_func(x):
    return x

True

def my_func(x):
       return x

False


In [3]:
tasks = read_problems()
print(f'Type of tasks: {type(tasks)}\n')

for k,v in tasks.items():
    print(f'task_id: {k}')
    for k2, v2 in v.items():
        print(f'\n{k2}:\n{v2}')
    print('\n' + '='*100 + '\n')

Type of tasks: <class 'dict'>

task_id: HumanEval/0

task_id:
HumanEval/0

prompt:
from typing import List


def has_close_elements(numbers: List[float], threshold: float) -> bool:
    """ Check if in given list of numbers, are any two numbers closer to each other than
    given threshold.
    >>> has_close_elements([1.0, 2.0, 3.0], 0.5)
    False
    >>> has_close_elements([1.0, 2.8, 3.0, 4.0, 5.0, 2.0], 0.3)
    True
    """


entry_point:
has_close_elements

canonical_solution:
    for idx, elem in enumerate(numbers):
        for idx2, elem2 in enumerate(numbers):
            if idx != idx2:
                distance = abs(elem - elem2)
                if distance < threshold:
                    return True

    return False


test:


METADATA = {
    'author': 'jt',
    'dataset': 'test'
}


def check(candidate):
    assert candidate([1.0, 2.0, 3.9, 4.0, 5.0, 2.2], 0.3) == True
    assert candidate([1.0, 2.0, 3.9, 4.0, 5.0, 2.2], 0.05) == False
    assert candidate([1.0, 2.0, 5.9, 

In [4]:
llama3_8b = Replicate(
    model        = 'meta/meta-llama-3-8b-instruct',
    model_kwargs = {"temperature": 0.95,"top_p": 1, "max_new_tokens":512}
)

solar_10b = Replicate(
    model        = 'nateraw/nous-hermes-2-solar-10.7b:1e918ab6ffd5872c21fba21a511f344fd12ac0edff6302c9cd260395c7707ff4',
    model_kwargs = {"temperature": 0.95,"top_p": 1, "max_new_tokens":512}
)

qwen = Replicate(
    model        = 'lucataco/qwen1.5-7b:f85bec5b21ba0860e0f200be6ef5af9d5a65b974b9f99e36eb036d21eab884de',
    model_kwargs = {"temperature": 0.95,"top_p": 1, "max_new_tokens":512}
)

phixtral_7b = Replicate(
    model        = 'lucataco/phixtral-2x2_8:25d7b93bb0ec9e8dd94fcc69adc786759243a5628ba5574bd9609d6abafe57cf',
    model_kwargs = {"temperature": 0.95,"top_p": 1, "max_new_tokens":512}
)

mamba = Replicate(
    model        = 'adirik/mamba-2.8b:864ed9237f28ff2e8eb55ffc07e9f0d7857888cacb9c32a92081f24f49a861db',
    model_kwargs = {"temperature": 0.95,"top_p": 1, "max_new_tokens":512}
)

mamba2 = Replicate(
    model        = 'adirik/mamba-2.8b:571abd73203a3dd3d7071f1c0380a3502c427aba98a2fb5edf2f7cfdeea1676c',
    model_kwargs = {"temperature": 0.95,"top_p": 1, "max_new_tokens":512}
)

current_model = solar_10b
current_model('Hello. How are you?')

  warn_deprecated(


'Hello! I am quite well, thank you for asking. I am always ready to provide assistance and engage in meaningful conversations.'

In [24]:
prompt_llama = '''
1. Act as an experienced Python software developer and complete the following starter code.
2. Without modifying the starter code, concatenate the starter code and the completion.
3. Double-check to ensure that the indentation and syntax in the output code comply with Python language rules and that the code is runnable.
4. Do not output irrelevant human phrases like: "Here is the complete code," triple backticks, or any other unnecessary symbols before or after the code.
5. Output only the unmodified starter code concatenated with its completion, and nothing else:\n
'''

full_func_prompt = '''
1. Complete the following starter code as code that can be run directly by a Python interpreter.
2. Without modifying the starter code, concatenate it with the completion.
3. Output only the code and no irrelevant language. Do not say "completion:" or "Here is a completion" or anything like that. Do not use tripple backticks or any other irrelevant symbols. The output must be only runnable Python code.
4. Do not provide any example usage or any other code after the function. Output only the function and nothing after it.

Starter code:\n
'''

completion_prompt = '''
1. Complete the following starter code using the correct Python indentation and syntax.
2. Output only the completion and nothing after it.
3. Do not include any irrelevant language, for example do not say "Completion:" or "Here is a completion" or anything like that. Do not use tripple backticks or any other irrelevant symbols. The output must be only runnable Python code.
4. Do not provide any example usage or any other code after the completion.

Starter code:
'''

completion_prompt = 'Complete the following Python code:\n'

#  ask for the body of the func to be completed.
completion_prompt = '''1. Complete the following starter code using correct Python indentation and syntax.
2. Output only the completion without the starter code, with no additional text, no explanations, and no extra symbols such as backticks.
3. Do not show “Completion:” or any similar heading, and do not provide example usage.
4. The final output must be Python code ready to run as-is — no other content before or after.
5. Below is the starter code to be completed:\n'''
print(completion_prompt + 'Next line')

1. Complete the following starter code using correct Python indentation and syntax.
2. Output only the completion without the starter code, with no additional text, no explanations, and no extra symbols such as backticks.
3. Do not show “Completion:” or any similar heading, and do not provide example usage.
4. The final output must be Python code ready to run as-is — no other content before or after.
5. Below is the starter code to be completed:
Next line


In [25]:
num_samples_per_task = 1
completions = []

for task_id, task_body in tasks.items():
    print(task_id)
    #if task_id == 'HumanEval/10':
    #        break
    for i in range(num_samples_per_task):
        start_time  = time.time()
        full_prompt = completion_prompt + task_body["prompt"]
        #full_prompt = task_body["prompt"]
        try:
            completion  = current_model( full_prompt )
        except Exception as e:
            completion = f"Error when model predicted a completion:\n{e}"
        
        completion = remove_triple_backticks_at_edges(completion.strip())
        completion = add_import(completion)
        completions.append( {'task_id': task_id, 'completion': completion} )
        print('PROMPT:\n', full_prompt, '\n', '='*75, '\n', sep='')
        print('COMPLETION:\n', completion, sep='')
        print(f"\nTime elapsed: {(time.time() - start_time):.4f} seconds\n", '\n', '='*75, '\n', sep='')

HumanEval/0
PROMPT:
1. Complete the following starter code using correct Python indentation and syntax.
2. Output only the completion without the starter code, with no additional text, no explanations, and no extra symbols such as backticks.
3. Do not show “Completion:” or any similar heading, and do not provide example usage.
4. The final output must be Python code ready to run as-is — no other content before or after.
5. Below is the starter code to be completed:
from typing import List


def has_close_elements(numbers: List[float], threshold: float) -> bool:
    """ Check if in given list of numbers, are any two numbers closer to each other than
    given threshold.
    >>> has_close_elements([1.0, 2.0, 3.0], 0.5)
    False
    >>> has_close_elements([1.0, 2.8, 3.0, 4.0, 5.0, 2.0], 0.3)
    True
    """


COMPLETION:
from typing import List


def has_close_elements(numbers: List[float], threshold: float) -> bool:
    for i in range(len(numbers) - 1):
        difference = abs(numbers

PROMPT:
1. Complete the following starter code using correct Python indentation and syntax.
2. Output only the completion without the starter code, with no additional text, no explanations, and no extra symbols such as backticks.
3. Do not show “Completion:” or any similar heading, and do not provide example usage.
4. The final output must be Python code ready to run as-is — no other content before or after.
5. Below is the starter code to be completed:
from typing import List


def filter_by_substring(strings: List[str], substring: str) -> List[str]:
    """ Filter an input list of strings only for ones that contain given substring
    >>> filter_by_substring([], 'a')
    []
    >>> filter_by_substring(['abc', 'bacd', 'cde', 'array'], 'a')
    ['abc', 'bacd', 'array']
    """


COMPLETION:
from typing import List

def filter_by_substring(strings: List[str], substring: str) -> List[str]:
    return [s for s in strings if substring in s]

Time elapsed: 1.5668 seconds


HumanEval/8
PROMP

PROMPT:
1. Complete the following starter code using correct Python indentation and syntax.
2. Output only the completion without the starter code, with no additional text, no explanations, and no extra symbols such as backticks.
3. Do not show “Completion:” or any similar heading, and do not provide example usage.
4. The final output must be Python code ready to run as-is — no other content before or after.
5. Below is the starter code to be completed:
from typing import List


def all_prefixes(string: str) -> List[str]:
    """ Return list of all prefixes from shortest to longest of the input string
    >>> all_prefixes('abc')
    ['a', 'ab', 'abc']
    """


COMPLETION:
from typing import List

def all_prefixes(string: str) -> List[str]:
    prefixes = []
    for i in range(len(string)):
        prefixes.append(string[:i+1])
    return prefixes

Time elapsed: 2.2460 seconds


HumanEval/15
PROMPT:
1. Complete the following starter code using correct Python indentation and syntax.
2. 

PROMPT:
1. Complete the following starter code using correct Python indentation and syntax.
2. Output only the completion without the starter code, with no additional text, no explanations, and no extra symbols such as backticks.
3. Do not show “Completion:” or any similar heading, and do not provide example usage.
4. The final output must be Python code ready to run as-is — no other content before or after.
5. Below is the starter code to be completed:
from typing import List


def rescale_to_unit(numbers: List[float]) -> List[float]:
    """ Given list of numbers (of at least two elements), apply a linear transform to that list,
    such that the smallest number will become 0 and the largest will become 1
    >>> rescale_to_unit([1.0, 2.0, 3.0, 4.0, 5.0])
    [0.0, 0.25, 0.5, 0.75, 1.0]
    """


COMPLETION:
from typing import List

def rescale_to_unit(numbers: List[float]) -> List[float]:
    min_num = min(numbers)
    max_num = max(numbers)
    return [(n - min_num) / (max_num - mi

PROMPT:
1. Complete the following starter code using correct Python indentation and syntax.
2. Output only the completion without the starter code, with no additional text, no explanations, and no extra symbols such as backticks.
3. Do not show “Completion:” or any similar heading, and do not provide example usage.
4. The final output must be Python code ready to run as-is — no other content before or after.
5. Below is the starter code to be completed:
from typing import List


def concatenate(strings: List[str]) -> str:
    """ Concatenate list of strings into a single string
    >>> concatenate([])
    ''
    >>> concatenate(['a', 'b', 'c'])
    'abc'
    """


COMPLETION:
from typing import List

def concatenate(strings: List[str]) -> str:
    return ''.join(strings)

Time elapsed: 1.6923 seconds


HumanEval/29
PROMPT:
1. Complete the following starter code using correct Python indentation and syntax.
2. Output only the completion without the starter code, with no additional text, 

PROMPT:
1. Complete the following starter code using correct Python indentation and syntax.
2. Output only the completion without the starter code, with no additional text, no explanations, and no extra symbols such as backticks.
3. Do not show “Completion:” or any similar heading, and do not provide example usage.
4. The final output must be Python code ready to run as-is — no other content before or after.
5. Below is the starter code to be completed:


def max_element(l: list):
    """Return maximum element in the list.
    >>> max_element([1, 2, 3])
    3
    >>> max_element([5, 3, -5, 2, -3, 3, 9, 0, 123, 1, -10])
    123
    """


COMPLETION:
from typing import List

def max_element(l: list):
    return max(l)

Time elapsed: 0.9508 seconds


HumanEval/36
PROMPT:
1. Complete the following starter code using correct Python indentation and syntax.
2. Output only the completion without the starter code, with no additional text, no explanations, and no extra symbols such as backticks.

PROMPT:
1. Complete the following starter code using correct Python indentation and syntax.
2. Output only the completion without the starter code, with no additional text, no explanations, and no extra symbols such as backticks.
3. Do not show “Completion:” or any similar heading, and do not provide example usage.
4. The final output must be Python code ready to run as-is — no other content before or after.
5. Below is the starter code to be completed:


def incr_list(l: list):
    """Return list with elements incremented by 1.
    >>> incr_list([1, 2, 3])
    [2, 3, 4]
    >>> incr_list([5, 3, 5, 2, 3, 3, 9, 0, 123])
    [6, 4, 6, 3, 4, 4, 10, 1, 124]
    """


COMPLETION:
from typing import List

def incr_list(l: list):
    return [i + 1 for i in l]

Time elapsed: 1.6821 seconds


HumanEval/43
PROMPT:
1. Complete the following starter code using correct Python indentation and syntax.
2. Output only the completion without the starter code, with no additional text, no explanations, an

PROMPT:
1. Complete the following starter code using correct Python indentation and syntax.
2. Output only the completion without the starter code, with no additional text, no explanations, and no extra symbols such as backticks.
3. Do not show “Completion:” or any similar heading, and do not provide example usage.
4. The final output must be Python code ready to run as-is — no other content before or after.
5. Below is the starter code to be completed:


def encode_shift(s: str):
    """
    returns encoded string by shifting every character by 5 in the alphabet.
    """
    return "".join([chr(((ord(ch) + 5 - ord("a")) % 26) + ord("a")) for ch in s])


def decode_shift(s: str):
    """
    takes as input string encoded with encode_shift function. Returns decoded string.
    """


COMPLETION:
from typing import List

def decode_shift(s: str):
    """
    takes as input string encoded with encode_shift function. Returns decoded string.
    """
    return "".join([chr(((ord(ch) - 5 - or

PROMPT:
1. Complete the following starter code using correct Python indentation and syntax.
2. Output only the completion without the starter code, with no additional text, no explanations, and no extra symbols such as backticks.
3. Do not show “Completion:” or any similar heading, and do not provide example usage.
4. The final output must be Python code ready to run as-is — no other content before or after.
5. Below is the starter code to be completed:


def monotonic(l: list):
    """Return True is list elements are monotonically increasing or decreasing.
    >>> monotonic([1, 2, 4, 20])
    True
    >>> monotonic([1, 20, 4, 10])
    False
    >>> monotonic([4, 1, 0, -10])
    True
    """


COMPLETION:
from typing import List

def monotonic(l: list):
    return all(l[i] <= l[i+1] for i in range(len(l) - 1))

Time elapsed: 2.3899 seconds


HumanEval/58
PROMPT:
1. Complete the following starter code using correct Python indentation and syntax.
2. Output only the completion without the

PROMPT:
1. Complete the following starter code using correct Python indentation and syntax.
2. Output only the completion without the starter code, with no additional text, no explanations, and no extra symbols such as backticks.
3. Do not show “Completion:” or any similar heading, and do not provide example usage.
4. The final output must be Python code ready to run as-is — no other content before or after.
5. Below is the starter code to be completed:

def circular_shift(x, shift):
    """Circular shift the digits of the integer x, shift the digits right by shift
    and return the result as a string.
    If shift > number of digits, return digits reversed.
    >>> circular_shift(12, 1)
    "21"
    >>> circular_shift(12, 2)
    "12"
    """


COMPLETION:
from typing import List

def circular_shift(x, shift):
    x = str(abs(x))
    n = len(x)
    shift %= n   
    return x[shift:] + x[:shift]

Time elapsed: 2.3796 seconds


HumanEval/66
PROMPT:
1. Complete the following starter code

PROMPT:
1. Complete the following starter code using correct Python indentation and syntax.
2. Output only the completion without the starter code, with no additional text, no explanations, and no extra symbols such as backticks.
3. Do not show “Completion:” or any similar heading, and do not provide example usage.
4. The final output must be Python code ready to run as-is — no other content before or after.
5. Below is the starter code to be completed:

def triangle_area(a, b, c):
    '''
    Given the lengths of the three sides of a triangle. Return the area of
    the triangle rounded to 2 decimal points if the three sides form a valid triangle. 
    Otherwise return -1
    Three sides make a valid triangle when the sum of any two sides is greater 
    than the third side.
    Example:
    triangle_area(3, 4, 5) == 6.00
    triangle_area(1, 2, 10) == -1
    '''


COMPLETION:
from typing import List

def triangle_area(a, b, c):
    if a + b > c and a + c > b and b + c > a:
        s 

PROMPT:
1. Complete the following starter code using correct Python indentation and syntax.
2. Output only the completion without the starter code, with no additional text, no explanations, and no extra symbols such as backticks.
3. Do not show “Completion:” or any similar heading, and do not provide example usage.
4. The final output must be Python code ready to run as-is — no other content before or after.
5. Below is the starter code to be completed:

def iscube(a):
    '''
    Write a function that takes an integer a and returns True 
    if this ingeger is a cube of some integer number.
    Note: you may assume the input is always valid.
    Examples:
    iscube(1) ==> True
    iscube(2) ==> False
    iscube(-1) ==> True
    iscube(64) ==> True
    iscube(0) ==> True
    iscube(180) ==> False
    '''


COMPLETION:
from typing import List

def iscube(a):
    return a**(1/3).is_integer()

Time elapsed: 1.0049 seconds


HumanEval/78
PROMPT:
1. Complete the following starter code usin

PROMPT:
1. Complete the following starter code using correct Python indentation and syntax.
2. Output only the completion without the starter code, with no additional text, no explanations, and no extra symbols such as backticks.
3. Do not show “Completion:” or any similar heading, and do not provide example usage.
4. The final output must be Python code ready to run as-is — no other content before or after.
5. Below is the starter code to be completed:

def prime_length(string):
    """Write a function that takes a string and returns True if the string
    length is a prime number or False otherwise
    Examples
    prime_length('Hello') == True
    prime_length('abcdcba') == True
    prime_length('kittens') == True
    prime_length('orange') == False
    """


COMPLETION:
from typing import List

def prime_length(s: str) -> bool:
    return len(s) > 1 and any(not s.count(c) for c in set(s))

# Test cases
print(prime_length('Hello'))  # True
print(prime_length('abcdcba'))  # True
prin

PROMPT:
1. Complete the following starter code using correct Python indentation and syntax.
2. Output only the completion without the starter code, with no additional text, no explanations, and no extra symbols such as backticks.
3. Do not show “Completion:” or any similar heading, and do not provide example usage.
4. The final output must be Python code ready to run as-is — no other content before or after.
5. Below is the starter code to be completed:

def encrypt(s):
    """Create a function encrypt that takes a string as an argument and
    returns a string encrypted with the alphabet being rotated. 
    The alphabet should be rotated in a manner such that the letters 
    shift down by two multiplied to two places.
    For example:
    encrypt('hi') returns 'lm'
    encrypt('asdfghjkl') returns 'ewhjklnop'
    encrypt('gf') returns 'kj'
    encrypt('et') returns 'ix'
    """


COMPLETION:
from typing import List

def encrypt(s):
    result = ''
    for c in s:
        if c.isalpha

PROMPT:
1. Complete the following starter code using correct Python indentation and syntax.
2. Output only the completion without the starter code, with no additional text, no explanations, and no extra symbols such as backticks.
3. Do not show “Completion:” or any similar heading, and do not provide example usage.
4. The final output must be Python code ready to run as-is — no other content before or after.
5. Below is the starter code to be completed:

def count_up_to(n):
    """Implement a function that takes an non-negative integer and returns an array of the first n
    integers that are prime numbers and less than n.
    for example:
    count_up_to(5) => [2,3]
    count_up_to(11) => [2,3,5,7]
    count_up_to(0) => []
    count_up_to(20) => [2,3,5,7,11,13,17,19]
    count_up_to(1) => []
    count_up_to(18) => [2,3,5,7,11,13,17]
    """


COMPLETION:
from typing import List

def count_up_to(n):
    prime_numbers = []
    for i in range(2, n):
        for j in range(2, i):
        

PROMPT:
1. Complete the following starter code using correct Python indentation and syntax.
2. Output only the completion without the starter code, with no additional text, no explanations, and no extra symbols such as backticks.
3. Do not show “Completion:” or any similar heading, and do not provide example usage.
4. The final output must be Python code ready to run as-is — no other content before or after.
5. Below is the starter code to be completed:

def rounded_avg(n, m):
    """You are given two positive integers n and m, and your task is to compute the
    average of the integers from n through m (including n and m). 
    Round the answer to the nearest integer and convert that to binary.
    If n is greater than m, return -1.
    Example:
    rounded_avg(1, 5) => "0b11"
    rounded_avg(7, 5) => -1
    rounded_avg(10, 20) => "0b1111"
    rounded_avg(20, 33) => "0b11010"
    """


COMPLETION:
rounded_avg = lambda n, m: bin(round((n + m) / 2))[2:] if n <= m else -1

Time elapsed: 

PROMPT:
1. Complete the following starter code using correct Python indentation and syntax.
2. Output only the completion without the starter code, with no additional text, no explanations, and no extra symbols such as backticks.
3. Do not show “Completion:” or any similar heading, and do not provide example usage.
4. The final output must be Python code ready to run as-is — no other content before or after.
5. Below is the starter code to be completed:

def exchange(lst1, lst2):
    """In this problem, you will implement a function that takes two lists of numbers,
    and determines whether it is possible to perform an exchange of elements
    between them to make lst1 a list of only even numbers.
    There is no limit on the number of exchanged elements between lst1 and lst2.
    If it is possible to exchange elements between the lst1 and lst2 to make
    all the elements of lst1 to be even, return "YES".
    Otherwise, return "NO".
    For example:
    exchange([1, 2, 3, 4], [1, 2, 

PROMPT:
1. Complete the following starter code using correct Python indentation and syntax.
2. Output only the completion without the starter code, with no additional text, no explanations, and no extra symbols such as backticks.
3. Do not show “Completion:” or any similar heading, and do not provide example usage.
4. The final output must be Python code ready to run as-is — no other content before or after.
5. Below is the starter code to be completed:

def sort_array(arr):
    """
    In this Kata, you have to sort an array of non-negative integers according to
    number of ones in their binary representation in ascending order.
    For similar number of ones, sort based on decimal value.

    It must be implemented like this:
    >>> sort_array([1, 5, 2, 3, 4]) == [1, 2, 3, 4, 5]
    >>> sort_array([-2, -3, -4, -5, -6]) == [-6, -5, -4, -3, -2]
    >>> sort_array([1, 0, 2, 3, 4]) [0, 1, 2, 3, 4]
    """


COMPLETION:
from typing import List

def sort_array(arr):
    def count_ones(n

PROMPT:
1. Complete the following starter code using correct Python indentation and syntax.
2. Output only the completion without the starter code, with no additional text, no explanations, and no extra symbols such as backticks.
3. Do not show “Completion:” or any similar heading, and do not provide example usage.
4. The final output must be Python code ready to run as-is — no other content before or after.
5. Below is the starter code to be completed:

def add_elements(arr, k):
    """
    Given a non-empty array of integers arr and an integer k, return
    the sum of the elements with at most two digits from the first k elements of arr.

    Example:

        Input: arr = [111,21,3,4000,5,6,7,8,9], k = 4
        Output: 24 # sum of 21 + 3

    Constraints:
        1. 1 <= len(arr) <= 100
        2. 1 <= k <= len(arr)
    """


COMPLETION:
from typing import List

def add_elements(arr, k):
    total = 0
    for i in range(k):
        if len(str(arr[i])) <= 2:
            total += arr

PROMPT:
1. Complete the following starter code using correct Python indentation and syntax.
2. Output only the completion without the starter code, with no additional text, no explanations, and no extra symbols such as backticks.
3. Do not show “Completion:” or any similar heading, and do not provide example usage.
4. The final output must be Python code ready to run as-is — no other content before or after.
5. Below is the starter code to be completed:

def prod_signs(arr):
    """
    You are given an array arr of integers and you need to return
    sum of magnitudes of integers multiplied by product of all signs
    of each number in the array, represented by 1, -1 or 0.
    Note: return None for empty arr.

    Example:
    >>> prod_signs([1, 2, 2, -4]) == -9
    >>> prod_signs([0, 1]) == 0
    >>> prod_signs([]) == None
    """


COMPLETION:
final_prod_signs = 0

for num in arr:
    if num > 0:
        # 1
        final_prod_signs += abs(num)
    elif num < 0:
        # -1
       

PROMPT:
1. Complete the following starter code using correct Python indentation and syntax.
2. Output only the completion without the starter code, with no additional text, no explanations, and no extra symbols such as backticks.
3. Do not show “Completion:” or any similar heading, and do not provide example usage.
4. The final output must be Python code ready to run as-is — no other content before or after.
5. Below is the starter code to be completed:


def sum_squares(lst):
    """You are given a list of numbers.
    You need to return the sum of squared numbers in the given list,
    round each element in the list to the upper int(Ceiling) first.
    Examples:
    For lst = [1,2,3] the output should be 14
    For lst = [1,4,9] the output should be 98
    For lst = [1,3,5,7] the output should be 84
    For lst = [1.4,4.2,0] the output should be 29
    For lst = [-2.4,1,1] the output should be 6
    

    """


COMPLETION:
from typing import List

def sum_squares(lst):
    return sum

PROMPT:
1. Complete the following starter code using correct Python indentation and syntax.
2. Output only the completion without the starter code, with no additional text, no explanations, and no extra symbols such as backticks.
3. Do not show “Completion:” or any similar heading, and do not provide example usage.
4. The final output must be Python code ready to run as-is — no other content before or after.
5. Below is the starter code to be completed:

def special_factorial(n):
    """The Brazilian factorial is defined as:
    brazilian_factorial(n) = n! * (n-1)! * (n-2)! * ... * 1!
    where n > 0

    For example:
    >>> special_factorial(4)
    288

    The function will receive an integer as input and should return the special
    factorial of this integer.
    """


COMPLETION:
from typing import List

def special_factorial(n):
    if n <= 1:
        return 1
    return n * special_factorial(n - 1)

Time elapsed: 1.6229 seconds


HumanEval/140
PROMPT:
1. Complete the following 

PROMPT:
1. Complete the following starter code using correct Python indentation and syntax.
2. Output only the completion without the starter code, with no additional text, no explanations, and no extra symbols such as backticks.
3. Do not show “Completion:” or any similar heading, and do not provide example usage.
4. The final output must be Python code ready to run as-is — no other content before or after.
5. Below is the starter code to be completed:

def order_by_points(nums):
    """
    Write a function which sorts the given list of integers
    in ascending order according to the sum of their digits.
    Note: if there are several items with similar sum of their digits,
    order them based on their index in original list.

    For example:
    >>> order_by_points([1, 11, -1, -11, -12]) == [-1, -11, 1, -12, 11]
    >>> order_by_points([]) == []
    """


COMPLETION:
from typing import List

def order_by_points(nums):
    return sorted(nums, key=sum)

Time elapsed: 1.6219 seconds

PROMPT:
1. Complete the following starter code using correct Python indentation and syntax.
2. Output only the completion without the starter code, with no additional text, no explanations, and no extra symbols such as backticks.
3. Do not show “Completion:” or any similar heading, and do not provide example usage.
4. The final output must be Python code ready to run as-is — no other content before or after.
5. Below is the starter code to be completed:

def double_the_difference(lst):
    '''
    Given a list of numbers, return the sum of squares of the numbers
    in the list that are odd. Ignore numbers that are negative or not integers.
    
    double_the_difference([1, 3, 2, 0]) == 1 + 9 + 0 + 0 = 10
    double_the_difference([-1, -2, 0]) == 0
    double_the_difference([9, -2]) == 81
    double_the_difference([0]) == 0  
   
    If the input list is empty, return 0.
    '''


COMPLETION:
from typing import List

def double_the_difference(lst):
    return sum(x ** 2 if x != 0 and 

PROMPT:
1. Complete the following starter code using correct Python indentation and syntax.
2. Output only the completion without the starter code, with no additional text, no explanations, and no extra symbols such as backticks.
3. Do not show “Completion:” or any similar heading, and do not provide example usage.
4. The final output must be Python code ready to run as-is — no other content before or after.
5. Below is the starter code to be completed:

def int_to_mini_roman(number):
    """
    Given a positive integer, obtain its roman numeral equivalent as a string,
    and return it in lowercase.
    Restrictions: 1 <= num <= 1000

    Examples:
    >>> int_to_mini_roman(19) == 'xix'
    >>> int_to_mini_roman(152) == 'clii'
    >>> int_to_mini_roman(426) == 'cdxxvi'
    """


COMPLETION:
from typing import List

def int_to_mini_roman(number):
    values = [1, 10, 5, 10, 10, 50, 50, 100, 500, 500, 1000]
    symbols = ['i', 'x', 'v', 'l', 'xl', 'c', 'xc', 'c', 'cm', 'm', 'm']

    r

PROMPT:
1. Complete the following starter code using correct Python indentation and syntax.
2. Output only the completion without the starter code, with no additional text, no explanations, and no extra symbols such as backticks.
3. Do not show “Completion:” or any similar heading, and do not provide example usage.
4. The final output must be Python code ready to run as-is — no other content before or after.
5. Below is the starter code to be completed:

def string_to_md5(text):
    """
    Given a string 'text', return its md5 hash equivalent string.
    If 'text' is an empty string, return None.

    >>> string_to_md5('Hello world') == '3e25960a79dbc69b674cd4ec67a72c62'
    """


COMPLETION:
from typing import List

from hashlib import md5

def string_to_md5(text):
    if not text:
        return None
    return md5(text.encode()).hexdigest()

Time elapsed: 1.6350 seconds


HumanEval/163
PROMPT:
1. Complete the following starter code using correct Python indentation and syntax.
2. Ou

In [26]:
#model_nickname = 'llama3_8B'
#model_nickname = 'mistral_7B'
#model_nickname = 'gemma_7B'
#model_nickname = 'phixtral_8B'
#model_nickname = 'gemma_2B'
model_nickname = 'solar_10B'
#model_nickname = 'phixtral'
time_stamp     = datetime.datetime.now().strftime("%Y%m%d_%H%M%S_%f")[:-2]
file_name      = f'logs/{model_nickname}_samples_{time_stamp}.jsonl'
write_jsonl(file_name, completions)

In [32]:
file_name

'logs/solar_10B_samples_20250110_020727_0996.jsonl'

In [4]:
# if the above cell has not been run
#file_name = 'logs/yi_6B_samples_20241127_155444_8511.jsonl'

In [31]:
#evaluate_functional_correctness(file_name, k=[1], use_prompt=True)
evaluate_functional_correctness(file_name, k=[1], mode='human_eval_with_prompt')

Reading samples...


164it [00:00, 5550.89it/s]


Running test suites...


100%|██████████████████████████████████████████████████████| 164/164 [00:13<00:00, 12.17it/s]


Writing results to logs/solar_10B_samples_20250110_020727_0996.jsonl_results.jsonl...


100%|███████████████████████████████████████████████████| 164/164 [00:00<00:00, 70204.72it/s]


{'pass@1': 0.29878048780487804}

In [29]:
completions[0]['completion']

'```python\nfrom typing import List\n\n\ndef has_close_elements(numbers: List[float], threshold: float) -> bool:\n    """ Check if in given list of numbers, are any two numbers closer to each other than\n    given threshold.\n    >>> has_close_elements([1.0, 2.0, 3.0], 0.5)\n    False\n    >>> has_close_elements([1.0, 2.8, 3.0, 4.0, 5.0, 2.0], 0.3)\n    True\n    """\n    for i in range(len(numbers) - 1):\n        if abs(numbers[i] - numbers[i + 1]) < threshold:\n            return True\n    return False\n\n# Output the complete code\n```'

In [35]:
for i in completions:
    print(remove_triple_backticks_at_edges(i['completion']))
    print('\n', '='*100, '\n', sep='')

from typing import List


def has_close_elements(numbers: List[float], threshold: float) -> bool:
    """ Check if in given list of numbers, are any two numbers closer to each other than
    given threshold.
    >>> has_close_elements([1.0, 2.0, 3.0], 0.5)
    False
    >>> has_close_elements([1.0, 2.8, 3.0, 4.0, 5.0, 2.0], 0.3)
    True
    """
    for i in range(len(numbers) - 1):
        if abs(numbers[i] - numbers[i + 1]) < threshold:
            return True
    return False

# Output the complete code


from typing import List

def separate_paren_groups(paren_string: str) -> List[str]:
    """ Input to this function is a string containing multiple groups of nested parentheses. Your goal is to
    separate those groups into separate strings and return the list of those.
    Separate groups are balanced (each open brace is properly closed) and not nested within each other.
    Ignore any spaces in the input string.
    >>> separate_paren_groups('( ) (( )) (( )( ))')
    ['()', '(())

In [34]:
s = '    code\n    code'
print(s.strip())

code
    code


# Appendix

## Models Tried, but Found Not Useful

In [None]:
mistral_7b = Replicate(
    model        = 'mistralai/mistral-7b-v0.1:3e8a0fb6d7812ce30701ba597e5080689bef8a013e5c6a724fafb108cc2426a0',
    model_kwargs = {"temperature": 0.95,"top_p": 1, "max_new_tokens":1000}
)

gemma_7b = Replicate(
    model        = 'google-deepmind/gemma-7b-it:2790a695e5dcae15506138cc4718d1106d0d475e6dca4b1d43f42414647993d5',
    model_kwargs = {"temperature": 0.95,"top_p": 1, "max_new_tokens":512}
)

gemma_2b = Replicate(
    model        = 'google-deepmind/gemma-2b-it:dff94eaf770e1fc211e425a50b51baa8e4cac6c39ef074681f9e39d778773626',
    model_kwargs = {"temperature": 0.95,"top_p": 1, "max_new_tokens":512}

)

gptj_6b = Replicate(
    model = 'replicate/gpt-j-6b:b3546aeec6c9891f0dd9929c2d3bedbf013c12e02e7dd0346af09c37e008c827',
    model_kwargs = {"temperature": 0.95,"top_p": 1, "max_new_tokens":512}
)

yi_6b = Replicate(
    model        = '01-ai/yi-6b:d302e64fad6b4d85d47b3d1ed569b06107504f5717ee1ec12136987bec1e94f1',
    model_kwargs = {"temperature": 0.95, "top_p": 1, "max_new_tokens":512}
)

phi = Replicate(
    model        = 'lucataco/phi-2:740618b0c24c0ea4ce5f49fcfef02fcd0bdd6a9f1b0c5e7c02ad78e9b3b190a6',
    model_kwargs = {"temperature": 0.95,"top_p": 1, "max_new_tokens":512}
)

## Using Replicate Without LangChain

__This approach works too, but the ouput is a list of strings. Joining over a space introduces indentation issues. Joining on an empty string seems to be better. But is there a risk of not joining correctly?__

In [61]:
import replicate

model         = "meta/meta-llama-3-8b-instruct"
model2        = "meta/meta-llama-3-8b"
prompt_prefix = '''
Act as an experienced Python software developer. Complete the following starter code.
Concatenate the starter code and the completion. Do not modify the starter code when you do this.
Double check and make sure that the indentation and sytax in the output code comply with the Python language rules and that the code is runnable.
Do not output irrelevant human phrases like: "Here is the complete code.", triple backticks or any other irrelevant symbols before or after the code.
Output only the unmodified starter code + its completion and nothing else:\n'
'''
client        = replicate.Client()
completions   = []

for task_id, task_body in tasks.items():
    if task_id == 'HumanEval/15':
        break
    start_time = time.time()
    input_params = {
        "prompt":         prompt_prefix + task_body["prompt"],
        "temperature":    0.5,
        "top_p":          1.0,
        "max_new_tokens": 1000,
    }
    output = client.run(model, input=input_params)
    print(''.join(output))
    completions.append(output)
    print(f"\nTime elapsed: {(time.time() - start_time):.4f} seconds\n")
    print('\n', '='*100, '\n')

2024-11-15 18:45:16,954 | INFO | HTTP Request: POST https://api.replicate.com/v1/models/meta/meta-llama-3-8b-instruct/predictions "HTTP/1.1 201 Created"


def has_close_elements(numbers: List[float], threshold: float) -> bool:
    numbers.sort()
    for i in range(len(numbers) - 1):
        if abs(numbers[i] - numbers[i + 1]) <= threshold:
            return True
    return False

Time elapsed: 1.2501 seconds



2024-11-15 18:45:18,914 | INFO | HTTP Request: POST https://api.replicate.com/v1/models/meta/meta-llama-3-8b-instruct/predictions "HTTP/1.1 201 Created"


```
from typing import List


def separate_paren_groups(paren_string: str) -> List[str]:
    result = []
    stack = []
    temp = ""
    for char in paren_string:
        if char == " ":
            continue
        if char == "(":
            stack.append(char)
            temp += char
        elif char == ")":
            stack.pop()
            temp += char
            if not stack:
                result.append(temp)
   

In [64]:
for idx, c in enumerate(completions):
    print(idx, '\n', ''.join(c))
    print('\n', '='*100, '\n')

0 
 

def has_close_elements(numbers: List[float], threshold: float) -> bool:
    numbers.sort()
    for i in range(len(numbers) - 1):
        if abs(numbers[i] - numbers[i + 1]) <= threshold:
            return True
    return False


1 
 

```
from typing import List


def separate_paren_groups(paren_string: str) -> List[str]:
    result = []
    stack = []
    temp = ""
    for char in paren_string:
        if char == " ":
            continue
        if char == "(":
            stack.append(char)
            temp += char
        elif char == ")":
            stack.pop()
            temp += char
            if not stack:
                result.append(temp)
                temp = ""
    return result
```


2 
 

def truncate_number(number: float) -> float:
    """ Given a positive floating point number, it can be decomposed into
    and integer part (largest integer smaller than given number) and decimals
    (leftover part always smaller than 1).

    Return the decimal part of the 

In [55]:
for task_id, task_body in tasks.items():
    if task_id == 'HumanEval/15':
        break
    print(task_id)
    print(task_body['prompt'])
    print('\n', '='*100, '\n')

HumanEval/0
from typing import List


def has_close_elements(numbers: List[float], threshold: float) -> bool:
    """ Check if in given list of numbers, are any two numbers closer to each other than
    given threshold.
    >>> has_close_elements([1.0, 2.0, 3.0], 0.5)
    False
    >>> has_close_elements([1.0, 2.8, 3.0, 4.0, 5.0, 2.0], 0.3)
    True
    """



HumanEval/1
from typing import List


def separate_paren_groups(paren_string: str) -> List[str]:
    """ Input to this function is a string containing multiple groups of nested parentheses. Your goal is to
    separate those group into separate strings and return the list of those.
    Separate groups are balanced (each open brace is properly closed) and not nested within each other
    Ignore any spaces in the input string.
    >>> separate_paren_groups('( ) (( )) (( )( ))')
    ['()', '(())', '(()())']
    """



HumanEval/2


def truncate_number(number: float) -> float:
    """ Given a positive floating point number, it can be

## How To Use Logging

In [2]:
# in case I need logging
# ATTENTION: introduces additional logging when calling Replicate models which slows down the process
logger = logging.getLogger()
logger.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s | %(levelname)s | %(message)s')

stdout_handler = logging.StreamHandler(sys.stdout)
stdout_handler.setLevel(logging.DEBUG)
stdout_handler.setFormatter(formatter)

file_handler = logging.FileHandler('logs/logs.log')
file_handler.setLevel(logging.DEBUG)
file_handler.setFormatter(formatter)


logger.addHandler(file_handler)
logger.addHandler(stdout_handler)

In [3]:
logger.debug('This is a debig message.')
logger.warning('This is a warning message')
logger.info('This is a log message!')
logger.error('This is an error message.')
logger.critical('This is a critical message')
# Shut down the logger
#logging.shutdown()

2024-11-15 19:27:49,595 | INFO | This is a log message!
2024-11-15 19:27:49,597 | ERROR | This is an error message.
2024-11-15 19:27:49,598 | CRITICAL | This is a critical message
