# Code Generation and Evaluation

In [30]:
from human_eval.data import write_jsonl, read_problems
from human_eval.evaluation import evaluate_functional_correctness
import os
import logging
import sys
import random, time, datetime
from datasets import load_dataset
from mistralai import Mistral
import json

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [31]:
from prompts import ( complete_code_prompt_basic, complete_code_prompt, complete_code_prompt_full,
                      complete_task_prompt_basic, complete_task_prompt, complete_task_prompt_full, )
from helpers import get_tokenizer, get_model, generate_response, clean_code, write_jsonl

In [32]:
# select model, propmt, temperature, top_p
models = [
    ('open-mistral-7b', 'mistral_7b'),
    ('ministral-3b-latest', 'mistral_3b'),
    ('ministral-8b-latest', 'mistral_8B'),
    ('open-mistral-nemo', 'nemo'),
    ('open-codestral-mamba', 'codestral_mamba'),
    ]
# actual prompt & prompt name for log file name
prompts_and_names = [
    (complete_code_prompt_basic, 'complete_code_prompt_basic'),            
    (complete_code_prompt,       'complete_code_prompt'),
    (complete_code_prompt_full,  'complete_code_prompt_full'),
    (complete_task_prompt_basic, 'complete_task_prompt_basic'),
    (complete_task_prompt,       'complete_task_prompt'),
    (complete_task_prompt_full,  'complete_task_prompt_full'), ]
# temperature and top_k values and labels for file names
temperature_values = [
    (1.0,  'temperature1.0'),
    (0.75, 'temperature0.75'),
    (0.5,  'temperature0.5'), ]
top_p_values = [
    (1.0,  'topP1.0'),
    (0.75, 'topP0.75'),
    (0.5,  'topP0.5'), ]

model_idx       = 4
prompt_idx      = 0
temperature_idx = 0
top_p_idx       = 0

model_name, model_nickname     = models[ model_idx ]
my_prompt, my_prompt_label     = prompts_and_names[ prompt_idx ]
TEMPERATURE, temperature_label = temperature_values[ temperature_idx ]
TOP_P, top_p_label             = top_p_values[ top_p_idx ]

SPECIAL_MESSAGE = f'Model temperature: {TEMPERATURE}. Model top_p: {TOP_P}. Model top_k: 50. Model dtype: torch.float32'

print(f'Model name:     {model_name}')
print(f'Model nickname: {model_nickname}')
print(f'Prompt name:    {my_prompt_label}')
print(f'Prompt:\n{my_prompt.strip().format("Starter Code")}\n')
print(f'Temperature: {TEMPERATURE}. Temperature label: {temperature_label}')
print(f'Top p: {TOP_P}. Top p label: {top_p_label}')
print(f'Special message: {SPECIAL_MESSAGE}')

Model name:     open-codestral-mamba
Model nickname: codestral_mamba
Prompt name:    complete_code_prompt_basic
Prompt:
Complete the following Python code:
Starter Code

Temperature: 1.0. Temperature label: temperature1.0
Top p: 1.0. Top p label: topP1.0
Special message: Model temperature: 1.0. Model top_p: 1.0. Model top_k: 50. Model dtype: torch.float32


In [33]:
# save results
time_stamp     = datetime.datetime.now().strftime("%Y%m%d_%H%M%S_%f")[:-2]
results_file   = f'logs_local/{model_nickname}_{my_prompt_label}_{temperature_label}_{top_p_label}_completions_{time_stamp}.jsonl'
log_file       = f'logs_local/logs/{model_nickname}_{my_prompt_label}_{temperature_label}_{top_p_label}_log_{time_stamp}.log'

# log results
for handler in logging.root.handlers[:]:            # overwrite any previous handlers with different formats
    logging.root.removeHandler(handler)
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s  %(levelname)s  %(message)s',
    handlers=[
        logging.FileHandler(log_file),              # Log to a file
        logging.StreamHandler(sys.stdout)           # Log to console (default - sys.stderr (red background)
    ]
)

# print job config
logging.info('*****     JOB CONFIGURATION     *****')
logging.info(f'MODEL: {model_name}')
logging.info(f'MODEL NICKNAME: {model_nickname}')
logging.info(SPECIAL_MESSAGE)
logging.info(f'GENERATED CODE SAVED IN: {results_file}')
logging.info(f'THIS LOG FILE: {log_file}')
if my_prompt in [complete_code_prompt_basic, complete_code_prompt, complete_code_prompt_full,]:
    logging.info(f'PROMPT:\n{my_prompt.lstrip().format("Starter Code")}')
else:
    logging.info(f'\nPROMPT:\n{my_prompt.format("Task Description", "Test Cases")}')
logging.info('*****     END OF JOB CONFIGURATION     *****')

2025-03-30 05:09:39,797  INFO  *****     JOB CONFIGURATION     *****
2025-03-30 05:09:39,797  INFO  MODEL: open-codestral-mamba
2025-03-30 05:09:39,798  INFO  MODEL NICKNAME: codestral_mamba
2025-03-30 05:09:39,798  INFO  Model temperature: 1.0. Model top_p: 1.0. Model top_k: 50. Model dtype: torch.float32
2025-03-30 05:09:39,799  INFO  GENERATED CODE SAVED IN: logs_local/codestral_mamba_complete_code_prompt_basic_temperature1.0_topP1.0_completions_20250330_050939_7966.jsonl
2025-03-30 05:09:39,799  INFO  THIS LOG FILE: logs_local/logs/codestral_mamba_complete_code_prompt_basic_temperature1.0_topP1.0_log_20250330_050939_7966.log
2025-03-30 05:09:39,799  INFO  PROMPT:
Complete the following Python code:
Starter Code

2025-03-30 05:09:39,800  INFO  *****     END OF JOB CONFIGURATION     *****


In [34]:
dataset = load_dataset("openai/openai_humaneval")
counter = 0
for item in dataset["test"]:
    if counter == 1: break
    counter += 1
    for k,v in item.items():
        print(f'{k}:\n{v}\n')
    print('\n', '='*75, '\n', sep='')

task_id:
HumanEval/0

prompt:
from typing import List


def has_close_elements(numbers: List[float], threshold: float) -> bool:
    """ Check if in given list of numbers, are any two numbers closer to each other than
    given threshold.
    >>> has_close_elements([1.0, 2.0, 3.0], 0.5)
    False
    >>> has_close_elements([1.0, 2.8, 3.0, 4.0, 5.0, 2.0], 0.3)
    True
    """


canonical_solution:
    for idx, elem in enumerate(numbers):
        for idx2, elem2 in enumerate(numbers):
            if idx != idx2:
                distance = abs(elem - elem2)
                if distance < threshold:
                    return True

    return False


test:


METADATA = {
    'author': 'jt',
    'dataset': 'test'
}


def check(candidate):
    assert candidate([1.0, 2.0, 3.9, 4.0, 5.0, 2.2], 0.3) == True
    assert candidate([1.0, 2.0, 3.9, 4.0, 5.0, 2.2], 0.05) == False
    assert candidate([1.0, 2.0, 5.9, 4.0, 5.0], 0.95) == True
    assert candidate([1.0, 2.0, 5.9, 4.0, 5.0], 0.8) == False

In [35]:
def api_call(model, prompt):
    ''' One API call to a Mistral model '''
    chat_response = client.chat.complete(
                model=model,
                messages=[{
                        "role": "user",
                        "content": prompt, 
                    }]
                )
    return chat_response.choices[0].message.content

def generate_response_with_backoff(model, prompt, max_retries=10, initial_delay=1, max_delay=16):
    '''
    Make API calls with exponential backoff on errors.
    Args:
        model (str): The model identifier.
        prompt (str): The prompt to send.
        max_retries (int): Maximum number of retries before giving up.
        initial_delay (int or float): Starting delay in seconds.
        max_delay (int or float): Maximum delay between retries.
    Returns:
        str: The chat response content OR f'Error generating a completion:{e}'
        if the max number of attemps has been reached
    '''
    retries = 0
    delay   = initial_delay
    while True:
        try:
            return api_call(model, prompt)
        except Exception as e:
            retries += 1
            if retries > max_retries:
                return f"Error generating a completion after {retries} retries:\n{e}"
            sleep_time = delay + random.uniform(0, 1)    # add jitter
            time.sleep(sleep_time)
            delay = min(delay * 2, max_delay)

In [36]:
api_key = os.environ["MISTRAL_API_KEY"]
client = Mistral(api_key=api_key)

In [37]:
input_prompt = "What is the capital of California?"
print(input_prompt)
print(generate_response_with_backoff(model_name, input_prompt))

What is the capital of California?
2025-03-30 05:09:42,231  INFO  HTTP Request: POST https://api.mistral.ai/v1/chat/completions "HTTP/1.1 200 OK"
The capital of California is Sacramento. It serves as the state capital and the seat of the California State Legislature.


In [38]:
num_samples_per_task = 1
completions  = []

for item in dataset["test"]:
    logging.info(item['task_id'])
    #if item['task_id'] == 'HumanEval/5':
    #        break
    for i in range(num_samples_per_task):
        start_time  = time.time()
        full_prompt = my_prompt.lstrip().format( item['prompt'] )
        completion = generate_response_with_backoff( model_name, full_prompt )
        completions.append( {'task_id': item['task_id'], 'completion': completion} )
        write_jsonl(results_file, completions)
        
        logging.info('PROMPT:\n' + full_prompt + '\n')
        logging.info('COMPLETION:\n' + completion + '\n')
        logging.info(f"Time elapsed: {(time.time() - start_time):.4f} seconds\n")

2025-03-30 05:09:42,298  INFO  HumanEval/0
2025-03-30 05:09:45,301  INFO  HTTP Request: POST https://api.mistral.ai/v1/chat/completions "HTTP/1.1 200 OK"
2025-03-30 05:09:45,313  INFO  PROMPT:
Complete the following Python code:
from typing import List


def has_close_elements(numbers: List[float], threshold: float) -> bool:
    """ Check if in given list of numbers, are any two numbers closer to each other than
    given threshold.
    >>> has_close_elements([1.0, 2.0, 3.0], 0.5)
    False
    >>> has_close_elements([1.0, 2.8, 3.0, 4.0, 5.0, 2.0], 0.3)
    True
    """



2025-03-30 05:09:45,314  INFO  COMPLETION:
from typing import List

def has_close_elements(numbers: List[float], threshold: float) -> bool:
    for i in range(len(numbers)):
        for j in range(i+1, len(numbers)):
            if abs(numbers[i] - numbers[j]) < threshold:
                return True
    return False

print(has_close_elements([1.0, 2.0, 3.0], 0.5))
print(has_close_elements([1.0, 2.8, 3.0, 4.0, 5.0, 2

2025-03-30 05:10:04,571  INFO  COMPLETION:
def parse_nested_parens(paren_string: str) -> List[int]:
    def get_max_depth(paren_string: str) -> int:
        max_depth = 0
        depth = 0
        for char in paren_string:
            if char == '(':
                depth += 1
                max_depth = max(max_depth, depth)
            elif char == ')':
                depth -= 1
        return max_depth

    return [get_max_depth(group) for group in paren_string.split()]

print(parse_nested_parens('(()()) ((())) () ((())()())'))

2025-03-30 05:10:04,571  INFO  Time elapsed: 1.9238 seconds

2025-03-30 05:10:04,573  INFO  HumanEval/7
2025-03-30 05:10:06,843  INFO  HTTP Request: POST https://api.mistral.ai/v1/chat/completions "HTTP/1.1 200 OK"
2025-03-30 05:10:06,857  INFO  PROMPT:
Complete the following Python code:
from typing import List


def filter_by_substring(strings: List[str], substring: str) -> List[str]:
    """ Filter an input list of strings only for ones that contain give

2025-03-30 05:10:25,046  INFO  Time elapsed: 1.9237 seconds

2025-03-30 05:10:25,047  INFO  HumanEval/14
2025-03-30 05:10:27,182  INFO  HTTP Request: POST https://api.mistral.ai/v1/chat/completions "HTTP/1.1 200 OK"
2025-03-30 05:10:27,193  INFO  PROMPT:
Complete the following Python code:
from typing import List


def all_prefixes(string: str) -> List[str]:
    """ Return list of all prefixes from shortest to longest of the input string
    >>> all_prefixes('abc')
    ['a', 'ab', 'abc']
    """



2025-03-30 05:10:27,194  INFO  COMPLETION:
def all_prefixes(string: str) -> List[str]:
    return [string[:i] for i in range(1, len(string) + 1)]

# Test the function
print(all_prefixes('abc'))  # Output: ['a', 'ab', 'abc']

2025-03-30 05:10:27,196  INFO  Time elapsed: 1.0461 seconds

2025-03-30 05:10:27,197  INFO  HumanEval/15
2025-03-30 05:10:30,255  INFO  HTTP Request: POST https://api.mistral.ai/v1/chat/completions "HTTP/1.1 200 OK"
2025-03-30 05:10:30,269  INFO  PROMPT:
Complete the fol

2025-03-30 05:10:47,061  INFO  Time elapsed: 2.3710 seconds

2025-03-30 05:10:47,063  INFO  HumanEval/21
2025-03-30 05:10:50,534  INFO  HTTP Request: POST https://api.mistral.ai/v1/chat/completions "HTTP/1.1 200 OK"
2025-03-30 05:10:50,547  INFO  PROMPT:
Complete the following Python code:
from typing import List


def rescale_to_unit(numbers: List[float]) -> List[float]:
    """ Given list of numbers (of at least two elements), apply a linear transform to that list,
    such that the smallest number will become 0 and the largest will become 1
    >>> rescale_to_unit([1.0, 2.0, 3.0, 4.0, 5.0])
    [0.0, 0.25, 0.5, 0.75, 1.0]
    """



2025-03-30 05:10:50,549  INFO  COMPLETION:
Sure, here is a Python code that solves your problem:

```python
from typing import List

def rescale_to_unit(numbers: List[float]) -> List[float]:
    min_val = min(numbers)
    max_val = max(numbers)

    if min_val == max_val:
        return [0.5] * len(numbers)

    return [(num - min_val) / (max_val - min_v

2025-03-30 05:11:10,103  INFO  COMPLETION:
def concatenate(strings: List[str]) -> str:
    """ Concatenate list of strings into a single string
    >>> concatenate([])
    ''
    >>> concatenate(['a', 'b', 'c'])
    'abc'
    """
    return ''.join(strings)

2025-03-30 05:11:10,104  INFO  Time elapsed: 1.0451 seconds

2025-03-30 05:11:10,105  INFO  HumanEval/29
2025-03-30 05:11:12,320  INFO  HTTP Request: POST https://api.mistral.ai/v1/chat/completions "HTTP/1.1 200 OK"
2025-03-30 05:11:12,334  INFO  PROMPT:
Complete the following Python code:
from typing import List


def filter_by_prefix(strings: List[str], prefix: str) -> List[str]:
    """ Filter an input list of strings only for ones that start with a given prefix.
    >>> filter_by_prefix([], 'a')
    []
    >>> filter_by_prefix(['abc', 'bcd', 'cde', 'array'], 'a')
    ['abc', 'array']
    """



2025-03-30 05:11:12,335  INFO  COMPLETION:
from typing import List

def filter_by_prefix(strings: List[str], prefix: str) -> List[str]:

2025-03-30 05:11:33,052  INFO  Time elapsed: 2.4910 seconds

2025-03-30 05:11:33,053  INFO  HumanEval/35
2025-03-30 05:11:35,996  INFO  HTTP Request: POST https://api.mistral.ai/v1/chat/completions "HTTP/1.1 200 OK"
2025-03-30 05:11:36,010  INFO  PROMPT:
Complete the following Python code:


def max_element(l: list):
    """Return maximum element in the list.
    >>> max_element([1, 2, 3])
    3
    >>> max_element([5, 3, -5, 2, -3, 3, 9, 0, 123, 1, -10])
    123
    """



2025-03-30 05:11:36,012  INFO  COMPLETION:
Here is the complete Python code:

```python
def max_element(l: list):
    """Return maximum element in the list.
    >>> max_element([1, 2, 3])
    3
    >>> max_element([5, 3, -5, 2, -3, 3, 9, 0, 123, 1, -10])
    123
    """
    return max(l)
```

This function uses the built-in max function in Python to find and return the maximum element in the list. The max function goes through all the elements in the list and returns the largest one.

2025-03-30 05:11:36,013  INFO  

2025-03-30 05:11:58,709  INFO  HTTP Request: POST https://api.mistral.ai/v1/chat/completions "HTTP/1.1 200 OK"
2025-03-30 05:11:58,719  INFO  PROMPT:
Complete the following Python code:


def car_race_collision(n: int):
    """
    Imagine a road that's a perfectly straight infinitely long line.
    n cars are driving left to right;  simultaneously, a different set of n cars
    are driving right to left.   The two sets of cars start out being very far from
    each other.  All cars move in the same speed.  Two cars are said to collide
    when a car that's moving left to right hits a car that's moving right to left.
    However, the cars are infinitely sturdy and strong; as a result, they continue moving
    in their trajectory as if they did not collide.

    This function outputs the number of such collisions.
    """



2025-03-30 05:11:58,720  INFO  COMPLETION:
Here is a Python code that calculates the number of collisions in the described scenario:

```python
def car_race_collisi

2025-03-30 05:12:19,533  INFO  COMPLETION:
def is_palindrome(text: str):
    """
    Checks if given string is a palindrome
    >>> is_palindrome('')
    True
    >>> is_palindrome('aba')
    True
    >>> is_palindrome('aaaaa')
    True
    >>> is_palindrome('zbcd')
    False
    """
    text = text.lower()
    return text == text[::-1]

print(is_palindrome(''))
print(is_palindrome('aba'))
print(is_palindrome('aaaaa'))
print(is_palindrome('zbcd'))

2025-03-30 05:12:19,533  INFO  Time elapsed: 1.8822 seconds

2025-03-30 05:12:19,535  INFO  HumanEval/49
2025-03-30 05:12:23,121  INFO  HTTP Request: POST https://api.mistral.ai/v1/chat/completions "HTTP/1.1 200 OK"
2025-03-30 05:12:23,134  INFO  PROMPT:
Complete the following Python code:


def modp(n: int, p: int):
    """Return 2^n modulo p (be aware of numerics).
    >>> modp(3, 5)
    3
    >>> modp(1101, 101)
    2
    >>> modp(0, 101)
    1
    >>> modp(3, 11)
    8
    >>> modp(100, 101)
    1
    """



2025-03-30 05:12:23,135  INFO

2025-03-30 05:12:43,965  INFO  PROMPT:
Complete the following Python code:


def fib(n: int):
    """Return n-th Fibonacci number.
    >>> fib(10)
    55
    >>> fib(1)
    1
    >>> fib(8)
    21
    """



2025-03-30 05:12:43,966  INFO  COMPLETION:
def fib(n: int):
    if n <= 0:
        return "Input should be a positive integer"
    elif n == 1:
        return 1
    elif n == 2:
        return 1
    else:
        fib_values = [0, 1]
        for i in range(2, n):
            fib_values.append(fib_values[i-1] + fib_values[i-2])
        return fib_values[n-1]

# Test cases
print(fib(10))  # 55
print(fib(1))   # 1
print(fib(8))   # 21

2025-03-30 05:12:43,967  INFO  Time elapsed: 1.8241 seconds

2025-03-30 05:12:43,968  INFO  HumanEval/56
2025-03-30 05:12:46,579  INFO  HTTP Request: POST https://api.mistral.ai/v1/chat/completions "HTTP/1.1 200 OK"
2025-03-30 05:12:46,594  INFO  PROMPT:
Complete the following Python code:


def correct_bracketing(brackets: str):
    """ brackets is a st

2025-03-30 05:13:07,762  INFO  COMPLETION:
Here is the Python code to solve the FibFib sequence.

```python
def fibfib(n: int):
    if n == 0:
        return 0
    elif n == 1:
        return 0
    elif n == 2:
        return 1

    fibfib_seq = [0, 0, 1]
    for i in range(3, n+1):
        fibfib_seq.append(fibfib_seq[i-1] + fibfib_seq[i-2] + fibfib_seq[i-3])

    return fibfib_seq[n]
```

This code uses an iterative approach to generate the fibfib sequence up to the nth term. The fibfib sequence is stored in a list, fibfib_seq, and the nth term is simply the last element in this list. The time complexity of this code is O(n), as it performs a single loop from 3 to n.

2025-03-30 05:13:07,763  INFO  Time elapsed: 2.6628 seconds

2025-03-30 05:13:07,764  INFO  HumanEval/64
2025-03-30 05:13:12,357  INFO  HTTP Request: POST https://api.mistral.ai/v1/chat/completions "HTTP/1.1 200 OK"
2025-03-30 05:13:12,376  INFO  PROMPT:
Complete the following Python code:

FIX = """
Add more test cases

2025-03-30 05:13:25,410  INFO  Time elapsed: 2.7998 seconds

2025-03-30 05:13:25,412  INFO  HumanEval/69
2025-03-30 05:13:28,637  INFO  HTTP Request: POST https://api.mistral.ai/v1/chat/completions "HTTP/1.1 200 OK"
2025-03-30 05:13:28,651  INFO  PROMPT:
Complete the following Python code:

def search(lst):
    '''
    You are given a non-empty list of positive integers. Return the greatest integer that is greater than 
    zero, and has a frequency greater than or equal to the value of the integer itself. 
    The frequency of an integer is the number of times it appears in the list.
    If no such a value exist, return -1.
    Examples:
        search([4, 1, 2, 2, 3, 1]) == 2
        search([1, 2, 2, 3, 3, 3, 4, 4, 4]) == 3
        search([5, 5, 4, 4, 4]) == -1
    '''



2025-03-30 05:13:28,652  INFO  COMPLETION:
def search(lst):
    freq_dict = {}
    for i in lst:
        if i > 0:
            if i not in freq_dict:
                freq_dict[i] = 1
            else:
              

2025-03-30 05:13:48,109  INFO  COMPLETION:
def is_multiply_prime(a):
    def is_prime(n):
        if n < 2:
            return False
        for i in range(2, int(n**0.5) + 1):
            if n % i == 0:
                return False
        return True

    prime_factors = []
    for i in range(2, a + 1):
        while a % i == 0 and is_prime(i):
            prime_factors.append(i)
            a //= i
        if a == 1:
            break

    return len(prime_factors) == 3

# Test cases
print(is_multiply_prime(30))  # True
print(is_multiply_prime(51))  # False
print(is_multiply_prime(99))  # True
print(is_multiply_prime(100)) # False

2025-03-30 05:13:48,110  INFO  Time elapsed: 2.5810 seconds

2025-03-30 05:13:48,111  INFO  HumanEval/76
2025-03-30 05:13:51,123  INFO  HTTP Request: POST https://api.mistral.ai/v1/chat/completions "HTTP/1.1 200 OK"
2025-03-30 05:13:51,137  INFO  PROMPT:
Complete the following Python code:

def is_simple_power(x, n):
    """Your task is to write a functio

2025-03-30 05:14:06,235  INFO  Time elapsed: 3.1436 seconds

2025-03-30 05:14:06,236  INFO  HumanEval/82
2025-03-30 05:14:09,290  INFO  HTTP Request: POST https://api.mistral.ai/v1/chat/completions "HTTP/1.1 200 OK"
2025-03-30 05:14:09,304  INFO  PROMPT:
Complete the following Python code:

def prime_length(string):
    """Write a function that takes a string and returns True if the string
    length is a prime number or False otherwise
    Examples
    prime_length('Hello') == True
    prime_length('abcdcba') == True
    prime_length('kittens') == True
    prime_length('orange') == False
    """



2025-03-30 05:14:09,305  INFO  COMPLETION:
def prime_length(string):
    def is_prime(n):
        if n < 2:
            return False
        for i in range(2, int(n**0.5) + 1):
            if n % i == 0:
                return False
        return True

    length = len(string)
    return is_prime(length)

print(prime_length('Hello'))
print(prime_length('abcdcba'))
print(prime_length('kitte

2025-03-30 05:14:30,406  INFO  COMPLETION:
def sort_array(array):
    if not array:
        return []
    if (array[0] + array[-1]) % 2 == 0:
        return sorted(array, reverse=True)
    else:
        return sorted(array)

# Test cases
print(sort_array([]))       # []
print(sort_array([5]))      # [5]
print(sort_array([2, 4, 3, 0, 1, 5])) # [0, 1, 2, 3, 4, 5]
print(sort_array([2, 4, 3, 0, 1, 5, 6])) # [6, 5, 4, 3, 2, 1, 0]

2025-03-30 05:14:30,407  INFO  Time elapsed: 2.1132 seconds

2025-03-30 05:14:30,408  INFO  HumanEval/89
2025-03-30 05:14:35,505  INFO  HTTP Request: POST https://api.mistral.ai/v1/chat/completions "HTTP/1.1 200 OK"
2025-03-30 05:14:35,520  INFO  PROMPT:
Complete the following Python code:

def encrypt(s):
    """Create a function encrypt that takes a string as an argument and
    returns a string encrypted with the alphabet being rotated. 
    The alphabet should be rotated in a manner such that the letters 
    shift down by two multiplied to two places.
    For

2025-03-30 05:14:54,056  INFO  Time elapsed: 5.0283 seconds

2025-03-30 05:14:54,057  INFO  HumanEval/95
2025-03-30 05:14:57,934  INFO  HTTP Request: POST https://api.mistral.ai/v1/chat/completions "HTTP/1.1 200 OK"
2025-03-30 05:14:57,948  INFO  PROMPT:
Complete the following Python code:

def check_dict_case(dict):
    """
    Given a dictionary, return True if all keys are strings in lower 
    case or all keys are strings in upper case, else return False.
    The function should return False is the given dictionary is empty.
    Examples:
    check_dict_case({"a":"apple", "b":"banana"}) should return True.
    check_dict_case({"a":"apple", "A":"banana", "B":"banana"}) should return False.
    check_dict_case({"a":"apple", 8:"banana", "a":"apple"}) should return False.
    check_dict_case({"Name":"John", "Age":"36", "City":"Houston"}) should return False.
    check_dict_case({"STATE":"NC", "ZIP":"12345" }) should return True.
    """



2025-03-30 05:14:57,949  INFO  COMPLETION:
```

2025-03-30 05:15:15,797  INFO  Time elapsed: 1.2063 seconds

2025-03-30 05:15:15,798  INFO  HumanEval/101
2025-03-30 05:15:17,899  INFO  HTTP Request: POST https://api.mistral.ai/v1/chat/completions "HTTP/1.1 200 OK"
2025-03-30 05:15:17,912  INFO  PROMPT:
Complete the following Python code:

def words_string(s):
    """
    You will be given a string of words separated by commas or spaces. Your task is
    to split the string into words and return an array of the words.
    
    For example:
    words_string("Hi, my name is John") == ["Hi", "my", "name", "is", "John"]
    words_string("One, two, three, four, five, six") == ["One", "two", "three", "four", "five", "six"]
    """



2025-03-30 05:15:17,913  INFO  COMPLETION:
def words_string(s):
    return s.replace(",", "").split()

print(words_string("Hi, my name is John"))
print(words_string("One, two, three, four, five, six"))

2025-03-30 05:15:17,914  INFO  Time elapsed: 1.0101 seconds

2025-03-30 05:15:17,915  INFO  HumanEval/102
20

2025-03-30 05:15:38,600  INFO  COMPLETION:
def even_odd_palindrome(n):
    even_count = 0
    odd_count = 0

    for num in range(1, n):
        if str(num) == str(num)[::-1]:  # check if the number is a palindrome
            if num % 2 == 0:
                even_count += 1
            else:
                odd_count += 1

    return (even_count, odd_count)

print(even_odd_palindrome(3))  # Output: (1, 2)
print(even_odd_palindrome(12))  # Output: (4, 6)

2025-03-30 05:15:38,601  INFO  Time elapsed: 1.9831 seconds

2025-03-30 05:15:38,602  INFO  HumanEval/108
2025-03-30 05:15:41,453  INFO  HTTP Request: POST https://api.mistral.ai/v1/chat/completions "HTTP/1.1 200 OK"
2025-03-30 05:15:41,473  INFO  PROMPT:
Complete the following Python code:

def count_nums(arr):
    """
    Write a function count_nums which takes an array of integers and returns
    the number of elements which has a sum of digits > 0.
    If a number is negative, then its first signed digit will be negative:
    e.g.

2025-03-30 05:15:57,242  INFO  COMPLETION:
def reverse_delete(s,c):
    result = ''.join([char for char in s if char not in c])
    return (result, result == result[::-1])

print(reverse_delete("abcde","ae"))
print(reverse_delete("abcdef","b"))
print(reverse_delete("abcdedcba","ab"))

2025-03-30 05:15:57,244  INFO  Time elapsed: 1.4618 seconds

2025-03-30 05:15:57,245  INFO  HumanEval/113
2025-03-30 05:16:00,260  INFO  HTTP Request: POST https://api.mistral.ai/v1/chat/completions "HTTP/1.1 200 OK"
2025-03-30 05:16:00,276  INFO  PROMPT:
Complete the following Python code:

def odd_count(lst):
    """Given a list of strings, where each string consists of only digits, return a list.
    Each element i of the output should be "the number of odd elements in the
    string i of the input." where all the i's should be replaced by the number
    of odd digits in the i'th string of the input.

    >>> odd_count(['1234567'])
    ["the number of odd elements 4n the str4ng 4 of the 4nput."]
    >>

2025-03-30 05:16:19,021  INFO  HTTP Request: POST https://api.mistral.ai/v1/chat/completions "HTTP/1.1 200 OK"
2025-03-30 05:16:19,035  INFO  PROMPT:
Complete the following Python code:

def get_closest_vowel(word):
    """You are given a word. Your task is to find the closest vowel that stands between 
    two consonants from the right side of the word (case sensitive).
    
    Vowels in the beginning and ending doesn't count. Return empty string if you didn't
    find any vowel met the above condition. 

    You may assume that the given string contains English letter only.

    Example:
    get_closest_vowel("yogurt") ==> "u"
    get_closest_vowel("FULL") ==> "U"
    get_closest_vowel("quick") ==> ""
    get_closest_vowel("ab") ==> ""
    """



2025-03-30 05:16:19,036  INFO  COMPLETION:
def get_closest_vowel(word):
    vowels = 'aeiouAEIOU'
    closest_vowel = ''

    # Iterate from the right side of the word
    for i in range(len(word)-1, -1, -1):
        # Check if the current 

2025-03-30 05:16:35,637  INFO  Time elapsed: 1.6528 seconds

2025-03-30 05:16:35,638  INFO  HumanEval/124
2025-03-30 05:16:40,946  INFO  HTTP Request: POST https://api.mistral.ai/v1/chat/completions "HTTP/1.1 200 OK"
2025-03-30 05:16:40,967  INFO  PROMPT:
Complete the following Python code:

def valid_date(date):
    """You have to write a function which validates a given date string and
    returns True if the date is valid otherwise False.
    The date is valid if all of the following rules are satisfied:
    1. The date string is not empty.
    2. The number of days is not less than 1 or higher than 31 days for months 1,3,5,7,8,10,12. And the number of days is not less than 1 or higher than 30 days for months 4,6,9,11. And, the number of days is not less than 1 or higher than 29 for the month 2.
    3. The months should not be less than 1 or higher than 12.
    4. The date should be in the format: mm-dd-yyyy

    for example: 
    valid_date('03-11-2000') => True

    valid_date('15

2025-03-30 05:16:56,537  INFO  Time elapsed: 4.7770 seconds

2025-03-30 05:16:56,538  INFO  HumanEval/128
2025-03-30 05:16:59,481  INFO  HTTP Request: POST https://api.mistral.ai/v1/chat/completions "HTTP/1.1 200 OK"
2025-03-30 05:16:59,502  INFO  PROMPT:
Complete the following Python code:

def prod_signs(arr):
    """
    You are given an array arr of integers and you need to return
    sum of magnitudes of integers multiplied by product of all signs
    of each number in the array, represented by 1, -1 or 0.
    Note: return None for empty arr.

    Example:
    >>> prod_signs([1, 2, 2, -4]) == -9
    >>> prod_signs([0, 1]) == 0
    >>> prod_signs([]) == None
    """



2025-03-30 05:16:59,503  INFO  COMPLETION:
def prod_signs(arr):
    if not arr:
        return None
    prod_sign = 1
    sum_magnitude = 0
    for num in arr:
        if num < 0:
            prod_sign *= -1
        elif num > 0:
            prod_sign *= 1
        sum_magnitude += abs(num)
    return sum_magnitude * 

2025-03-30 05:17:16,407  INFO  Time elapsed: 1.7739 seconds

2025-03-30 05:17:16,408  INFO  HumanEval/133
2025-03-30 05:17:20,307  INFO  HTTP Request: POST https://api.mistral.ai/v1/chat/completions "HTTP/1.1 200 OK"
2025-03-30 05:17:20,325  INFO  PROMPT:
Complete the following Python code:


def sum_squares(lst):
    """You are given a list of numbers.
    You need to return the sum of squared numbers in the given list,
    round each element in the list to the upper int(Ceiling) first.
    Examples:
    For lst = [1,2,3] the output should be 14
    For lst = [1,4,9] the output should be 98
    For lst = [1,3,5,7] the output should be 84
    For lst = [1.4,4.2,0] the output should be 29
    For lst = [-2.4,1,1] the output should be 6
    

    """



2025-03-30 05:17:20,326  INFO  COMPLETION:
Here is the Python code that completes the given task:

```python
import math

def sum_squares(lst):
    total_sum = 0
    for num in lst:
        # Round the number up to the nearest integer
   

2025-03-30 05:17:37,429  INFO  HumanEval/139
2025-03-30 05:17:39,624  INFO  HTTP Request: POST https://api.mistral.ai/v1/chat/completions "HTTP/1.1 200 OK"
2025-03-30 05:17:39,639  INFO  PROMPT:
Complete the following Python code:

def special_factorial(n):
    """The Brazilian factorial is defined as:
    brazilian_factorial(n) = n! * (n-1)! * (n-2)! * ... * 1!
    where n > 0

    For example:
    >>> special_factorial(4)
    288

    The function will receive an integer as input and should return the special
    factorial of this integer.
    """



2025-03-30 05:17:39,640  INFO  COMPLETION:
def special_factorial(n):
    if n == 0:
        return 1
    else:
        result = 1
        for i in range(n, 0, -1):
            result *= special_factorial(i)
        return result

print(special_factorial(4))  # Output: 288

2025-03-30 05:17:39,641  INFO  Time elapsed: 1.1061 seconds

2025-03-30 05:17:39,642  INFO  HumanEval/140
2025-03-30 05:17:43,524  INFO  HTTP Request: POST https://api

2025-03-30 05:17:56,922  INFO  Time elapsed: 3.1521 seconds

2025-03-30 05:17:56,923  INFO  HumanEval/144
2025-03-30 05:18:02,150  INFO  HTTP Request: POST https://api.mistral.ai/v1/chat/completions "HTTP/1.1 200 OK"
2025-03-30 05:18:02,173  INFO  PROMPT:
Complete the following Python code:

def simplify(x, n):
    """Your task is to implement a function that will simplify the expression
    x * n. The function returns True if x * n evaluates to a whole number and False
    otherwise. Both x and n, are string representation of a fraction, and have the following format,
    <numerator>/<denominator> where both numerator and denominator are positive whole numbers.

    You can assume that x, and n are valid fractions, and do not have zero as denominator.

    simplify("1/5", "5/1") = True
    simplify("1/6", "2/1") = False
    simplify("7/10", "10/2") = False
    """



2025-03-30 05:18:02,174  INFO  COMPLETION:
To solve this problem, we need to convert the string representation of the f

2025-03-30 05:18:20,273  INFO  COMPLETION:
def bf(planet1, planet2):
    solar_system = {
        "Mercury": 0, "Venus": 1, "Earth": 2, "Mars": 3, "Jupiter": 4,
        "Saturn": 5, "Uranus": 6, "Neptune": 7
    }

    try:
        start = solar_system[planet1]
        end = solar_system[planet2]
    except KeyError:
        return ()

    if start > end:
        start, end = end, start

    return tuple(planet for planet, distance in solar_system.items() if start < distance < end)

print(bf("Jupiter", "Neptune"))
print(bf("Earth", "Mercury"))
print(bf("Mercury", "Uranus"))
print(bf("Earth", "Pluto"))

2025-03-30 05:18:20,274  INFO  Time elapsed: 2.6894 seconds

2025-03-30 05:18:20,276  INFO  HumanEval/149
2025-03-30 05:18:23,143  INFO  HTTP Request: POST https://api.mistral.ai/v1/chat/completions "HTTP/1.1 200 OK"
2025-03-30 05:18:23,158  INFO  PROMPT:
Complete the following Python code:

def sorted_list_sum(lst):
    """Write a function that accepts a list of strings as a parameter,


2025-03-30 05:18:37,903  INFO  COMPLETION:
Here is the Python code to solve this problem:

```python
def cycpattern_check(a, b):
    for i in range(len(b)):
        temp = b[i:] + b[:i]
        if temp in a:
            return True
    return False
```
This function works by generating all possible rotations of the string `b`, and then checks if any of these rotations are substrings of `a`. If it finds a match, it returns `True`; otherwise, it returns `False`.

2025-03-30 05:18:37,904  INFO  Time elapsed: 1.5495 seconds

2025-03-30 05:18:37,905  INFO  HumanEval/155
2025-03-30 05:18:40,752  INFO  HTTP Request: POST https://api.mistral.ai/v1/chat/completions "HTTP/1.1 200 OK"
2025-03-30 05:18:40,766  INFO  PROMPT:
Complete the following Python code:

def even_odd_count(num):
    """Given an integer. return a tuple that has the number of even and odd digits respectively.

     Example:
        even_odd_count(-12) ==> (1, 1)
        even_odd_count(123) ==> (1, 2)
    """



2025-03-30 05:1

2025-03-30 05:18:57,981  INFO  COMPLETION:
def do_algebra(operator, operand):
    expression = str(operand[0])
    for i in range(len(operator)):
        expression += ' ' + operator[i] + ' ' + str(operand[i+1])
    return eval(expression)

operator = ['+', '*', '-']
operand = [2, 3, 4, 5]
print(do_algebra(operator, operand)) # Output: 9

2025-03-30 05:18:57,981  INFO  Time elapsed: 1.3934 seconds

2025-03-30 05:18:57,983  INFO  HumanEval/161
2025-03-30 05:19:00,520  INFO  HTTP Request: POST https://api.mistral.ai/v1/chat/completions "HTTP/1.1 200 OK"
2025-03-30 05:19:00,534  INFO  PROMPT:
Complete the following Python code:

def solve(s):
    """You are given a string s.
    if s[i] is a letter, reverse its case from lower to upper or vise versa, 
    otherwise keep it as it is.
    If the string contains no letters, reverse the string.
    The function should return the resulted string.
    Examples
    solve("1234") = "4321"
    solve("ab") = "AB"
    solve("#a@C") = "#A@c"
    """


In [39]:
unknown_variable

NameError: name 'unknown_variable' is not defined

# Appendix

In [None]:
# read a jsonl file
file_name = 'logs/mistral_7b_samples_20241201_005244_0462.jsonl'
data = read_jsonl(file_name)

In [None]:
data[0]['completion']

In [None]:
# modify and write back
data2 = []
for i in data:
    new_i = dict()
    new_i['task_id'] = i['task_id']
    new_i['completion'] = add_import(i['completion'])
    data2.append(new_i)

In [None]:
file_name = 'logs/mistral_7b_samples_20241201_005244_0462_v2.jsonl'
write_jsonl(file_name, data2)