In [2]:
from datasets import load_dataset

mbpp = load_dataset("mbpp")

In [138]:
mbpp['test'][0]

{'task_id': 11,
 'text': 'Write a python function to remove first and last occurrence of a given character from the string.',
 'code': 'def remove_Occ(s,ch): \r\n    for i in range(len(s)): \r\n        if (s[i] == ch): \r\n            s = s[0 : i] + s[i + 1:] \r\n            break\r\n    for i in range(len(s) - 1,-1,-1):  \r\n        if (s[i] == ch): \r\n            s = s[0 : i] + s[i + 1:] \r\n            break\r\n    return s ',
 'test_list': ['assert remove_Occ("hello","l") == "heo"',
  'assert remove_Occ("abcda","a") == "bcd"',
  'assert remove_Occ("PHP","P") == "H"'],
 'test_setup_code': '',
 'challenge_test_list': ['assert remove_Occ("hellolloll","l") == "helollol"',
  'assert remove_Occ("","l") == ""']}

In [3]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
from transformers import BitsAndBytesConfig


nf4_config = BitsAndBytesConfig(
   load_in_4bit=True,
   bnb_4bit_quant_type="nf4",
   bnb_4bit_use_double_quant=True,
   bnb_4bit_compute_dtype=torch.bfloat16
)

def load_model_and_tokenizer(model_name = "google/gemma-2b-it", device = None):
    
    if device is None:
      device = 'cpu'
    
    tokenizer = AutoTokenizer.from_pretrained(model_name,
                                              trust_remote_code=True,
                                              )
    model = AutoModelForCausalLM.from_pretrained(model_name, device_map=device, torch_dtype=torch.bfloat16)
    model.eval()
    
    return tokenizer, model

In [4]:
import warnings

import numpy as np

import torch

tokenizer, model = load_model_and_tokenizer('meta-llama/Llama-3.2-1B-Instruct')

In [116]:
import ast

def predict_code(model, tokenizer, problem_description):

    prompt = [{"role":"user", "content": f"Problem: {problem_description}\n\n"
            "Input:\nWrite a single Python function to solve the problem above.\nOutput:\n"}] 

    inputs = tokenizer.apply_chat_template(prompt, return_tensors="pt", add_generation_prompt=True)

    outputs = model.generate(
        input_ids=inputs,
        max_new_tokens=1024,
        do_sample=True,
        temperature=0.7,
        top_p=0.9,
        num_return_sequences=1
    )
    print(outputs)
    predicted_code = tokenizer.decode(outputs[0], skip_special_tokens=True)

    return predicted_code[len(prompt):]

def parse_test_case(test_case):
    """
    Parse an assertion like:
    assert max_chain_length([Pair(1, 2), Pair(3, 4)], 4) == 4
    to extract:
    - Function call as a string: "max_chain_length([Pair(1, 2), Pair(3, 4)], 4)"
    - Expected output: 4
    """
    # Parse the assertion string
    assertion_node = ast.parse(test_case, mode='exec').body[0]
    if isinstance(assertion_node, ast.Assert):
        test_call = assertion_node.test.left  # The function call (left of ==)
        # i want to take out the name of the function too
        test_call = test_call.args
        expected_output = assertion_node.test.comparators[0]  # The value (right of ==)

        # Convert AST back to source code for evaluation
        test_call_code = ast.unparse(test_call)
        expected_output_value = eval(ast.unparse(expected_output))
        return test_call_code, expected_output_value
    else:
        raise ValueError("Test case is not a valid assertion.")
    

def evaluate_problem(function, test_cases):

    success_count = 0
    local_env = {}
    exec(function, {}, local_env)
    function_handle = next(iter(local_env.values()))
    for test_case in test_cases:
        input_data, expected_output = parse_test_case(test_case)
        try:
            result = function_handle(eval(input_data))
            if result == expected_output:
                success_count += 1
        except Exception as e:
            print("not a python code")
            continue

    # Return results
    accuracy = success_count / len(test_cases)
    print(accuracy)
    return accuracy




In [94]:
def clean_code(predicted_code):
    """
    Finds the first occurrence of 'def' in the predicted code and removes everything before it.
    If 'def' is not found, returns the code unchanged.
    """
    def_index = predicted_code.find("def")  # Find the index of the first 'def'
    if def_index != -1:
        return predicted_code[def_index:]  # Return the substring starting from 'def'
    return predicted_code  
for i in range(0, 10):
    print("Problem", i)
    sample_problem = mbpp['train'][i] 
    problem_description = sample_problem["text"]
    ground_truth_code = sample_problem["code"]
    test_cases = sample_problem["test_list"]

    predicted_code = predict_code(model, tokenizer, problem_description)
    print("pred:", predicted_code)
    print("ground:", ground_truth_code)
    cleaned_code = clean_code(predicted_code)
    print("cleaned:", cleaned_code)
    accuracy = evaluate_problem(cleaned_code, test_cases)
    print(accuracy)
# sample_problem = mbpp['train'][12] 
# problem_description = sample_problem["text"]
# ground_truth_code = sample_problem["code"]
# test_cases = sample_problem["test_list"]


# predicted_code = predict_code(model, tokenizer, problem_description)
# accuracy = evaluate_problem(predicted_code, test_cases)
# print(f"Problem Description:\n{problem_description}")
# print(f"Accuracy: {accuracy * 100:.2f}%")
# print(f"Predicted Code:\n{predicted_code}")
# print(f"Ground Truth Code:\n{ground_truth_code}")

Problem 0


TypeError: transformers.generation.utils.GenerationMixin.generate() argument after ** must be a mapping, not Tensor

In [None]:
i = 12
print("Problem", i)
sample_problem = mbpp['train'][i] 
problem_description = sample_problem["text"]
ground_truth_code = sample_problem["code"]
test_cases = sample_problem["test_list"]

# predicted_code = predict_code(model, tokenizer, problem_description)
# print("pred:", predicted_code)
# print("ground:", ground_truth_code)

# extract code inside ```python
import re
def extract_function(predicted_code):
    pattern = r"```python\n(.*)\n```"
    match = re.findall(pattern, predicted_code, re.DOTALL)
    return match[-1]

print("cleaned:\n\n", extract_function(predicted_code))

#execute the code
#result = evaluate_problem(ground_truth_code, test_cases)
#print("v")
#result = evaluate_problem(extract_function(predicted_code), test_cases)

print(result)

# cleaned_code = extract_function(predicted_code)
# print("cleaned:", cleaned_code)
# accuracy = evaluate_problem(cleaned_code, test_cases)
# print(accuracy)

Problem 12
cleaned:

 def max_record_value(tuples_list):
    """
    This function finds the maximum value in a list of tuples.

    Args:
    tuples_list (list): A list of tuples.

    Returns:
    tuple: The maximum value in the list of tuples.
    """
    if not tuples_list:
        return None  # Return None if the list is empty

    max_value = max(tuples_list[0])  # Initialize max_value with the first tuple
    for tuple in tuples_list[1:]:  # Iterate over the rest of the tuples
        if tuple[0] > max_value:  # Compare the first element of the current tuple with max_value
            max_value = tuple[0]  # Update max_value if the current tuple's first element is larger

    return max_value  # Return the maximum value


# Example usage
tuples_list = [(10, 20, 30), (40, 50, 60), (70, 80, 90)]
max_value = max_record_value(tuples_list)
print("Maximum value:", max_value)  # Output: Maximum value: (90, 60, 70)
1.0
v
Maximum value: 70
not a python code
not a python code
not a pytho

In [132]:
print(pred)

def max_record_value(tuples_list):
    """
    This function finds the maximum value in a list of tuples.

    Args:
    tuples_list (list): A list of tuples.

    Returns:
    tuple: The maximum value in the list of tuples.
    """
    if not tuples_list:
        return None  # Return None if the list is empty

    max_value = max(tuples_list[0])  # Initialize max_value with the first tuple
    for tuple in tuples_list[1:]:  # Iterate over the rest of the tuples
        if tuple[0] > max_value:  # Compare the first element of the current tuple with max_value
            max_value = tuple[0]  # Update max_value if the current tuple's first element is larger

    return max_value  # Return the maximum value


# Example usage
tuples_list = [(10, 20, 30), (40, 50, 60), (70, 80, 90)]
max_value = max_record_value(tuples_list)
print("Maximum value:", max_value)  # Output: Maximum value: (90, 60, 70)


In [3]:
from vllm import SamplingParams
from EvalUtils.MathUtils.python_executor import PythonExecutor
from tqdm import trange
import re
from timeout_decorator import timeout

executor = PythonExecutor()

@timeout(5, timeout_exception=ValueError)
def run_code(code):
    exec(code)

def extract_python(predicted_code):
    pattern = r"```python\n(.*)\n```"
    match = re.findall(pattern, predicted_code, re.DOTALL)
    if len(match) == 0:
        return ""
    return match[-1]

def run_mbpp_benchmark(model, prompt_type, max_tokens=1024):
    import datasets
    data = datasets.load_dataset("mbpp")['test']
    
    inputs = []
    test_cases = []
    for i in range(len(data)):
        inputs.append([{ "role": "user", "content": data[i]['text']}])
        test_cases.append("\n".join(data[i]['test_list']))

    if prompt_type == 'chat':
        inputs = model.get_tokenizer().apply_chat_template(inputs, add_generation_prompt=True, tokenize=False)
    elif prompt_type == 'plain':
        pass
    else:
        raise NotImplementedError('problem_prompt should be either "chat" or "plain"')
    

    sampling = SamplingParams(n=1, temperature=0.0, max_tokens=max_tokens, top_p=1.0)
    
    results = model.generate(inputs, sampling_params=sampling)
    results = extract_outputs(results)

    accuracy = 0
    total_codes = []
    for i in trange(len(results)):
        predicted_code = results[i]        
        cleaned_code = extract_python(predicted_code)
        
        try:
            function_name = cleaned_code.split('def ')[1].split('(')[0]
            # add a timeout to the execution
            test_function_name = test_cases[i].split('assert ')[1].split('(')[0]
            test = test_cases[i].replace(test_function_name, function_name)
            total_codes.append(cleaned_code+'\n'+test)
            # _,r = executor.execute(cleaned_code+'\n'+test)
            run_code(cleaned_code+'\n'+test)
            
            accuracy += 1
        except Exception as e:
            continue
    
    return accuracy / len(results), results, test_cases, total_codes

def extract_outputs(outputs):
    return [output.outputs[0].text for output in outputs]

In [None]:
import datasets
data = datasets.load_dataset("mbpp")['test']

In [1]:
from EvalUtils import load_model

model = load_model('meta-llama/Llama-3.1-8B-Instruct')

INFO 12-06 23:00:05 config.py:1021] Chunked prefill is enabled with max_num_batched_tokens=512.
INFO 12-06 23:00:05 llm_engine.py:237] Initializing an LLM engine (v0.6.3.post1) with config: model='meta-llama/Llama-3.1-8B-Instruct', speculative_config=None, tokenizer='meta-llama/Llama-3.1-8B-Instruct', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, rope_scaling=None, rope_theta=None, tokenizer_revision=None, trust_remote_code=True, dtype=torch.bfloat16, max_seq_len=39000, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=False), seed=0, served_model_name=meta-l

Loading safetensors checkpoint shards:   0% Completed | 0/4 [00:00<?, ?it/s]


INFO 12-06 23:00:16 model_runner.py:1067] Loading model weights took 14.9888 GB
INFO 12-06 23:00:16 gpu_executor.py:122] # GPU blocks: 2488, # CPU blocks: 2048
INFO 12-06 23:00:16 gpu_executor.py:126] Maximum concurrency for 39000 tokens per request: 1.02x
INFO 12-06 23:00:18 model_runner.py:1395] Capturing the model for CUDA graphs. This may lead to unexpected consequences if the model is not static. To run the model in eager mode, set 'enforce_eager=True' or use '--enforce-eager' in the CLI.
INFO 12-06 23:00:18 model_runner.py:1399] CUDA graphs can take additional 1~3 GiB memory per GPU. If you are running out of memory, consider decreasing `gpu_memory_utilization` or enforcing eager mode. You can also reduce the `max_num_seqs` as needed to decrease memory usage.
INFO 12-06 23:00:27 model_runner.py:1523] Graph capturing finished in 9 secs.


In [4]:
acc,results, test_cases, total_codes = run_mbpp_benchmark(model, 'chat', max_tokens=1024)

Processed prompts:   0%|          | 0/500 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]



Processed prompts:  21%|██        | 104/500 [00:19<00:41,  9.57it/s, est. speed input: 272.60 toks/s, output: 1975.85 toks/s]



Processed prompts:  31%|███       | 154/500 [00:25<00:45,  7.57it/s, est. speed input: 309.74 toks/s, output: 2442.47 toks/s]



Processed prompts:  37%|███▋      | 184/500 [00:29<00:50,  6.27it/s, est. speed input: 324.02 toks/s, output: 2596.82 toks/s]



Processed prompts:  42%|████▏     | 210/500 [00:32<00:38,  7.46it/s, est. speed input: 332.58 toks/s, output: 2678.30 toks/s]



Processed prompts:  54%|█████▎    | 268/500 [00:39<00:19, 12.19it/s, est. speed input: 354.11 toks/s, output: 2890.92 toks/s]



Processed prompts:  55%|█████▌    | 277/500 [00:41<00:44,  5.02it/s, est. speed input: 347.02 toks/s, output: 2829.92 toks/s]



Processed prompts:  63%|██████▎   | 313/500 [00:45<00:22,  8.26it/s, est. speed input: 355.65 toks/s, output: 2935.24 toks/s]



Processed prompts:  67%|██████▋   | 337/500 [00:49<00:21,  7.53it/s, est. speed input: 355.89 toks/s, output: 2945.98 toks/s]



Processed prompts: 100%|██████████| 500/500 [01:10<00:00,  7.09it/s, est. speed input: 369.16 toks/s, output: 3122.09 toks/s]
  0%|          | 0/500 [00:00<?, ?it/s]

Hell, Wold!
Pytho is fun
Misissppi

abc
The volume of the triangular prism is: 37.5 cubic units
True
False
[2, 4, 6, 8, 10]
29
[1, 4, 7]
[2, 5, 8]
['dog', 'cat']
Original array: [4, 2, 2, 8, 3, 3, 1]
Sorted array: [1, 2, 2, 3, 3, 4, 8]
3
Original array: [64, 34, 25, 12, 22, 11, 90]
Sorted array: [11, 12, 22, 25, 34, 64, 90]
True
False
There are 10 squares of size 2 that can fit in a rectangle of length 10 and width 5.
-3
20
['3', '6', '9']
Triplet found: [1, 5, 9]
 8  1  6
 3  5  7
 4  9  2
-12
Even
Total bill: $500.00
Total bill with taxes: $550.00
Original list: [23, 10, 20, 11, 12, 6, 7]
Sorted list: [6, 7, 10, 11, 12, 20, 23]
3
ASCII sum: 1129


100%|██████████| 500/500 [00:01<00:00, 283.73it/s]

2863311544
2863311533
Invalid date
Summer
Summer
Invalid date
{1: 0, 2: 0, 3: 0, 4: 1, 5: 1, 6: 0, 7: 0, 8: 1, 9: 0, 10: 0, 11: 0, 12: 3, 13: 0, 14: 0, 15: 2, 16: 0, 17: 0, 18: 1, 19: 0, 20: 0, 21: 0, 22: 0, 23: 1, 24: 0, 25: 0, 26: 0}
0
1
0
fl

inter
The focus of the parabola is: (0.0, 1.25)
The decimal equivalent of 12 is 10
True
True
True
False
True
True
True
False
False
False
Original heap: [4, 2, 9, 6, 5, 1, 8, 3, 7]
Smallest element: 4
Heap after deleting smallest element: [2, 5, 9, 6, 7, 1, 8, 3]
Inserting new item: 0
True
False
False
True
Maximum k elements: [8, 9, 10]
Minimum k elements: [1, 2, 3]
Hello_World
2863311530
2863311550
18
5
10.5
21.0
33
0
(5, 15, 25)
None
Element 23 found at index 5.
[5, 15, 25]
True
False
False
False
False
False
The number of odd days in 2024 is: 366
[3, 5, 6, 8, 9]
105
[(1, 2), (2, 3), (3, 4), (4, 5)]
True
False
40.0
There are 10 squares of size 2 that fit in the rectangle.
[1, 0, 0, 4]
True
True
Lateral surface area: 62
Original list: [64, 34, 2




In [5]:
acc

0.054

In [50]:
print(test_cases[0], '\n', total_codes[0])

assert remove_Occ("hello","l") == "heo"
assert remove_Occ("abcda","a") == "bcd"
assert remove_Occ("PHP","P") == "H" 
 def remove_first_and_last_occurrence(input_string, char):
    """
    Removes the first and last occurrence of a given character from the string.

    Args:
        input_string (str): The input string.
        char (str): The character to be removed.

    Returns:
        str: The modified string with the first and last occurrence of the character removed.
    """
    # Find the index of the first occurrence of the character
    first_occurrence_index = input_string.find(char)
    
    # If the character is not found, return the original string
    if first_occurrence_index == -1:
        return input_string
    
    # Find the index of the last occurrence of the character
    last_occurrence_index = input_string.rfind(char)
    
    # If the character is not found in the string, return the original string
    if last_occurrence_index == -1:
        return input_string

In [8]:
print(results[0])

Here's a Python function that removes the first and last occurrence of a given character from a string:

```python
def remove_first_and_last_occurrence(input_string, char):
    """
    Removes the first and last occurrence of a given character from the string.

    Args:
        input_string (str): The input string.
        char (str): The character to be removed.

    Returns:
        str: The modified string with the first and last occurrence of the character removed.
    """
    # Find the index of the first occurrence of the character
    first_occurrence_index = input_string.find(char)
    
    # If the character is not found, return the original string
    if first_occurrence_index == -1:
        return input_string
    
    # Find the index of the last occurrence of the character
    last_occurrence_index = input_string.rfind(char)
    
    # If the character is not found in the string, return the original string
    if last_occurrence_index == -1:
        return input_string
  

In [133]:
full_code = pred + "\n" + "\n".join(test_cases)

# run the code
try:
    exec(full_code)
except Exception as e:
    print(e)
    print("error")

Maximum value: 70


In [119]:
pred = extract_function(predicted_code)

local_env = {}
exec(pred, {}, local_env)
function_handle = next(iter(local_env.values()))
test_case = test_cases[0]
input_data, expected_output = parse_test_case(test_case)
print(function_handle)
result = function_handle(eval(input_data))
print(result)

Maximum value: 70
<function max_record_value at 0x797d148c85e0>


TypeError: '>' not supported between instances of 'list' and 'str'

In [52]:
prompt = (f"Problem: {problem_description}\n\n""Input:\nWrite a Python function to solve the problem above.\nOutput:\n")   

In [29]:
def extract_python_code_block(predicted_code):
    """
    Extracts the Python code block enclosed between ```python and ```.
    """
    start_tag = "```python"
    end_tag = "```"
    
    # Find the first Python code block
    start_index = predicted_code.find(start_tag)
    end_index = predicted_code.find(end_tag, start_index + len(start_tag))

    if start_index != -1 and end_index != -1:
        # Extract and return the content inside the code block
        return predicted_code[start_index + len(start_tag):end_index].strip()
    
    return "No Python code block found."

In [92]:
def evaluate_problem(function, test_cases):

    success_count = 0
    local_env = {}
    exec(function, {}, local_env)
    function_handle = next(iter(local_env.values()))
    for test_case in test_cases:
        input_data, expected_output = parse_test_case(test_case)
        try:
            result = function_handle(eval(input_data))
            print("result:", result)
            if result == expected_output:
                success_count += 1
        except Exception as e:
            print("not a python code")
            continue

    # Return results
    accuracy = success_count / len(test_cases)
    return accuracy


evaluate_problem(ground_truth_code, test_cases)

result: [('key1', 5), ('key2', 4), ('key3', 9)]
result: [('key1', 6), ('key2', 5), ('key3', 10)]
result: [('key1', 7), ('key2', 6), ('key3', 11)]


1.0

In [None]:


def parse_test_case(test_case):
    """
    Parse an assertion like:
    assert max_chain_length([Pair(1, 2), Pair(3, 4)], 4) == 4
    to extract:
    - Function call as a string: "max_chain_length([Pair(1, 2), Pair(3, 4)], 4)"
    - Expected output: 4
    """
    # Parse the assertion string
    assertion_node = ast.parse(test_case, mode='exec').body[0]
    if isinstance(assertion_node, ast.Assert):
        test_call = assertion_node.test.left  # The function call (left of ==)
        # i want to take out the name of the function too
        test_call = test_call.args
        expected_output = assertion_node.test.comparators[0]  # The value (right of ==)

        # Convert AST back to source code for evaluation
        test_call_code = ast.unparse(test_call)
        expected_output_value = eval(ast.unparse(expected_output))
        return test_call_code, expected_output_value
    else:
        raise ValueError("Test case is not a valid assertion.")

("[('key1', [3, 4, 5]), ('key2', [1, 4, 2]), ('key3', [9, 3])]", [('key1', 5), ('key2', 4), ('key3', 9)])
("[('key1', [4, 5, 6]), ('key2', [2, 5, 3]), ('key3', [10, 4])]", [('key1', 6), ('key2', 5), ('key3', 10)])
("[('key1', [5, 6, 7]), ('key2', [3, 6, 4]), ('key3', [11, 5])]", [('key1', 7), ('key2', 6), ('key3', 11)])


In [36]:
cleaned_code = extract_python_code_block(predicted_code)
print(cleaned_code)
accuracy = evaluate_problem(cleaned_code, test_cases)
print(accuracy)

def find_max_value_in_list(tuple_list):
    """
    This function finds the maximum value in a list of tuples.

    Parameters:
    tuple_list (list): A list of tuples.

    Returns:
    tuple: The maximum value in the list of tuples.
    """
    if not tuple_list:
        return None
    max_value = max(tuple_list, key=lambda x: x[1])
    return max_value

# Example usage:
tuple_list = [(1, 2), (3, 4), (5, 6), (7, 8)]
max_value = find_max_value_in_list(tuple_list)
print(max_value)  # Output: (7, 8)
(7, 8)
not a python code
(7, 8)
not a python code
(7, 8)
not a python code
0.0


In [None]:
c = """def find_max_value_in_list(tuple_list):
    \"\"\"
    This function finds the maximum value in a list of tuples.

    Parameters:
    tuple_list (list): A list of tuples.

    Returns:
    tuple: The maximum value in the list of tuples.
    \"\"\"
    if not tuple_list:
        return None
    max_value = max(tuple_list, key=lambda x: x[1])
    return max_value
    
    """

In [96]:
i = 12
print("Problem", i)
sample_problem = mbpp['train'][i] 
problem_description = sample_problem["text"]
ground_truth_code = sample_problem["code"]
test_cases = sample_problem["test_list"]
input_data, expected_output = parse_test_case(test_cases[0])
print(input_data, expected_output)

Problem 12
[('key1', [3, 4, 5]), ('key2', [1, 4, 2]), ('key3', [9, 3])] [('key1', 5), ('key2', 4), ('key3', 9)]


In [49]:
print(ground_truth_code)

def maximum_value(test_list):
  res = [(key, max(lst)) for key, lst in test_list]
  return (res) 


In [58]:



for test_case in test_cases:
    input_data = '''[('key1', [3, 4, 5]), ('key2', [1, 4, 2]), ('key3', [9, 3])]'''
    expected_output = "[('key1', 5), ('key2', 4), ('key3', 9)]"
    #input_data, expected_output = parse_test_case(test_case)
    # Use `exec` to run the code and check the output
    local_env = {}

    exec(ground_truth_code, {}, local_env)
    result = local_env[input_data]
    print(result)

    exec(c, {}, local_env)
    result = local_env[input_data]
    print(result)


KeyError: "[('key1', [3, 4, 5]), ('key2', [1, 4, 2]), ('key3', [9, 3])]"

In [61]:
print(ground_truth_code)

def maximum_value(test_list):
  res = [(key, max(lst)) for key, lst in test_list]
  return (res) 


In [None]:
local_env = {}

try:
    # Dynamically execute the function string
    exec(ground_truth_code, {}, local_env)

    # Extract the function name (assumes it's the first 'def' in the string)
    function_name = ground_truth_code.split("def ")[1].split("(")[0].strip()

    # Call the function dynamically with the input
    result = local_env[function_name](input_data)
    print(result)
except Exception as e:
    print(f"Error during function execution: {e}")

Error during function execution: not enough values to unpack (expected 2, got 1)


In [83]:
exec(ground_truth_code, {}, local_env)

In [None]:
exec(ground_truth_code, {}, local_env)
function_handle = next(iter(local_env.values()))
result = function_handle(eval(input_data))
print(result)

In [78]:
result = function_handle(eval(input_data))
print(result)

[('key1', 5), ('key2', 4), ('key3', 9)]


In [24]:
def extract_function(predicted_code):
    """
    Extracts a function from a code string starting with 'def' and ending based on indentation rules.
    """
    lines = predicted_code.splitlines()  # Split code into lines
    start_index = -1

    # Find the first occurrence of a line starting with 'def'
    for i, line in enumerate(lines):
        if line.strip().startswith("def "):  # Check for the 'def' keyword
            start_index = i
            break

    if start_index == -1:
        return "No function definition found."

    # Determine indentation of the function header
    function_indent = len(lines[start_index]) - len(lines[start_index].lstrip())
    function_lines = [lines[start_index]]  # Start collecting the function from 'def'

    # Collect all indented lines after the function header
    for line in lines[start_index + 1:]:
        current_indent = len(line) - len(line.lstrip())

        # Stop if indentation decreases or line is blank and isn't a continuation
        if current_indent <= function_indent:
            break

        function_lines.append(line)

    # Join the collected lines to reconstruct the function
    return "\n".join(function_lines)

In [57]:
def maximum_value(test_list):
  res = [(key, max(lst)) for key, lst in test_list]
  return (res) 

maximum_value([("key1", [3, 4, 5]), ("key2", [1, 4, 2]), ("key3", [9, 3])])

[('key1', 5), ('key2', 4), ('key3', 9)]

In [97]:
print(predicted_code)

Write a single Python function to solve the problem above.

```python
def find_max_value_in_list(tuple_list):
    """
    This function finds the maximum value in a list of tuples.

    Parameters:
    tuple_list (list): A list of tuples.

    Returns:
    tuple: The maximum value in the list of tuples.
    """
    if not tuple_list:
        return None
    max_value = max(tuple_list, key=lambda x: x[1])
    return max_value

# Example usage:
tuple_list = [(1, 2), (3, 4), (5, 6), (7, 8)]
max_value = find_max_value_in_list(tuple_list)
print(max_value)  # Output: (7, 8)
```

This solution works by using the built-in `max` function in Python, which returns the largest item in an iterable. The `key` argument of the `max` function is used to specify that we want to compare the tuples based on their values (i.e., the second element of each tuple). The `max_value` variable is assigned the maximum tuple found in the list. Finally, the function returns `max_value`. The example usage demonstrate