In [None]:
# Required packages to download, you only need to run this once!
!pip3 install google.generativeai
!pip3 install datasets

In [1]:
import google.generativeai as genai
import re
import time

# TODO: put your Google API key
api_key = 'AIzaSyBK375J_WOHHnxjTY8_PnN2rUIj48KoBjY'  # TODO put your api key
genai.configure(api_key=api_key)
model = genai.GenerativeModel(model_name='gemini-1.5-flash')

def call_google_api(prompt, my_model):
    """
    Method for getting a response from the Gemini API.
    Args:
        - prompt (str): The input instruction for the language model.
        - my_model: The Gemini model instance.
    Returns:
        str: The generated response, or None if no response is available.
    """
    google_model_config = genai.types.GenerationConfig(temperature=0, max_output_tokens=6000)
    completion = my_model.generate_content(prompt, generation_config=google_model_config)
    try:
        gemini_response_text = completion.text
    except Exception as e:
        print("Gemini response error: " + str(e))
        try:
            if hasattr(completion.parts, 'text'):
                gemini_response_text = completion.parts.text
            else:
                gemini_response_text = None
        except Exception:
            gemini_response_text = None
    return gemini_response_text

def clean_generated_code(generated_code, language):
    """
    Helper method for cleaning LLM-generated code.
    Args:
        - generated_code (str): The raw code output from the LLM.
        - language (str): A string indicating the language of the code (e.g., "python3").
    Returns:
        str: Cleaned LLM-generated code.
    """
    if not generated_code:
        return ""
    
    code = re.sub(r"(def[^\n]+:\s*)('''[\s\S]*?''')", r"\1", generated_code)
    code = re.sub(r'(def[^\n]+:\s*)("""[\s\S]*?""")', r"\1", code)
    code = re.sub(r"(def[^\n]+:\n)\s*\n", r"\1", code)
    
    cleaned_code = []
    for line in code.split('\n'):
        if f"```{language}" in line or line.strip().startswith("```"):
            continue
        cleaned_code.append(line)
    return "\n".join(cleaned_code)

def get_llm_response(prompt):
    """
    Wrapper method for retrieving and cleaning LLM-generated code using the two functions above.
    1. call_google_api: Gets a response from the Gemini 1.5 model via the Google API.
    2. clean_generated_code: Cleans the generated code by removing code block markers.
    Args:
        - prompt (str): The code generation prompt.

    Returns:
        str: Cleaned code if successful, otherwise None.
    """
    # Maximum 5 attempts (this number can be adjusted as needed).
    for attempt in range(5):
        try:
            res = call_google_api(prompt, model)
            return clean_generated_code(res, 'python3')
        except Exception as e:
            time.sleep(5)
        if res is None or res.lower() == 'none':
            print(f"llm did not respond for problem")
    return None

# Do NOT change this prompt template
CODE_GENERATION_PROMPT_TEMPLATE = """
System:
## Persona
- You are a code generation assistant who specializes in {language}.
- You follow strict guidelines for producing high-quality, readable, and correct code.

## Instructions
- You will be given a coding question specification, which consists of function signatures, and docstrings.
- Your task is to **generate the complete, correct {language} code** based on the provided docstring and requirements.
- You must think step by step when generating the {language} code.

## Output Format
- Your **final code** should be enclosed in a code block, for example:
  ```{language}
  # your code here
- Do not add additional text or commentary outside of the code block.

User:
### Coding Question Specification
{problem_stmt}
"""

In [2]:
# Keep the function name as: order_by_points

original_stmt = """def order_by_points(nums):
    '''
    Write a function which sorts the given list of integers
    in ascending order according to the sum of their digits.
    Note: if there are several items with similar sum of their digits,
    order them based on their index in original list.

    Examples:
    >>> order_by_points([1, 11, -1, -11, -12]) == [-1, -11, 1, -12, 11]
    >>> order_by_points([]) == []
    '''"""

In [3]:
"""Generate code using the Gemini model. Here we build the prompt using the `original_stmt`"""

# Specify the function name and Canvas group number
function_name = 'order_by_points'
canvas_group_number = '83'       #TODO:you can change this to your Canvas group name

# LLM-generated code using original_stmt
my_prompt = CODE_GENERATION_PROMPT_TEMPLATE.format(problem_stmt=original_stmt, language="python3")
llm_code_original = get_llm_response(my_prompt)
print(llm_code_original)

# Save the Python file for testing
filename_original = f"hw5-{function_name}-group{canvas_group_number}-original.py"
with open(filename_original, "w") as file:
    file.write(llm_code_original)

def order_by_points(nums):
    if not nums:
        return []

    def sum_digits(n):
        s = 0
        n = abs(n)
        while n:
            s += n % 10
            n //= 10
        return s

    with_indices = list(enumerate(nums))
    with_indices.sort(key=lambda x: (sum_digits(x[1]), x[0]))
    return [x[1] for x in with_indices]




In [6]:
import json
import importlib.util
# Load the JSON file for the test cases
with open(f'test_case_{function_name}.json', 'r') as f:
    test_cases = json.load(f)["test_case_improved"]

# Load the saved Python file using function name and file_name
spec = importlib.util.spec_from_file_location(function_name, filename_original)
module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(module)
function = getattr(module, function_name)

# Run test cases
for idx, case in enumerate(test_cases):
    try:
        inputs = case["input"]
        if isinstance(inputs, (list, tuple)):
            try:
                result = function(*inputs)
            except TypeError:
                result = function(inputs)
        else:
            result = function(inputs)

        assert result == case["expected"], f"Test {idx+1} failed: input={inputs}, expected={case['expected']}, got={result}"
        print(f"Test case {idx+1} passed.")
    except AssertionError as e:
        print(e)

Test 1 failed: input=[1, 11, -1, -11, -12], expected=[-1, -11, 1, -12, 11], got=[1, -1, 11, -11, -12]
Test case 2 passed.
Test case 3 passed.
Test case 4 passed.
Test case 5 passed.
Test case 6 passed.
Test case 7 passed.
Test case 8 passed.
Test case 9 passed.
Test case 10 passed.


In [None]:
# TODO
# 1. Generate code using `original_stmt`.
# 2. Write test cases (in the provided JSON file) to evaluate the LLM-generated code based on `original_stmt`.
# 3. Run your test cases and demonstrate examples of both failing and passing cases.
#    (You do not have to follow the exact implementation shown in the demo_same_chars file, but you are welcome to reference or reuse parts of it.)
# 4. Write a `new_stmt` that improves the prompt to enhance the accuracy of LLM-generated code.
# 5. Generate code using `new_stmt` and run your test cases, ensuring that the code passes all of them.
# 6. Ensure you write as many test cases as needed to cover all edge cases.
#    We will run the autograder against your final LLM-generated code (using `new_stmt`), ensure that it can pass all autograder test cases.

new_stmt = """def order_by_points(nums):
    '''
    Write a function which sorts the given list of integers
    in ascending order according to the sum of their digits.
    For each number in the list nums, calculate their given sum based on the sum of digits template that I will be providing below.
    Negative numbers should only count the first digit of the number as negative when calculating the sum.
    You may write a helper function to calculate the sum of digits of a given integer.
    For calculating the sum of digits, you may turn the given integer into a string first for purposes of parsing, but your helper MUST return an integer.
    Note: if there are several items with similar sum of their digits,
    order them based on their index in original list.

    Examples of sum of digits:
    11 == 2
    -11 == -1 + 1 == 0

    

    Examples:
    >>> order_by_points([1, 11, -1, -11, -12]) == [-1, -11, 1, -12, 11]
    >>> order_by_points([]) == []
    >>> order_by_points([7]) == [7]
    >>> order_by_points([0, 1, 100, 10]) == [0, 1, 100, 10]
    >>> order_by_points([15, 6, 12, 3, 9]) == [12, 3, 15, 6, 9]
    >>> order_by_points([[0, -0, 1, -1]]) == [-1, 0, -0, 1]

    '''"""

In [32]:
"""Generate code using the Gemini model. Here we build the prompt using the `original_stmt`"""

# Specify the function name and Canvas group number
function_name = 'order_by_points'
canvas_group_number = '83'       #TODO:you can change this to your Canvas group name

# LLM-generated code using original_stmt
my_prompt = CODE_GENERATION_PROMPT_TEMPLATE.format(problem_stmt=new_stmt, language="python3")
llm_code_improved = get_llm_response(my_prompt)
print(llm_code_improved)

# Save the Python file for testing
filename_improved = f"hw5-{function_name}-group{canvas_group_number}-improved.py"
with open(filename_improved, "w") as file:
    file.write(llm_code_improved)

def sum_digits(n):
    s = 0
    sign = -1 if n < 0 else 1
    n = abs(n)
    s += sign * (n // (10 ** (len(str(n)) - 1)))
    n %= (10 ** (len(str(n)) - 1))
    while n > 0:
        s += n % 10
        n //= 10
    return s

def order_by_points(nums):
    if not nums:
        return []
    
    with_sums = []
    for i, num in enumerate(nums):
        with_sums.append((sum_digits(num), i, num))
    
    with_sums.sort()
    
    result = [num for _, _, num in with_sums]
    return result




In [33]:
import json
import importlib.util
# Load the JSON file for the test cases
with open(f'test_case_{function_name}.json', 'r') as f:
    test_cases = json.load(f)["test_case_improved"]

# Load the saved Python file using function name and file_name
spec = importlib.util.spec_from_file_location(function_name, filename_improved)
module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(module)
function = getattr(module, function_name)

# Run test cases
for idx, case in enumerate(test_cases):
    try:
        inputs = case["input"]
        if isinstance(inputs, (list, tuple)):
            try:
                result = function(*inputs)
            except TypeError:
                result = function(inputs)
        else:
            result = function(inputs)

        assert result == case["expected"], f"Test {idx+1} failed: input={inputs}, expected={case['expected']}, got={result}"
        print(f"Test case {idx+1} passed.")
    except AssertionError as e:
        print(e)

Test case 1 passed.
Test case 2 passed.
Test case 3 passed.
Test case 4 passed.
Test case 5 passed.
Test case 6 passed.
Test case 7 passed.
Test case 8 passed.
Test case 9 passed.
Test case 10 passed.
