In [None]:
# Required packages to download, you only need to run this once!
!pip3 install google.generativeai
!pip3 install datasets

In [1]:
import google.generativeai as genai
import re
import time

# TODO: put your Google API key
api_key = 'AIzaSyBK375J_WOHHnxjTY8_PnN2rUIj48KoBjY'  # TODO put your api key
genai.configure(api_key=api_key)
model = genai.GenerativeModel(model_name='gemini-1.5-flash')

def call_google_api(prompt, my_model):
    """
    Method for getting a response from the Gemini API.
    Args:
        - prompt (str): The input instruction for the language model.
        - my_model: The Gemini model instance.
    Returns:
        str: The generated response, or None if no response is available.
    """
    google_model_config = genai.types.GenerationConfig(temperature=0, max_output_tokens=6000)
    completion = my_model.generate_content(prompt, generation_config=google_model_config)
    try:
        gemini_response_text = completion.text
    except Exception as e:
        print("Gemini response error: " + str(e))
        try:
            if hasattr(completion.parts, 'text'):
                gemini_response_text = completion.parts.text
            else:
                gemini_response_text = None
        except Exception:
            gemini_response_text = None
    return gemini_response_text

def clean_generated_code(generated_code, language):
    """
    Helper method for cleaning LLM-generated code.
    Args:
        - generated_code (str): The raw code output from the LLM.
        - language (str): A string indicating the language of the code (e.g., "python3").
    Returns:
        str: Cleaned LLM-generated code.
    """
    if not generated_code:
        return ""
    
    code = re.sub(r"(def[^\n]+:\s*)('''[\s\S]*?''')", r"\1", generated_code)
    code = re.sub(r'(def[^\n]+:\s*)("""[\s\S]*?""")', r"\1", code)
    code = re.sub(r"(def[^\n]+:\n)\s*\n", r"\1", code)
    
    cleaned_code = []
    for line in code.split('\n'):
        if f"```{language}" in line or line.strip().startswith("```"):
            continue
        cleaned_code.append(line)
    return "\n".join(cleaned_code)

def get_llm_response(prompt):
    """
    Wrapper method for retrieving and cleaning LLM-generated code using the two functions above.
    1. call_google_api: Gets a response from the Gemini 1.5 model via the Google API.
    2. clean_generated_code: Cleans the generated code by removing code block markers.
    Args:
        - prompt (str): The code generation prompt.

    Returns:
        str: Cleaned code if successful, otherwise None.
    """
    # Maximum 5 attempts (this number can be adjusted as needed).
    for attempt in range(5):
        try:
            res = call_google_api(prompt, model)
            return clean_generated_code(res, 'python3')
        except Exception as e:
            time.sleep(5)
        if res is None or res.lower() == 'none':
            print(f"llm did not respond for problem")
    return None

# Do NOT change this prompt template
CODE_GENERATION_PROMPT_TEMPLATE = """
System:
## Persona
- You are a code generation assistant who specializes in {language}.
- You follow strict guidelines for producing high-quality, readable, and correct code.

## Instructions
- You will be given a coding question specification, which consists of function signatures, and docstrings.
- Your task is to **generate the complete, correct {language} code** based on the provided docstring and requirements.
- You must think step by step when generating the {language} code.

## Output Format
- Your **final code** should be enclosed in a code block, for example:
  ```{language}
  # your code here
- Do not add additional text or commentary outside of the code block.

User:
### Coding Question Specification
{problem_stmt}
"""

In [4]:
"""Example prompt for instructing an LLM to generate a function named `same_chars`. Describes the intended functionality and expected behavior of the code."""

original_stmt = '''def same_chars(s0: str, s1: str):
    """
    Check if the words have the same characters.
    >>> same_chars('eabcdzzzz', 'dddzzzzzzzddeddabc')
    True
    >>> same_chars('abcd', 'dddddddabc')
    True
    >>> same_chars('dddddddabc', 'abcd')
    True
    >>> same_chars('eabcd', 'dddddddabc')
    False
    >>> same_chars('abcd', 'dddddddabce')
    False
    >>> same_chars('eabcdzzzz', 'dddzzzzzzzddddabc')
    False
    """'''

In [5]:
"""Generate code using the Gemini model. Here we build the prompt using the `original_stmt`"""

# Specify the function name and Canvas group number
function_name = 'same_chars'
canvas_group_number = '1'       #TODO:you can change this to your Canvas group name

# LLM-generated code using original_stmt
my_prompt = CODE_GENERATION_PROMPT_TEMPLATE.format(problem_stmt=original_stmt, language="python3")
llm_code_original = get_llm_response(my_prompt)
print(llm_code_original)

# Save the Python file for testing
filename_original = f"hw5-{function_name}-group{canvas_group_number}-original.py"
with open(filename_original, "w") as file:
    file.write(llm_code_original)

def same_chars(s0: str, s1: str):
    dict0 = {}
    dict1 = {}
    for char in s0:
        if char in dict0:
            dict0[char] += 1
        else:
            dict0[char] = 1
    for char in s1:
        if char in dict1:
            dict1[char] += 1
        else:
            dict1[char] = 1
    for key in dict0:
        if key not in dict1 or dict0[key] != dict1[key]:
            return False
    for key in dict1:
        if key not in dict0 or dict1[key] != dict0[key]:
            return False
    return True




In [6]:
"""Example of running test cases on LLM-generated code. You do not need to follow this exact implementation for your code."""

import json
import importlib.util

# Load the JSON file for the test cases
with open(f'test_case_{function_name}.json', 'r') as f:
    test_cases = json.load(f)["test_case"]

# Load the saved Python file using function name and file_name
spec = importlib.util.spec_from_file_location(function_name, filename_original)
module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(module)
function = getattr(module, function_name)

# Run test cases
for idx, case in enumerate(test_cases):
    try:
        inputs = case["input"]
        if isinstance(inputs, (list, tuple)):
            try:
                result = function(*inputs)
            except TypeError:
                result = function(inputs)
        else:
            result = function(inputs)

        assert result == case["expected"], f"Test {idx+1} failed: input={inputs}, expected={case['expected']}, got={result}"
        print(f"Test case {idx+1} passed.")
    except AssertionError as e:
        print(e)

Test 1 failed: input=['eabcdzzzz', 'dddzzzzzzzddeddabc'], expected=True, got=False
Test 2 failed: input=['abcd', 'dddddddabc'], expected=True, got=False
Test 3 failed: input=['dddddddabc', 'abcd'], expected=True, got=False
Test case 4 passed.
Test case 5 passed.
Test case 6 passed.
Test case 7 passed.


In [7]:
"""Example of modifying a prompt to improve LLM-generated code. Compare with `original_stmt` to see what has been added or changed."""

new_stmt = '''def same_chars(s0: str, s1: str):
    """
    Check if two words have the same set of unique characters.
    >>> same_chars('eabcdzzzz', 'dddzzzzzzzddeddabc')
    True
    >>> same_chars('abcd', 'dddddddabc')
    True
    >>> same_chars('dddddddabc', 'abcd')
    True
    >>> same_chars('eabcd', 'dddddddabc')
    False
    >>> same_chars('abcd', 'dddddddabce')
    False
    >>> same_chars('eabcdzzzz', 'dddzzzzzzzddddabc')
    False
    """'''


In [8]:
"""Generate code using the Gemini model. Here we build the prompt using the `new_stmt`"""

# LLM-generated code using new_stmt
my_prompt = CODE_GENERATION_PROMPT_TEMPLATE.format(problem_stmt=new_stmt, language="python3")
llm_code_improved = get_llm_response(my_prompt)
print(llm_code_improved)

# Save the Python file for testing
filename_improved = f"hw5-{function_name}-group{canvas_group_number}-improved.py"
with open(filename_improved, "w") as file:
    file.write(llm_code_improved)

def same_chars(s0: str, s1: str):
    set0 = set(s0)
    set1 = set(s1)
    return set0 == set1




In [9]:
"""Running test cases for the LLM-generated code using the modified prompt"""

import json
import importlib.util

# Load the JSON file for the test cases
with open(f'test_case_{function_name}.json', 'r') as f:
    test_cases = json.load(f)["test_case"]

# Load the saved Python file using function name and file_name
spec = importlib.util.spec_from_file_location(function_name, filename_improved)
module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(module)
function = getattr(module, function_name)

# Run test cases
for idx, case in enumerate(test_cases):
    try:
        inputs = case["input"]
        if isinstance(inputs, (list, tuple)):
            try:
                result = function(*inputs)
            except TypeError:
                result = function(inputs)
        else:
            result = function(inputs)
        assert result == case["expected"], f"Test {idx+1} failed: input={inputs}, expected={case['expected']}, got={result}"
        print(f"Test case {idx+1} passed.")
    except AssertionError as e:
        print(e)

Test case 1 passed.
Test case 2 passed.
Test case 3 passed.
Test case 4 passed.
Test case 5 passed.
Test case 6 passed.
Test case 7 passed.
