# Code Generator

Generate unit tests for given code snippet


In [None]:
! uv pip install pytest==9.0.2

In [None]:
# imports

import os
import io
import sys
import tempfile
from dotenv import load_dotenv
from openai import OpenAI
import gradio as gr
import subprocess
import pytest
from IPython.display import Markdown, display


In [None]:
load_dotenv(override=True)
openai_api_key = os.getenv('OPENAI_API_KEY')
google_api_key = os.getenv('GOOGLE_API_KEY')
grok_api_key = os.getenv('GROK_API_KEY')
groq_api_key = os.getenv('GROQ_API_KEY')
openrouter_api_key = os.getenv('OPENROUTER_API_KEY')

if openai_api_key:
    print(f"OpenAI API Key exists and begins {openai_api_key[:8]}")
else:
    print("OpenAI API Key not set")

if google_api_key:
    print(f"Google API Key exists and begins {google_api_key[:2]}")
else:
    print("Google API Key not set (and this is optional)")

if grok_api_key:
    print(f"Grok API Key exists and begins {grok_api_key[:4]}")
else:
    print("Grok API Key not set (and this is optional)")

if groq_api_key:
    print(f"Groq API Key exists and begins {groq_api_key[:4]}")
else:
    print("Groq API Key not set (and this is optional)")

if openrouter_api_key:
    print(f"OpenRouter API Key exists and begins {openrouter_api_key[:6]}")
else:
    print("OpenRouter API Key not set (and this is optional)")



In [None]:
# Connect to client libraries

openai = OpenAI()

gemini_url = "https://generativelanguage.googleapis.com/v1beta/openai/"
grok_url = "https://api.x.ai/v1"
groq_url = "https://api.groq.com/openai/v1"
openrouter_url = "https://openrouter.ai/api/v1"

gemini = OpenAI(api_key=google_api_key, base_url=gemini_url)
grok = OpenAI(api_key=grok_api_key, base_url=grok_url)
groq = OpenAI(api_key=groq_api_key, base_url=groq_url)
openrouter = OpenAI(api_key=openrouter_api_key, base_url=openrouter_url)



In [None]:
models = ["gpt-5-mini", "grok-3-fast", "gemini-2.5-flash", "qwen/qwen3-coder:free", "llama-3.3-70b-versatile", "google/gemma-3-27b-it:free"]

# Map each model to its API client (use your own model IDs if needed)
clients = {
    "gpt-4o-mini": openai,
    "grok-3-fast": grok,
    "gemini-2.5-flash": gemini,
    "qwen/qwen3-coder:free": openrouter,
    "llama-3.3-70b-versatile": groq,
    "google/gemma-3-27b-it:free": openrouter,
}

def get_client(model):
    """Return the OpenAI-compatible client for the given model."""
    return clients.get(model, openai)

In [None]:
# System prompts for code+docstrings and for unit tests

NUM_UNIT_TESTS = 5  # Fixed number of tests the model must generate

SYSTEM_PROMPT_CODE_AND_DOCSTRINGS = """You are an expert Python developer. Your task is to:
1. Generate clean, correct Python code from the user's description or code stub.
2. Add PEP-257 style docstrings (summary, Args, Returns, Raises where relevant).
3. Add brief inline comments for non-obvious logic only.

Output only the Python code. No markdown fences, no explanations before or after. If the user provides a description, implement it; if they provide code, add docstrings and comments to it."""

SYSTEM_PROMPT_UNIT_TEST = f"""You are an expert in Python testing. Your task is to generate exactly {NUM_UNIT_TESTS} unit tests for the given Python code.
- Use pytest.
- Import the code from a module named `generated_code` (the code will be in generated_code.py).
- Write exactly {NUM_UNIT_TESTS} test functions. Cover normal cases, edge cases, and expected errors.
- Use clear test names (e.g. test_add_positive_numbers, test_add_negative_raises).
- Output only the test code. No markdown fences, no explanations. The tests will be run in the same directory as generated_code.py."""

In [None]:
def _strip_code_block(text):
    """Remove markdown code fences if present."""
    if not text or not text.strip():
        return text
    s = text.strip()
    for start in ("```python", "```"):
        if s.startswith(start):
            s = s[len(start):].lstrip()
        if s.endswith("```"):
            s = s[:-3].rstrip()
    return s

def generate_code_with_docstrings(user_input, model):
    """Call the selected model to generate Python code with docstrings and comments."""
    if not user_input or not user_input.strip():
        yield "Please provide a code snippet or description."
        return
    client = get_client(model)
    messages = [
        {"role": "system", "content": SYSTEM_PROMPT_CODE_AND_DOCSTRINGS},
        {"role": "user", "content": user_input.strip()},
    ]
    try:
        stream = client.chat.completions.create(
            model=model,
            messages=messages,
            stream=True,
        )
        output = ""
        for chunk in stream:
            delta = chunk.choices[0].delta.content or ""
            output += delta
            yield _strip_code_block(output)
    except Exception as e:
        yield f"Error calling model: {e}"

In [None]:
def generate_unit_test(generated_code, model):
    """Generate pytest unit tests for the given Python code."""
    if not generated_code or not generated_code.strip():
        yield "Generate code with docstrings first, then click Generate unit test."
        return
    client = get_client(model)
    user_content = f"""Generate pytest unit tests for this code. The code will be saved in generated_code.py, so import from `generated_code` and test its functions/classes.

Code to test:

{generated_code.strip()}"""
    messages = [
        {"role": "system", "content": SYSTEM_PROMPT_UNIT_TEST},
        {"role": "user", "content": user_content},
    ]
    try:
        stream = client.chat.completions.create(
            model=model,
            messages=messages,
            stream=True,
        )
        output = ""
        for chunk in stream:
            delta = chunk.choices[0].delta.content or ""
            output += delta
            yield _strip_code_block(output)
    except Exception as e:
        yield f"Error calling model: {e}"

In [None]:
import re

def _parse_pytest_summary(out):
    """Parse pytest stdout+stderr for passed/failed counts and time. Returns (total, passed, failed, time_str) or None."""
    combined = (out or "").strip()
    passed = 0
    failed = 0
    m_pass = re.search(r"(\d+)\s+passed", combined)
    m_fail = re.search(r"(\d+)\s+failed", combined)
    m_time = re.search(r"in\s+([\d.]+)s", combined)
    if m_pass:
        passed = int(m_pass.group(1))
    if m_fail:
        failed = int(m_fail.group(1))
    total = passed + failed
    time_str = m_time.group(1) + "s" if m_time else None
    if total == 0 and ("error" in combined.lower() or "Error" in combined):
        return None
    return (total, passed, failed, time_str)

def run_unit_test(generated_code, test_code):
    """Write generated code and test to temp files, run pytest, return minimal summary."""
    if not generated_code or not generated_code.strip():
        return "No generated code. Generate code with docstrings first."
    if not test_code or not test_code.strip():
        return "No test code. Click 'Generate unit test' first."
    with tempfile.TemporaryDirectory(prefix="unit_test_") as tmpdir:
        code_path = os.path.join(tmpdir, "generated_code.py")
        test_path = os.path.join(tmpdir, "test_generated.py")
        with open(code_path, "w", encoding="utf-8") as f:
            f.write(_strip_code_block(generated_code))
        with open(test_path, "w", encoding="utf-8") as f:
            f.write(_strip_code_block(test_code))
        try:
            result = subprocess.run(
                [sys.executable, "-m", "pytest", test_path, "-q", "--tb=no"],
                capture_output=True,
                text=True,
                timeout=30,
                cwd=tmpdir,
            )
            out = result.stdout + result.stderr
            summary = _parse_pytest_summary(out)
            if summary is not None:
                total, passed, failed, time_str = summary
                lines = [
                    f"Total tests: {total}",
                    f"Passed: {passed}",
                    f"Failed: {failed}",
                    f"Time taken: {time_str}" if time_str else "Time taken: —",
                    "",
                    "All tests passed." if failed == 0 else "Some tests failed.",
                ]
                return "\n".join(lines)
            return f"Run failed (could not get counts):\n{out[:500]}"
        except subprocess.TimeoutExpired:
            return "Test run timed out (30s)."
        except Exception as e:
            return f"Error running tests: {e}"

In [None]:
# Example inputs: click to load into the code stub / description box
EXAMPLE_CODES = [
    """def add(a, b):
    return a + b""",
    """A function that takes a list of numbers and returns the mean. Raise ValueError if the list is empty.""",
    """def is_palindrome(s: str) -> bool:
    return s == s[::-1]""",
    """A function factorial(n: int) that returns n! for non-negative n; raise ValueError for n < 0.""",
    """def divide(a: float, b: float) -> float:
    return a / b""",
]

# Gradio UI: model selection → generate code & docstrings → generate unit test → run tests

with gr.Blocks(title="Code & Unit Test Generator", theme=gr.themes.Soft()) as demo:
    gr.Markdown("# Code & Unit Test Generator\nGenerate Python code with docstrings, then unit tests, and run them.")

    model_dropdown = gr.Dropdown(choices=models, value=models[0], label="Select model")

    with gr.Row():
        with gr.Column():
            user_input = gr.Code(
                label="Code stub or description",
                language="python",
                lines=12,
            )
            gr.Examples(
                examples=[[ex] for ex in EXAMPLE_CODES],
                inputs=user_input,
                label="Example codes (click to load)",
            )
            gen_code_btn = gr.Button("Generate code & docstrings", variant="primary")
        with gr.Column():
            generated_code = gr.Code(
                label="Generated code (with docstrings/comments)",
                language="python",
                lines=20,
            )

    gen_code_btn.click(
        fn=generate_code_with_docstrings,
        inputs=[user_input, model_dropdown],
        outputs=generated_code,
    )

    with gr.Row():
        gen_test_btn = gr.Button("Generate unit test", variant="secondary")
        run_test_btn = gr.Button("Run unit test", variant="stop")

    with gr.Row():
        test_code = gr.Code(
            label="Generated unit test",
            language="python",
            lines=25,
        )
        test_result = gr.Textbox(
            label="Test run result",
            lines=15,
            interactive=False,
        )

    gen_test_btn.click(
        fn=generate_unit_test,
        inputs=[generated_code, model_dropdown],
        outputs=test_code,
    )
    run_test_btn.click(
        fn=run_unit_test,
        inputs=[generated_code, test_code],
        outputs=test_result,
    )

    gr.Markdown("**Flow:** 1) Select model, enter code/description → 2) Generate code & docstrings → 3) Generate unit test → 4) Run unit test to verify.")

demo.launch(inbrowser=True)