# Python Code Documentation Assistant

The requirement: use a Frontier model to add docstrings and comments to your Python code


## Imports

In [None]:
!pip install -U -q "google-genai"

In [None]:
import os
import io
import sys
from dotenv import load_dotenv
from openai import OpenAI
from google import genai
from google.genai import types
import anthropic
from IPython.display import Markdown, display, update_display
import gradio as gr
import subprocess

## Environment

In [None]:
load_dotenv(override=True)
openai_api_key = os.getenv('OPENAI_API_KEY')
anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')
google_api_key = os.getenv('GOOGLE_API_KEY')

if openai_api_key:
    print(f"OpenAI API Key exists and begins with: {openai_api_key[:8]}")
else:
    print("OpenAI API Key not set")
    
if anthropic_api_key:
    print(f"Anthropic API Key exists and begins with: {anthropic_api_key[:7]}")
else:
    print("Anthropic API Key not set")

if google_api_key:
    print(f"Google API Key exists and begins with: {google_api_key[:4]}")
else:
    print("Google API Key not set")

In [None]:
openai = OpenAI()
claude = anthropic.Anthropic()
gemini = genai.Client()

OPENAI_MODEL = "o4-mini"
CLAUDE_MODEL = "claude-3-7-sonnet-latest"
GEMINI_MODEL = "gemini-2.5-flash"

## Prompts

In [None]:
system_message = """
You are an assistant that documents Python code.  
Your task:  
- Add concise, clear, and informative docstrings to functions, classes, and modules.  
- Add inline comments only where they improve readability or clarify intent.  
- Do not modify the code logic or structure.  
- Respond with Python code only.  
"""

In [None]:
def user_prompt_for(python):
    user_prompt = "Add docstrings and comments to the following Python code:\n"
    user_prompt += python
    return user_prompt

In [None]:
def messages_for(python):
    return [
        {"role": "system", "content": system_message},
        {"role": "user", "content": user_prompt_for(python)}
    ]

## Helper functions

In [None]:
def write_output(python, filename_suffix):
    filename = f"annotated_{filename_suffix}.py"
    code = python.replace("```python","").replace("```","")
    with open(filename, "w") as f:
        f.write(code)
    print(f"\nWritten code to {filename}")
    return filename

In [None]:
def annotate_with_gpt(python, task_name):    
    stream = openai.chat.completions.create(model=OPENAI_MODEL, messages=messages_for(python), stream=True)
    reply = ""
    for chunk in stream:
        fragment = chunk.choices[0].delta.content or ""
        reply += fragment
        print(fragment, end='', flush=True)
    return write_output(reply, f"{task_name}_gpt")

In [None]:
def annotate_with_claude(python, task_name):
    result = claude.messages.stream(
        model=CLAUDE_MODEL,
        max_tokens=2000,
        system=system_message,
        messages=[{"role": "user", "content": user_prompt_for(python)}],
    )
    reply = ""
    with result as stream:
        for text in stream.text_stream:
            reply += text
            print(text, end="", flush=True)
    return write_output(reply, f"{task_name}_claude")

In [None]:
def annotate_with_gemini(python, task_name):
    reply = gemini.models.generate_content(
        model=GEMINI_MODEL,
        contents=user_prompt_for(python),
        config=types.GenerateContentConfig(
            system_instruction=system_message,
        )
    )

    print(reply.text)
    return write_output(reply.text, f"{task_name}_gemini")

# Run the Annotator

## Pi example

In [None]:
pi = """
import time

def calculate(iterations, param1, param2):
    result = 1.0
    for i in range(1, iterations+1):
        j = i * param1 - param2
        result -= (1/j)
        j = i * param1 + param2
        result += (1/j)
    return result

start_time = time.time()
result = calculate(100_000_000, 4, 1) * 4
end_time = time.time()

print(f"Result: {result:.12f}")
print(f"Execution Time: {(end_time - start_time):.6f} seconds")
"""

In [None]:
gpt_pi = annotate_with_gpt(pi, "pi))

In [None]:
# check if the script works

exec(open(gpt_pi).read())

In [None]:
claude_pi = annotate_with_claude(pi, "pi")

In [None]:
exec(open(claude_pi).read())

In [None]:
gemini_pi = annotate_with_gemini(pi, "pi")

In [None]:
exec(open(gemini_pi).read())

## Hard example

In [None]:
python_hard = """# Be careful to support large number sizes

def lcg(seed, a=1664525, c=1013904223, m=2**32):
    value = seed
    while True:
        value = (a * value + c) % m
        yield value
        
def max_subarray_sum(n, seed, min_val, max_val):
    lcg_gen = lcg(seed)
    random_numbers = [next(lcg_gen) % (max_val - min_val + 1) + min_val for _ in range(n)]
    max_sum = float('-inf')
    for i in range(n):
        current_sum = 0
        for j in range(i, n):
            current_sum += random_numbers[j]
            if current_sum > max_sum:
                max_sum = current_sum
    return max_sum

def total_max_subarray_sum(n, initial_seed, min_val, max_val):
    total_sum = 0
    lcg_gen = lcg(initial_seed)
    for _ in range(20):
        seed = next(lcg_gen)
        total_sum += max_subarray_sum(n, seed, min_val, max_val)
    return total_sum

# Parameters
n = 10000         # Number of random numbers
initial_seed = 42 # Initial seed for the LCG
min_val = -10     # Minimum value of random numbers
max_val = 10      # Maximum value of random numbers

# Timing the function
import time
start_time = time.time()
result = total_max_subarray_sum(n, initial_seed, min_val, max_val)
end_time = time.time()

print("Total Maximum Subarray Sum (20 runs):", result)
print("Execution Time: {:.6f} seconds".format(end_time - start_time))
"""

In [None]:
exec(python_hard)

In [None]:
gpt_hard = annotate_with_gpt(python_hard, "hard")

In [None]:
exec(open(gpt_hard).read())

In [None]:
gemini_hard = annotate_with_gemini(python_hard, "hard")

In [None]:
exec(open(gemini_hard).read())

In [None]:
claude_hard = annotate_with_claude(python_hard, "hard")

In [None]:
exec(open(claude_hard).read())

In [None]:
"""
This module implements a Linear Congruential Generator (LCG) and uses it
to generate random numbers for calculating the maximum subarray sum.
It includes functions for the LCG, finding the maximum subarray sum, and
aggregating results over multiple runs.
"""

def lcg(seed, a=1664525, c=1013904223, m=2**32):
    """
    Implements a Linear Congruential Generator (LCG) to produce a sequence of
    pseudorandom numbers.

    The generator uses the formula: X_{n+1} = (a * X_n + c) % m.

    Args:
        seed (int): The initial seed value for the generator (X_0).
        a (int, optional): The multiplier. Defaults to 1664525 (common LCG parameter).
        c (int, optional): The increment. Defaults to 1013904223 (common LCG parameter).
        m (int, optional): The modulus. Defaults to 2**32, meaning numbers will be
                           between 0 and m-1.

    Yields:
        int: The next pseudorandom number in the sequence.
    """
    value = seed
    while True:
        # Calculate the next pseudorandom number using the LCG formula.
        value = (a * value + c) % m
        yield value

def max_subarray_sum(n, seed, min_val, max_val):
    """
    Calculates the maximum possible sum of a contiguous subarray within a list
    of 'n' pseudorandom numbers.

    The random numbers are generated using an LCG based on the provided seed,
    and then mapped to the range [min_val, max_val].
    This implementation uses a brute-force approach with O(n^2) complexity.

    Args:
        n (int): The number of random integers to generate for the array.
        seed (int): The seed for the LCG to generate the random numbers.
        min_val (int): The minimum possible value for the generated random numbers.
        max_val (int): The maximum possible value for the generated random numbers.

    Returns:
        int: The maximum sum found among all contiguous subarrays.
    """
    lcg_gen = lcg(seed)
    # Generate a list of 'n' random numbers within the specified range [min_val, max_val].
    random_numbers = [next(lcg_gen) % (max_val - min_val + 1) + min_val for _ in range(n)]

    max_sum = float('-inf') # Initialize max_sum to negative infinity to handle all negative numbers.

    # Iterate through all possible starting points of a subarray.
    for i in range(n):
        current_sum = 0
        # Iterate through all possible ending points for the current starting point.
        for j in range(i, n):
            current_sum += random_numbers[j]
            # Update max_sum if the current subarray sum is greater.
            if current_sum > max_sum:
                max_sum = current_sum
    return max_sum

def total_max_subarray_sum(n, initial_seed, min_val, max_val):
    """
    Calculates the sum of maximum subarray sums over 20 separate runs.

    Each run generates a new set of 'n' random numbers for `max_subarray_sum`
    using a new seed derived from the initial LCG sequence.

    Args:
        n (int): The number of random integers for each subarray sum calculation.
        initial_seed (int): The initial seed for the LCG that generates seeds
                            for individual `max_subarray_sum` runs.
        min_val (int): The minimum possible value for random numbers in each run.
        max_val (int): The maximum possible value for random numbers in each run.

    Returns:
        int: The sum of the maximum subarray sums across all 20 runs.
    """
    total_sum = 0
    lcg_gen = lcg(initial_seed) # LCG to generate seeds for subsequent runs.
    # Perform 20 independent runs.
    for _ in range(20):
        # Get a new seed for each run from the initial LCG generator.
        seed = next(lcg_gen)
        # Add the maximum subarray sum of the current run to the total sum.
        total_sum += max_subarray_sum(n, seed, min_val, max_val)
    return total_sum

# Parameters for the simulation
n = 10000         # Number of random numbers to generate for each subarray
initial_seed = 42 # Initial seed for the LCG that generates seeds for runs
min_val = -10     # Minimum value for the random numbers
max_val = 10      # Maximum value for the random numbers

# Import the time module to measure execution time.
import time

# Record the start time before executing the main function.
start_time = time.time()
# Call the function to calculate the total maximum subarray sum over multiple runs.
result = total_max_subarray_sum(n, initial_seed, min_val, max_val)
# Record the end time after the function completes.
end_time = time.time()

# Print the final aggregated result.
print("Total Maximum Subarray Sum (20 runs):", result)
# Print the total execution time, formatted to 6 decimal places.
print("Execution Time: {:.6f} seconds".format(end_time - start_time))

# Streaming

In [None]:
def stream_gpt(python):    
    stream = openai.chat.completions.create(model=OPENAI_MODEL, messages=messages_for(python), stream=True)
    reply = ""
    for chunk in stream:
        fragment = chunk.choices[0].delta.content or ""
        reply += fragment
        yield reply.replace('```python\n','').replace('```','')

In [None]:
def stream_claude(python):
    result = claude.messages.stream(
        model=CLAUDE_MODEL,
        max_tokens=2000,
        system=system_message,
        messages=[{"role": "user", "content": user_prompt_for(python)}],
    )
    reply = ""
    with result as stream:
        for text in stream.text_stream:
            reply += text
            yield reply.replace('```python\n','').replace('```','')

In [None]:
def stream_gemini(python):
    stream = gemini.models.generate_content_stream(
        model=GEMINI_MODEL,
        contents=user_prompt_for(python),
        config=types.GenerateContentConfig(
            system_instruction=system_message,
        ),
    )
    reply = ""
    for chunk in stream:
        reply += chunk.text
        yield reply.replace('```python\n','').replace('```','')

In [None]:
def annotate(python, model):
    if model == "GPT":
        result = stream_gpt(python)
    elif model == "Claude":
        result = stream_claude(python)
    elif model == "Gemini":
        result = stream_gemini(python)
    else:
        raise ValueError("Unknown model")
    for stream_so_far in result:
        yield stream_so_far        

In [None]:
def execute_python(code):
    try:
        output = io.StringIO()
        sys.stdout = output
        exec(code)
    finally:
        sys.stdout = sys.__stdout__
    return output.getvalue()

# Gradio App

In [None]:
css = """
.python {background-color: #306998;}
"""

In [None]:
import gradio as gr

# Parameters
LINES = 25
LINE_HEIGHT = 20  # px, typical CodeMirror line height
PADDING = 10      # px, top + bottom padding

CODE_HEIGHT = LINES * LINE_HEIGHT + PADDING


with gr.Blocks(
    theme=gr.themes.Soft(),
    css=f"""
#code_input .cm-editor, #annotated_code .cm-editor {{
    height: {CODE_HEIGHT}px !important;
    overflow-y: auto !important;
}}
"""
) as demo_v2:
    gr.Markdown("## 🐍 Annotate Python Code with Docstrings and Comments")

    with gr.Row():
        with gr.Column(scale=1):
            gr.Markdown("### Python code:")
            code_input = gr.Code(
                language="python", 
                value=python_hard,
                elem_id="code_input"
            )
        
        with gr.Column(scale=1):
            gr.Markdown("### Annotated code:")
            annotated_output = gr.Code(
                language="python",
                elem_id="annotated_code",
                interactive=False
            )

    with gr.Row():
        with gr.Column(scale=1):
            model_dropdown = gr.Dropdown(
                choices=["Gemini", "GPT-4", "Claude"],
                value="Gemini",
                label="Select model"
            )
        with gr.Column(scale=1):
            annotate_btn = gr.Button("✨ Annotate code", variant="primary")
            run_btn = gr.Button("▶️ Run Python", variant="secondary")

    with gr.Row():
        with gr.Column():
            gr.Markdown("### Python result:")
            result_output = gr.Textbox(
                lines=5, 
                label="Output",
                interactive=False
            )
    
    annotate_btn.click(
        annotate,
        inputs=[code_input, model_dropdown],
        outputs=[annotated_output]
    )
    run_btn.click(execute_python, inputs=[annotated_output], outputs=[result_output])

    
demo_v2.launch(inbrowser=True)
