In [14]:
import subprocess
import tempfile
import importlib
import time
import sys
import os
import shutil
import ctypes
from pathlib import Path

from dotenv import load_dotenv
from openai import OpenAI
import gradio as gr

In [15]:
load_dotenv()

# Register models
MODELS = {
    "Kimi K2 Thinking": {"id": "moonshotai/Kimi-K2-Thinking", "temperature": 1.0},
    "GLM-4.6":          {"id": "zai-org/GLM-4.6",             "temperature": 0.7},
    "Qwen3 235B":       {"id": "Qwen/Qwen3-235B-A22B",        "temperature": 0.7},
    "DeepSeek-R1":      {"id": "deepseek-ai/DeepSeek-R1",     "temperature": 0.6},
    "DeepSeek-V3.2":    {"id": "deepseek-ai/DeepSeek-V3.2",   "temperature": 0.7},
}


In [16]:
def get_client() -> OpenAI:
    return OpenAI(
        base_url="https://router.huggingface.co/v1",
        api_key=os.environ["HF_TOKEN"],
    )


In [17]:
def strip_fences(code: str) -> str:
    """Remove markdown code fences from LLM output."""
    lines = code.strip().splitlines()
    return "\n".join(l for l in lines if not l.strip().startswith("```"))


In [18]:
def find_symbol(lib_path: str, function_name: str) -> str | None:
    """
    Use `nm` to find the real exported symbol name in a compiled library.
    Handles macOS leading-underscore mangling and minor name variations.
    """
    r = subprocess.run(["nm", "-gU", lib_path], capture_output=True, text=True)
    for line in r.stdout.splitlines():
        # Only look at text (T) symbols ‚Äî exported functions
        if " T " not in line and " t " not in line:
            continue
        parts = line.strip().split()
        if not parts:
            continue
        raw = parts[-1]
        # Strip leading underscore added by macOS linker
        clean = raw.lstrip("_")
        if clean == function_name:
            return clean
        # Fuzzy: symbol contains the function name
        if function_name in clean:
            return clean
    return None



In [19]:
def translate_code(client: OpenAI, model_id: str, temperature: float,
                   target: str, python_code: str, function_name: str) -> str:
    base = f"The function MUST be named exactly `{function_name}`."
    prompts = {
        "cpp": f"""Convert this Python function to a standalone, high-performance C++ function.
Requirements:
- Include all necessary headers
- Expose via extern "C" so it is callable from Python via ctypes
- Use modern C++17
- {base}
- Optimize for speed: prefer stack allocation, avoid heap where possible
- Return ONLY valid C++ source code, no explanation, no markdown

Python code:
{python_code}""",

        "rust": f"""Convert this Python function to high-performance Rust.
Requirements:
- Write a single lib.rs file for a cdylib crate
- Annotate the function with #[no_mangle] and pub extern "C"
- {base}
- Use unsafe only if strictly necessary
- Optimize for speed
- Return ONLY valid Rust source code, no explanation, no markdown

Python code:
{python_code}""",

        "cython": f"""Convert this Python function to optimized Cython (.pyx file).
Requirements:
- Use static typing with cdef / cpdef throughout
- cimport C libraries where beneficial
- {base}
- Optimize for speed
- Return ONLY valid Cython source code, no explanation, no markdown

Python code:
{python_code}""",
    }

    response = client.chat.completions.create(
        model=model_id,
        max_tokens=2000,
        temperature=temperature,
        messages=[{"role": "user", "content": prompts[target]}],
    )
    return strip_fences(response.choices[0].message.content)


In [20]:
# Benchmark helpers 

def benchmark_python(python_code: str, function_name: str, test_args: tuple, n: int = 1000) -> tuple:
    namespace: dict = {}
    exec(python_code, namespace)  # noqa: S102
    fn = namespace[function_name]
    start = time.perf_counter()
    for _ in range(n):
        fn(*test_args)
    return (time.perf_counter() - start) / n, None


In [21]:
def benchmark_cpp(code: str, function_name: str, test_args: tuple, n: int = 1000) -> tuple:
    with tempfile.TemporaryDirectory() as tmp:
        src = Path(tmp) / "perf.cpp"
        lib = Path(tmp) / "perf.so"
        src.write_text(code)

        r = subprocess.run(
            ["g++", "-O3", "-march=native", "-shared", "-fPIC",
             "-std=c++17", "-o", str(lib), str(src)],
            capture_output=True, text=True,
        )
        if r.returncode != 0:
            return None, f"Compile error:\n{r.stderr[:400]}"

        symbol = find_symbol(str(lib), function_name)
        if not symbol:
            return None, f"Symbol '{function_name}' not found. Exported symbols:\n" + \
                subprocess.run(["nm", "-gU", str(lib)], capture_output=True, text=True).stdout[:300]

        h = ctypes.CDLL(str(lib))
        fn = getattr(h, symbol)
        fn.restype = ctypes.c_double
        fn.argtypes = [ctypes.c_long] * len(test_args)

        start = time.perf_counter()
        for _ in range(n):
            fn(*test_args)
        return (time.perf_counter() - start) / n, None


In [22]:
def benchmark_rust(code: str, function_name: str, test_args: tuple, n: int = 1000) -> tuple:
    cargo_path = shutil.which("cargo") or os.path.expanduser("~/.cargo/bin/cargo")
    if not os.path.exists(cargo_path):
        return None, "cargo not found ‚Äî run: curl https://sh.rustup.rs -sSf | sh"

    with tempfile.TemporaryDirectory() as tmp:
        tmp = Path(tmp)
        (tmp / "src").mkdir()
        (tmp / "src" / "lib.rs").write_text(code)
        (tmp / "Cargo.toml").write_text(
            '[package]\nname = "perf_ext"\nversion = "0.1.0"\nedition = "2021"\n\n'
            '[lib]\ncrate-type = ["cdylib"]\n'
        )

        env = os.environ.copy()
        env["PATH"] = os.path.expanduser("~/.cargo/bin") + ":" + env.get("PATH", "")

        r = subprocess.run(
            [cargo_path, "build", "--release"],
            capture_output=True, text=True, cwd=tmp, env=env,
        )
        if r.returncode != 0:
            return None, f"Compile error:\n{r.stderr[:400]}"

        # macOS ‚Üí .dylib, Linux ‚Üí .so
        libs = (list((tmp / "target" / "release").glob("*.dylib")) or
                list((tmp / "target" / "release").glob("*.so")))
        if not libs:
            return None, "No compiled library found in target/release"

        symbol = find_symbol(str(libs[0]), function_name)
        if not symbol:
            return None, f"Symbol '{function_name}' not found in Rust lib"

        h = ctypes.CDLL(str(libs[0]))
        fn = getattr(h, symbol)
        fn.restype = ctypes.c_double
        fn.argtypes = [ctypes.c_long] * len(test_args)

        start = time.perf_counter()
        for _ in range(n):
            fn(*test_args)
        return (time.perf_counter() - start) / n, None

In [23]:
def benchmark_cython(code: str, function_name: str, test_args: tuple, n: int = 1000) -> tuple:
    with tempfile.TemporaryDirectory() as tmp:
        tmp = Path(tmp)
        pyx = tmp / f"{function_name}.pyx"
        pyx.write_text(code)
        (tmp / "setup.py").write_text(
            "from setuptools import setup\n"
            "from Cython.Build import cythonize\n"
            f'setup(ext_modules=cythonize("{function_name}.pyx", '
            'compiler_directives={"language_level": 3}))\n'
        )

        r = subprocess.run(
            [sys.executable, "setup.py", "build_ext", "--inplace"],
            capture_output=True, text=True, cwd=tmp,
        )
        if r.returncode != 0:
            return None, f"Compile error:\n{r.stderr[:400]}"

        sys.path.insert(0, str(tmp))
        try:
            # Invalidate any cached module from a previous run
            if function_name in sys.modules:
                del sys.modules[function_name]
            mod = importlib.import_module(function_name)
            fn = getattr(mod, function_name)
            start = time.perf_counter()
            for _ in range(n):
                fn(*test_args)
            return (time.perf_counter() - start) / n, None
        except Exception as exc:
            return None, str(exc)
        finally:
            sys.path.pop(0)


In [24]:
# Main Gradio implementation

def run_benchmark(python_code: str, function_name: str, test_args_str: str, model_label: str):
    try:
        test_args = eval(f"({test_args_str},)")  # noqa: S307
    except Exception as exc:
        yield f"‚ùå Could not parse test args: {exc}", None
        return

    model_cfg = MODELS[model_label]
    model_id = model_cfg["id"]
    temperature = model_cfg["temperature"]
    client = get_client()

    log = f"ü§ñ Model: **{model_label}** (`{model_id}`)\n\n"
    yield log + "üîÑ Translating to C++, Rust, Cython...\n", None

    # Translations 
    translations: dict[str, str] = {}
    for target in ("cpp", "rust", "cython"):
        try:
            translations[target] = translate_code(
                client, model_id, temperature, target, python_code, function_name
            )
            log += f"  ‚úÖ {target.upper()} translation done\n"
        except Exception as exc:
            log += f"  ‚ùå {target.upper()} translation failed: {exc}\n"
            translations[target] = ""
        yield log, None

    # Benchmarks 
    log += "\n‚è±Ô∏è Running benchmarks...\n"
    yield log, None

    results: dict[str, tuple] = {}

    results["python"] = benchmark_python(python_code, function_name, test_args)
    log += f"  ‚úÖ PYTHON: {results['python'][0]*1e6:.2f} ¬µs\n"
    yield log, None

    for target, bench_fn in [
        ("cpp",    benchmark_cpp),
        ("rust",   benchmark_rust),
        ("cython", benchmark_cython),
    ]:
        if not translations.get(target):
            results[target] = (None, "Translation failed")
        else:
            results[target] = bench_fn(translations[target], function_name, test_args)

        t, err = results[target]
        status = f"{t*1e6:.2f} ¬µs" if t else f"FAILED ‚Äî {err}"
        icon = "‚úÖ" if t else "‚ùå"
        log += f"  {icon} {target.upper()}: {status}\n"
        yield log, None

    # Results table 
    baseline = results["python"][0]
    valid = {k: v[0] for k, v in results.items() if v[0] is not None}
    winner = min(valid, key=valid.get)
    winner_speedup = baseline / valid[winner]

    table = []
    for target, (t, _err) in results.items():
        if t is None:
            table.append([target.upper(), "FAILED", "‚Äî", "‚ùå"])
        else:
            speedup = baseline / t
            trophy = "üèÜ" if target == winner and target != "python" else ""
            table.append([target.upper(), f"{t*1e6:.2f}", f"{speedup:.2f}x", trophy])

    summary = log
    summary += f"\n---\nüèÜ **Winner: {winner.upper()}** ‚Äî {winner_speedup:.1f}x faster than Python\n"
    if winner in translations:
        summary += f"\n**{winner.upper()} code:**\n```\n{translations[winner]}\n```"

    yield summary, table



In [25]:
# Gradio UI 

with gr.Blocks(title="‚ö° Multi-Target Compiler Benchmark") as demo:
    gr.Markdown(
        "# ‚ö° Python ‚Üí C++ / Rust / Cython Benchmark\n"
        "Translate a Python function with an LLM, compile all three targets, and benchmark them."
    )

    with gr.Row():
        with gr.Column(scale=1):
            model_selector = gr.Dropdown(
                choices=list(MODELS.keys()),
                value="Kimi K2 Thinking",
                label="ü§ñ Translation Model",
            )
            code_input = gr.Code(
                label="Python Function",
                language="python",
                value=(
                    "def dot_product(n):\n"
                    "    a = list(range(n))\n"
                    "    b = list(range(n))\n"
                    "    return sum(x * y for x, y in zip(a, b))"
                ),
            )
            fn_name = gr.Textbox(label="Function Name", value="dot_product")
            test_args = gr.Textbox(
                label="Test Args (comma-separated)",
                value="1000",
                placeholder="e.g. 1000   or   500, 'hello'",
            )
            run_btn = gr.Button("üöÄ Run Benchmark", variant="primary")

        with gr.Column(scale=1):
            log_output = gr.Markdown(label="Progress & Output")
            table_output = gr.Dataframe(
                headers=["Target", "Time (¬µs)", "Speedup", ""],
                label="Results",
                interactive=False,
            )

    run_btn.click(
        fn=run_benchmark,
        inputs=[code_input, fn_name, test_args, model_selector],
        outputs=[log_output, table_output],
    )


In [26]:
demo.launch()

* Running on local URL:  http://127.0.0.1:7861
* To create a public link, set `share=True` in `launch()`.


