In [None]:
!pip install -q gradio anthropic transformers accelerate torch

In [None]:
!pip install langdetect

In [None]:
# basic imports for everything
import os
import re       # for cleaning up text
import gradio as gr  # UI library
from transformers import pipeline
from langdetect import detect     # simple lang detection

In [None]:
local_model = pipeline(
    "text-generation",
    # any of the model, here we use this hugging face mode phi-2 but you can use phi-1.5 if you want even lighter like Langdetect installed above
    model="microsoft/phi-2",
    torch_dtype="auto",
    device_map="auto",
    max_new_tokens=250,
    do_sample=True,
    temperature=0.3
)


In [None]:
# detecting the language
def detect_language(code):
    # common detection using keywords for each language
    if any(k in code for k in ["def ", "import ", "print(", "lambda"]):
        return "python"
    elif any(k in code for k in ["#include", "int main", "cout"]):
        return "cpp"
    elif any(k in code for k in ["function ", "console.log"]):
        return "javascript"
    else:
        return "unknown"


In [None]:
def add_comments_to_code(code):
    # if input is empty
    if not code.strip():
        return "Please paste some code to add comments."

    # gt the languagee
    language = detect_language(code)
    if language == "unknown":
        return "Could not detect programming language. Output may be limited."

    # Tell the model/system what we want and how
    prompt = (
        f"You are a coding assistant. "
        f"Add inline comments to this Python code."
        f"Explain all assignments, exec calls, and dictionary manipulations."
        f"Do not change logic, only add comments."
        f"Add concise inline comments and docstrings to this {language} code. "
        f"Do NOT change the logic, only add comments to make it understandable.\n\n"
        f"Code:\n{code}\n\nImproved code with comments:\n"
    )

    # run model
    result = local_model(
        prompt,
        max_new_tokens=400,
        do_sample=True,
        temperature=0.3
    )[0]["generated_text"]

    # remove the echoed prompt (from the first attemp most models had a lot of echo, thus the need to do away with the repetition)
    cleaned = result.replace(prompt, "").strip()

    # remove duplicate lines
    lines = cleaned.splitlines()
    deduped = []
    for line in lines:
        if not deduped or line.strip() != deduped[-1].strip():
            deduped.append(line)

    return "\n".join(deduped)


In [None]:
# Our grado UX
with gr.Blocks(theme="soft") as demo:
    gr.Markdown("## Code Commenter & Interpreter")
    gr.Markdown("Paste your code below, click **Analyze**, and view the rewritten code with human-like comments.")

    # input
    code_input = gr.Textbox(
        label="Paste your code here",
        lines=12,
        placeholder="Write or paste any code snippet...",
        elem_id="code_box"
    )

    # button to click
    analyze_btn = gr.Button("Analyze Code", variant="primary")

    # output area
    rewritten_out = gr.Code(
        label="Rewritten Code with Human-Like Comments",
        language="python",
        lines=14
    )

    # fn to link the button with
    analyze_btn.click(fn=add_comments_to_code, inputs=code_input, outputs=rewritten_out)

# launch app
demo.launch(share=True)
