In [28]:
# imports
import os
import io
import sys
from dotenv import load_dotenv
from openai import OpenAI
import google.generativeai as genai
import anthropic
from huggingface_hub import login, InferenceClient
from transformers import AutoTokenizer
from IPython.display import Markdown, display, update_display
import gradio as gr
import subprocess
import re
import requests
import json

In [29]:
models=genai.list_models()

for model in models:
    print(model.name, model.supported_generation_methods)

models/embedding-gecko-001 ['embedText', 'countTextTokens']
models/gemini-2.5-flash ['generateContent', 'countTokens', 'createCachedContent', 'batchGenerateContent']
models/gemini-2.5-pro ['generateContent', 'countTokens', 'createCachedContent', 'batchGenerateContent']
models/gemini-2.0-flash-exp ['generateContent', 'countTokens', 'bidiGenerateContent']
models/gemini-2.0-flash ['generateContent', 'countTokens', 'createCachedContent', 'batchGenerateContent']
models/gemini-2.0-flash-001 ['generateContent', 'countTokens', 'createCachedContent', 'batchGenerateContent']
models/gemini-2.0-flash-lite-001 ['generateContent', 'countTokens', 'createCachedContent', 'batchGenerateContent']
models/gemini-2.0-flash-lite ['generateContent', 'countTokens', 'createCachedContent', 'batchGenerateContent']
models/gemini-2.0-flash-lite-preview-02-05 ['generateContent', 'countTokens', 'createCachedContent', 'batchGenerateContent']
models/gemini-2.0-flash-lite-preview ['generateContent', 'countTokens', 'crea

In [30]:
# environment
load_dotenv()
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')
os.environ['ANTHROPIC_API_KEY'] = os.getenv('ANTHROPIC_API_KEY', 'your-key-if-not-using-env')
os.environ['GOOGLE_API_KEY'] = os.getenv('GOOGLE_API_KEY', 'your-key-if-not-using-env')

# initialize models
openai = OpenAI()
claude = anthropic.Anthropic()
genai.configure(api_key=os.environ['GOOGLE_API_KEY'])
GEMINI_MODEL="gemini-2.5-flash"
OPENAI_MODEL = "gpt-4o"
# CLAUDE_MODEL = "claude-3-5-sonnet-20240620"
CLAUDE_MODEL = "claude-sonnet-4-20250514"

hf_token = os.environ['HF_TOKEN']
login(hf_token, add_to_git_credential=True)

code_gemma = "google/codegemma-7b-it"
# Endpoint URL
GEMMA_URL = "https://yllsha6or5pa77qh.us-east-1.aws.endpoints.huggingface.cloud"


Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


In [3]:
pi = """
import time

def calculate(iterations, param1, param2):
    result = 1.0
    for i in range(1, iterations+1):
        j = i * param1 - param2
        result -= (1/j)
        j = i * param1 + param2
        result += (1/j)
    return result

start_time = time.time()
result = calculate(100_000_000, 4, 1) * 4
end_time = time.time()

print(f"Result: {result:.12f}")
print(f"Execution Time: {(end_time - start_time):.6f} seconds")
"""

"""Con la función exec, ejecutamos el código de Python generado anteriormente."""


python_hard = """
def lcg(seed, a=1664525, c=1013904223, m=2**32):
    value = seed
    while True:
        value = (a * value + c) % m
        yield value

def max_subarray_sum(n, seed, min_val, max_val):
    lcg_gen = lcg(seed)
    random_numbers = [next(lcg_gen) % (max_val - min_val + 1) + min_val for _ in range(n)]
    max_sum = float('-inf')
    for i in range(n):
        current_sum = 0
        for j in range(i, n):
            current_sum += random_numbers[j]
            if current_sum > max_sum:
                max_sum = current_sum
    return max_sum

def total_max_subarray_sum(n, initial_seed, min_val, max_val):
    total_sum = 0
    lcg_gen = lcg(initial_seed)
    for _ in range(20):
        seed = next(lcg_gen)
        total_sum += max_subarray_sum(n, seed, min_val, max_val)
    return total_sum

# Parameters
n = 10000         # Number of random numbers
initial_seed = 42 # Initial seed for the LCG
min_val = -10     # Minimum value of random numbers
max_val = 10      # Maximum value of random numbers

# Timing the function
import time
start_time = time.time()
result = total_max_subarray_sum(n, initial_seed, min_val, max_val)
end_time = time.time()

print("Total Maximum Subarray Sum (20 runs):", result)
print("Execution Time: {:.6f} seconds".format(end_time - start_time))
"""


In [4]:
# System messages
system_message = "Eres un asistente que reimplementa código Python en C++ de alto rendimiento para una PC Windows 10 64 bits. "
system_message += "Responde solo con código C++; usa los comentarios con moderación y no proporciones ninguna explicación más allá de comentarios ocasionales. "
system_message += "La respuesta C++ debe producir una salida idéntica en el menor tiempo posible."

documentation_system = "Eres un experto en documentación de código. Tu tarea es agregar docstrings detallados y comentarios explicativos al código proporcionado. "
documentation_system += "Mantén el código funcional intacto y solo agrega documentación clara y útil."

def user_prompt_for(python):
    user_prompt = "Reescribe este código Python en C++ con la implementación más rápida posible que produzca una salida idéntica en el menor tiempo posible."
    user_prompt += "Responde solo con código C++; no expliques tu trabajo más allá de algunos comentarios."
    user_prompt += "Manten la implementación de la generación de números aleatorios idénticos para que los resultados de la coincidencia sean exactos."
    user_prompt += "Responde solo con código C++; no añadas nada más que código; usa los comentarios con moderación y no proporciones ninguna explicación más allá de comentarios ocasionales. "
    user_prompt += "Presta atención a los tipos de números para asegurar que no haya desbordamientos de int (overflow). Recuerda incluir todos los paquetes de C++ necesarios, como iomanip.\n\n"
    user_prompt += python
    return user_prompt

def documentation_prompt_for(code, language="Python"):
    prompt = f"Agrega docstrings detallados y comentarios explicativos al siguiente código {language}. "
    prompt += "Para Python, usa docstrings estilo Google. Para C++, usa comentarios Doxygen. "
    prompt += "Explica qué hace cada función, sus parámetros, valores de retorno y cualquier lógica compleja. "
    prompt += "Mantén el código funcional exactamente igual, solo agrega documentación.\n\n"
    prompt += code
    return prompt

def messages_for(python):
    return [
        {"role": "system", "content": system_message},
        {"role": "user", "content": user_prompt_for(python)}
    ]

def write_output(cpp):
    code = cpp.replace("```cpp","").replace("```","")
    with open("optimized.cpp", "w") as f:
        f.write(code)

In [22]:
# Streaming functions for each model
def stream_gpt(python):
    stream = openai.chat.completions.create(model=OPENAI_MODEL, messages=messages_for(python), stream=True)
    reply = ""
    for chunk in stream:
        fragment = chunk.choices[0].delta.content or ""
        reply += fragment
        yield reply.replace('```cpp\n','').replace('```','')

def stream_claude(python):
    result = claude.messages.stream(
        model=CLAUDE_MODEL,
        max_tokens=2000,
        system=system_message,
        messages=[{"role": "user", "content": user_prompt_for(python)}],
    )
    reply = ""
    with result as stream:
        for text in stream.text_stream:
            reply += text
            yield reply.replace('```cpp\n','').replace('```','')

def stream_gemini(python):
    model = genai.GenerativeModel(GEMINI_MODEL, system_instruction=system_message)
    response = model.generate_content(user_prompt_for(python), stream=True)
    reply = ""
    for chunk in response:
        if chunk.text:
            reply += chunk.text
            yield reply.replace('```cpp\n','').replace('```','')

def stream_gemma(python):
    import json
    import requests
    
    tokenizer = AutoTokenizer.from_pretrained(code_gemma)
    
    combined_prompt = f"{system_message}\n\n{user_prompt_for(python)}"
    messages = [{"role": "user", "content": combined_prompt}]
    text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    
    headers = {
        "Authorization": f"Bearer {hf_token}",
        "Content-Type": "application/json"
    }
    
    payload = {
        "prompt": text,
        "max_tokens": 2000,
        "temperature": 0.5,  # Reducir temperatura para más coherencia
        "top_p": 0.9,  # Reducir top_p también
        "repetition_penalty": 1.2,  # Aumentar penalización por repetición
        "stream": True
    }
    
    try:
        response = requests.post(
            f"{GEMMA_URL}/v1/completions",
            headers=headers, 
            json=payload, 
            stream=True, 
            timeout=120
        )
        response.raise_for_status()
        
        reply = ""
        for line in response.iter_lines():
            if line:
                line_text = line.decode('utf-8')
                if line_text.startswith('data: '):
                    line_text = line_text[6:]
                
                if line_text.strip() == '[DONE]':
                    break
                
                try:
                    data = json.loads(line_text)
                    if 'choices' in data and len(data['choices']) > 0:
                        choice = data['choices'][0]
                        if 'text' in choice:
                            reply += choice['text']
                        elif 'delta' in choice and 'content' in choice['delta']:
                            reply += choice['delta']['content']
                    
                    # Detectar si la salida está corrupta
                    if len(reply) > 100:
                        # Contar caracteres no ASCII o repeticiones excesivas
                        non_ascii = sum(1 for c in reply if ord(c) > 127)
                        if non_ascii > len(reply) * 0.1:  # Más del 10% caracteres raros
                            yield "⚠️ El modelo Gemma generó código corrupto. Intenta con GPT, Claude o Gemini."
                            return
                    
                    cleaned = reply.replace('```cpp\n','').replace('```','')
                    yield cleaned
                    
                except json.JSONDecodeError:
                    continue
        
    except requests.exceptions.HTTPError as e:
        yield f"Error HTTP {e.response.status_code}: {e.response.text[:500]}"
    except Exception as e:
        yield f"Error con Gemma: {str(e)}"

In [16]:
def add_documentation(code, language, model_choice):
    """Agrega docstrings y comentarios al código usando el modelo seleccionado"""
    prompt = documentation_prompt_for(code, language)
    
    if model_choice == "GPT":
        messages = [
            {"role": "system", "content": documentation_system},
            {"role": "user", "content": prompt}
        ]
        stream = openai.chat.completions.create(model=OPENAI_MODEL, messages=messages, stream=True)
        reply = ""
        for chunk in stream:
            fragment = chunk.choices[0].delta.content or ""
            reply += fragment
            yield reply.replace('```python','').replace('```cpp','').replace('```','')
    
    elif model_choice == "Claude":
        result = claude.messages.stream(
            model=CLAUDE_MODEL,
            max_tokens=2000,
            system=documentation_system,
            messages=[{"role": "user", "content": prompt}],
        )
        reply = ""
        with result as stream:
            for text in stream.text_stream:
                reply += text
                yield reply.replace('```python','').replace('```cpp','').replace('```','')
    
    elif model_choice == "Gemini":
        model = genai.GenerativeModel(GEMINI_MODEL, system_instruction=documentation_system)
        response = model.generate_content(prompt, stream=True)
        reply = ""
        for chunk in response:
            if chunk.text:
                reply += chunk.text
                yield reply.replace('```python','').replace('```cpp','').replace('```','')
    
    elif model_choice == "Gemma":
        import requests
        import json
        
        tokenizer = AutoTokenizer.from_pretrained(code_gemma)
        
        combined_prompt = f"{documentation_system}\n\n{prompt}"
        messages = [{"role": "user", "content": combined_prompt}]
        text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
        
        headers = {
            "Authorization": f"Bearer {hf_token}",
            "Content-Type": "application/json"
        }
        
        payload = {
            "prompt": text,
            "max_tokens": 2000,
            "temperature": 0.7,
            "top_p": 0.95,
            "repetition_penalty": 1.1,
            "stream": True
        }
        
        try:
            response = requests.post(
                f"{GEMMA_URL}/v1/completions",
                headers=headers,
                json=payload,
                stream=True,
                timeout=120
            )
            response.raise_for_status()
            
            reply = ""
            for line in response.iter_lines():
                if line:
                    line_text = line.decode('utf-8')
                    if line_text.startswith('data: '):
                        line_text = line_text[6:]
                    
                    if line_text.strip() == '[DONE]':
                        break
                    
                    try:
                        data = json.loads(line_text)
                        if 'choices' in data and len(data['choices']) > 0:
                            choice = data['choices'][0]
                            if 'text' in choice:
                                reply += choice['text']
                            elif 'delta' in choice and 'content' in choice['delta']:
                                reply += choice['delta']['content']
                        
                        cleaned = reply.replace('```python','').replace('```cpp','').replace('```','')
                        yield cleaned
                        
                    except json.JSONDecodeError:
                        continue
            
        except requests.exceptions.HTTPError as e:
            yield f"Error HTTP {e.response.status_code}: {e.response.text[:500]}"
        except Exception as e:
            yield f"Error con Gemma: {str(e)}"

def execute_python(code):
    try:
        output = io.StringIO()
        sys.stdout = output
        exec(code)
    finally:
        sys.stdout = sys.__stdout__
    return output.getvalue()

def execute_cpp(code):
    write_output(code)
    try:
        compile_cmd = [
            "g++", "-Ofast", "-std=c++17",
            "-march=native",
            "-o", "optimized.exe", "optimized.cpp"
        ]
        compile_result = subprocess.run(compile_cmd, check=True, text=True, capture_output=True)
        run_cmd = ["optimized.exe"]
        run_result = subprocess.run(run_cmd, check=True, text=True, capture_output=True)
        return run_result.stdout
    except subprocess.CalledProcessError as e:
        return f"Ha ocurrido un error:\n{e.stderr}"
    except FileNotFoundError:
        return "Error: No se encontró 'g++'. Asegúrate de que esté en tu PATH de Windows."

def optimize(python, model):
    if model == "GPT":
        result = stream_gpt(python)
    elif model == "Claude":
        result = stream_claude(python)
    elif model == "Gemini":
        result = stream_gemini(python)
    elif model == "Gemma":
        result = stream_gemma(python)
    else:
        raise ValueError("Unknown model")
    
    for stream_so_far in result:
        yield stream_so_far

In [7]:
css = """
.python {background-color: #306998;}
.cpp {background-color: #050;}
.doc {background-color: #4a148c;}
"""

In [31]:
# Gradio UI
with gr.Blocks(css=css) as ui:
    gr.Markdown("# Conversor Python → C++ con Documentación Automática")
    
    with gr.Tab("Conversión de Código"):
        gr.Markdown("## Convierte código Python a C++ optimizado")
        with gr.Row():
            python_input = gr.Textbox(label="Python code:", value=pi, lines=15)
            cpp_output = gr.Textbox(label="C++ code:", lines=15)
        
        with gr.Row():
            model_select = gr.Dropdown(
                ["GPT", "Claude", "Gemini", "Gemma"], 
                label="Selecciona el modelo", 
                value="GPT"
            )
        
        with gr.Row():
            convert_btn = gr.Button("Convertir a C++", variant="primary")
        
        with gr.Row():
            python_run = gr.Button("Ejecutar Python")
            cpp_run = gr.Button("Ejecutar C++")
        
        with gr.Row():
            python_result = gr.TextArea(label="Resultado Python:", elem_classes=["python"])
            cpp_result = gr.TextArea(label="Resultado C++:", elem_classes=["cpp"])
    
    with gr.Tab("Documentación Automática"):
        gr.Markdown("## Agrega docstrings y comentarios automáticamente")
        
        with gr.Row():
            code_input = gr.Textbox(label="Código sin documentar:", lines=15, value=pi)
            documented_output = gr.Textbox(label="Código documentado:", lines=15, elem_classes=["doc"])
        
        with gr.Row():
            lang_select = gr.Radio(["Python", "C++"], label="Lenguaje", value="Python")
            doc_model = gr.Dropdown(["GPT", "Claude", "Gemini", "Gemma"], label="Modelo", value="Claude")
        
        document_btn = gr.Button("Agregar Documentación", variant="primary")
        
        document_btn.click(
            add_documentation,
            inputs=[code_input, lang_select, doc_model],
            outputs=[documented_output]
        )
    
    # Event handlers
    convert_btn.click(optimize, inputs=[python_input, model_select], outputs=[cpp_output])
    python_run.click(execute_python, inputs=[python_input], outputs=[python_result])
    cpp_run.click(execute_cpp, inputs=[cpp_output], outputs=[cpp_result])

ui.launch(inbrowser=True)

  with gr.Blocks(css=css) as ui:


* Running on local URL:  http://127.0.0.1:7867
* To create a public link, set `share=True` in `launch()`.


