In [1]:
import ast
import re
import json
import subprocess
import tempfile
import os
from typing import Dict, List, Set, Optional, Any, Tuple
from dataclasses import dataclass
from enum import Enum
import requests
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

In [2]:
class DataType(Enum):
    INT = "int"
    FLOAT = "double"
    STRING = "std::string"
    BOOL = "bool"
    LIST = "std::vector"
    DICT = "std::map"
    TUPLE = "std::tuple"
    NONE = "void"
    AUTO = "auto"

In [3]:
@dataclass
class Variable:
    name: str
    type: DataType
    is_const: bool = False
    template_params: List[str] = None

In [4]:
@dataclass
class Function:
    name: str
    params: List[Variable]
    return_type: DataType
    body: str

In [5]:
class PythonAnalyzer(ast.NodeVisitor):
    """Analyzes Python AST to extract semantic information"""
    
    def __init__(self):
        self.variables: Dict[str, Variable] = {}
        self.functions: List[Function] = []
        self.imports: Set[str] = set()
        self.type_hints: Dict[str, str] = {}
        self.literals: Dict[str, DataType] = {}
        
    def visit_Import(self, node):
        for alias in node.names:
            self.imports.add(alias.name)
        self.generic_visit(node)
    
    def visit_ImportFrom(self, node):
        if node.module:
            for alias in node.names:
                self.imports.add(f"{node.module}.{alias.name}")
        self.generic_visit(node)
    
    def visit_FunctionDef(self, node):
        params = []
        for arg in node.args.args:
            param_type = self._infer_type(arg.annotation) if arg.annotation else DataType.AUTO
            params.append(Variable(arg.arg, param_type))
        
        return_type = self._infer_type(node.returns) if node.returns else DataType.AUTO
        
        func = Function(
            name=node.name,
            params=params,
            return_type=return_type,
            body=""  # Will be filled by converter
        )
        self.functions.append(func)
        self.generic_visit(node)
    
    def visit_Assign(self, node):
        for target in node.targets:
            if isinstance(target, ast.Name):
                inferred_type = self._infer_value_type(node.value)
                self.variables[target.id] = Variable(target.id, inferred_type)
        self.generic_visit(node)
    
    def visit_AnnAssign(self, node):
        if isinstance(node.target, ast.Name):
            var_type = self._infer_type(node.annotation)
            self.variables[node.target.id] = Variable(node.target.id, var_type)
        self.generic_visit(node)
    
    def _infer_type(self, annotation) -> DataType:
        """Infer C++ type from Python type annotation"""
        if annotation is None:
            return DataType.AUTO
        
        if isinstance(annotation, ast.Name):
            type_map = {
                'int': DataType.INT,
                'float': DataType.FLOAT,
                'str': DataType.STRING,
                'bool': DataType.BOOL,
                'list': DataType.LIST,
                'dict': DataType.DICT,
                'tuple': DataType.TUPLE
            }
            return type_map.get(annotation.id, DataType.AUTO)
        
        return DataType.AUTO
    
    def _infer_value_type(self, value) -> DataType:
        """Infer type from value node"""
        if isinstance(value, ast.Constant):
            if isinstance(value.value, int):
                return DataType.INT
            elif isinstance(value.value, float):
                return DataType.FLOAT
            elif isinstance(value.value, str):
                return DataType.STRING
            elif isinstance(value.value, bool):
                return DataType.BOOL
        elif isinstance(value, ast.List):
            return DataType.LIST
        elif isinstance(value, ast.Dict):
            return DataType.DICT
        elif isinstance(value, ast.Tuple):
            return DataType.TUPLE
        
        return DataType.AUTO


In [6]:
class CppGenerator:
    """Generates C++ code structures"""
    
    def __init__(self):
        self.includes = {
            "#include <iostream>",
            "#include <string>",
            "#include <vector>",
            "#include <map>",
            "#include <tuple>",
            "#include <algorithm>",
            "#include <cmath>"
        }
        
    def generate_headers(self) -> str:
        return "\n".join(sorted(self.includes)) + "\n\n"
    
    def generate_variable_declaration(self, var: Variable) -> str:
        if var.template_params:
            type_str = f"{var.type.value}<{', '.join(var.template_params)}>"
        else:
            type_str = var.type.value
        
        const_str = "const " if var.is_const else ""
        return f"{const_str}{type_str} {var.name}"
    
    def generate_function_signature(self, func: Function) -> str:
        params_str = ", ".join([
            self.generate_variable_declaration(param) for param in func.params
        ])
        return f"{func.return_type.value} {func.name}({params_str})"
    
    def python_to_cpp_operators(self, code: str) -> str:
        """Convert Python operators to C++ equivalents"""
        # Python's ** to C++ pow()
        code = re.sub(r'(\w+)\s*\*\*\s*(\w+)', r'pow(\1, \2)', code)
        
        # Python's // to C++ integer division
        code = re.sub(r'(\w+)\s*//\s*(\w+)', r'(\1 / \2)', code)
        
        # Python's and/or to C++ &&/||
        code = re.sub(r'\band\b', '&&', code)
        code = re.sub(r'\bor\b', '||', code)
        code = re.sub(r'\bnot\b', '!', code)
        
        return code

In [7]:
class QwenModelInterface:
    """Interface to Qwen/Qwen2.5-Coder-32B-Instruct model"""
    
    def __init__(self, model_path: str = "Qwen/Qwen2.5-Coder-32B-Instruct"):
        self.model_path = model_path
        self.tokenizer = None
        self.model = None
        self._load_model()
    
    def _load_model(self):
        """Load the Qwen model and tokenizer"""
        try:
            self.tokenizer = AutoTokenizer.from_pretrained(self.model_path)
            self.model = AutoModelForCausalLM.from_pretrained(
                self.model_path,
                torch_dtype=torch.float16,
                device_map="auto"
            )
        except Exception as e:
            print(f"Warning: Could not load model {self.model_path}: {e}")
            print("Falling back to rule-based conversion...")
    
    def convert_code(self, python_code: str, context: Dict[str, Any] = None) -> str:
        """Convert Python code to C++ using the Qwen model"""
        if not self.model or not self.tokenizer:
            return self._fallback_conversion(python_code, context)
        
        prompt = self._create_conversion_prompt(python_code, context)
        
        try:
            inputs = self.tokenizer(prompt, return_tensors="pt")
            
            with torch.no_grad():
                outputs = self.model.generate(
                    inputs.input_ids,
                    max_new_tokens=1000,
                    temperature=0.1,
                    do_sample=True,
                    pad_token_id=self.tokenizer.eos_token_id
                )
            
            response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
            cpp_code = self._extract_cpp_code(response)
            return cpp_code
            
        except Exception as e:
            print(f"Model inference failed: {e}")
            return self._fallback_conversion(python_code, context)
    
    def _create_conversion_prompt(self, python_code: str, context: Dict[str, Any] = None) -> str:
        """Create a structured prompt for the model"""
        context_info = ""
        if context:
            if context.get('variables'):
                var_info = [f"{v.name}: {v.type.value}" for v in context['variables'].values()]
                context_info += f"Variables: {', '.join(var_info)}\n"
            
            if context.get('functions'):
                func_info = [f.name for f in context['functions']]
                context_info += f"Functions: {', '.join(func_info)}\n"
        
        prompt = f"""Convert the following Python code to semantically equivalent C++ code that produces identical output.

{context_info}
Python code:
```python
{python_code}
```

Requirements:
- Use appropriate C++ data types
- Handle memory management properly
- Maintain exact same logic and output
- Include necessary headers
- Use modern C++ standards (C++17 or later)

C++ code:
```cpp"""
        
        return prompt
    
    def _extract_cpp_code(self, response: str) -> str:
        """Extract C++ code from model response"""
        # Look for code blocks
        cpp_match = re.search(r'```cpp\n(.*?)```', response, re.DOTALL)
        if cpp_match:
            return cpp_match.group(1).strip()
        
        # Fallback: look for code after "C++ code:" or similar
        lines = response.split('\n')
        cpp_started = False
        cpp_lines = []
        
        for line in lines:
            if 'C++ code:' in line or cpp_started:
                cpp_started = True
                if not ('C++ code:' in line):
                    cpp_lines.append(line)
        
        return '\n'.join(cpp_lines).strip()
    
    def _fallback_conversion(self, python_code: str, context: Dict[str, Any] = None) -> str:
        """Fallback rule-based conversion when model is unavailable"""
        converter = CppGenerator()
        
        # Basic conversions
        cpp_code = converter.python_to_cpp_operators(python_code)
        
        # Replace print statements
        cpp_code = re.sub(r'print\((.*?)\)', r'std::cout << \1 << std::endl', cpp_code)
        
        # Replace len() with .size()
        cpp_code = re.sub(r'len\((\w+)\)', r'\1.size()', cpp_code)
        
        # Add basic structure
        headers = converter.generate_headers()
        
        return f"""{headers}
int main() {{
    {cpp_code}
    return 0;
}}"""

In [8]:
class PythonToCppConverter:
    """Main converter class that orchestrates the conversion process"""
    
    def __init__(self, model_path: str = "Qwen/Qwen2.5-Coder-32B-Instruct"):
        self.analyzer = PythonAnalyzer()
        self.cpp_generator = CppGenerator()
        self.model_interface = QwenModelInterface(model_path)
        
    def convert(self, python_code: str) -> str:
        """Convert Python code to C++ code"""
        try:
            # Parse and analyze Python code
            tree = ast.parse(python_code)
            self.analyzer.visit(tree)
            
            # Create context for model
            context = {
                'variables': self.analyzer.variables,
                'functions': self.analyzer.functions,
                'imports': self.analyzer.imports
            }
            
            # Use model for conversion
            cpp_code = self.model_interface.convert_code(python_code, context)
            
            # Post-process and validate
            cpp_code = self._post_process(cpp_code)
            
            return cpp_code
            
        except SyntaxError as e:
            raise ValueError(f"Invalid Python code: {e}")
        except Exception as e:
            raise RuntimeError(f"Conversion failed: {e}")
    
    def _post_process(self, cpp_code: str) -> str:
        """Post-process generated C++ code"""
        # Ensure proper headers
        if not cpp_code.startswith('#include'):
            headers = self.cpp_generator.generate_headers()
            cpp_code = headers + cpp_code
        
        # Ensure main function exists
        if 'int main(' not in cpp_code:
            # Wrap in main function if needed
            lines = cpp_code.split('\n')
            code_lines = [line for line in lines if not line.startswith('#include') and line.strip()]
            headers = [line for line in lines if line.startswith('#include')]
            
            cpp_code = '\n'.join(headers) + '\n\nint main() {\n'
            cpp_code += '\n'.join(f'    {line}' for line in code_lines)
            cpp_code += '\n    return 0;\n}'
        
        return cpp_code
    
    def convert_file(self, input_file: str, output_file: str):
        """Convert Python file to C++ file"""
        with open(input_file, 'r') as f:
            python_code = f.read()
        
        cpp_code = self.convert(python_code)
        
        with open(output_file, 'w') as f:
            f.write(cpp_code)
    
    def test_conversion(self, python_code: str) -> Tuple[bool, str, str]:
        """Test if converted C++ produces same output as Python"""
        try:
            # Run Python code
            with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as py_file:
                py_file.write(python_code)
                py_file_path = py_file.name
            
            python_output = subprocess.run(
                ['python', py_file_path],
                capture_output=True,
                text=True,
                timeout=10
            )
            
            # Convert to C++
            cpp_code = self.convert(python_code)
            
            # Compile and run C++
            with tempfile.NamedTemporaryFile(mode='w', suffix='.cpp', delete=False) as cpp_file:
                cpp_file.write(cpp_code)
                cpp_file_path = cpp_file.name
            
            exe_path = cpp_file_path.replace('.cpp', '.exe')
            
            # Compile
            compile_result = subprocess.run(
                ['g++', '-std=c++17', cpp_file_path, '-o', exe_path],
                capture_output=True,
                text=True,
                timeout=10
            )
            
            if compile_result.returncode != 0:
                return False, python_output.stdout, f"Compilation error: {compile_result.stderr}"
            
            # Run
            cpp_output = subprocess.run(
                [exe_path],
                capture_output=True,
                text=True,
                timeout=10
            )
            
            # Compare outputs
            success = python_output.stdout.strip() == cpp_output.stdout.strip()
            
            # Cleanup
            os.unlink(py_file_path)
            os.unlink(cpp_file_path)
            if os.path.exists(exe_path):
                os.unlink(exe_path)
            
            return success, python_output.stdout, cpp_output.stdout
            
        except Exception as e:
            return False, "", f"Test failed: {e}"


In [9]:
def main():
    """Example usage of the converter"""
    converter = PythonToCppConverter()
    
    # Example Python codes to test
    test_codes = [
        # Simple arithmetic
        """
def add_numbers(a, b):
    return a + b

result = add_numbers(5, 3)
print(result)
""",
        
        # List operations
        """
numbers = [1, 2, 3, 4, 5]
squared = [x ** 2 for x in numbers]
print(squared)
""",
        
        # String manipulation
        """
name = "World"
message = f"Hello, {name}!"
print(message)
print(len(message))
""",
        
        # Control flow
        """
for i in range(5):
    if i % 2 == 0:
        print(f"{i} is even")
    else:
        print(f"{i} is odd")
"""
    ]
    
    for i, code in enumerate(test_codes, 1):
        print(f"\n=== Test Case {i} ===")
        print("Python code:")
        print(code)
        
        try:
            cpp_code = converter.convert(code)
            print("\nGenerated C++ code:")
            print(cpp_code)
            
            # Test if outputs match
            success, py_out, cpp_out = converter.test_conversion(code)
            print(f"\nOutput comparison: {'✓ PASS' if success else '✗ FAIL'}")
            if not success:
                print(f"Python output: {py_out}")
                print(f"C++ output: {cpp_out}")
                
        except Exception as e:
            print(f"Conversion failed: {e}")

In [10]:
main()

tokenizer_config.json: 0.00B [00:00, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/663 [00:00<?, ?B/s]

model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 14 files:   0%|          | 0/14 [00:00<?, ?it/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Xet Storage is enabled for this repo, but the 'hf_xet' package is not in

model-00001-of-00014.safetensors:   0%|          | 0.00/4.89G [00:00<?, ?B/s]

model-00002-of-00014.safetensors:   0%|          | 0.00/4.88G [00:00<?, ?B/s]

model-00003-of-00014.safetensors:   0%|          | 0.00/4.88G [00:00<?, ?B/s]

model-00007-of-00014.safetensors:   0%|          | 0.00/4.88G [00:00<?, ?B/s]

model-00005-of-00014.safetensors:   0%|          | 0.00/4.88G [00:00<?, ?B/s]

model-00004-of-00014.safetensors:   0%|          | 0.00/4.88G [00:00<?, ?B/s]

model-00008-of-00014.safetensors:   0%|          | 0.00/4.88G [00:00<?, ?B/s]

model-00006-of-00014.safetensors:   0%|          | 0.00/4.88G [00:00<?, ?B/s]

Falling back to rule-based conversion...

=== Test Case 1 ===
Python code:

def add_numbers(a, b):
    return a + b

result = add_numbers(5, 3)
print(result)


Generated C++ code:
#include <algorithm>
#include <cmath>
#include <iostream>
#include <map>
#include <string>
#include <tuple>
#include <vector>


int main() {
    
def add_numbers(a, b):
    return a + b

result = add_numbers(5, 3)
std::cout << result << std::endl

    return 0;
}

Output comparison: ✗ FAIL
Python output: 
C++ output: Test failed: [WinError 2] The system cannot find the file specified

=== Test Case 2 ===
Python code:

numbers = [1, 2, 3, 4, 5]
squared = [x ** 2 for x in numbers]
print(squared)


Generated C++ code:
#include <algorithm>
#include <cmath>
#include <iostream>
#include <map>
#include <string>
#include <tuple>
#include <vector>


int main() {
    
numbers = [1, 2, 3, 4, 5]
squared = [pow(x, 2) for x in numbers]
std::cout << squared << std::endl

    return 0;
}

Output comparison: ✗ FAIL
Python out