In [2]:
import os
import json
import re
from typing import List, Dict, Optional
from dotenv import load_dotenv
from openai import OpenAI
import gradio as gr
from IPython.display import Markdown, display


In [None]:
load_dotenv(override=True)

# Ollama connection 
ollama_url = "http://localhost:11434/v1"
ollama_client = OpenAI(api_key="ollama", base_url=ollama_url)

# Groq connection
groq_api_key = os.getenv('GROQ_API_KEY')
groq_url = "https://api.groq.com/openai/v1"
groq_client = None
if groq_api_key:
    groq_client = OpenAI(api_key=groq_api_key, base_url=groq_url)
    print(f"Groq API Key loaded (begins with {groq_api_key[:4]})")
else:
    print("Groq API Key not set (optional)")

# OpenRouter connection
openrouter_api_key = os.getenv('OPENROUTER_API_KEY')
openrouter_url = "https://openrouter.ai/api/v1"
openrouter_client = None
if openrouter_api_key:
    openrouter_client = OpenAI(api_key=openrouter_api_key, base_url=openrouter_url)
    print(f"OpenRouter API Key loaded (begins with {openrouter_api_key[:6]})")
else:
    print("OpenRouter API Key not set (optional)")


Groq API Key not set (optional)
OpenRouter API Key loaded (begins with sk-or-)


In [24]:
# Open-source code models configuration
MODELS = {}

if groq_client:
    MODELS.update({
        "gpt-oss-20b-groq": {
            "name": "GPT-OSS-20B (Groq)",
            "client": groq_client,
            "model": "gpt-oss:20b",
            "description": "Cloud"
        },
        "gpt-oss-120b-groq": {
            "name": "GPT-OSS-120B (Groq)",
            "client": groq_client,
            "model": "openai/gpt-oss-120b",
            "description": "Cloud - Larger GPT-OSS"
        },
        "qwen2.5-coder-32b-groq": {
            "name": "Qwen2.5-Coder 32B (Groq)",
            "client": groq_client,
            "model": "qwen/qwen2.5-coder-32b-instruct",
            "description": "Cloud"
        },
    })

# OpenRouter models
if openrouter_client:
    MODELS.update({
        "qwen-2.5-coder-32b-openrouter": {
            "name": "Qwen2.5-Coder 32B (OpenRouter)",
            "client": openrouter_client,
            "model": "qwen/qwen-2.5-coder-32b-instruct",
            "description": "Cloud - Perfect for demos, 50 req/day free"
        },
        "gpt-oss-20b-groq": {
            "name": "GPT-OSS-20B",
            "client": openrouter_client,
            "model": "openai/gpt-oss-20b",
            "description": "Cloud - OpenAI's open model, excellent for code!"
        },
    })

print(f"Configured {len(MODELS)} models")
if openrouter_client:
    print("OpenRouter models available (perfect for limited storage demos!)")
if groq_client:
    print("Groq models available (fast cloud inference!)")
if "qwen2.5-coder:7b" in MODELS:
    print("Ollama models available (unlimited local usage!)")


Configured 2 models
OpenRouter models available (perfect for limited storage demos!)


In [22]:
BUG_DETECTION_SYSTEM_PROMPT = """You are an expert code reviewer specializing in finding bugs, security vulnerabilities, and logic errors.

Your task is to analyze Python code and identify issues. Return ONLY a valid JSON array with this exact format:
[{
  "severity": "critical|high|medium|low",
  "line": number,
  "issue": "brief description of the problem",
  "suggestion": "specific fix recommendation"
}]

Be thorough but concise. Focus on real bugs and security issues."""

IMPROVEMENTS_SYSTEM_PROMPT = """You are a senior software engineer specializing in code quality and best practices.

Analyze the Python code and suggest improvements for:
- Code readability and maintainability
- Performance optimizations
- Pythonic idioms and conventions
- Better error handling

Return ONLY a JSON array:
[{
  "category": "readability|performance|style|error_handling",
  "line": number,
  "current": "current code snippet",
  "improved": "improved code snippet",
  "explanation": "why this is better"
}]

Only suggest meaningful improvements."""

TEST_GENERATION_SYSTEM_PROMPT = """You are an expert in writing comprehensive unit tests.

Generate pytest unit tests for the given Python code. Include:
- Test cases for normal operation
- Edge cases and boundary conditions
- Error handling tests
- Tests for any bugs that were identified

Return ONLY Python code with pytest tests. Include the original code at the top if needed.
Put the imports at the top of the file first.
Do not include explanations or markdown formatting."""


In [6]:
def extract_json_from_response(text: str) -> List[Dict]:
    """Extract JSON array from model response, handling markdown code blocks."""
    # Remove markdown code blocks
    text = re.sub(r'```json\n?', '', text)
    text = re.sub(r'```\n?', '', text)
    
    # Try to find JSON array
    json_match = re.search(r'\[\s*\{.*\}\s*\]', text, re.DOTALL)
    if json_match:
        try:
            return json.loads(json_match.group())
        except json.JSONDecodeError:
            pass
    
    # Fallback: try parsing entire response
    try:
        return json.loads(text.strip())
    except json.JSONDecodeError:
        return []

def detect_bugs(code: str, model_key: str) -> Dict:
    """Detect bugs and security issues in code."""
    model_config = MODELS[model_key]
    client = model_config["client"]
    model_name = model_config["model"]
    
    user_prompt = f"Analyze this Python code for bugs and security issues:\n\n```python\n{code}\n```"
    
    try:
        response = client.chat.completions.create(
            model=model_name,
            messages=[
                {"role": "system", "content": BUG_DETECTION_SYSTEM_PROMPT},
                {"role": "user", "content": user_prompt}
            ],
            temperature=0.1
        )
        
        content = response.choices[0].message.content
        issues = extract_json_from_response(content)
        
        return {
            "model": model_config["name"],
            "issues": issues,
            "raw_response": content,
            "success": True
        }
    except Exception as e:
        return {
            "model": model_config["name"],
            "issues": [],
            "error": str(e),
            "success": False
        }

def suggest_improvements(code: str, model_key: str) -> Dict:
    """Suggest code improvements and best practices."""
    model_config = MODELS[model_key]
    client = model_config["client"]
    model_name = model_config["model"]
    
    user_prompt = f"Suggest improvements for this Python code:\n\n```python\n{code}\n```"
    
    try:
        response = client.chat.completions.create(
            model=model_name,
            messages=[
                {"role": "system", "content": IMPROVEMENTS_SYSTEM_PROMPT},
                {"role": "user", "content": user_prompt}
            ],
            temperature=0.2
        )
        
        content = response.choices[0].message.content
        improvements = extract_json_from_response(content)
        
        return {
            "model": model_config["name"],
            "improvements": improvements,
            "raw_response": content,
            "success": True
        }
    except Exception as e:
        return {
            "model": model_config["name"],
            "improvements": [],
            "error": str(e),
            "success": False
        }

def generate_tests(code: str, bugs: List[Dict], model_key: str) -> Dict:
    """Generate unit tests for the code."""
    model_config = MODELS[model_key]
    client = model_config["client"]
    model_name = model_config["model"]
    
    bugs_context = ""
    if bugs:
        bugs_context = f"\n\nNote: The following bugs were identified:\n" + "\n".join([f"- Line {b.get('line', '?')}: {b.get('issue', '')}" for b in bugs])
    
    user_prompt = f"Generate pytest unit tests for this Python code:{bugs_context}\n\n```python\n{code}\n```"
    
    try:
        response = client.chat.completions.create(
            model=model_name,
            messages=[
                {"role": "system", "content": TEST_GENERATION_SYSTEM_PROMPT},
                {"role": "user", "content": user_prompt}
            ],
            temperature=0.3
        )
        
        content = response.choices[0].message.content
        # Remove markdown code blocks if present
        test_code = re.sub(r'```python\n?', '', content)
        test_code = re.sub(r'```\n?', '', test_code)
        
        return {
            "model": model_config["name"],
            "test_code": test_code.strip(),
            "raw_response": content,
            "success": True
        }
    except Exception as e:
        return {
            "model": model_config["name"],
            "test_code": "",
            "error": str(e),
            "success": False
        }


In [7]:
def format_bugs_output(result: Dict) -> str:
    """Format bug detection results for display."""
    if not result.get("success"):
        return f"**Error with {result['model']}:** {result.get('error', 'Unknown error')}"
    
    issues = result.get("issues", [])
    if not issues:
        return f"‚úÖ **{result['model']}**: No issues found. Code looks good!"
    
    output = [f"**{result['model']}** - Found {len(issues)} issue(s):\n"]
    
    severity_order = {"critical": 0, "high": 1, "medium": 2, "low": 3}
    sorted_issues = sorted(issues, key=lambda x: severity_order.get(x.get("severity", "low"), 3))
    
    for issue in sorted_issues:
        severity = issue.get("severity", "unknown").upper()
        line = issue.get("line", "?")
        issue_desc = issue.get("issue", "")
        suggestion = issue.get("suggestion", "")
        
        severity_emoji = {
            "CRITICAL": "üî¥",
            "HIGH": "üü†",
            "MEDIUM": "üü°",
            "LOW": "üîµ"
        }.get(severity, "‚ö™")
        
        output.append(f"{severity_emoji} **{severity}** (Line {line}): {issue_desc}")
        if suggestion:
            output.append(f"   üí° *Fix:* {suggestion}")
        output.append("")
    
    return "\n".join(output)

def format_improvements_output(result: Dict) -> str:
    """Format improvement suggestions for display."""
    if not result.get("success"):
        return f"**Error with {result['model']}:** {result.get('error', 'Unknown error')}"
    
    improvements = result.get("improvements", [])
    if not improvements:
        return f"‚úÖ **{result['model']}**: Code follows best practices. No major improvements needed!"
    
    output = [f"**{result['model']}** - {len(improvements)} suggestion(s):\n"]
    
    for imp in improvements:
        category = imp.get("category", "general").replace("_", " ").title()
        line = imp.get("line", "?")
        current = imp.get("current", "")
        improved = imp.get("improved", "")
        explanation = imp.get("explanation", "")
        
        output.append(f"\nüìù **{category}** (Line {line}):")
        if current and improved:
            output.append(f"   Before: `{current[:60]}{'...' if len(current) > 60 else ''}`")
            output.append(f"   After:  `{improved[:60]}{'...' if len(improved) > 60 else ''}`")
        if explanation:
            output.append(f"   üí° {explanation}")
    
    return "\n".join(output)

def format_tests_output(result: Dict) -> str:
    """Format test generation results for display."""
    if not result.get("success"):
        return f"**Error with {result['model']}:** {result.get('error', 'Unknown error')}"
    
    test_code = result.get("test_code", "")
    if not test_code:
        return f"‚ö†Ô∏è **{result['model']}**: No tests generated."
    
    return test_code


In [8]:
def review_code(code: str, model_key: str, include_tests: bool = True) -> tuple:
    """Main function to perform complete code review."""
    if not code.strip():
        return "Please provide code to review.", "", ""
    
    # Detect bugs
    bugs_result = detect_bugs(code, model_key)
    bugs_output = format_bugs_output(bugs_result)
    bugs_issues = bugs_result.get("issues", [])
    
    # Suggest improvements
    improvements_result = suggest_improvements(code, model_key)
    improvements_output = format_improvements_output(improvements_result)
    
    # Generate tests
    tests_output = ""
    if include_tests:
        tests_result = generate_tests(code, bugs_issues, model_key)
        tests_output = format_tests_output(tests_result)
    
    return bugs_output, improvements_output, tests_output


In [20]:
def compare_models(code: str, model_keys: List[str]) -> str:
    """Compare multiple models on the same code."""
    if not code.strip():
        return "Please provide code to review."
    
    results = []
    all_issues = []
    
    for model_key in model_keys:
        result = detect_bugs(code, model_key)
        results.append(result)
        if result.get("success"):
            all_issues.extend(result.get("issues", []))
    
    # Build comparison output
    output = ["# Model Comparison Results\n"]
    
    for result in results:
        model_name = result["model"]
        issues = result.get("issues", [])
        success = result.get("success", False)
        
        if success:
            output.append(f"\n**{model_name}**: Found {len(issues)} issue(s)")
            if issues:
                severity_counts = {}
                for issue in issues:
                    sev = issue.get("severity", "low")
                    severity_counts[sev] = severity_counts.get(sev, 0) + 1
                output.append(f"   Breakdown: {dict(severity_counts)}")
        else:
            output.append(f"\n**{model_name}**: Error - {result.get('error', 'Unknown')}")
    
    # Find consensus issues (found by multiple models)
    if len(results) > 1:
        issue_signatures = {}
        for result in results:
            if result.get("success"):
                for issue in result.get("issues", []):
                    # Create signature from line and issue description
                    sig = f"{issue.get('line')}-{issue.get('issue', '')[:50]}"
                    if sig not in issue_signatures:
                        issue_signatures[sig] = []
                    issue_signatures[sig].append(result["model"])
        
        consensus = [sig for sig, models in issue_signatures.items() if len(models) > 1]
        if consensus:
            output.append(f"\n\n **Consensus Issues**: {len(consensus)} issue(s) identified by multiple models")
    
    return "\n".join(output)


## Gradio UI


In [23]:
# Example buggy code for testing
EXAMPLE_CODE = '''def divide_numbers(a, b):
    return a / b

def process_user_data(user_input):
    # Missing input validation
    result = eval(user_input)
    return result

def get_user_by_id(user_id):
    # SQL injection vulnerability
    query = f"SELECT * FROM users WHERE id = {user_id}"
    return query

def calculate_average(numbers):
    total = sum(numbers)
    return total / len(numbers)  # Potential division by zero
'''

def create_ui():
    with gr.Blocks(title="AI Code Review Assistant", theme=gr.themes.Soft()) as demo:
        gr.Markdown("""
        # üîç AI-Powered Code Review Assistant
        
        Review your Python code using open-source AI models. Detect bugs, get improvement suggestions, and generate unit tests.
        """)
        
        with gr.Row():
            with gr.Column(scale=2):
                code_input = gr.Code(
                    label="Python Code to Review",
                    value=EXAMPLE_CODE,
                    language="python",
                    lines=20
                )
                
                with gr.Row():
                    model_selector = gr.Dropdown(
                        choices=list(MODELS.keys()),
                        value=list(MODELS.keys())[0],
                        label="Select Model",
                        info="Choose an open-source code model"
                    )
                    
                    include_tests = gr.Checkbox(
                        label="Generate Tests",
                        value=True
                    )
                
                with gr.Row():
                    review_btn = gr.Button("üîç Review Code", variant="primary", scale=2)
                    compare_btn = gr.Button("üìä Compare Models", variant="secondary", scale=1)
            
            with gr.Column(scale=3):
                with gr.Tabs() as tabs:
                    with gr.Tab("üêõ Bug Detection"):
                        bugs_output = gr.Markdown(value="Select a model and click 'Review Code' to analyze your code.")
                    
                    with gr.Tab("‚ú® Improvements"):
                        improvements_output = gr.Markdown(value="Get suggestions for code improvements and best practices.")
                    
                    with gr.Tab("üß™ Unit Tests"):
                        tests_output = gr.Code(
                            label="Generated Test Code",
                            language="python",
                            lines=25
                        )
                    
                    with gr.Tab("üìä Comparison"):
                        comparison_output = gr.Markdown(value="Compare multiple models side-by-side.")
        
        # Event handlers
        review_btn.click(
            fn=review_code,
            inputs=[code_input, model_selector, include_tests],
            outputs=[bugs_output, improvements_output, tests_output]
        )
        
        def compare_selected_models(code):
            # Compare first 3 models by default
            model_keys = list(MODELS.keys())[:3]
            return compare_models(code, model_keys)
        
        compare_btn.click(
            fn=compare_selected_models,
            inputs=[code_input],
            outputs=[comparison_output]
        )
        
        gr.Examples(
            examples=[
                [EXAMPLE_CODE],
                ["""def fibonacci(n):
    if n <= 1:
        return n
    return fibonacci(n-1) + fibonacci(n-2)
"""],
                ["""def parse_config(file_path):
    with open(file_path) as f:
        return eval(f.read())
"""]
            ],
            inputs=[code_input]
        )
    
    return demo

demo = create_ui()
demo.launch(inbrowser=True, share=False)


* Running on local URL:  http://127.0.0.1:7884
* To create a public link, set `share=True` in `launch()`.




Traceback (most recent call last):
  File "c:\Users\Philo Baba\llm_engineering\.venv\Lib\site-packages\gradio\queueing.py", line 745, in process_events
    response = await route_utils.call_process_api(
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Philo Baba\llm_engineering\.venv\Lib\site-packages\gradio\route_utils.py", line 354, in call_process_api
    output = await app.get_blocks().process_api(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Philo Baba\llm_engineering\.venv\Lib\site-packages\gradio\blocks.py", line 2116, in process_api
    result = await self.call_function(
             ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Philo Baba\llm_engineering\.venv\Lib\site-packages\gradio\blocks.py", line 1623, in call_function
    prediction = await anyio.to_thread.run_sync(  # type: ignore
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Philo Baba\llm_engineering\.venv\Lib\site-packages\anyio\to_thread.py", l

In [12]:
# Test with a simple example
test_code = """def divide(a, b):
    return a / b
"""

# Test bug detection
result = detect_bugs(test_code, list(MODELS.keys())[0])
print(format_bugs_output(result))


**Qwen2.5-Coder 32B (OpenRouter)** - Found 2 issue(s):

üî¥ **CRITICAL** (Line 2): No division by zero protection
   üí° *Fix:* Add a check for b == 0 and raise ValueError or handle ZeroDivisionError

üü° **MEDIUM** (Line 2): No input validation for numeric types
   üí° *Fix:* Add type checking to ensure a and b are numbers (int/float)

