<a href="https://colab.research.google.com/github/arunpiyush25/Darwix-AI-Hackathon/blob/main/DarwixAI_Hackathon.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [22]:
# === Selecting Mission 1: The Empathetic Code Reviewer ===
# Cell 1: I've included imports & sample input.

import re
import json
from textwrap import dedent
from typing import Dict, Tuple
from IPython.display import Markdown, display

# Provided input JSON here which was given in pdf.
input_json = {
    "code_snippet": """def get_active_users(users):
    results = []
    for u in users:
        if u.is_active == True and u.profile_complete == True:
            results.append(u)
    return results""",
    "review_comments": [
        "This is inefficient. Don't loop twice conceptually.",
        "Variable 'u' is a bad name.",
        "Boolean comparison '== True' is redundant."
    ]
}


In [23]:
# Cell 2: Severity detection, principle inference, and resource library

def detect_severity(comment: str) -> str:
    """
    Very simple heuristic severity classifier: high/medium/low
    """
    c = comment.lower()
    high_words = ["bug", "crash", "security", "data loss", "sql injection", "overflow", "race condition"]
    med_words  = ["inefficient", "wrong", "incorrect", "leak", "complex", "duplicate", "anti-pattern"]
    low_words  = ["nit", "minor", "style", "readability", "naming", "format", "redundant", "typo", "pep8", "convention"]

    if any(w in c for w in high_words):
        return "high"
    if any(w in c for w in med_words):
        return "medium"
    if any(w in c for w in low_words):
        return "low"
    # fallback by tone
    if any(w in c for w in ["bad", "terrible", "awful", "never"]):
        return "medium"
    return "low"

PRINCIPLE_KB: Dict[str, Dict[str, str]] = {
    "performance": {
        "why": "Efficient iteration and vectorized or comprehension-based constructs reduce per-item overhead and scale better with large inputs.",
        "resource": "https://docs.python.org/3/tutorial/datastructures.html#list-comprehensions",
        "fallback_suggestion": """def get_active_users(users):
    return [user for user in users if user.is_active and user.profile_complete]"""
    },
    "readability": {
        "why": "Descriptive names and clear structure make code self-documenting, easier to maintain, and friendlier to future contributors.",
        "resource": "https://peps.python.org/pep-0008/#naming-conventions",
        "fallback_suggestion": """def get_active_users(users):
    results = []
    for user in users:
        if user.is_active and user.profile_complete:
            results.append(user)
    return results"""
    },
    "convention": {
        "why": "Following language idioms (e.g., truthy checks) improves consistency and reduces visual noise.",
        "resource": "https://docs.python.org/3/library/stdtypes.html#truth-value-testing",
        "fallback_suggestion": """def get_active_users(users):
    results = []
    for user in users:
        if user.is_active and user.profile_complete:
            results.append(user)
    return results"""
    },
    "robustness": {
        "why": "Clear error handling and validation prevent edge-case failures and improve reliability in production.",
        "resource": "https://docs.python.org/3/tutorial/errors.html",
        "fallback_suggestion": """def get_active_users(users):
    if users is None:
        return []
    results = []
    for user in users:
        if getattr(user, "is_active", False) and getattr(user, "profile_complete", False):
            results.append(user)
    return results"""
    },
    "security": {
        "why": "Security issues can compromise data integrity and privacy; follow best practices to minimize attack surfaces.",
        "resource": "https://cheatsheetseries.owasp.org/",
        "fallback_suggestion": "# Security-specific suggestion depends on context; consider input validation, least privilege, and avoiding dangerous eval/exec."
    }
}

def infer_principle(comment: str) -> str:
    c = comment.lower()
    if any(k in c for k in ["inefficient", "performance", "optimize", "complexity", "big-o", "slow"]):
        return "performance"
    if any(k in c for k in ["name", "naming", "readable", "readability", "clarity", "clean"]):
        return "readability"
    if any(k in c for k in ["pep8", "convention", "idiomatic", "boolean", "== true", "== false", "style"]):
        return "convention"
    if any(k in c for k in ["error", "exception", "handle", "robust", "validate"]):
        return "robustness"
    if any(k in c for k in ["security", "injection", "xss", "csrf"]):
        return "security"
    # default
    return "readability"

def tone_by_severity(sev: str) -> Tuple[str, str]:
    """
    Returns (positive_prefix, coaching_suffix) tuned by severity.
    """
    if sev == "high":
        return ("You’ve made solid progress on the core idea! ",
                " Let’s address this critical issue so the solution stays reliable and safe.")
    if sev == "medium":
        return ("Nice work outlining the approach! ",
                " With a small refinement, we can make it more efficient and maintainable.")
    return ("Great start and clear intent! ",
            " A tiny tweak here will make it even cleaner and more Pythonic.")


In [25]:
# Cell 3: Heuristic code transformations for concrete suggestions

def transform_boolean_comparisons(code: str) -> str:
    """
    Replace '== True' and '== False' with idiomatic truth checks.
    Conservative: only applies simple attribute/variable patterns.
    """
    # X == True  -> X
    code = re.sub(r'(\b[A-Za-z_][\w\.]*\b)\s*==\s*True\b', r'\1', code)
    # X == False -> not X
    code = re.sub(r'(\b[A-Za-z_][\w\.]*\b)\s*==\s*False\b', r'not \1', code)
    return code

def transform_variable_name_u_to_user(code: str) -> str:
    """
    Rename 'u' to 'user' in common loop/append patterns.
    """
    code = re.sub(r'\bfor\s+u\s+in\b', 'for user in', code)
    code = re.sub(r'\bappend\(\s*u\s*\)', 'append(user)', code)
    # I'm also handling simple conditions like 'if u.' -> 'if user.'
    code = re.sub(r'\bu\.', 'user.', code)
    return code

def transform_list_comprehension(code: str) -> str:
    """
    Try to convert
        results = []
        for <v> in <it>:
            if <cond>:
                results.append(<v>)
        return results

    into a list comprehension.
    Best-effort, falls back silently if pattern not found.
    """
    pattern = re.compile(
        r'(results\s*=\s*\[\]\s*)'
        r'([\s\S]*?)'  # allow other code, but try to find the canonical loop next
        r'for\s+(\w+)\s+in\s+([^\n:]+):\s*\n'
        r'\s*if\s+([^\n:]+):\s*\n'
        r'\s*results\.append\(\s*\3\s*\)\s*\n'
        r'([\s\S]*?)return\s+results',
        re.MULTILINE
    )
    m = pattern.search(code)
    if not m:
        return code

    var = m.group(3).strip()
    it_expr = m.group(4).strip()
    cond = m.group(5).strip()

    # Building a new comprehension line
    comp = f"return [{var} for {var} in {it_expr} if {cond}]"

    # Coarse replacement: Here, I am replacing everything from 'results = []' to 'return results'
    start = code.find(m.group(1))
    end = code.rfind("return results", start)
    if start != -1 and end != -1:
        # Keep code before 'results = []' and after 'return results'
        before = code[:start]
        after = code[end + len("return results"):]
        new_block = comp
        return before + new_block + after

    return code

def apply_suggestions_for_comment(comment: str, code: str) -> str:
    """
    Route to specific transformation(s) based on the comment content.
    Always returns a *concrete* code suggestion (fallback: original or KB).
    """
    c = comment.lower()
    suggestion = code

    # Ordering of targeted fixes matters a bit
    if any(k in c for k in ["boolean", "== true", "== false", "redundant"]):
        suggestion = transform_boolean_comparisons(suggestion)

    if any(k in c for k in ["bad name", "poor name", "rename", "variable 'u'", "name"]):
        suggestion = transform_variable_name_u_to_user(suggestion)

    if any(k in c for k in ["inefficient", "loop twice", "optimize", "performance"]):
        # trying a comprehension if pattern matches
        new_suggestion = transform_list_comprehension(suggestion)
        if new_suggestion != suggestion:
            suggestion = new_suggestion

    # If we didn't change anything, still try to improve style minimally
    if suggestion == code:
        suggestion = transform_boolean_comparisons(transform_variable_name_u_to_user(code))

    return suggestion


In [8]:
# Cell 4: Generate empathetic, principle-backed feedback sections

def generate_feedback_section(comment: str, code_snippet: str) -> str:
    severity = detect_severity(comment)
    principle = infer_principle(comment)
    pos_prefix, coach_suffix = tone_by_severity(severity)

    kb = PRINCIPLE_KB.get(principle, PRINCIPLE_KB["readability"])
    why = kb["why"]
    resource = kb["resource"]

    # Here, I am trying to produce a concrete code example tailored to the comment
    tailored = apply_suggestions_for_comment(comment, code_snippet).strip()
    # If nothing changed and we have a principle fallback, use that instead
    if tailored.strip() == code_snippet.strip() and "fallback_suggestion" in kb:
        tailored = kb["fallback_suggestion"]

    positive = (pos_prefix + {
        "high": "There’s a crucial improvement we can make.",
        "medium": "There’s a meaningful optimization within reach.",
        "low": "There’s a small polish opportunity."
    }[severity] + coach_suffix)

    section = f"""
### Analysis of Comment: *"{comment}"*
- **Positive Rephrasing:** {positive}
- **The 'Why':** {why}
- **Suggested Improvement:**

In [29]:
# Cell 5: I am Building the full Markdown report and displaying it in Colab
def build_markdown_report(data: Dict) -> str:
    code = data["code_snippet"]
    comments = data["review_comments"]

    report = "# 🚀 Empathetic AI-Powered Code Review Report\n\n"

    # Showing original code in this section.
    report += "## Original Code Snippet\n"
    report += f"```python\n{code}\n```\n\n"

    # Detailed review per comment
    report += "## AI-Powered Feedback Breakdown\n"
    for c in comments:
        report += generate_feedback_section(c, code)
        report += "\n---\n"

    return report


markdown_report = build_markdown_report(input_json)
display(Markdown(markdown_report))


# 🚀 Empathetic AI-Powered Code Review Report

## Original Code Snippet
```python
def get_active_users(users):
    results = []
    for u in users:
        if u.is_active == True and u.profile_complete == True:
            results.append(u)
    return results
```

## AI-Powered Feedback Breakdown
### Analysis of Comment: *"This is inefficient. Don't loop twice conceptually."*
- **Positive Rephrasing:** Nice work outlining the approach! There’s a meaningful optimization within reach. With a small refinement, we can make it more efficient and maintainable.
- **The 'Why':** Efficient iteration and vectorized or comprehension-based constructs reduce per-item overhead and scale better with large inputs.
- **Suggested Improvement:**
```python
def get_active_users(users):
    return [u for u in users if u.is_active == True and u.profile_complete == True]
```
📖 *Learn more: [https://docs.python.org/3/tutorial/datastructures.html#list-comprehensions](https://docs.python.org/3/tutorial/datastructures.html#list-comprehensions)*
---
### Analysis of Comment: *"Variable 'u' is a bad name."*
- **Positive Rephrasing:** Nice work outlining the approach! There’s a meaningful optimization within reach. With a small refinement, we can make it more efficient and maintainable.
- **The 'Why':** Descriptive names and clear structure make code self-documenting, easier to maintain, and friendlier to future contributors.
- **Suggested Improvement:**
```python
def get_active_users(users):
    results = []
    for user in users:
        if user.is_active == True and user.profile_complete == True:
            results.append(user)
    return results
```
📖 *Learn more: [https://peps.python.org/pep-0008/#naming-conventions](https://peps.python.org/pep-0008/#naming-conventions)*
---
### Analysis of Comment: *"Boolean comparison '== True' is redundant."*
- **Positive Rephrasing:** Great start and clear intent! There’s a small polish opportunity. A tiny tweak here will make it even cleaner and more Pythonic.
- **The 'Why':** Following language idioms (e.g., truthy checks) improves consistency and reduces visual noise.
- **Suggested Improvement:**
```python
def get_active_users(users):
    results = []
    for u in users:
        if u.is_active and u.profile_complete:
            results.append(u)
    return results
```
📖 *Learn more: [https://docs.python.org/3/library/stdtypes.html#truth-value-testing](https://docs.python.org/3/library/stdtypes.html#truth-value-testing)*
---


In [28]:
with open("empathetic_code_review.txt", "w") as f:
    f.write(markdown_report)

print("✅ Report saved as empathetic_code_review.txt")


✅ Report saved as empathetic_code_review.txt
