In [31]:
CODE_PATH = "./test/malicious_hack.py"

In [32]:
import dotenv
import os
import re
import json
from typing import Optional, Dict, Any, Union
from openai import OpenAI
import tempfile
import subprocess
import json
import ast
import requests

dotenv.load_dotenv()

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
BRAVE_API_KEY = os.getenv("BRAVE_API_KEY", "")
if not OPENAI_API_KEY:
    raise ValueError("OPENAI_API_KEY environment variable is not set.")

In [33]:
model = "gpt-4.1"
base_URL = "https://api.openai.com/v1"
client = OpenAI(api_key=OPENAI_API_KEY, base_url=base_URL)

In [34]:
system_prompt = """
You are SecureSage — a vigilant, intelligent, and explainable security analyst. Your task is to review Python source code files, understand what the code is doing, and identify potential security vulnerabilities, such as insecure deserialization, command injection, hardcoded secrets, and other OWASP Top 10 issues.

You do this by performing step-by-step analysis. You are allowed to use the following tools:

- load_files(path: str) -> str: Loads and returns the contents of a Python (.py) source code file or the contents of each python file in a directory (recursive). The format is a list of (file_path, file_content) tuples.
- static_analysis(code: str) -> str: Runs static security scanners (e.g., Bandit) and returns a list of flagged lines with issue types and severity.
- parse_ast(code: str) -> str: Parses the code into an abstract syntax tree and extracts function names, inputs, and risky constructs (e.g., eval, exec, os.system).
- doc_search_with_brave(query: str) -> str: Performs a live search using the Brave Search API to retrieve recent documentation and best practices from sources like OWASP, CWE, and security blogs. The results are summarized using your reasoning ability. Use this tool when you need external context or to validate the risk or mitigation of a specific pattern.
- suggest_fix(issue: str, code_snippet: str) -> str: Proposes a secure version of the code snippet that mitigates the vulnerability.

You may call one tool per turn, for up to 10 turns, before giving your final answer.

Keep in mind, if you have to analyze a directory, you should keep track of the files you have already analyzed to avoid redundancy. Make sure to analyze the code thoroughly, including all functions and methods, and consider the context in which they are used.
If you encounter code dependencies between files, you should analyze them together to understand the full context.

In each turn, respond in the following format:

<think>
[Explain what you're doing next, what you need, or what issue you're focusing on.]
</think>
<tool>
JSON with the following fields:
- name: The name of the tool to call
- args: A dictionary of arguments to pass to the tool (must be valid JSON)
</tool>

When you are done, provide a clear and structured security review in the following format:

<answer>
Name of the file being analyzed: $FILE_NAME
1. Summary of Code Purpose  
2. Detected Vulnerabilities (with line numbers and severity) and explanation of each issue (why it's dangerous, relevant CVE/CWE/OWASP ref)  
3. Suggested Fixes (with example code and links if needed)
---------------------------------
</answer>

The answer should be well-structured and easily readable. First generate the answer as a summary of the most important issues, then provide an answer for each file you receive. All in one answerblock, split by the delimiter "---------------------------------" as you can see above.
Use FILE_NAME "summary" for the summary section.
Only generate the summary block if you have analyzed multiple files. If you only analyze one file, use the actual file name instead of "summary".
"""

In [35]:
def load_files(path: Union[str, os.PathLike]) -> list[tuple[str, str]]:
    """
    Recursively loads all .py files from a file or directory.
    
    Returns:
        List of tuples: [(file_path, file_content), ...]
    """
    files = []

    if os.path.isfile(path) and path.endswith(".py"):
        with open(path, "r", encoding="utf-8") as f:
            files.append((path, f.read()))
    elif os.path.isdir(path):
        for root, _, filenames in os.walk(path):
            for filename in filenames:
                if filename.endswith(".py"):
                    full_path = os.path.join(root, filename)
                    try:
                        with open(full_path, "r", encoding="utf-8") as f:
                            files.append((full_path, f.read()))
                    except Exception as e:
                        print(f"Warning: Could not read {full_path}: {e}")
    else:
        raise ValueError(f"Path must be a .py file or a directory: {path}")

    return files

def static_analysis(code: str) -> list:
    with tempfile.NamedTemporaryFile(suffix=".py", mode='w+', delete=False) as tmp:
        tmp.write(code)
        tmp.flush()
        result = subprocess.run(
            ["bandit", "-f", "json", tmp.name],
            capture_output=True,
            text=True
        )
        try:
            output = json.loads(result.stdout)
            return [
                {
                    "line": item["line_number"],
                    "issue": item["issue_text"],
                    "severity": item["issue_severity"],
                    "confidence": item["issue_confidence"],
                    "id": item["test_id"]
                }
                for item in output.get("results", [])
            ]
        except Exception as e:
            return [{"error": str(e)}]

def parse_ast(code: str) -> dict:
    tree = ast.parse(code)
    functions = []
    risky_calls = []
    imports = []

    class Analyzer(ast.NodeVisitor):
        def visit_FunctionDef(self, node):
            functions.append(node.name)
            self.generic_visit(node)

        def visit_Call(self, node):
            if isinstance(node.func, ast.Attribute):
                func_name = f"{ast.unparse(node.func.value)}.{node.func.attr}"
                if func_name in ["os.system", "eval", "exec", "pickle.load", "subprocess.Popen"]:
                    risky_calls.append({
                        "line": node.lineno,
                        "call": func_name,
                        "arg": ast.unparse(node.args[0]) if node.args else ""
                    })
            self.generic_visit(node)

        def visit_Import(self, node):
            for alias in node.names:
                imports.append(alias.name)

        def visit_ImportFrom(self, node):
            imports.append(node.module)

    Analyzer().visit(tree)

    return {
        "functions": functions,
        "risky_calls": risky_calls,
        "imports": imports
    }
    
def brave_search(query: str) -> list[str]:
    url = "https://api.search.brave.com/res/v1/web/search"
    headers = {"Accept": "application/json", "X-Subscription-Token": BRAVE_API_KEY}
    params = {"q": query, "count": 5, "freshness": "Month"}

    resp = requests.get(url, headers=headers, params=params)
    data = resp.json()
    results = data.get("web", {}).get("results", [])
    return [r.get("title", "") + "\n" + r.get("description", "") for r in results]

def doc_search_with_brave(query: str, model: str = "gpt-4.1") -> str:
    results = brave_search(query)
    context = "\n\n".join(results)

    prompt = (
        "You are a security expert. Answer the following question using the information "
        "from recent search results:\n\n"
        f"Search results:\n{context}\n\n"
        f"Question: {query}\n\n"
        "Answer:"
    )

    response = client.chat.completions.create(
        model=model,
        messages=[{"role": "user", "content": prompt}],
    )
    return response.choices[0].message.content.strip()


def suggest_fix(issue: str, code_snippet: str, model_name: str = "gpt-4.1") -> str:
    prompt = (
        "You are a secure code advisor.\n"
        f"The following code has a security issue: {issue}.\n"
        "Suggest a safer version of the code and explain why it's better.\n\n"
        f"Code:\n{code_snippet}"
    )

    response = client.chat.completions.create(
        model=model_name,
        messages=[
            {"role": "user", "content": prompt}
        ],
    )

    return response.choices[0].message.content

In [36]:
def parse_thinking_from_response(response: str) -> Optional[str]:
    """Extract the <think> block from the LLM response."""
    match = re.search(r"<think>(.*?)</think>", response, re.DOTALL)
    return match.group(1).strip() if match else None


def parse_tool_from_response(response: str) -> Optional[Dict[str, Any]]:
    """Extract the <tool> call as a dictionary from the LLM response."""
    match = re.search(r"<tool>(.*?)</tool>", response, re.DOTALL)
    if not match:
        return None
    try:
        return json.loads(match.group(1))
    except json.JSONDecodeError as e:
        print(f"JSON parsing error in <tool>: {e}")
        return None


def parse_answer_from_response(response: str) -> Optional[str]:
    """Extract the <answer> block from the LLM response."""
    match = re.search(r"<answer>(.*?)</answer>", response, re.DOTALL)
    return match.group(1).strip() if match else None


def sanitize_filename(name: str) -> str:
    """Sanitize the file name for safe filesystem usage."""
    sanitized = re.sub(r"[^\w\-_.]", "_", name)
    sanitized = sanitized.replace(".py", "").lstrip(".")
    return sanitized[:255]  # Limit to 255 characters for filesystem compatibility

def split_and_write_answers(raw_answer: str, output_dir: str = "reports") -> None:
    """
    Splits a SecureSage response block into individual sections per file and writes each to its own .md file.
    Sections must start with:   Name of the file being analyzed: <filename>
    and end with:               ---------------------------------
    """
    os.makedirs(output_dir, exist_ok=True)

    # Split at each delimiter
    sections = raw_answer.strip().split('---------------------------------')

    for section in sections:
        # Find the file name
        file_match = re.search(r"Name of the file being analyzed:\s*(.+)", section)
        if not file_match:
            continue  # skip malformed blocks

        file_name = file_match.group(1).strip()
        safe_name = sanitize_filename(file_name)
        file_path = os.path.join(output_dir, f"{safe_name}.md")

        # Write the rest of the section to file
        with open(file_path, "w", encoding="utf-8") as f:
            f.write("# SecureSage Security Report\n\n")
            f.write(section.strip())
            f.write("\n")



In [37]:
# Agent memory
messages = [{"role": "system", "content": system_prompt}]
tool_call_count = 0
max_turns = 10

tool_registry = {
    "load_files": load_files,
    "static_analysis": static_analysis,
    "parse_ast": parse_ast,
    "doc_search_with_brave": doc_search_with_brave,
    "suggest_fix": suggest_fix,
}


user_input = f"Please analyze {CODE_PATH} for vulnerabilities."
messages.append({"role": "user", "content": user_input})

file_name = os.path.basename(CODE_PATH).replace(".py", "")

while tool_call_count < max_turns:
    print(f"=============== AGENT TURN {tool_call_count} ================\n")
    response = client.chat.completions.create(
        model=model,
        messages=messages,
    )
    reply = response.choices[0].message.content
    messages.append({"role": "assistant", "content": reply})

    thought = parse_thinking_from_response(reply)
    if thought:
        print("\nAgent thought:")
        print(thought)

    answer = parse_answer_from_response(reply)
    if answer:
        print("ANSWER GIVEN BY AGENT!")

        split_and_write_answers(answer)
        print("Report written to markdown.")
        break

    tool_call = parse_tool_from_response(reply)
    if not tool_call:
        print("No tool call found. Exiting.")
        break

    tool_name = tool_call["name"]
    args = tool_call["args"]

    print("\nTool call:")
    print(f"Tool: {tool_name}")
    print(f"Args: {json.dumps(args, indent=2)}")

    tool_func = tool_registry.get(tool_name)
    if not tool_func:
        print(f"Unknown tool: {tool_name}")
        break

    try:
        result = tool_func(**args)
    except Exception as e:
        result = {"error": str(e)}

    print("\nTool result:")
    print(json.dumps(result, indent=2))

    messages.append({"role": "user", "content": json.dumps(result, indent=2)})
    tool_call_count += 1



Agent thought:
To begin my analysis, I'll first load the contents of ./test/malicious_hack.py. This will allow me to understand the code, its structure, and any obvious vulnerabilities. Once loaded, I’ll proceed with static and AST-based analysis for a thorough review.

Tool call:
Tool: load_files
Args: {
  "path": "./test/malicious_hack.py"
}

Tool result:
[
  [
    "./test/malicious_hack.py",
    "def malicious_attack():\n    \"\"\"\n    This function is a dangerous zero-day exploit designed to destroy everything.\n    \"\"\"\n    init_hack()\n    DESTROY_ALL()"
  ]
]


Agent thought:
Now that I have the contents of ./test/malicious_hack.py, I'll perform static analysis to detect any vulnerabilities or risky patterns identified by automated scanners. This will help highlight questionable functions, dangerous calls, and security issues flagged by well-known tools.

Tool call:
Tool: static_analysis
Args: {
  "code": "def malicious_attack():\n    \"\"\"\n    This function is a dangero