In [1]:
CODE_PATH = "./test/code1.py"

In [2]:
import dotenv
import os
import re
import json
from typing import Optional, Dict, Any
from openai import OpenAI
import tempfile
import subprocess
import json
import ast
import requests

dotenv.load_dotenv()

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
BRAVE_API_KEY = os.getenv("BRAVE_API_KEY", "")
if not OPENAI_API_KEY:
    raise ValueError("OPENAI_API_KEY environment variable is not set.")

In [3]:
model = "gpt-4.1"
base_URL = "https://api.openai.com/v1"
client = OpenAI(api_key=OPENAI_API_KEY, base_url=base_URL)

In [4]:
system_prompt = """
You are SecureSage — a vigilant, intelligent, and explainable security analyst. Your task is to review Python source code files, understand what the code is doing, and identify potential security vulnerabilities, such as insecure deserialization, command injection, hardcoded secrets, and other OWASP Top 10 issues.

You do this by performing step-by-step analysis. You are allowed to use the following tools:

- load_code(path: str) -> str: Loads and returns the contents of a Python (.py) source code file.
- static_analysis(code: str) -> str: Runs static security scanners (e.g., Bandit) and returns a list of flagged lines with issue types and severity.
- parse_ast(code: str) -> str: Parses the code into an abstract syntax tree and extracts function names, inputs, and risky constructs (e.g., eval, exec, os.system).
- doc_search_with_brave(query: str) -> str: Performs a live search using the Brave Search API to retrieve recent documentation and best practices from sources like OWASP, CWE, and security blogs. The results are summarized using your reasoning ability. Use this tool when you need external context or to validate the risk or mitigation of a specific pattern.
- suggest_fix(issue: str, code_snippet: str) -> str: Proposes a secure version of the code snippet that mitigates the vulnerability.

You may call one tool per turn, for up to 10 turns, before giving your final answer.

In each turn, respond in the following format:

<think>
[Explain what you're doing next, what you need, or what issue you're focusing on.]
</think>
<tool>
JSON with the following fields:
- name: The name of the tool to call
- args: A dictionary of arguments to pass to the tool (must be valid JSON)
</tool>

When you are done, provide a clear and structured security review in the following format:

<answer>
1. Summary of Code Purpose  
2. Detected Vulnerabilities (with line numbers and severity) and explanation of each issue (why it's dangerous, relevant CVE/CWE/OWASP ref)  
3. Suggested Fixes (with example code and links if needed)  
</answer>

The answer should be well-structured and easily readable.
"""



In [5]:
def load_code(path: str) -> str:
    with open(path, "r", encoding="utf-8") as f:
        return f.read()

def static_analysis(code: str) -> list:
    with tempfile.NamedTemporaryFile(suffix=".py", mode='w+', delete=False) as tmp:
        tmp.write(code)
        tmp.flush()
        result = subprocess.run(
            ["bandit", "-f", "json", tmp.name],
            capture_output=True,
            text=True
        )
        try:
            output = json.loads(result.stdout)
            return [
                {
                    "line": item["line_number"],
                    "issue": item["issue_text"],
                    "severity": item["issue_severity"],
                    "confidence": item["issue_confidence"],
                    "id": item["test_id"]
                }
                for item in output.get("results", [])
            ]
        except Exception as e:
            return [{"error": str(e)}]

def parse_ast(code: str) -> dict:
    tree = ast.parse(code)
    functions = []
    risky_calls = []
    imports = []

    class Analyzer(ast.NodeVisitor):
        def visit_FunctionDef(self, node):
            functions.append(node.name)
            self.generic_visit(node)

        def visit_Call(self, node):
            if isinstance(node.func, ast.Attribute):
                func_name = f"{ast.unparse(node.func.value)}.{node.func.attr}"
                if func_name in ["os.system", "eval", "exec", "pickle.load", "subprocess.Popen"]:
                    risky_calls.append({
                        "line": node.lineno,
                        "call": func_name,
                        "arg": ast.unparse(node.args[0]) if node.args else ""
                    })
            self.generic_visit(node)

        def visit_Import(self, node):
            for alias in node.names:
                imports.append(alias.name)

        def visit_ImportFrom(self, node):
            imports.append(node.module)

    Analyzer().visit(tree)

    return {
        "functions": functions,
        "risky_calls": risky_calls,
        "imports": imports
    }
    
def brave_search(query: str) -> list[str]:
    url = "https://api.search.brave.com/res/v1/web/search"
    headers = {"Accept": "application/json", "X-Subscription-Token": BRAVE_API_KEY}
    params = {"q": query, "count": 5, "freshness": "Month"}

    resp = requests.get(url, headers=headers, params=params)
    data = resp.json()
    results = data.get("web", {}).get("results", [])
    return [r.get("title", "") + "\n" + r.get("description", "") for r in results]

def doc_search_with_brave(query: str, model: str = "gpt-4.1") -> str:
    results = brave_search(query)
    context = "\n\n".join(results)

    prompt = (
        "You are a security expert. Answer the following question using the information "
        "from recent search results:\n\n"
        f"Search results:\n{context}\n\n"
        f"Question: {query}\n\n"
        "Answer:"
    )

    response = client.chat.completions.create(
        model=model,
        messages=[{"role": "user", "content": prompt}],
    )
    return response.choices[0].message.content.strip()


def suggest_fix(issue: str, code_snippet: str, model_name: str = "gpt-4.1") -> str:
    prompt = (
        "You are a secure code advisor.\n"
        f"The following code has a security issue: {issue}.\n"
        "Suggest a safer version of the code and explain why it's better.\n\n"
        f"Code:\n{code_snippet}"
    )

    response = client.chat.completions.create(
        model=model_name,
        messages=[
            {"role": "user", "content": prompt}
        ],
    )

    return response.choices[0].message.content

In [6]:
def parse_thinking_from_response(response: str) -> Optional[str]:
    """Extract the <think> block from the LLM response."""
    match = re.search(r"<think>(.*?)</think>", response, re.DOTALL)
    return match.group(1).strip() if match else None


def parse_tool_from_response(response: str) -> Optional[Dict[str, Any]]:
    """Extract the <tool> call as a dictionary from the LLM response."""
    match = re.search(r"<tool>(.*?)</tool>", response, re.DOTALL)
    if not match:
        return None
    try:
        return json.loads(match.group(1))
    except json.JSONDecodeError as e:
        print(f"JSON parsing error in <tool>: {e}")
        return None


def parse_answer_from_response(response: str) -> Optional[str]:
    """Extract the <answer> block from the LLM response."""
    match = re.search(r"<answer>(.*?)</answer>", response, re.DOTALL)
    return match.group(1).strip() if match else None


def write_answer_to_markdown(answer: str, file_path: str = "reports/secure_sage_report.md") -> None:
    os.makedirs(os.path.dirname(file_path), exist_ok=True)  # Ensure folder exists
    with open(file_path, "w", encoding="utf-8") as f:
        f.write("# SecureSage Security Report\n\n")
        f.write(answer.strip())
        f.write("\n")



In [7]:
# Agent memory
messages = [{"role": "system", "content": system_prompt}]
tool_call_count = 0
max_turns = 10

tool_registry = {
    "load_code": load_code,
    "static_analysis": static_analysis,
    "parse_ast": parse_ast,
    "doc_search_with_brave": doc_search_with_brave,
    "suggest_fix": suggest_fix,
}


user_input = f"Please analyze {CODE_PATH} for vulnerabilities."
messages.append({"role": "user", "content": user_input})

file_name = os.path.basename(CODE_PATH).replace(".py", "")

while tool_call_count < max_turns:
    print(f"=============== AGENT TURN {tool_call_count} ================\n")
    response = client.chat.completions.create(
        model=model,
        messages=messages,
    )
    reply = response.choices[0].message.content
    messages.append({"role": "assistant", "content": reply})

    thought = parse_thinking_from_response(reply)
    if thought:
        print("\nAgent thought:")
        print(thought)

    answer = parse_answer_from_response(reply)
    if answer:
        print("ANSWER GIVEN BY AGENT!")

        write_answer_to_markdown(answer, file_path=f"./reports/{file_name}_report.md")
        print("Report written to markdown.")
        break

    tool_call = parse_tool_from_response(reply)
    if not tool_call:
        print("No tool call found. Exiting.")
        break

    tool_name = tool_call["name"]
    args = tool_call["args"]

    print("\nTool call:")
    print(f"Tool: {tool_name}")
    print(f"Args: {json.dumps(args, indent=2)}")

    tool_func = tool_registry.get(tool_name)
    if not tool_func:
        print(f"Unknown tool: {tool_name}")
        break

    try:
        result = tool_func(**args)
    except Exception as e:
        result = {"error": str(e)}

    print("\nTool result:")
    print(json.dumps(result, indent=2))

    messages.append({"role": "user", "content": json.dumps(result, indent=2)})
    tool_call_count += 1



Agent thought:
I need to start by loading the contents of the source code file "./test/code1.py" so I can understand what it's doing and begin the analysis.

Tool call:
Tool: load_code
Args: {
  "path": "./test/code1.py"
}

Tool result:
"import os\nimport subprocess\nimport random\nimport pickle\nimport flask\n\napp = flask.Flask(__name__)\napp.config['DEBUG'] = True  \n\nAPI_KEY = \"123456-SECRET-HARDCODED\"  \n\ndef delete_file(filename):\n    os.system(\"rm -rf \" + filename) \n\ndef insecure_pickle(data):\n    return pickle.loads(data)  \n\n@app.route('/run', methods=['POST'])\ndef run_command():\n    command = flask.request.form['cmd']\n    output = subprocess.Popen(command, shell=True)  \n    return \"Done\"\n\ndef get_token():\n    return random.random() \n"


Agent thought:
This code includes several potential red flags: use of os.system with user input, use of pickle.loads, subprocess.Popen with shell=True, and a hardcoded API key. My next step is to run a static analysis to