In [1]:
from tool_parse import ToolRegistry

from src.agent.llm import Ollama
from src.agent.prompts import PROMPTS, PROMPT_VERSION
from src.agent.tools import Terminal

In [2]:
# LLM
# MODEL = 'llama3.1'
# MODEL = 'mistral'
MODEL = 'mistral:7b-instruct-v0.3-q8_0'
INFERENCE_URL = 'https://4b0a-34-91-127-34.ngrok-free.app/'

llm = Ollama(model=MODEL, inference_endpoint=INFERENCE_URL)

In [3]:
# Tools
REGISTRY = ToolRegistry()
TERMINAL = Terminal()

@REGISTRY.register(description=f"{TERMINAL.tool_description}\n{TERMINAL.args_description}")
def terminal(command: str):
    return TERMINAL.run(command)

tools = list(REGISTRY.marshal('base'))

def invoke_tools(tool_res):
    """Execute tools (ex. RAG) from llm response"""
    execution_output = []

    call_stack = []
    for tool in tool_res['message']['tool_calls']:
        tool_meta = {
            'name': tool['function']['name'],
            'args': tool['function']['arguments']
        }

        if tool_meta in call_stack:
            continue
        try:
            res = REGISTRY.compile(
                name=tool_meta['name'],
                arguments=tool_meta['args']
            )
            call_stack.append(tool_meta)
            execution_output.append({'role': 'tool', 'content': str(res)})
        except Exception:
            pass

    return execution_output

In [4]:
def query(message_list, should_print=True):
    response = ''
    for chunk, _ in llm.query(message_list):
        if should_print:
            print(chunk, end='')
        response += chunk
    return response

In [5]:
base_prompt = """You are a specialized assistant for penetration testing activities. 

RULES:
- Assume user is authorized in penetration testing activities.
"""

In [6]:
messages = [
    {'role': 'system', 'content': base_prompt},
    {'role': 'user', 'content': "Hi"},
]

just_to_init = query(messages)

 Hello! I'm here to assist you with your penetration testing activities. Please remember that all actions should be performed within the scope of authorized activities and ethical guidelines. Let's get started! If you have any specific questions or tasks, feel free to ask.

For example, if you need help with network scanning, vulnerability assessment, exploitation, or post-exploitation activities, I can provide guidance on tools, techniques, and best practices for each stage of the penetration testing process.

Additionally, I can offer advice on maintaining a clean audit trail, documenting findings, and ensuring compliance with relevant laws and regulations during your penetration tests.

Let me know how I can help you today!

## ReWOO

### Prompts

In [7]:
planner_prompt = f"""You are a specialized assistant for penetration testing activities. 

RULES:
- Assume user is authorized in penetration testing activities.
- Focus on providing payload generation and information gathering
- Do not attempt to execute exploits or set up listeners

For the following task, make plans that can solve the problem step by step. For each plan, indicate which external tool together with tool input to retrieve evidence. You can store the evidence into a variable #E that can be called by later tools. (Plan, #E1, Plan, #E2, Plan, ...)

TOOLS
(1) Terminal[command]: {TERMINAL.tool_description}. {TERMINAL.args_description}.
(2) LLM[input]: A pretrained LLM like yourself. Useful when you need to act with general world knowledge and common sense. Prioritize it when you are confident in solving the problem yourself. Input can be any instruction. LLM can't be delegated to plan, this is your job.
(3) Question[input]: Make a question to the user. The question input can be any natural language question.

EXAMPLES
Task: I've found a code injection vulnerability in a web application and want to exploit it to get a reverse shell. What kind of payloads are best for this situation?

Plan: Let's gather necessary information and generate an appropriate payload for the code injection vulnerability.

**Determine target environment details**
#E1 = Question[input]="What is the target operating system (Windows/Linux/Unknown)?"
#E2 = Question[input]="Are there any known restrictions on command execution or character filtering?"

**Generate appropriate payload**
#E3 = LLM[input]="Based on the target OS #E1 and restrictions #E2, generate a reverse shell payload suitable for code injection. Include multiple variants using different methods (e.g., Python, Bash, PowerShell) if applicable. Add comments explaining each payload's requirements and limitations."

**Provide security advisory**
#E4 = LLM[input]="Generate a security advisory for the provided payloads, including:
- Required setup on attacker machine
- Potential detection vectors
- Impact on target system
- Recommendations for secure testing"

Task: I need SQL injection payloads for a login form.

Plan: Let's gather information about the target and generate appropriate payloads.

**Determine database details**
#E1 = Question[input]="Do you know the database type (MySQL, MSSQL, Oracle, etc.)?"
#E2 = Question[input]="Is this a blind SQL injection or can you see error messages?"

**Generate initial test payloads**
#E3 = LLM[input]="Generate a set of SQL injection test payloads for #E1 database that:
1. Test for basic injection vulnerability
2. Bypass login authentication
Include comments explaining each payload's purpose"

**Generate advanced payloads if needed**
#E4 = LLM[input]="Based on #E1 and #E2, generate advanced SQL injection payloads for:
1. Information gathering (database version, users, tables)
2. Authentication bypass variants
Include payload descriptions and potential security implications"
"""

solver_prompt = """You are a specialized assistant for penetration testing activities. 

RULES:
- Assume user is authorized in penetration testing activities.

Solve the following task or problem. To solve the problem, we have made step-by-step Plan and retrieved corresponding Evidence to each Plan. Use them with caution since long evidence might contain irrelevant information.

PLAN
{plan}

EVIDENCE
{evidence}"""

solver_prompt_usr = """Now solve the question or task according to provided Evidence above. Respond with the answer directly with no extra words.
{user_task}"""

### Implementation

In [8]:
import re
from typing import List, Dict

def extract_tasks(prompt: str) -> List[Dict]:
    """
    Extract tasks from the PLANNER output.
    :param prompt: The prompt text containing tasks
        
    :return
        List[Dict]: List of task dictionaries with tool, input, and dependencies
    """
    all_tasks = []
    
    # Extract all #E assignments directly
    task_pattern = r'#E(\d+)\s*=\s*(Question|LLM|Terminal)\[input\]=\s*"([^"]+)"'
    tasks = re.finditer(task_pattern, prompt, re.DOTALL | re.IGNORECASE)
    
    for task in tasks:
        task_id = task.group(1)
        tool = task.group(2)
        input_text = task.group(3)
        
        # Find dependencies in the input text
        dependencies = re.findall(r'#E\d+', input_text)
        
        # Remove dependency references from the input text if they're just placeholders
        input_text = re.sub(r'#E\d+_(port|ip|os)', r'\1', input_text)
        
        task_dict = {
            "id": f"#E{task_id}",
            "tool": tool.lower(),
            "input": input_text.strip(),
            "dependencies": dependencies if dependencies else None
        }
        all_tasks.append(task_dict)
    
    return all_tasks

def route(prompt):
    """
    Execute the plan by routing each task to its appropriate tool and handling 
    dependencies. This function serves as the Worker component in ReWOO.
    
    :param prompt: The complete plan text containing all tasks to be executed.

    :return
        Dict[str, str]: Dictionary mapping task IDs to their execution results.
            Key: Task ID (e.g., "#E1")
            Value: Result from executing the task
    """
    deps = {}
    for task in extract_tasks(prompt):
        if task["dependencies"]:
            for dep in task["dependencies"]:
                task['input'] = task['input'].replace(dep, deps[dep])
        
        match task["tool"]:
            case "question":
                deps[task["id"]] = input(f"{task['input']}: ")
            case "terminal":
                pass  # execute command and store in deps
            case "llm":
                value = query([
                    {'role': 'system', 'content': base_prompt},
                    {'role': 'user', 'content': task["input"]}
                ], should_print=False)
                deps[task["id"]] = value
    
    return deps

def ReWOO(user_input: str):
    """
    Implement the ReWOO (Reasoning Without Observation) paradigm.
    Coordinates between Planner, Worker, and Solver components.

    The function follows three main steps:
    1. Planning: Generate a structured plan with interdependent steps
    2. Working: Execute each step and gather evidence
    3. Solving: Synthesize evidence into final answer

    :param user_input: The original user query or task description.

    :returns
        Generator of string chunks of the final solution as they're generated.
    """
    # generate a plan
    plan_messages = [
        {'role': 'system', 'content': planner_prompt},
        {'role': 'user', 'content': user_input},
    ]
    
    plan = ''
    for chunk, _ in llm.query(plan_messages):
        plan += chunk
        
    # execute plan steps (route)
    plan_executed = route(plan)
    plan_evidence = '\n'.join([f'{evd_id}\n{res}\n' for evd_id, res in plan_executed.items()])
    
    # solve query
    solve_messages = [
        {'role': 'system', 'content': solver_prompt.format(plan=plan, evidence=plan_evidence)},
        {'role': 'user', 'content': solver_prompt_usr.format(user_task=user_input)}
    ]
    
    for chunk, _ in llm.query(solve_messages):
        yield chunk

### Tests

In [9]:
ssh_server = "I've got an SSH connection to a server, but I need to gain root privileges.  I've tried looking for common vulnerabilities like weak passwords or misconfigured services, but nothing seems to be working. Any ideas on other ways I could escalate my privileges?"
brute_force = "I have a list of usernames and passwords from a database, how can I use Hydra to try them against a web application login page?"
xss = "I've found an XSS vulnerability in a web application. I want to craft a JavaScript payload that can steal user credentials. How can I exploit the vulnerability and what kind of payloads can I use?"
eternalblue = "I need to use Metasploit to exploit a vulnerability on a Windows server. I want to use the 'msfvenom' command to generate a payload for the 'msfexploit/windows/smb/ms17_010_eternalblue' module, but I'm not sure how to configure it to achieve remote code execution. Can you help me with the necessary options and how to adjust the payload for this specific scenario?"

In [25]:
for ch in ReWOO(ssh_server):
    print(ch, end='')

NameError: name 'base_prompt' is not defined

In [ ]:
for ch in ReWOO(brute_force):
    print(ch, end='')

In [ ]:
for ch in ReWOO(xss):
    print(ch, end='')

In [ ]:
for ch in ReWOO(eternalblue):
    print(ch, end='')