In [1]:
#!/usr/bin/env python3
"""
CleanScanner - lightweight educational web vulnerability scanner
Save as scanner.py and run: python scanner.py
Default target: http://testphp.vulnweb.com/
"""

# Optional: in Colab or fresh env install:
# !pip install requests beautifulsoup4

import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin, urlparse, parse_qs
from collections import deque
import hashlib, ssl, socket, time
from datetime import datetime
from random import shuffle

# colorama is optional; fallback to blank colors if missing
try:
    from colorama import Fore, init
    init(autoreset=True)
except Exception:
    class Fore:
        CYAN = ""
        YELLOW = ""
        RED = ""
        GREEN = ""
        BLUE = ""

# ------------------------------
# CONFIGURATION (tweak these)
# ------------------------------
START_URL = "http://testphp.vulnweb.com/"   # safe test target
MAX_CRAWL_DEPTH = 1
MAX_PARAM_URLS = 12
REQUEST_TIMEOUT = 6
CRAWL_USER_AGENT = "CleanScanner/1.0 (edu)"

# payloads
SQLI_PAYLOADS = ["'", "' OR '1'='1", "' UNION SELECT NULL --"]
XSS_PAYLOADS = ["<script>alert(1)</script>", "'><img src=x onerror=alert(1)>", "<svg/onload=alert(1)>"]

# OWASP mapping
OWASP_MAPPING = {
    "BrokenAccess": "A01:2021 - Broken Access Control",
    "SQLi": "A03:2021 - Injection",
    "XSS": "A03:2021 - Injection (Cross-Site Scripting)",
    "Headers": "A05:2021 - Security Misconfiguration",
    "Crypto": "A02:2021 - Cryptographic Failures",
    "InsecureDesign": "A04:2021 - Insecure Design"
}

# findings container
findings_list = []
_findings_keys = set()

# verbosity flag
VERBOSE = False

# A01 helpers/settings
COMMON_ADMIN_PATHS = [
    "admin/", "administrator/", "admin/login.php", "admin.php", "login.php",
    "user/login", "wp-admin/", "config.php", ".git/", ".env", "phpinfo.php",
    "backup/", "server-status", "console/", "manage/"
]
DIR_LISTING_MARKERS = ["Index of /", "Parent Directory", "<title>Index of", "Directory listing for"]
DANGEROUS_HTTP_METHODS = {"PUT", "DELETE", "TRACE", "TRACK", "PATCH"}

# IDOR offsets
IDOR_PROBE_OFFSETS = (-2, -1, 1, 2)
IDOR_MAX_EXAMPLES = 2

# Role-based test: disabled by default (set True only for your lab)
ENABLE_ROLE_TEST = False
ROLE_TEST_CREDENTIALS = [("admin", "admin"), ("admin", "password"), ("admin", "123456")]

# ------------------------------
# helpers
# ------------------------------
def logv(msg):
    if VERBOSE:
        print(Fore.BLUE + "[DEBUG] " + msg)

def normalize_url_without_fragment(url):
    p = urlparse(url)
    return p._replace(fragment="").geturl()

def short_hash(text):
    return hashlib.sha256(text.encode("utf-8", errors="ignore")).hexdigest()[:12]

# ------------------------------
# Crawler - BFS for parameterized URLs
# ------------------------------
def crawl_for_param_urls(start_url, max_depth=1, max_urls=12, same_domain_only=True):
    parsed_start = urlparse(start_url)
    base_domain = parsed_start.netloc
    visited = set()
    found_param_urls = []
    queue = deque([(start_url, 0)])
    headers = {"User-Agent": CRAWL_USER_AGENT}

    while queue and len(found_param_urls) < max_urls:
        current_url, depth = queue.popleft()
        current_url = normalize_url_without_fragment(current_url)
        if current_url in visited or depth > max_depth:
            continue
        visited.add(current_url)
        logv(f"Crawling: {current_url} (depth {depth})")
        try:
            r = requests.get(current_url, timeout=REQUEST_TIMEOUT, headers=headers)
            if r.status_code != 200:
                logv(f"Skipping non-200: {r.status_code} for {current_url}")
                continue
            soup = BeautifulSoup(r.text, "html.parser")
            for a in soup.find_all("a", href=True):
                href = a["href"].strip()
                next_url = urljoin(current_url, href)
                next_url = normalize_url_without_fragment(next_url)
                parsed = urlparse(next_url)
                if same_domain_only and parsed.netloc != base_domain:
                    continue
                if parsed.query:
                    if next_url not in found_param_urls:
                        found_param_urls.append(next_url)
                        logv(f"Found parameterized URL: {next_url}")
                        if len(found_param_urls) >= max_urls:
                            break
                if depth + 1 <= max_depth:
                    queue.append((next_url, depth + 1))
            time.sleep(0.05)
        except Exception as e:
            logv(f"Crawl error for {current_url}: {e}")
            continue

    # dedupe preserving order
    unique = []
    for u in found_param_urls:
        if u not in unique:
            unique.append(u)
    return unique

# ------------------------------
# Header check (A05)
# ------------------------------
def header_check(url):
    try:
        r = requests.get(url, timeout=REQUEST_TIMEOUT, headers={"User-Agent": CRAWL_USER_AGENT})
        important = ["Content-Security-Policy", "X-Frame-Options", "Strict-Transport-Security", "X-Content-Type-Options"]
        missing = [h for h in important if h not in r.headers]
        if missing:
            key = ("Headers", url, None)
            if key not in _findings_keys:
                _findings_keys.add(key)
                findings_list.append({
                    "name": "Security Misconfiguration - Missing HTTP Headers",
                    "type": "Headers",
                    "url": url,
                    "param": None,
                    "payloads": missing
                })
                logv(f"Header issues for {url}: {missing}")
    except Exception as e:
        logv(f"Header check error for {url}: {e}")

# ------------------------------
# SQLi check (A03)
# ------------------------------
def test_sqli(url):
    params = list(parse_qs(urlparse(url).query).keys())
    if not params:
        return
    base = url.split("?")[0]
    for p in params:
        successful = []
        for payload in SQLI_PAYLOADS:
            try:
                r = requests.get(base, params={p: payload}, timeout=REQUEST_TIMEOUT, headers={"User-Agent": CRAWL_USER_AGENT})
                error_signs = ["sql syntax", "mysql", "ORA-", "syntax error", "PDOException"]
                if any(e.lower() in r.text.lower() for e in error_signs):
                    if payload not in successful:
                        successful.append(payload)
                    if len(successful) >= 2:
                        break
            except Exception as e:
                logv(f"SQLi request error: {e}")
        if successful:
            key = ("SQLi", base, p)
            if key not in _findings_keys:
                _findings_keys.add(key)
                findings_list.append({
                    "name": "SQL Injection",
                    "type": "SQLi",
                    "url": base + "?" + p + "=<value>",
                    "param": p,
                    "payloads": successful
                })
                logv(f"Saved SQLi finding: {base} param={p} payloads={successful}")

# ------------------------------
# XSS check (A03)
# ------------------------------
def test_xss(url):
    params = list(parse_qs(urlparse(url).query).keys())
    if not params:
        return
    base = url.split("?")[0]
    for p in params:
        successful = []
        for payload in XSS_PAYLOADS:
            try:
                r = requests.get(base, params={p: payload}, timeout=REQUEST_TIMEOUT, headers={"User-Agent": CRAWL_USER_AGENT})
                if payload in r.text or requests.utils.quote(payload) in r.text:
                    if payload not in successful:
                        successful.append(payload)
                    if len(successful) >= 2:
                        break
            except Exception as e:
                logv(f"XSS request error: {e}")
        if successful:
            key = ("XSS", base, p)
            if key not in _findings_keys:
                _findings_keys.add(key)
                findings_list.append({
                    "name": "Cross-Site Scripting (Reflected)",
                    "type": "XSS",
                    "url": base + "?" + p + "=<value>",
                    "param": p,
                    "payloads": successful
                })
                logv(f"Saved XSS finding: {base} param={p} payloads={successful}")

# ------------------------------
# IDOR detection (A01 basic)
# ------------------------------
def detect_idor(url, probe_offsets=IDOR_PROBE_OFFSETS, max_examples=IDOR_MAX_EXAMPLES):
    parsed = urlparse(url)
    qs = parse_qs(parsed.query)
    if not qs:
        return
    base = parsed._replace(query="").geturl()
    for param, vals in qs.items():
        val = vals[0]
        try:
            orig_int = int(val)
        except Exception:
            continue
        try:
            r0 = requests.get(base, params={param: orig_int}, timeout=REQUEST_TIMEOUT, headers={"User-Agent": CRAWL_USER_AGENT})
            hash0 = short_hash(r0.text)
        except Exception as e:
            logv(f"IDOR baseline error: {e}")
            continue
        examples = []
        for off in probe_offsets:
            probe_val = orig_int + off
            if probe_val < 0:
                continue
            try:
                rp = requests.get(base, params={param: probe_val}, timeout=REQUEST_TIMEOUT, headers={"User-Agent": CRAWL_USER_AGENT})
                if rp.status_code == 200:
                    hashp = short_hash(rp.text)
                    if hashp != hash0:
                        examples.append((probe_val, rp.status_code))
                        if len(examples) >= max_examples:
                            break
            except Exception as e:
                logv(f"IDOR probe error: {e}")
                continue
        if examples:
            key = ("BrokenAccess", base, param)
            if key not in _findings_keys:
                _findings_keys.add(key)
                payload_examples = [str(e[0]) for e in examples]
                findings_list.append({
                    "name": "Broken Access Control - Possible IDOR",
                    "type": "BrokenAccess",
                    "url": base + "?" + param + "=<id>",
                    "param": param,
                    "payloads": payload_examples
                })
                logv(f"Saved IDOR finding: {base} param={param} examples={payload_examples}")

# ------------------------------
# A01 extended checks (forced browsing, methods, dir listing)
# ------------------------------
def forced_browsing_check(start_url, paths=COMMON_ADMIN_PATHS, max_checks=10):
    parsed = urlparse(start_url)
    base = f"{parsed.scheme}://{parsed.netloc}/"
    checks = 0
    headers = {"User-Agent": CRAWL_USER_AGENT}
    for p in paths:
        if checks >= max_checks:
            break
        target = urljoin(base, p)
        try:
            resp = requests.get(target, timeout=REQUEST_TIMEOUT, headers=headers, allow_redirects=True)
            status = resp.status_code
            body = resp.text or ""
            if status == 200 and len(body) > 50:
                key = ("BrokenAccess", target, None)
                if key not in _findings_keys:
                    _findings_keys.add(key)
                    findings_list.append({
                        "name": "Broken Access Control - Forced Browsing (Exposed Page)",
                        "type": "BrokenAccess",
                        "url": target,
                        "param": None,
                        "payloads": [f"HTTP {status}"]
                    })
            elif status in (401, 403):
                lower = body.lower()
                if "login" in lower or "admin" in lower or "unauthorized" in lower:
                    key = ("BrokenAccess", target, None)
                    if key not in _findings_keys:
                        _findings_keys.add(key)
                        findings_list.append({
                            "name": "Broken Access Control - Forced Browsing (Auth Gate)",
                            "type": "BrokenAccess",
                            "url": target,
                            "param": None,
                            "payloads": [f"HTTP {status} - login/admin markers"]
                        })
            checks += 1
        except Exception as e:
            logv(f"forced_browsing_check error for {target}: {e}")
            checks += 1
            continue

def check_insecure_http_methods(start_url, paths_to_check=None, max_checks=6):
    headers = {"User-Agent": CRAWL_USER_AGENT}
    parsed = urlparse(start_url)
    base = f"{parsed.scheme}://{parsed.netloc}"
    targets = []
    if paths_to_check:
        for p in paths_to_check[:max_checks]:
            targets.append(urljoin(base + "/", p))
    else:
        targets = [base + "/", base + "/api/", base + "/admin/"]
    for t in targets:
        try:
            r = requests.options(t, timeout=REQUEST_TIMEOUT, headers=headers)
            allow = r.headers.get("Allow", "") or r.headers.get("allow", "")
            if allow:
                allowed = {m.strip().upper() for m in allow.split(",") if m.strip()}
                dangerous = allowed.intersection(DANGEROUS_HTTP_METHODS)
                if dangerous:
                    key = ("BrokenAccess", t, None)
                    if key not in _findings_keys:
                        _findings_keys.add(key)
                        findings_list.append({
                            "name": "Broken Access Control - Insecure HTTP Methods Allowed",
                            "type": "BrokenAccess",
                            "url": t,
                            "param": None,
                            "payloads": [", ".join(sorted(dangerous))]
                        })
        except Exception as e:
            logv(f"check_insecure_http_methods error for {t}: {e}")
            continue

def check_directory_listing(start_url, sample_paths=None, max_checks=8):
    headers = {"User-Agent": CRAWL_USER_AGENT}
    parsed = urlparse(start_url)
    base = f"{parsed.scheme}://{parsed.netloc}/"
    candidates = []
    if sample_paths:
        for u in sample_paths:
            p = urlparse(u).path
            if p.endswith("/"):
                dirurl = urljoin(base, p)
            else:
                dirurl = urljoin(base, "/".join(p.split("/")[:-1]) + "/")
            if dirurl not in candidates:
                candidates.append(dirurl)
    else:
        candidates = [base]
    checks = 0
    for d in candidates:
        if checks >= max_checks:
            break
        try:
            r = requests.get(d, timeout=REQUEST_TIMEOUT, headers=headers)
            body = (r.text or "").lower()
            if any(marker.lower() in body for marker in DIR_LISTING_MARKERS):
                key = ("BrokenAccess", d, None)
                if key not in _findings_keys:
                    _findings_keys.add(key)
                    findings_list.append({
                        "name": "Broken Access Control - Directory Listing Enabled",
                        "type": "BrokenAccess",
                        "url": d,
                        "param": None,
                        "payloads": ["Directory listing detected"]
                    })
            checks += 1
        except Exception as e:
            logv(f"check_directory_listing error for {d}: {e}")
            checks += 1
            continue

# ------------------------------
# Optional: role-based access test (disabled by default)
# ------------------------------
def role_based_access_test(start_url, paths=None, credentials=None, max_checks=6):
    if not ENABLE_ROLE_TEST:
        logv("Role-based tests disabled (ENABLE_ROLE_TEST=False)")
        return
    if not credentials:
        credentials = ROLE_TEST_CREDENTIALS
    headers = {"User-Agent": CRAWL_USER_AGENT}
    parsed = urlparse(start_url)
    base = f"{parsed.scheme}://{parsed.netloc}"
    targets = [base + "/admin/", base + "/login.php", base + "/dashboard/"]
    if paths:
        targets = (targets + paths)[:max_checks]
    for t in targets:
        try:
            r = requests.get(t, timeout=REQUEST_TIMEOUT, headers=headers, allow_redirects=True)
            public_status = r.status_code
            public_hash = short_hash(r.text)
        except Exception:
            continue
        for (u, p) in credentials:
            try:
                ra = requests.get(t, auth=(u, p), timeout=REQUEST_TIMEOUT, headers=headers, allow_redirects=True)
                if ra.status_code == 200:
                    auth_hash = short_hash(ra.text)
                    if public_status in (401,403) or auth_hash != public_hash:
                        key = ("BrokenAccess", t, None)
                        if key not in _findings_keys:
                            _findings_keys.add(key)
                            findings_list.append({
                                "name": "Broken Access Control - Role-based/Auth test succeeded",
                                "type": "BrokenAccess",
                                "url": t,
                                "param": None,
                                "payloads": [f"Creds: {u}/{p} -> HTTP {ra.status_code}"]
                            })
                time.sleep(0.2)
            except Exception as e:
                logv(f"role_based_access_test error for {t} with {u}: {e}")
                continue

# ------------------------------
# Crypto checks (A02)
# ------------------------------
def check_crypto_tls(start_url):
    parsed = urlparse(start_url)
    host = parsed.hostname
    port = 443
    cert_issues = []
    uses_https = False
    try:
        resp = requests.get(start_url, timeout=REQUEST_TIMEOUT, allow_redirects=True, headers={"User-Agent": CRAWL_USER_AGENT})
        final_url = resp.url
        uses_https = final_url.startswith("https://")
    except Exception as e:
        logv(f"HTTPS availability check error: {e}")
    if uses_https:
        try:
            ctx = ssl.create_default_context()
            with socket.create_connection((host, port), timeout=REQUEST_TIMEOUT) as sock:
                with ctx.wrap_socket(sock, server_hostname=host) as ssock:
                    cert = ssock.getpeercert()
                    notAfter = cert.get('notAfter')
                    if notAfter:
                        try:
                            exp = datetime.strptime(notAfter, "%b %d %H:%M:%S %Y %Z")
                            days_left = (exp - datetime.utcnow()).days
                            if days_left < 0:
                                cert_issues.append(f"Certificate expired {abs(days_left)} days ago")
                            elif days_left < 90:
                                cert_issues.append(f"Certificate expires in {days_left} days (consider renewal)")
                        except Exception:
                            logv(f"Could not parse cert expiry: {notAfter}")
                    issuer = cert.get('issuer')
                    subject = cert.get('subject')
                    if issuer == subject:
                        cert_issues.append("Certificate appears self-signed (issuer == subject)")
        except Exception as e:
            cert_issues.append(f"TLS handshake failed or certificate not retrievable: {e}")
            logv(f"TLS handshake error: {e}")
    else:
        cert_issues.append("Site does not redirect to HTTPS or HTTPS not available")
    try:
        r = requests.get(start_url, timeout=REQUEST_TIMEOUT, headers={"User-Agent": CRAWL_USER_AGENT})
        if 'Strict-Transport-Security' not in r.headers:
            cert_issues.append("Missing Strict-Transport-Security header (HSTS)")
    except Exception:
        pass
    if cert_issues:
        key = ("Crypto", start_url, None)
        if key not in _findings_keys:
            _findings_keys.add(key)
            findings_list.append({
                "name": "Cryptographic Failures / TLS Issues",
                "type": "Crypto",
                "url": start_url,
                "param": None,
                "payloads": cert_issues
            })

# ------------------------------
# Insecure design heuristics (A04)
# ------------------------------
def check_insecure_design(start_url, max_pages=3):
    sensitive_terms = ["password", "secret", "api_key", "apikey", "token", "access_token", "private_key", "aws_secret"]
    csrf_like_names = ["csrf", "token", "_csrf", "authenticity_token", "csrf_token"]
    headers_issues = []
    page_issues = []
    try:
        r = requests.get(start_url, timeout=REQUEST_TIMEOUT, headers={"User-Agent": CRAWL_USER_AGENT})
        for h in ["Server", "X-Powered-By"]:
            if h in r.headers:
                headers_issues.append(f"Header leaks info: {h}: {r.headers.get(h)}")
    except Exception:
        pass
    to_visit = [start_url]
    visited = set()
    count = 0
    while to_visit and count < max_pages:
        url = to_visit.pop(0)
        if url in visited:
            continue
        visited.add(url)
        try:
            r = requests.get(url, timeout=REQUEST_TIMEOUT, headers={"User-Agent": CRAWL_USER_AGENT})
            if r.status_code != 200:
                continue
            soup = BeautifulSoup(r.text, "html.parser")
            text = r.text.lower()
            found_terms = [t for t in sensitive_terms if t in text]
            if found_terms:
                page_issues.append(f"Sensitive terms present on {url}: {', '.join(found_terms)}")
            for form in soup.find_all("form"):
                method = (form.get("method") or "").lower()
                hidden_names = [inp.get("name","").lower() for inp in form.find_all("input", type="hidden")]
                has_csrf = any(any(cs in hn for hn in hidden_names) for cs in csrf_like_names)
                if method == "post" and not has_csrf:
                    page_issues.append(f"POST form without apparent CSRF token on {url}")
                for pwd in form.find_all("input", {"type":"password"}):
                    ac = pwd.get("autocomplete")
                    if ac is None or ac.lower() != "off":
                        page_issues.append(f"Password input on {url} without autocomplete='off'")
            for a in soup.find_all("a", href=True):
                next_url = urljoin(url, a["href"].strip())
                if urlparse(next_url).netloc == urlparse(start_url).netloc and next_url not in visited:
                    to_visit.append(next_url)
            count += 1
            time.sleep(0.05)
        except Exception as e:
            logv(f"Insecure design page fetch error: {e}")
            continue
    all_issues = headers_issues + page_issues
    if all_issues:
        key = ("InsecureDesign", start_url, None)
        if key not in _findings_keys:
            _findings_keys.add(key)
            findings_list.append({
                "name": "Insecure Design - Heuristic Findings",
                "type": "InsecureDesign",
                "url": start_url,
                "param": None,
                "payloads": all_issues
            })

# ------------------------------
# Report printer (grouped)
# ------------------------------
def print_grouped_report(start_url):
    print("\n" + "-"*60)
    print(f"Vulnerability Report for: {start_url}")
    print("-"*60 + "\n")

    broken = [f for f in findings_list if f["type"] == "BrokenAccess"]
    sqli = [f for f in findings_list if f["type"] == "SQLi"]
    xss = [f for f in findings_list if f["type"] == "XSS"]
    crypto = [f for f in findings_list if f["type"] == "Crypto"]
    insecure = [f for f in findings_list if f["type"] == "InsecureDesign"]
    headers = [f for f in findings_list if f["type"] == "Headers"]

    if broken:
        print("========== CATEGORY: Broken Access Control (A01) ==========\n")
        for f in broken:
            print(f"NAME OF VULN: {f['name']}")
            print(f"TYPE OF VULN BASED ON WHICH VULN OF OWASP TOP 10: {OWASP_MAPPING.get(f['type'])}")
            print(f"AFFECTED URL: {f['url']}")
            if f['param']:
                print(f"AFFECTED PARAMETER: {f['param']}")
            print("PAYLOADS USED: " + ", ".join(f['payloads']))
            print("\n" + "-"*40 + "\n")

    if sqli or xss:
        print("========== CATEGORY: Injection (A03) ==========\n")
        if sqli:
            print("---- Subgroup: SQL Injection ----\n")
            for f in sqli:
                print(f"NAME OF VULN: {f['name']}")
                print(f"TYPE OF VULN BASED ON WHICH VULN OF OWASP TOP 10: {OWASP_MAPPING.get(f['type'])}")
                print(f"AFFECTED URL: {f['url']}")
                print(f"AFFECTED PARAMETER: {f['param']}")
                print("PAYLOADS USED: " + ", ".join(f['payloads']))
                print("\n" + "-"*40 + "\n")
        if xss:
            print("---- Subgroup: Cross-Site Scripting (XSS) ----\n")
            for f in xss:
                print(f"NAME OF VULN: {f['name']}")
                print(f"TYPE OF VULN BASED ON WHICH VULN OF OWASP TOP 10: {OWASP_MAPPING.get(f['type'])}")
                print(f"AFFECTED URL: {f['url']}")
                print(f"AFFECTED PARAMETER: {f['param']}")
                print("PAYLOADS USED: " + ", ".join(f['payloads']))
                print("\n" + "-"*40 + "\n")

    if crypto:
        print("========== CATEGORY: Cryptographic Failures (A02) ==========\n")
        for f in crypto:
            print(f"NAME OF VULN: {f['name']}")
            print(f"TYPE OF VULN BASED ON WHICH VULN OF OWASP TOP 10: {OWASP_MAPPING.get(f['type'])}")
            print(f"AFFECTED URL: {f['url']}")
            print("DETAILS: " + "; ".join(f['payloads']))
            print("\n" + "-"*40 + "\n")

    if insecure:
        print("========== CATEGORY: Insecure Design (A04) ==========\n")
        for f in insecure:
            print(f"NAME OF VULN: {f['name']}")
            print(f"TYPE OF VULN BASED ON WHICH VULN OF OWASP TOP 10: {OWASP_MAPPING.get(f['type'])}")
            print(f"AFFECTED URL: {f['url']}")
            print("DETAILS:")
            for d in f['payloads']:
                print("  - " + d)
            print("\n" + "-"*40 + "\n")

    if headers:
        print("========== CATEGORY: Security Misconfiguration (A05) ==========\n")
        for f in headers:
            print(f"NAME OF VULN: {f['name']}")
            print(f"TYPE OF VULN BASED ON WHICH VULN OF OWASP TOP 10: {OWASP_MAPPING.get(f['type'])}")
            print(f"AFFECTED URL: {f['url']}")
            print("MISSING HEADERS: " + ", ".join(f['payloads']))
            print("\n" + "-"*40 + "\n")

    print("Summary:")
    print(f"  Broken Access findings : {len(broken)}")
    print(f"  SQLi findings          : {len(sqli)}")
    print(f"  XSS findings           : {len(xss)}")
    print(f"  Crypto issues          : {len(crypto)}")
    print(f"  Insecure design issues : {len(insecure)}")
    print(f"  Header issues          : {len(headers)}")
    print("-"*60 + "\n")

# ------------------------------
# Orchestration
# ------------------------------
def run_full_scanner(start_url=START_URL, max_depth=MAX_CRAWL_DEPTH, max_param_urls=MAX_PARAM_URLS, verbose=False):
    global VERBOSE
    VERBOSE = verbose
    findings_list.clear()
    _findings_keys.clear()

    print(Fore.CYAN + f"[*] Scanning target: {start_url}\n")

    # header & crypto & insecure-design quick checks
    header_check(start_url)
    check_crypto_tls(start_url)
    check_insecure_design(start_url, max_pages=3)

    # crawl for parameterized URLs
    param_urls = crawl_for_param_urls(start_url, max_depth=max_depth, max_urls=max_param_urls)
    if not param_urls:
        print(Fore.YELLOW + "[!] No parameterized URLs found by crawler.")
        print_grouped_report(start_url)
        return

    # per-URL tests
    for u in param_urls:
        test_sqli(u)
        test_xss(u)
        detect_idor(u)

    # extended A01 checks (forced browsing, methods, dir listing)
    # small wrapper to call three functions
    def _run_a01_extended(s, p):
        forced_browsing_check(s, max_checks=8)
        check_insecure_http_methods(s, paths_to_check=["/","/admin/","/api/"], max_checks=5)
        check_directory_listing(s, sample_paths=p, max_checks=6)
    _run_a01_extended(start_url, param_urls)

    # optional role-based tests (disabled by default)
    role_based_access_test(start_url, paths=param_urls[:6], credentials=ROLE_TEST_CREDENTIALS, max_checks=6)

    print_grouped_report(start_url)

# ------------------------------
# Run when executed directly
# ------------------------------
if __name__ == "__main__":
    # Set verbose=True for debug prints
    run_full_scanner(start_url=START_URL, max_depth=MAX_CRAWL_DEPTH, max_param_urls=MAX_PARAM_URLS, verbose=False)


[*] Scanning target: http://testphp.vulnweb.com/


------------------------------------------------------------
Vulnerability Report for: http://testphp.vulnweb.com/
------------------------------------------------------------


NAME OF VULN: Broken Access Control - Possible IDOR
TYPE OF VULN BASED ON WHICH VULN OF OWASP TOP 10: A01:2021 - Broken Access Control
AFFECTED URL: http://testphp.vulnweb.com/listproducts.php?cat=<id>
AFFECTED PARAMETER: cat
PAYLOADS USED: 0, 2

----------------------------------------

NAME OF VULN: Broken Access Control - Possible IDOR
TYPE OF VULN BASED ON WHICH VULN OF OWASP TOP 10: A01:2021 - Broken Access Control
AFFECTED URL: http://testphp.vulnweb.com/artists.php?artist=<id>
AFFECTED PARAMETER: artist
PAYLOADS USED: 0, 2

----------------------------------------

NAME OF VULN: Broken Access Control - Possible IDOR
TYPE OF VULN BASED ON WHICH VULN OF OWASP TOP 10: A01:2021 - Broken Access Control
AFFECTED URL: http://testphp.vulnweb.com/hpp/?pp=<id>
AFF