danielmiessler · g0tmi1k · Jun 11, 2024 · Apr 5, 2024 · Apr 5, 2024 · Apr 5, 2024
diff --git a/.bin/checkers/check-if-auto-updated.py b/.bin/checkers/check-if-auto-updated.py
@@ -0,0 +1,92 @@
+#!/usr/bin/env python3
+
+import os,sys,json
+
+if not sys.argv[1]:
+    exit(0)
+
+IS_WRAPPED=False
+
+if "IS_RUNNING_UNDER_CALLER_SCRIPT" in os.environ:
+    IS_WRAPPED=os.environ['IS_RUNNING_UNDER_CALLER_SCRIPT']=="1"
+
+def print_normal(msg):
+
+    if IS_WRAPPED:
+        return
+    print(msg)    
+
+def print_err(file,line_number):
+
+    if IS_WRAPPED:
+        print("E,%s,%s"%(file,line_number))
+
+def print_warn(file,line_number):
+
+    if IS_WRAPPED:
+        print("W,%s,%s"%(file,line_number))    
+
+print_normal("[+] Remote wordlist overwrite check")
+if IS_WRAPPED:
+    print("Remote wordlist overwrite check")
+    print("Files that the script catches will be overwritten next update.")
+
+files=sys.argv[1].split(" ")
+
+for i in files:
+    if not os.path.isfile(i):
+        print_err(i,0)
+        print_normal("[!] %s does not exist!"%(i))
+        exit(2)
+
+overall_pass_status=True
+
+sources = json.load(open(".bin/wordlist-updaters/sources.json"))
+overwritten_paths = {
+    "dirs": [],
+    "files": []
+}
+
+for source in sources:
+    found_paths = []
+
+    if "output" in source.keys():
+        found_paths.append(source["output"])
+
+    if "additional_paths" in source.keys():
+        found_paths += source["additional_paths"]
+
+    for path in found_paths:
+
+        if os.path.isdir(path):
+            overwritten_paths["dirs"].append(path)
+
+        elif os.path.isfile(path):
+            overwritten_paths["files"].append(path)
+
+for i in files:
+
+    for dir_path in overwritten_paths["dirs"]:
+        if i.startswith(dir_path):
+            print_normal(f"[!] Warning: file {i} is in a directory that will get overwritten!")
+            print_err(i, 0)
+            overall_pass_status=False
+            break
+
+    for file_path in overwritten_paths["files"]:
+        if i == file_path:
+            print_normal(f"[!] Warning: file {i} will get overwritten!")
+            print_err(i, 0)
+            overall_pass_status=False
+            break
+
+if overall_pass_status:
+    print_normal("[+] All files passed overwrite checks")
+    exit(0)
+
+print_normal("[!] Warning: One or more files failed to pass the overwrite checks")
+
+if IS_WRAPPED:
+    exit(0)
+else:
+    exit(2)
diff --git a/.bin/trickest-patcher.py b/.bin/trickest-patcher.py
@@ -41,6 +41,7 @@
         shutil.copytree(path,OUTPUT_ROBOTS,dirs_exist_ok=True)
 
 print("[+] Copied all the files")
+
 for i in [OUTPUT_ROBOTS,OUTPUT_TECHNOLOGIES]:
     for root,_,file_list in os.walk(i):
         for file in file_list:
@@ -64,6 +65,3 @@
 
             if len(contents)!=len(patch_content):
                 open(path,"wb").write(b"\n".join(patch_content))
-
-
-
diff --git a/.bin/trickest-updater.sh b/.bin/trickest-updater.sh
diff --git a/.bin/wordlist-updaters/README.md b/.bin/wordlist-updaters/README.md
@@ -0,0 +1,56 @@
+# Wordlist updaters
+
+## Overview
+The purpose of the scripts are to update wordlists from remote sources defined in sources.json.
+
+A github action should check every hour to see if the update conditions are met, then updates accordingly
+
+`status.json` is not meant to be edited in a pr.
+
+## Format
+
+Example sources.json
+
+```json
+[
+    {
+        "name": "Jwt secrets update",
+        "type": "file",
+        "source": "https://raw.githubusercontent.com/wallarm/jwt-secrets/master/jwt.secrets.list",
+        "output": "Passwords/scraped-JWT-secrets.txt",
+        "post_run_script": "",
+        "frequency": "3h"
+    }
+]
+```
+
+All fields are required unless otherwise stated.
+
+`name` is the name of the task.
+
+`type` can be one of the following: `file, git_dir`.
+
+`source` specify the remote location. If type is `git_dir`, the folder at that location will be cloned using git.
+
+`frequency` is the update frequency. The script will use the `status.json` file to know when to update. Accepted units of time are `h,H` for hours and `d,D` for days. Frequency can be specified with only days or hours, or with both of them. Hours cannot be before days. (`6h1d`)
+
+`update_time` specifies the daily frequency in utc 24 hour syntax (0300). Only one update frequency field can be set at a time. (`frequency` or `update_time`)
+
+`output` is the output file/dir the script will put the output in.
+
+`post_run_script` is the script to be run after pulling the list successfully. This field is optional.
+
+`additional_paths` is the additional paths that the workflow script should alert if there is a pull request for the file. This field is optional and won't be used for the updater, but rather the checker.
+
+- - -
+
+Example status.json
+
+```json
+{
+    "Jwt secrets update": {
+        "last_update" : 0
+    }
+}
+```
+
diff --git a/.bin/wordlist-updaters/sources.json b/.bin/wordlist-updaters/sources.json
@@ -0,0 +1,22 @@
+[
+    {
+        "name": "Jwt secrets update",
+        "type": "file",
+        "source": "https://raw.githubusercontent.com/wallarm/jwt-secrets/master/jwt.secrets.list",
+        "output": "Passwords/scraped-JWT-secrets.txt",
+        "post_run_script": "",
+        "frequency": "6h"
+    },
+    {
+        "name": "Trickest wordlist update",
+        "type": "git_dir",
+        "source": "https://github.com/trickest/wordlists.git",
+        "output": ".working_space",
+        "post_run_script": ".bin/trickest-patcher.py",
+        "update_time": "1030",
+        "additional_paths": [
+            "Discovery/Web-Content/trickest-robots-disallowed-wordlists/",
+            "Discovery/Web-Content/CMS/trickest-cms-wordlist/"
+        ]
+    }
+]
diff --git a/.bin/wordlist-updaters/status.json b/.bin/wordlist-updaters/status.json
@@ -0,0 +1,8 @@
+{
+    "Jwt secrets update": {
+        "last_update": 1712376971
+    },
+    "Trickest wordlist update": {
+        "last_update": 1712310048
+    }
+}
diff --git a/.bin/wordlist-updaters/updater.py b/.bin/wordlist-updaters/updater.py
@@ -0,0 +1,178 @@
+#!/usr/bin/env python3
+
+import os
+import re
+import json
+import requests
+import subprocess
+from datetime import datetime, timedelta
+
+# TODO Summary file 
+# TODO Advanced crontab syntax
+
+BASE_PATH = ".bin/wordlist-updaters"
+SOURCE_PATH = os.path.join(BASE_PATH, "sources.json")
+STATUS_PATH = os.path.join(BASE_PATH, "status.json")
+FREQUENCY_REGEX = r"^(?:([0-9]+)d|())(?:([0-9]+)h|())(?!.*?d)$"
+VALID_TYPES = ["file", "git_dir"]
+TIME_NOW = datetime.now()
+
+def request_wrapper(url):
+
+    for i in range(1,4):
+        r = requests.get(url)
+        if r.status_code == 200:
+            # print("[+] Got %s successfully!"%(url))
+            break
+        if i == 3:
+            print("[!] Failed to get %s."%(url))
+            exit(2)
+        print("[!] Getting %s failed(%i/3)"%(url,i))
+
+    return r.text
+
+# Check if the files exists
+if not os.path.isfile(SOURCE_PATH):
+    print("[!] Sources.json is missing!")
+    exit(2)
+
+if not os.path.isfile(STATUS_PATH):
+    print("[!] Status.json is missing!")
+    exit(2)
+
+SOURCES = json.load(open(SOURCE_PATH, "r"))
+STATUS = json.load(open(STATUS_PATH, "r"))
+
+to_check = []
+
+for source in SOURCES:
+    task_name = source["name"]
+    source_keys = source.keys()
+
+    if not task_name in STATUS.keys():
+        print(f"[+] Queuing task {task_name} as task was never checked before")
+        to_check.append(source)
+        continue
+
+    if not "output" in source_keys or not isinstance(source["output"], str):
+        print(f"[!] Skipping task {task_name} as output field is missing/invalid")
+        continue
+
+    if not "type" in source_keys or not isinstance(source["type"], str):
+        print(f"[!] Skipping task {task_name} as type field is missing/invalid")
+        continue
+
+    if not source["type"] in VALID_TYPES:
+        print(f"[!] Skipping task {task_name} as type is invalid")
+        continue
+
+    if source["output"].startswith("/"):
+        print(f"[!] Skipping task {task_name} as output path is not relative.")
+        continue
+
+    if source["type"].startswith("git_") and not source["source"].endswith(".git"):
+        print(f"[!] Skipping task {task_name} as a git task was defined with a non git url.")
+        continue
+
+    if not "last_update" in STATUS[task_name].keys() or not isinstance(STATUS[task_name]["last_update"], int):
+        print(f"[!] Queuing task {task_name} as last_update field is missing/invalid")
+        to_check.append(source)
+        continue
+
+    if not ("frequency" in source_keys) ^ ("update_time" in source_keys):
+        print(f"[!] Skipping task {task_name} as only frequency or update_time can be specified")
+        continue
+
+    if "frequency" in source_keys and isinstance(source["frequency"], str):
+        regex_match = re.search(FREQUENCY_REGEX, source["frequency"])
+
+        if not regex_match:
+            print(f"[!] Skipping task {task_name} as frequency field contains invalid formatting of days and hours")
+            continue
+
+        days, _, hours, _ = regex_match.groups()
+
+        days = bool(days) | 0
+        hours = bool(hours) | 0
+
+        next_update_time = datetime.fromtimestamp(STATUS[task_name]["last_update"]) + timedelta(days=days, hours=hours)
+        time_from_update = TIME_NOW - next_update_time
+        time_to_update = next_update_time - TIME_NOW
+
+        if TIME_NOW < next_update_time:
+            if time_to_update.seconds <= 300:
+                print(f"[+] Queuing task {task_name} as it is less than 5 minutes to update. ({time_to_update.seconds} seconds to update)")
+                to_check.append(source)
+                continue
+
+            print(f"[!] Skipping task {task_name} as it is more than 5 minutes to update ({time_to_update.seconds} seconds to update)")
+            continue
+
+        print(f"[+] Queuing task {task_name} as it is {time_to_update.seconds} seconds after scheduled update time.")
+        to_check.append(source)
+
+    elif "update_time" in source_keys and isinstance(source["update_time"], str):
+        update_time = source["update_time"]
+
+        if len(update_time) != 4 and update_time.isnumeric():
+            print(f"[!] Skipping task {task_name} as it is in a incorrect format")
+            continue
+
+        hours = int(update_time[:2])
+        minutes = int(update_time[2:])
+
+        if not hours in range(1, 25):
+            print(f"[!] Skipping task {task_name} as hours is not in range 1-24.")
+            continue
+
+        if not minutes in range(1, 61):
+            print(f"[!] Skipping task {task_name} as minutes is not in range 1-60.")
+            continue
+
+        scheduled_update_time = TIME_NOW.replace(hour=hours, minute=minutes)
+        if TIME_NOW <= scheduled_update_time and TIME_NOW + timedelta(hours=1) >= scheduled_update_time:
+            print(f"[+] Queuing task {task_name} as update time is within the next hour")
+            to_check.append(source)
+            continue
+
+    else:
+        print(f"[!] Skipping task {task_name} as update_time field is invalid")
+        continue
+
+if len(to_check) == 0:
+    print(f"[!] No task were queued. Exiting.")
+    exit()
+
+print(f"[+] Queued a total of {len(to_check)} tasks to run.")
+
+for task in to_check:
+    print(f"[+] Starting task {task['name']}")
+
+    if not task["name"] in STATUS.keys():
+        STATUS[task["name"]] = {}    
+
+    task_type = task["type"]
+
+    if task_type == "file":
+        content = request_wrapper(task["source"])
+        open(task["output"], "w").write(content)
+        print(f"[+] Saved file to output location")
+
+        STATUS[task["name"]]["last_update"] = int(datetime.now().timestamp())
+
+    elif task_type == "git_dir":
+        if not os.path.exists(task['output']):
+            print(f"[+] Making directory {task['output']}")
+            os.makedirs(task["output"])
+
+        subprocess.run(["git", "clone", "-q", "--depth=1", task["source"]], cwd=task["output"])
+        STATUS[task["name"]]["last_update"] = int(datetime.now().timestamp())
+
+    if task["post_run_script"]:
+        print("[+] Running post run script")
+        subprocess.run(task["post_run_script"])
+        print("[+] Finished running post run script")
+
+    print(f"[+] Finished task {task['name']}")
+
+json.dump(STATUS, open(STATUS_PATH, "w"), indent=4)