From 35c115a8b3a8010cba8183fdae7fe8a2b04bdeef Mon Sep 17 00:00:00 2001 From: Dreg Date: Sat, 2 Jul 2022 20:37:38 +0200 Subject: [PATCH 1/2] --regex support for search-pattern command (#860) --- docs/commands/search-pattern.md | 9 +++++ gef.py | 58 +++++++++++++++++++++++++++++--- tests/commands/search_pattern.py | 15 ++++++++- 3 files changed, 77 insertions(+), 5 deletions(-) diff --git a/docs/commands/search-pattern.md b/docs/commands/search-pattern.md index c06b19ca7..49b15c5f9 100644 --- a/docs/commands/search-pattern.md +++ b/docs/commands/search-pattern.md @@ -35,3 +35,12 @@ Sometimes, you may need to search for a very common pattern. To limit the search gef➤ search-pattern 0x4005f6 little libc gef➤ search-pattern 0x4005f6 little 0x603100-0x603200 ``` +### Searching in a specific range using regex ### +Sometimes, you may need an advanced search using regex. Just use --regex arg. + +Example: how to find null-end-printable(from x20-x7e) C strings (min size >=2 bytes) with a regex: + +``` +gef➤ search-pattern --regex 0x401000 0x401500 ([\\x20-\\x7E]{2,})(?=\\x00) + +``` diff --git a/gef.py b/gef.py index 6846c2b8d..7e3c3664c 100644 --- a/gef.py +++ b/gef.py @@ -51,6 +51,7 @@ import abc import argparse +import ast import binascii import codecs import collections @@ -5686,10 +5687,16 @@ class SearchPatternCommand(GenericCommand): _cmdline_ = "search-pattern" _syntax_ = f"{_cmdline_} PATTERN [little|big] [section]" _aliases_ = ["grep", "xref"] - _example_ = (f"\n{_cmdline_} AAAAAAAA" - f"\n{_cmdline_} 0x555555554000 little stack" - f"\n{_cmdline_} AAAA 0x600000-0x601000") - + _example_ = [f"{_cmdline_} AAAAAAAA", + f"{_cmdline_} 0x555555554000 little stack", + f"{_cmdline_} AAAA 0x600000-0x601000", + f"{_cmdline_} --regex 0x401000 0x401500 ([\\\\x20-\\\\x7E]{{2,}})(?=\\\\x00) <-- It matchs null-end-printable(from x20-x7e) C strings (min size 2 bytes)"] + + def __init__(self) -> None: + super().__init__() + self["max_size_preview"] = (10, "max size preview of bytes") + self["nr_pages_chunk"] = (0x400, "number of pages readed for each memory read chunk") + def print_section(self, section: Section) -> None: title = "In " if section.path: @@ -5746,6 +5753,37 @@ def search_pattern_by_address(self, pattern: str, start_address: int, end_addres del mem return locations + + def search_binpattern_by_address(self, binpattern: bytes, start_address: int, end_address: int) -> List[Tuple[int, int, Optional[str]]]: + """Search a binary pattern within a range defined by arguments.""" + + step = self["nr_pages_chunk"] * gef.session.pagesize + locations = [] + + for chunk_addr in range(start_address, end_address, step): + if chunk_addr + step > end_address: + chunk_size = end_address - chunk_addr + else: + chunk_size = step + + try: + mem = gef.memory.read(chunk_addr, chunk_size) + except gdb.MemoryError as e: + return [] + preview_size = self["max_size_preview"] + for match in re.finditer(binpattern, mem): + start = chunk_addr + match.start() + preview = str(mem[slice(*match.span())][0:preview_size]) + "..." + size_match = match.span()[1] - match.span()[0] + if size_match > 0: + size_match -= 1 + end = start + size_match + + locations.append((start, end, preview)) + + del mem + + return locations def search_pattern(self, pattern: str, section_name: str) -> None: """Search a pattern within the whole userland memory.""" @@ -5774,6 +5812,18 @@ def do_invoke(self, argv: List[str]) -> None: if argc < 1: self.usage() return + + if argc > 3 and argv[0].startswith("--regex"): + pattern = ' '.join(argv[3:]) + pattern = ast.literal_eval("b'" + pattern + "'") + + addr_start = parse_address(argv[1]) + addr_end = parse_address(argv[2]) + + for loc in self.search_binpattern_by_address(pattern, addr_start, addr_end): + self.print_loc(loc) + + return pattern = argv[0] endian = gef.arch.endianness diff --git a/tests/commands/search_pattern.py b/tests/commands/search_pattern.py index e5e78a8a0..deb29d17c 100644 --- a/tests/commands/search_pattern.py +++ b/tests/commands/search_pattern.py @@ -3,7 +3,7 @@ """ -from tests.utils import BIN_SH, GefUnitTestGeneric, gdb_run_cmd, gdb_start_silent_cmd +from tests.utils import BIN_SH, GefUnitTestGeneric, gdb_run_cmd, gdb_start_silent_cmd, gdb_start_silent_cmd_last_line class SearchPatternCommand(GefUnitTestGeneric): @@ -15,3 +15,16 @@ def test_cmd_search_pattern(self): res = gdb_start_silent_cmd(f"grep {BIN_SH}") self.assertNoException(res) self.assertIn("0x", res) + + def test_cmd_search_pattern_regex(self): + res = gdb_start_silent_cmd_last_line("set {char[6]} $sp = { 0x41, 0x42, 0x43, 0x44, 0x45, 0x00 }", +after=[r"search-pattern --regex $sp $sp+7 ([\\x20-\\x7E]{2,})(?=\\x00)",]) + self.assertNoException(res) + self.assertTrue(r"b'ABCDE'" in res) + # this should not match because binary string is not null ended: + res = gdb_start_silent_cmd_last_line("set {char[6]} $sp = { 0x41, 0x42, 0x43, 0x44, 0x45, 0x03 }", +after=[r"search-pattern --regex $sp $sp+7 ([\\x20-\\x7E]{2,})(?=\\x00)",]) + self.assertNoException(res) + self.assertTrue(r"b'ABCDE'" not in res) + + From 366237ce78786be49d686d371537f05a822503fa Mon Sep 17 00:00:00 2001 From: Dreg Date: Sat, 2 Jul 2022 21:11:27 +0200 Subject: [PATCH 2/2] Slightly improve the code of `search-pattern` (#862) Make `search-pattern` rely on native `gdb` exception to catch invalid memory access instead of relying on the string of the error. --- gef.py | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/gef.py b/gef.py index 7e3c3664c..b34a66386 100644 --- a/gef.py +++ b/gef.py @@ -5714,7 +5714,7 @@ def print_loc(self, loc: Tuple[int, int, str]) -> None: def search_pattern_by_address(self, pattern: str, start_address: int, end_address: int) -> List[Tuple[int, int, Optional[str]]]: """Search a pattern within a range defined by arguments.""" _pattern = gef_pybytes(pattern) - step = 0x400 * 0x1000 + step = self["nr_pages_chunk"] * gef.session.pagesize locations = [] for chunk_addr in range(start_address, end_address, step): @@ -5725,20 +5725,8 @@ def search_pattern_by_address(self, pattern: str, start_address: int, end_addres try: mem = gef.memory.read(chunk_addr, chunk_size) - except gdb.error as e: - estr = str(e) - if estr.startswith("Cannot access memory "): - # - # This is a special case where /proc/$pid/maps - # shows virtual memory address with a read bit, - # but it cannot be read directly from userspace. - # - # See: https://github.com/hugsy/gef/issues/674 - # - err(estr) - return [] - else: - raise e + except gdb.MemoryError as e: + return [] for match in re.finditer(_pattern, mem): start = chunk_addr + match.start()