Skip to content

Commit

Permalink
Fix parsing NULL-terminated UTF-16 REG_SZ type registry values (#19)
Browse files Browse the repository at this point in the history
(DIS-2317)
  • Loading branch information
pyrco committed Oct 4, 2023
1 parent 14cfb86 commit a10fabc
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 1 deletion.
12 changes: 11 additions & 1 deletion dissect/regf/regf.py
Original file line number Diff line number Diff line change
Expand Up @@ -478,7 +478,17 @@ def try_decode_sz(data):

# This will return the string utf-16-le decoded up until the first
# double NULL byte.
return data.split(b"\x00\x00")[0].decode("utf-16-le")
# A naive split on two NULL bytes will not work as the possibility
# exists that the first NULL byte is the high byte of the first
# character and the second NULL byte the low byte of the second
# character. So the first NULL byte should start at an even index in
# the data.
idx = -1
while (idx := data.find(b"\x00\x00", idx + 1)) & 1:
if idx == -1:
idx = len(data)
break
return data[:idx].decode("utf-16-le")

except UnicodeDecodeError:
# Last ditch effort, decode the whole bytestring as if it were utf-16,
Expand Down
55 changes: 55 additions & 0 deletions tests/test_regf.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import pytest

from dissect.regf import regf


Expand All @@ -22,3 +24,56 @@ def test_regf(system_hive):
assert lsa.subkey("Data").class_name == "a282942c"

assert hive.open("ControlSet001\\Services\\Tcpip\\Parameters\\DNSRegisteredAdapters").class_name == "DynDRootClass"


@pytest.mark.parametrize(
"data, expected",
[
(
b"",
"",
),
(
b"The Quick Brown Fox\x00Jumped Over The Lazy Dog",
"The Quick Brown Fox",
),
(
b"The Quick Brown Fox\x00Jumped Over The Lazy Dog\x00",
"The Quick Brown Fox",
),
(
b"The Quick Brown Fox",
"The Quick Brown Fox",
),
(
"The Quick Brown Fox\x00Jumped Over The Lazy Dog".encode("utf-16-le"),
"The Quick Brown Fox",
),
(
"The Quick Brown Fox\x00Jumped Over The Lazy Dog\x00".encode("utf-16-le"),
"The Quick Brown Fox",
),
(
"The Quick Brown Fox\x00Jumped Over The Lazy Dog".encode("utf-16-le") + b"\x00",
"The Quick Brown Fox",
),
(
"The Quick Brown Fox".encode("utf-16-le"),
"The Quick Brown Fox",
),
(
b"\xe4bcd\x00", # interpreted as latin1
"äbcd",
),
(
b"\xe4bcd", # interpreted as utf-16-le
"拤摣",
),
(
b"\x41\x00\x00\x01\x42\x00",
"AĀB",
),
],
)
def test_try_decode_sz(data, expected):
assert regf.try_decode_sz(data) == expected

0 comments on commit a10fabc

Please sign in to comment.