Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Prevent duplicate or overlapping blocks #7

Merged
merged 13 commits into from
Nov 14, 2022
38 changes: 19 additions & 19 deletions dissect/evidence/asdf/asdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@
import uuid
from bisect import bisect_right
from collections import defaultdict
from typing import BinaryIO, Callable, Optional, Tuple
from typing import BinaryIO, Callable, Optional

from dissect import cstruct
from dissect.util import ts
from dissect.util.stream import AlignedStream, RangeStream
from dissect.util.stream import AlignedStream

from dissect.evidence.asdf.streams import CompressedStream, Crc32Stream, HashedStream
from dissect.evidence.exceptions import (
Expand All @@ -22,6 +22,8 @@
UnsupportedVersion,
)

SnapshotTableEntry = tuple[int, int, int, int]

VERSION = 1
DEFAULT_BLOCK_SIZE = 4096
MAX_BLOCK_TABLE_SIZE = 2**32
Expand Down Expand Up @@ -213,7 +215,7 @@ def copy_block(
def copy_runlist(
self,
source: BinaryIO,
runlist: list[Tuple[Optional[int], int]],
runlist: list[tuple[Optional[int], int]],
runlist_block_size: int,
idx: int = 0,
base: int = 0,
Expand Down Expand Up @@ -309,8 +311,8 @@ def _write_block(self, source: BinaryIO, offset: int, size: int, idx: int = 0, b
block.write(self.fh)
data_offset = self.fh.tell() # Block data location

source_stream = RangeStream(source, offset, size)
shutil.copyfileobj(source_stream, outfh)
source.seek(offset)
shutil.copyfileobj(source, outfh, size)
# This writes any remaining data or footer for each block writer
outfh.finalize()

Expand Down Expand Up @@ -371,30 +373,28 @@ def __init__(self, fh: BinaryIO):
self.timestamp = ts.from_unix(self.header.timestamp)
self.guid = uuid.UUID(bytes_le=self.header.guid)

self.table: dict[list[Tuple[int, int, int, int]]] = defaultdict(list)
self.table: dict[list[SnapshotTableEntry]] = defaultdict(list)
self._table_lookup: dict[list[int]] = defaultdict(list)

self.fh.seek(-len(c_asdf.footer), io.SEEK_END)
self.footer_offset = self.fh.tell()
footer_offset = self.fh.seek(-len(c_asdf.footer), io.SEEK_END)

self.footer = c_asdf.footer(self.fh)
if self.footer.magic != FOOTER_MAGIC:
raise InvalidSnapshot("invalid footer magic")

self._parse_block_table()
self._parse_block_table(
self.footer.table_offset,
(footer_offset - self.footer.table_offset) // len(c_asdf.table_entry),
)

self.metadata = Metadata(self)

def _parse_block_table(self) -> None:
def _parse_block_table(self, offset: int, count: int) -> None:
"""Parse the block table, getting rid of overlapping blocks."""
table_offset = self.footer.table_offset
table_size = self.footer_offset - table_offset
table_count = table_size // len(c_asdf.table_entry)

self.fh.seek(table_offset)
table_data = io.BytesIO(self.fh.read(table_size))
self.fh.seek(offset)
table_data = io.BytesIO(self.fh.read(count * len(c_asdf.table_entry)))

for _ in range(table_count):
for _ in range(count):
entry = c_asdf.table_entry(table_data)
self._table_insert(entry.idx, entry.offset, entry.size, entry.file_offset)

Expand Down Expand Up @@ -512,7 +512,7 @@ def _read(self, offset: int, length: int) -> bytes:
run_idx = bisect_right(self._table_lookup, offset) - 1
runlist_len = len(self.table)

while length > 0 and run_idx < len(self.table):
while length > 0 and run_idx < runlist_len:
run_start, run_size, run_file_offset, run_data_offset = self.table[run_idx]
run_end = run_start + run_size

Expand Down Expand Up @@ -566,7 +566,7 @@ def _read(self, offset: int, length: int) -> bytes:
raise InvalidBlock("invalid block magic")

# Skip over block header
self.fh.seek(run_data_offset)
self.fh.seek(run_data_offset + run_pos)
r.append(self.fh.read(read_count))

# Proceed to next run
Expand Down
19 changes: 19 additions & 0 deletions tests/test_asdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,25 @@ def test_asdf_overlap_seek():
assert stream.read() == (b"\x00" * 100) + bytes(range(50, 150)) + (b"\x00" * 100)


def test_asdf_mid_run():
fh = BytesIO()
fh.close = noop # Prevent clearing the buffer, we need it

writer = AsdfWriter(fh)
Schamper marked this conversation as resolved.
Show resolved Hide resolved

writer.add_bytes(bytes([v & 0xFF for v in range(4096)]), base=0)

writer.close()
fh.seek(0)

reader = AsdfSnapshot(fh)
stream = reader.open(0)
stream.align = 512

stream.seek(1100)
assert stream.read(512) == bytes([v & 0xFF for v in range(1100, 1100 + 512)])


def test_asdf_metadata():
fh = BytesIO()
fh.close = noop # Prevent clearing the buffer, we need it
Expand Down