diff --git a/dissect/hypervisor/__init__.py b/dissect/hypervisor/__init__.py index c872697..0da1051 100644 --- a/dissect/hypervisor/__init__.py +++ b/dissect/hypervisor/__init__.py @@ -1,12 +1,14 @@ from dissect.hypervisor.backup import vma, wim, xva -from dissect.hypervisor.descriptor import hyperv, ovf, vbox, vmx -from dissect.hypervisor.disk import qcow2, vdi, vhd, vhdx, vmdk +from dissect.hypervisor.descriptor import hyperv, ovf, pvs, vbox, vmx +from dissect.hypervisor.disk import hdd, qcow2, vdi, vhd, vhdx, vmdk from dissect.hypervisor.util import envelope, vmtar __all__ = [ "envelope", + "hdd", "hyperv", "ovf", + "pvs", "qcow2", "vbox", "vdi", diff --git a/dissect/hypervisor/descriptor/pvs.py b/dissect/hypervisor/descriptor/pvs.py new file mode 100644 index 0000000..23c4a40 --- /dev/null +++ b/dissect/hypervisor/descriptor/pvs.py @@ -0,0 +1,25 @@ +from typing import IO, Iterator +from xml.etree.ElementTree import Element + +try: + from defusedxml import ElementTree +except ImportError: + from xml.etree import ElementTree + + +class PVS: + """Parallels VM settings file. + + Args: + fh: The file-like object to a PVS file. + """ + + def __init__(self, fh: IO): + self._xml: Element = ElementTree.fromstring(fh.read()) + + def disks(self) -> Iterator[str]: + """Yield the disk file names.""" + for hdd_elem in self._xml.iterfind(".//Hdd"): + system_name = hdd_elem.find("SystemName") + if system_name is not None: + yield system_name.text diff --git a/dissect/hypervisor/descriptor/vbox.py b/dissect/hypervisor/descriptor/vbox.py index 16b3a93..1dc1779 100644 --- a/dissect/hypervisor/descriptor/vbox.py +++ b/dissect/hypervisor/descriptor/vbox.py @@ -1,13 +1,19 @@ -from xml.etree import ElementTree +from typing import IO, Iterator +from xml.etree.ElementTree import Element + +try: + from defusedxml import ElementTree +except ImportError: + from xml.etree import ElementTree class VBox: VBOX_XML_NAMESPACE = "{http://www.virtualbox.org/}" - def __init__(self, fh): - self._xml = ElementTree.fromstring(fh.read()) + def __init__(self, fh: IO): + self._xml: Element = ElementTree.fromstring(fh.read()) - def disks(self): + def disks(self) -> Iterator[str]: for hdd_elem in self._xml.findall( f".//{self.VBOX_XML_NAMESPACE}HardDisk[@location][@format='VDI'][@type='Normal']" ): diff --git a/dissect/hypervisor/disk/c_hdd.py b/dissect/hypervisor/disk/c_hdd.py new file mode 100644 index 0000000..94fab02 --- /dev/null +++ b/dissect/hypervisor/disk/c_hdd.py @@ -0,0 +1,68 @@ +# References: +# - https://src.openvz.org/projects/OVZ/repos/ploop/browse/include/ploop1_image.h +# - https://github.com/qemu/qemu/blob/master/docs/interop/parallels.txt + + +from dissect import cstruct + +hdd_def = """ +/* Compressed disk v1 signature */ +#define SIGNATURE_STRUCTURED_DISK_V1 b"WithoutFreeSpace" + +/* Compressed disk v2 signature */ +#define SIGNATURE_STRUCTURED_DISK_V2 b"WithouFreSpacExt" + +/* Sign that the disk is in "using" state */ +#define SIGNATURE_DISK_IN_USE 0x746F6E59 + +#define SECTOR_LOG 9 +#define SECTOR_SIZE (1 << SECTOR_LOG) + +struct pvd_header { + char m_Sig[16]; /* Signature */ + uint32 m_Type; /* Disk type */ + uint32 m_Heads; /* heads count */ + uint32 m_Cylinders; /* tracks count */ + uint32 m_Sectors; /* Sectors per track count */ + uint32 m_Size; /* Size of disk in tracks */ + union { /* Size of disk in 512-byte sectors */ + struct { + uint32 m_SizeInSectors_v1; + uint32 Unused; + }; + uint64 m_SizeInSectors_v2; + }; + uint32 m_DiskInUse; /* Disk in use */ + uint32 m_FirstBlockOffset; /* First data block offset (in sectors) */ + uint32 m_Flags; /* Misc flags */ + uint64 m_FormatExtensionOffset; /* Optional header offset in bytes */ +}; + +struct pvd_ext_block_check { + // Format Extension magic = 0xAB234CEF23DCEA87 + uint64 m_Magic; + // Md5 checksum of the whole (without top 24 bytes of block check) + // Format Extension Block. + uint8 m_Md5[16]; +}; + +struct pvd_ext_block_element_header { + uint64 magic; + uint64 flags; + uint32 size; + uint32 unused32; +}; + +struct pvd_dirty_bitmap_raw { + uint64 m_Size; + uint8 m_Id[16]; + uint32 m_Granularity; + uint32 m_L1Size; + uint64 m_L1[m_L1Size]; +}; +""" + +c_hdd = cstruct.cstruct() +c_hdd.load(hdd_def) + +SECTOR_SIZE = c_hdd.SECTOR_SIZE diff --git a/dissect/hypervisor/disk/hdd.py b/dissect/hypervisor/disk/hdd.py new file mode 100644 index 0000000..b309959 --- /dev/null +++ b/dissect/hypervisor/disk/hdd.py @@ -0,0 +1,410 @@ +from __future__ import annotations + +from bisect import bisect_right +from dataclasses import dataclass +from functools import cached_property +from pathlib import Path +from typing import BinaryIO, Iterator, Optional, Tuple, Union +from uuid import UUID +from xml.etree.ElementTree import Element + +try: + from defusedxml import ElementTree +except ImportError: + from xml.etree import ElementTree + +from dissect.util.stream import AlignedStream + +from dissect.hypervisor.disk.c_hdd import SECTOR_SIZE, c_hdd +from dissect.hypervisor.exceptions import InvalidHeaderError + +DEFAULT_TOP_GUID = UUID("{5fbaabe3-6958-40ff-92a7-860e329aab41}") +NULL_GUID = UUID("00000000-0000-0000-0000-000000000000") + + +class HDD: + """Parallels HDD virtual disk implementation. + + Args: + path: The path to the .hdd directory or .hdd file in a .hdd directory. + """ + + def __init__(self, path: Path): + if path.is_file() and path.parent.suffix.lower() == ".hdd": + path = path.parent + self.path = path + + descriptor_path = path.joinpath("DiskDescriptor.xml") + if not descriptor_path.exists(): + raise ValueError(f"Invalid Parallels HDD path: {path} (missing DiskDescriptor.xml)") + + self.descriptor = Descriptor(descriptor_path) + + def _open_image(self, path: Path) -> BinaryIO: + """Helper method for opening image files relative to this HDD. + + Args: + path: The path to the image file to open. + """ + root = self.path + filename = path.name + + if path.is_absolute(): + # If the path is absolute, check if it exists + if not path.exists(): + # If the absolute path does not exist, we're probably dealing with a HDD + # that's been copied or moved (e.g., uploaded or copied as evidence) + # Try a couple of common patterns to see if we can locate the file + # + # Example variables: + # root = /some/path/example.pvm/example.hdd/ + # path = /other/path/absolute.pvm/absolute.hdd/absolute.ext + + # File is in same HDD directory + # candidate_path = /some/path/example.pvm/example.hdd/absolute.ext + candidate_path = root / filename + if not candidate_path.exists(): + # File is in a separate HDD directory in parent (VM) directory + # candidate_path = /some/path/example.pvm/absolute.hdd/absolute.ext + candidate_path = root.parent / path.parent.name / filename + + if not candidate_path.exists(): + # File is in .pvm directory in parent of parent directory (linked clones) + # candidate_path = /some/path/absolute.pvm/absolute.hdd/absolute.ext + candidate_path = root.parent.parent / path.parent.parent.name / path.parent.name / filename + + path = candidate_path + + return path.open("rb") + + # If the path is relative, it's always relative to the HDD root + return (root / path).open("rb") + + def open(self, guid: Optional[Union[str, UUID]] = None) -> BinaryIO: + """Open a stream for this HDD, optionally for a specific snapshot. + + If no snapshot GUID is provided, the "top" snapshot will be used. + + Args: + guid: The snapshot GUID to open. + """ + if guid and not isinstance(guid, UUID): + guid = UUID(guid) + + if guid is None: + guid = self.descriptor.snapshots.top_guid or DEFAULT_TOP_GUID + + chain = self.descriptor.get_snapshot_chain(guid) + + streams = [] + for storage in self.descriptor.storage_data.storages: + stream = None + for guid in chain[::-1]: + image = storage.find_image(guid) + fh = self._open_image(Path(image.file)) + + if image.type == "Compressed": + fh = HDS(fh, parent=stream) + elif image.type != "Plain": + raise ValueError(f"Unsupported image type: {image.type}") + + stream = fh + + streams.append((storage, stream)) + + return StorageStream(streams) + + +class Descriptor: + """Helper class for working with ``DiskDescriptor.xml``. + + References: + - https://github.com/qemu/qemu/blob/master/docs/interop/prl-xml.txt + + Args: + path: The path to ``DiskDescriptor.xml``. + """ + + def __init__(self, path: Path): + self.path = path + + self.xml: Element = ElementTree.fromstring(path.read_text()) + self.storage_data = StorageData.from_xml(self.xml.find("StorageData")) + self.snapshots = Snapshots.from_xml(self.xml.find("Snapshots")) + + def get_snapshot_chain(self, guid: UUID) -> list[UUID]: + """Return the snapshot chain for a given snapshot GUID. + + Args: + guid: The snapshot GUID to return a chain for. + """ + shot = self.snapshots.find_shot(guid) + + chain = [shot.guid] + while shot.parent != NULL_GUID: + shot = self.snapshots.find_shot(shot.parent) + chain.append(shot.guid) + + return chain + + +@dataclass +class XMLEntry: + @classmethod + def from_xml(cls, element: Element) -> XMLEntry: + if element.tag != cls.__name__: + raise ValueError(f"Invalid {cls.__name__} XML element") + return cls._from_xml(element) + + @classmethod + def _from_xml(cls, element: Element) -> XMLEntry: + raise NotImplementedError() + + +@dataclass +class StorageData(XMLEntry): + storages: list[Storage] + + @classmethod + def _from_xml(cls, element: Element) -> StorageData: + return cls(list(map(Storage.from_xml, element.iterfind("Storage")))) + + +@dataclass +class Storage(XMLEntry): + start: int + end: int + images: list[Image] + + @classmethod + def _from_xml(cls, element: Element) -> Storage: + start = int(element.find("Start").text) + end = int(element.find("End").text) + images = list(map(Image.from_xml, element.iterfind("Image"))) + + return cls(start, end, images) + + def find_image(self, guid: UUID) -> Image: + """Find a specific image GUID. + + Args: + guid: The image GUID to find. + + Raises: + KeyError: If the GUID could not be found. + """ + for image in self.images: + if image.guid == guid: + return image + + raise KeyError(f"Image GUID not found: {guid}") + + +@dataclass +class Image(XMLEntry): + guid: UUID + type: str + file: str + + @classmethod + def _from_xml(cls, element: Element) -> Image: + return cls( + UUID(element.find("GUID").text), + element.find("Type").text, + element.find("File").text, + ) + + +@dataclass +class Snapshots(XMLEntry): + top_guid: Optional[UUID] + shots: list[Shot] + + @classmethod + def _from_xml(cls, element: Element) -> Snapshots: + top_guid = element.find("TopGUID") + if top_guid: + top_guid = UUID(top_guid.text) + shots = list(map(Shot.from_xml, element.iterfind("Shot"))) + + return cls(top_guid, shots) + + def find_shot(self, guid: UUID) -> Shot: + """Find a specific snapshot GUID. + + Args: + guid: The snapshot GUID to find. + + Raises: + KeyError: If the GUID could not be found. + """ + for shot in self.shots: + if shot.guid == guid: + return shot + + raise KeyError(f"Shot GUID not found: {guid}") + + +@dataclass +class Shot(XMLEntry): + guid: UUID + parent: UUID + + @classmethod + def _from_xml(cls, element: Element) -> Shot: + return cls( + UUID(element.find("GUID").text), + UUID(element.find("ParentGUID").text), + ) + + +class StorageStream(AlignedStream): + """Stream implementation for HDD streams. + + HDD files can exist of one or multiple streams, starting at consecutive offsets. + This class stitches all streams together into a single stream. + + Args: + streams: A list of :class:`Storage` and file-like object tuples. + """ + + def __init__(self, streams: list[tuple[Storage, BinaryIO]]): + self.streams = sorted(streams, key=lambda entry: entry[0].start) + self._lookup = [] + + size = 0 + for storage, _ in self.streams: + self._lookup.append(storage.start) + size = storage.end + + super().__init__(size * SECTOR_SIZE) + + def _read(self, offset: int, length: int) -> bytes: + sector = offset // SECTOR_SIZE + count = (length + SECTOR_SIZE - 1) // SECTOR_SIZE + + result = [] + stream_idx = bisect_right(self._lookup, sector) - 1 + + while count > 0 and stream_idx < len(self.streams): + storage, stream = self.streams[stream_idx] + sectors_remaining = storage.end - sector + read_sectors = min(sectors_remaining, count) + + stream.seek((sector - storage.start) * SECTOR_SIZE) + result.append(stream.read(read_sectors * SECTOR_SIZE)) + + sector += read_sectors + count -= read_sectors + stream_idx += 1 + + return b"".join(result) + + +class HDS(AlignedStream): + """Parallels HDS implementation. + + HDS is the format for Parallels sparse disk files. + + Args: + fh: The file-like object to the HDS file. + parent: Optional file-like object for the parent HDS file. + """ + + def __init__(self, fh: BinaryIO, parent: Optional[BinaryIO] = None): + self.fh = fh + self.parent = parent + + self.header = c_hdd.pvd_header(fh) + if self.header.m_Sig not in (c_hdd.SIGNATURE_STRUCTURED_DISK_V1, c_hdd.SIGNATURE_STRUCTURED_DISK_V2): + raise InvalidHeaderError(f"Invalid HDS header signature: {self.header.m_Sig}") + + if self.header.m_Sig == c_hdd.SIGNATURE_STRUCTURED_DISK_V1: + size = self.header.m_SizeInSectors_v1 + self._bat_step = self.header.m_Sectors + self._bat_multiplier = 1 + else: + size = self.header.m_SizeInSectors_v2 + self._bat_step = 1 + self._bat_multiplier = self.header.m_Sectors + + self.cluster_size = self.header.m_Sectors * SECTOR_SIZE + + self.data_offset = self.header.m_FirstBlockOffset + self.in_use = self.header.m_DiskInUse == c_hdd.SIGNATURE_DISK_IN_USE + + super().__init__(size * SECTOR_SIZE) + + @cached_property + def bat(self) -> list[int]: + """Return the block allocation table (BAT).""" + self.fh.seek(len(c_hdd.pvd_header)) + return c_hdd.uint32[self.header.m_Size](self.fh) + + def _read(self, offset: int, length: int) -> bytes: + result = [] + + for read_offset, read_size in self._iter_runs(offset, length): + # Sentinel value for sparse runs + if read_offset is None: + if self.parent: + self.parent.seek(offset) + result.append(self.parent.read(read_size)) + else: + result.append(b"\x00" * read_size) + else: + self.fh.seek(read_offset) + result.append(self.fh.read(read_size)) + + offset += read_size + length -= read_size + + return b"".join(result) + + def _iter_runs(self, offset: int, length: int) -> Iterator[Tuple[int, int]]: + """Iterate optimized read runs for a given offset and read length. + + Args: + offset: The offset in bytes to generate runs for. + length: The length in bytes to generate runs for. + """ + bat = self.bat + + run_offset = None + run_size = 0 + + while offset < self.size and length > 0: + cluster_idx, offset_in_cluster = divmod(offset, self.cluster_size) + read_size = min(self.cluster_size - offset_in_cluster, length) + + bat_entry = bat[cluster_idx] + if bat_entry == 0: + # BAT entry of 0 means either a sparse or a parent read + # Use 0 to denote a sparse run for now to make calculations easier + read_offset = 0 + else: + read_offset = (bat_entry * self._bat_multiplier * SECTOR_SIZE) + offset_in_cluster + + if run_offset is None: + # First iteration + run_offset = read_offset + run_size = read_size + elif (read_offset == run_offset + run_size) or (run_offset, read_offset) == (0, 0): + # Consecutive (sparse) clusters + run_size += read_size + else: + # New run + # Replace 0 with None as sparse sentinel value + yield (run_offset or None, run_size) + + # Reset run + run_offset = read_offset + run_size = read_size + + offset += read_size + length -= read_size + + if run_offset is not None: + # Flush remaining run + # Replace 0 with None as sparse sentinel value + yield (run_offset or None, run_size) diff --git a/tests/conftest.py b/tests/conftest.py index 982f427..fedce99 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -63,6 +63,21 @@ def sesparse_vmdk(): yield from open_file_gz("data/sesparse.vmdk.gz") +@pytest.fixture +def plain_hdd(): + yield absolute_path("data/plain.hdd") + + +@pytest.fixture +def expanding_hdd(): + yield absolute_path("data/expanding.hdd") + + +@pytest.fixture +def split_hdd(): + yield absolute_path("data/split.hdd") + + @pytest.fixture def simple_vma(): yield from open_file_gz("data/test.vma.gz") diff --git a/tests/data/expanding.hdd/DiskDescriptor.xml b/tests/data/expanding.hdd/DiskDescriptor.xml new file mode 100644 index 0000000..cefe2ca --- /dev/null +++ b/tests/data/expanding.hdd/DiskDescriptor.xml @@ -0,0 +1,52 @@ + + + + 204800 + 400 + 4096 + 512 + 16 + 32 + 0 + + {00000000-0000-0000-0000-000000000000} + + + + {0610bb35-447e-4aae-aa79-f1571d969081} + expanding + + level2 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + + + + + 0 + 204800 + 2048 + + {5fbaabe3-6958-40ff-92a7-860e329aab41} + Compressed + expanding.hdd.0.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds + + + + + + {5fbaabe3-6958-40ff-92a7-860e329aab41} + {00000000-0000-0000-0000-000000000000} + + + diff --git a/tests/data/expanding.hdd/expanding.hdd b/tests/data/expanding.hdd/expanding.hdd new file mode 100644 index 0000000..e69de29 diff --git a/tests/data/expanding.hdd/expanding.hdd.0.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz b/tests/data/expanding.hdd/expanding.hdd.0.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz new file mode 100644 index 0000000..90789fd Binary files /dev/null and b/tests/data/expanding.hdd/expanding.hdd.0.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz differ diff --git a/tests/data/plain.hdd/DiskDescriptor.xml b/tests/data/plain.hdd/DiskDescriptor.xml new file mode 100644 index 0000000..c9653f3 --- /dev/null +++ b/tests/data/plain.hdd/DiskDescriptor.xml @@ -0,0 +1,52 @@ + + + + 204800 + 400 + 4096 + 512 + 16 + 32 + 0 + + {00000000-0000-0000-0000-000000000000} + + + + {4be4afe0-ff6f-4544-b16c-d98d170a029c} + plain + + level2 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + + + + + 0 + 204800 + 2048 + + {5fbaabe3-6958-40ff-92a7-860e329aab41} + Plain + plain.hdd.0.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds + + + + + + {5fbaabe3-6958-40ff-92a7-860e329aab41} + {00000000-0000-0000-0000-000000000000} + + + diff --git a/tests/data/plain.hdd/plain.hdd b/tests/data/plain.hdd/plain.hdd new file mode 100644 index 0000000..e69de29 diff --git a/tests/data/plain.hdd/plain.hdd.0.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz b/tests/data/plain.hdd/plain.hdd.0.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz new file mode 100644 index 0000000..6a366d9 Binary files /dev/null and b/tests/data/plain.hdd/plain.hdd.0.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz differ diff --git a/tests/data/split.hdd/DiskDescriptor.xml b/tests/data/split.hdd/DiskDescriptor.xml new file mode 100644 index 0000000..bdeaf83 --- /dev/null +++ b/tests/data/split.hdd/DiskDescriptor.xml @@ -0,0 +1,102 @@ + + + + 20971520 + 40960 + 4096 + 512 + 16 + 32 + 0 + + {00000000-0000-0000-0000-000000000000} + + + + {d6e2bfb7-109e-4f6b-954c-6e2e7ae60d5a} + split + + level2 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 1 + 0 + + + + + 0 + 3989504 + 2048 + + {5fbaabe3-6958-40ff-92a7-860e329aab41} + Compressed + split.hdd.0.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds + + + + 3989504 + 7979008 + 2048 + + {5fbaabe3-6958-40ff-92a7-860e329aab41} + Compressed + split.hdd.1.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds + + + + 7979008 + 11968512 + 2048 + + {5fbaabe3-6958-40ff-92a7-860e329aab41} + Compressed + split.hdd.2.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds + + + + 11968512 + 15958016 + 2048 + + {5fbaabe3-6958-40ff-92a7-860e329aab41} + Compressed + split.hdd.3.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds + + + + 15958016 + 19947520 + 2048 + + {5fbaabe3-6958-40ff-92a7-860e329aab41} + Compressed + split.hdd.4.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds + + + + 19947520 + 20971520 + 2048 + + {5fbaabe3-6958-40ff-92a7-860e329aab41} + Compressed + split.hdd.5.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds + + + + + + {5fbaabe3-6958-40ff-92a7-860e329aab41} + {00000000-0000-0000-0000-000000000000} + + + diff --git a/tests/data/split.hdd/split.hdd b/tests/data/split.hdd/split.hdd new file mode 100644 index 0000000..e69de29 diff --git a/tests/data/split.hdd/split.hdd.0.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz b/tests/data/split.hdd/split.hdd.0.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz new file mode 100644 index 0000000..a1163e7 Binary files /dev/null and b/tests/data/split.hdd/split.hdd.0.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz differ diff --git a/tests/data/split.hdd/split.hdd.1.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz b/tests/data/split.hdd/split.hdd.1.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz new file mode 100644 index 0000000..25f67c6 Binary files /dev/null and b/tests/data/split.hdd/split.hdd.1.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz differ diff --git a/tests/data/split.hdd/split.hdd.2.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz b/tests/data/split.hdd/split.hdd.2.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz new file mode 100644 index 0000000..db4722f Binary files /dev/null and b/tests/data/split.hdd/split.hdd.2.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz differ diff --git a/tests/data/split.hdd/split.hdd.3.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz b/tests/data/split.hdd/split.hdd.3.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz new file mode 100644 index 0000000..e92ddcb Binary files /dev/null and b/tests/data/split.hdd/split.hdd.3.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz differ diff --git a/tests/data/split.hdd/split.hdd.4.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz b/tests/data/split.hdd/split.hdd.4.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz new file mode 100644 index 0000000..b0451d0 Binary files /dev/null and b/tests/data/split.hdd/split.hdd.4.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz differ diff --git a/tests/data/split.hdd/split.hdd.5.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz b/tests/data/split.hdd/split.hdd.5.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz new file mode 100644 index 0000000..0bdc3c8 Binary files /dev/null and b/tests/data/split.hdd/split.hdd.5.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz differ diff --git a/tests/test_hdd.py b/tests/test_hdd.py new file mode 100644 index 0000000..8294fb5 --- /dev/null +++ b/tests/test_hdd.py @@ -0,0 +1,83 @@ +import gzip +from pathlib import Path +from unittest.mock import patch + +from dissect.hypervisor.disk.hdd import HDD + +Path_open = Path.open + + +def mock_open_gz(self, *args, **kwargs): + if self.suffix.lower() != ".hds": + return Path_open(self, *args, **kwargs) + + return gzip.open(self.with_suffix(self.suffix + ".gz")) + + +def test_plain_hdd(plain_hdd): + hdd = HDD(Path(plain_hdd)) + storages = hdd.descriptor.storage_data.storages + + assert len(storages) == 1 + assert storages[0].start == 0 + assert storages[0].end == 204800 + assert len(storages[0].images) == 1 + assert storages[0].images[0].type == "Plain" + + with patch.object(Path, "open", mock_open_gz): + stream = hdd.open() + + for i in range(100): + assert stream.read(1024 * 1024).strip(bytes([i])) == b"" + + +def test_expanding_hdd(expanding_hdd): + hdd = HDD(Path(expanding_hdd)) + storages = hdd.descriptor.storage_data.storages + + assert len(storages) == 1 + assert storages[0].start == 0 + assert storages[0].end == 204800 + assert len(storages[0].images) == 1 + assert storages[0].images[0].type == "Compressed" + + with patch.object(Path, "open", mock_open_gz): + stream = hdd.open() + + for i in range(100): + assert stream.read(1024 * 1024).strip(bytes([i])) == b"" + + +def test_split_hdd(split_hdd): + hdd = HDD(Path(split_hdd)) + storages = hdd.descriptor.storage_data.storages + + assert len(storages) == 6 + + split_sizes = [3989504, 3989504, 3989504, 3989504, 3989504, 1024000] + + start = 0 + + for storage, split_size in zip(storages, split_sizes): + assert storage.start == start + assert storage.end == start + split_size + assert len(storage.images) == 1 + assert storage.images[0].type == "Compressed" + + start = storage.end + + with patch.object(Path, "open", mock_open_gz): + stream = hdd.open() + + assert stream.read(1024 * 1024).strip(b"\x01") == b"" + + offset = 0 + for i, split_size in enumerate(split_sizes): + offset += split_size * 512 + stream.seek(offset - 512) + + buf = stream.read(1024) + if i < 5: + assert buf == bytes([i + 1] * 512) + bytes([i + 2] * 512) + else: + assert buf == bytes([i + 1] * 512) diff --git a/tests/test_pvs.py b/tests/test_pvs.py new file mode 100644 index 0000000..2bec592 --- /dev/null +++ b/tests/test_pvs.py @@ -0,0 +1,20 @@ +from io import StringIO + +from dissect.hypervisor.descriptor.pvs import PVS + + +def test_pvs(): + xml = """ + + + + + Fedora-0.hdd + + + + """ # noqa: E501 + + with StringIO(xml.strip()) as fh: + pvs = PVS(fh) + assert next(pvs.disks()) == "Fedora-0.hdd"