diff --git a/dissect/hypervisor/__init__.py b/dissect/hypervisor/__init__.py
index c872697..1ad7286 100644
--- a/dissect/hypervisor/__init__.py
+++ b/dissect/hypervisor/__init__.py
@@ -1,10 +1,11 @@
from dissect.hypervisor.backup import vma, wim, xva
from dissect.hypervisor.descriptor import hyperv, ovf, vbox, vmx
-from dissect.hypervisor.disk import qcow2, vdi, vhd, vhdx, vmdk
+from dissect.hypervisor.disk import hdd, qcow2, vdi, vhd, vhdx, vmdk
from dissect.hypervisor.util import envelope, vmtar
__all__ = [
"envelope",
+ "hdd",
"hyperv",
"ovf",
"qcow2",
diff --git a/dissect/hypervisor/disk/c_hdd.py b/dissect/hypervisor/disk/c_hdd.py
new file mode 100644
index 0000000..5c04158
--- /dev/null
+++ b/dissect/hypervisor/disk/c_hdd.py
@@ -0,0 +1,82 @@
+from dissect import cstruct
+
+hdd_def = """
+/* Compressed disk (version 1) */
+#define PRL_IMAGE_COMPRESSED 2
+
+/* Compressed disk v1 signature */
+#define SIGNATURE_STRUCTURED_DISK_V1 b"WithoutFreeSpace"
+
+/* Compressed disk v2 signature */
+#define SIGNATURE_STRUCTURED_DISK_V2 b"WithouFreSpacExt"
+
+/* Sign that the disk is in "using" state */
+#define SIGNATURE_DISK_IN_USE 0x746F6E59
+
+/**
+ * Compressed disk image flags
+ */
+#define CIF_NoFlags 0x00000000 /* No any flags */
+#define CIF_Empty 0x00000001 /* No any data was written */
+#define CIF_FmtVersionConvert 0x00000002 /* Version Convert in progree */
+#define CIF_FlagsMask (CIF_Empty | CIF_FmtVersionConvert)
+#define CIF_Invalid 0xFFFFFFFF /* Invalid flag */
+
+#define SECTOR_LOG 9
+#define DEF_CLUSTER_LOG 11 /* 1M cluster-block */
+#define DEF_CLUSTER (1 << (DEF_CLUSTER_LOG + SECTOR_LOG))
+
+/* Helpers to generate PVD-header based on requested bdsize */
+
+#define DEFAULT_HEADS_COUNT 16
+#define DEFAULT_SECTORS_COUNT 63
+#define SECTOR_SIZE (1 << SECTOR_LOG)
+
+struct pvd_header {
+ char m_Sig[16]; /* Signature */
+ uint32 m_Type; /* Disk type */
+ uint32 m_Heads; /* heads count */
+ uint32 m_Cylinders; /* tracks count */
+ uint32 m_Sectors; /* Sectors per track count */
+ uint32 m_Size; /* Size of disk in tracks */
+ union { /* Size of disk in 512-byte sectors */
+ struct {
+ uint32 m_SizeInSectors_v1;
+ uint32 Unused;
+ };
+ uint64 m_SizeInSectors_v2;
+ };
+ uint32 m_DiskInUse; /* Disk in use */
+ uint32 m_FirstBlockOffset; /* First data block offset (in sectors) */
+ uint32 m_Flags; /* Misc flags */
+ uint64 m_FormatExtensionOffset; /* Optional header offset in bytes */
+};
+
+struct pvd_ext_block_check {
+ // Format Extension magic = 0xAB234CEF23DCEA87
+ uint64 m_Magic;
+ // Md5 checksum of the whole (without top 24 bytes of block check)
+ // Format Extension Block.
+ uint8 m_Md5[16];
+};
+
+struct pvd_ext_block_element_header {
+ uint64 magic;
+ uint64 flags;
+ uint32 size;
+ uint32 unused32;
+};
+
+struct pvd_dirty_bitmap_raw {
+ uint64 m_Size;
+ uint8 m_Id[16];
+ uint32 m_Granularity;
+ uint32 m_L1Size;
+ uint64 m_L1[m_L1Size];
+};
+"""
+
+c_hdd = cstruct.cstruct()
+c_hdd.load(hdd_def)
+
+SECTOR_SIZE = c_hdd.SECTOR_SIZE
diff --git a/dissect/hypervisor/disk/hdd.py b/dissect/hypervisor/disk/hdd.py
new file mode 100644
index 0000000..4d7d83e
--- /dev/null
+++ b/dissect/hypervisor/disk/hdd.py
@@ -0,0 +1,392 @@
+from __future__ import annotations
+
+from bisect import bisect_right
+from dataclasses import dataclass
+from functools import cached_property
+from pathlib import Path
+from typing import BinaryIO, Iterator, Optional, Tuple, Union
+from uuid import UUID
+from xml.etree.ElementTree import Element
+
+try:
+ from defusedxml import ElementTree
+except ImportError:
+ from xml.etree import ElementTree
+
+from dissect.util.stream import AlignedStream
+
+from dissect.hypervisor.disk.c_hdd import SECTOR_SIZE, c_hdd
+from dissect.hypervisor.exceptions import InvalidHeaderError
+
+DEFAULT_TOP_GUID = UUID("{5fbaabe3-6958-40ff-92a7-860e329aab41}")
+
+
+class HDD:
+ """Parallels HDD virtual disk implementation.
+
+ Args:
+ path: The path to the .hdd directory or .hdd file in a .hdd directory.
+ """
+
+ def __init__(self, path: Path):
+ if path.is_file() and path.parent.suffix.lower() == ".hdd":
+ path = path.parent
+ self.path = path
+
+ descriptor_path = path.joinpath("DiskDescriptor.xml")
+ if not descriptor_path.exists():
+ raise ValueError("Invalid Parallels HDD path (missing DiskDescriptor.xml)")
+
+ self.descriptor = Descriptor(descriptor_path)
+
+ def _open_image(self, path: Path) -> BinaryIO:
+ """Helper method for opening image files relative to this HDD.
+
+ Args:
+ path: The path to the image file to open.
+ """
+ root = self.path
+ filename = path.name
+
+ if path.is_absolute():
+ if not path.exists():
+ # File is in same directory
+ candidate_path = root / filename
+ if not candidate_path.exists():
+ # File is in .hdd directory in parent (VM) directory
+ candidate_path = root.parent / path.parent.name / filename
+
+ if not candidate_path.exists():
+ # File is in .pvm directory in parent of parent directory
+ candidate_path = root.parent.parent / path.parent.parent.name / path.parent.name / filename
+
+ path = candidate_path
+
+ return path.open("rb")
+
+ return (root / path).open("rb")
+
+ def open(self, guid: Optional[Union[str, UUID]] = None) -> BinaryIO:
+ """Open a stream for this HDD, optionally for a specific snapshot.
+
+ If no snapshot GUID is provided, the "top" snapshot will be used.
+
+ Args:
+ guid: The snapshot GUID to open.
+ """
+ if guid and not isinstance(guid, UUID):
+ guid = UUID(guid)
+
+ if guid is None:
+ guid = self.descriptor.snapshots.top_guid or DEFAULT_TOP_GUID
+
+ chain = self.descriptor.get_chain(guid)
+
+ streams = []
+ for storage in self.descriptor.storage_data.storages:
+ stream = None
+ for guid in chain[::-1]:
+ image = storage.find_image(guid)
+ fh = self._open_image(Path(image.file))
+
+ if image.type == "Compressed":
+ fh = HDS(fh, parent=stream)
+ elif image.type != "Plain":
+ raise ValueError(f"Unsupported image type: {image.type}")
+
+ stream = fh
+
+ streams.append((storage, stream))
+
+ return StorageStream(streams)
+
+
+class Descriptor:
+ """Helper class for working with ``DiskDescriptor.xml``.
+
+ Args:
+ path: The path to ``DiskDescriptor.xml``.
+ """
+
+ def __init__(self, path: Path):
+ self.path = path
+
+ self.xml: Element = ElementTree.fromstring(path.read_text())
+ self.storage_data = StorageData.from_xml(self.xml.find("StorageData"))
+ self.snapshots = Snapshots.from_xml(self.xml.find("Snapshots"))
+
+ def get_chain(self, guid: UUID) -> list[UUID]:
+ """Return the snapshot chain for a given snapshot GUID.
+
+ Args:
+ guid: The snapshot GUID to return a chain for.
+ """
+ shot = self.snapshots.find_shot(guid)
+
+ chain = [shot.guid]
+ while shot.parent != UUID("00000000-0000-0000-0000-000000000000"):
+ shot = self.snapshots.find_shot(shot.parent)
+ chain.append(shot.guid)
+
+ return chain
+
+
+@dataclass
+class StorageData:
+ storages: list[Storage]
+
+ @classmethod
+ def from_xml(cls, element: Element) -> StorageData:
+ if element.tag != "StorageData":
+ raise ValueError("Invalid StorageData XML element")
+
+ return cls(list(map(Storage.from_xml, element.iterfind("Storage"))))
+
+
+@dataclass
+class Storage:
+ start: int
+ end: int
+ images: list[Image]
+
+ @classmethod
+ def from_xml(cls, element: Element) -> Storage:
+ if element.tag != "Storage":
+ raise ValueError("Invalid Storage XML element")
+
+ start = int(element.find("Start").text)
+ end = int(element.find("End").text)
+ images = list(map(Image.from_xml, element.iterfind("Image")))
+
+ return cls(start, end, images)
+
+ def find_image(self, guid: UUID) -> Image:
+ """Find a specific image GUID.
+
+ Args:
+ guid: The image GUID to find.
+
+ Raises:
+ KeyError: If the GUID could not be found.
+ """
+ for image in self.images:
+ if image.guid == guid:
+ return image
+
+ raise KeyError(f"Image GUID not found: {guid}")
+
+
+@dataclass
+class Image:
+ guid: UUID
+ type: str
+ file: str
+
+ @classmethod
+ def from_xml(cls, element: Element) -> Image:
+ if element.tag != "Image":
+ raise ValueError("Invalid Image XML element")
+
+ return cls(
+ UUID(element.find("GUID").text),
+ element.find("Type").text,
+ element.find("File").text,
+ )
+
+
+@dataclass
+class Snapshots:
+ top_guid: Optional[UUID]
+ shots: list[Shot]
+
+ @classmethod
+ def from_xml(cls, element: Element) -> Snapshots:
+ if element.tag != "Snapshots":
+ raise ValueError("Invalid Snapshots XML element")
+
+ top_guid = element.find("TopGUID")
+ if top_guid:
+ top_guid = UUID(top_guid.text)
+ shots = list(map(Shot.from_xml, element.iterfind("Shot")))
+
+ return cls(top_guid, shots)
+
+ def find_shot(self, guid: UUID) -> Shot:
+ """Find a specific snapshot GUID.
+
+ Args:
+ guid: The snapshot GUID to find.
+
+ Raises:
+ KeyError: If the GUID could not be found.
+ """
+ for shot in self.shots:
+ if shot.guid == guid:
+ return shot
+
+ raise KeyError(f"Shot GUID not found: {guid}")
+
+
+@dataclass
+class Shot:
+ guid: UUID
+ parent: UUID
+
+ @classmethod
+ def from_xml(cls, element: Element) -> Shot:
+ if element.tag != "Shot":
+ raise ValueError("Invalid Shot XML element")
+
+ return cls(
+ UUID(element.find("GUID").text),
+ UUID(element.find("ParentGUID").text),
+ )
+
+
+class StorageStream(AlignedStream):
+ """Stream implementation for HDD streams.
+
+ HDD files can exist of one or multiple streams, starting at consecutive offsets.
+ This class stitches all streams together into a single stream.
+
+ Args:
+ streams: A list of :class:`Storage` and file-like object tuples.
+ """
+
+ def __init__(self, streams: list[tuple[Storage, BinaryIO]]):
+ self.streams = streams
+ self._lookup = []
+
+ size = 0
+ for storage, _ in streams:
+ if size != 0:
+ self._lookup.append(storage.start)
+ size = storage.end
+
+ super().__init__(size * SECTOR_SIZE)
+
+ def _read(self, offset: int, length: int) -> bytes:
+ sector = offset // SECTOR_SIZE
+ count = (length + SECTOR_SIZE - 1) // SECTOR_SIZE
+
+ result = []
+ stream_idx = bisect_right(self._lookup, sector)
+ while count > 0:
+ storage, stream = self.streams[stream_idx]
+ sectors_remaining = storage.end - sector
+ read_sectors = min(sectors_remaining, count)
+
+ stream.seek((sector - storage.start) * SECTOR_SIZE)
+ result.append(stream.read(read_sectors * SECTOR_SIZE))
+
+ sector += read_sectors
+ count -= read_sectors
+ stream_idx += 1
+
+ return b"".join(result)
+
+
+class HDS(AlignedStream):
+ """Parallels HDS implementation.
+
+ HDS is the format for Parallels sparse disk files.
+
+ Args:
+ fh: The file-like object to the HDS file.
+ parent: Optional file-like object for the parent HDS file.
+ """
+
+ def __init__(self, fh: BinaryIO, parent: Optional[BinaryIO] = None):
+ self.fh = fh
+ self.parent = parent
+
+ self.header = c_hdd.pvd_header(fh)
+ if self.header.m_Sig not in (c_hdd.SIGNATURE_STRUCTURED_DISK_V1, c_hdd.SIGNATURE_STRUCTURED_DISK_V2):
+ raise InvalidHeaderError(f"Invalid HDS header signature: {self.header.m_Sig}")
+
+ if self.header.m_Sig == c_hdd.SIGNATURE_STRUCTURED_DISK_V1:
+ size = self.header.m_SizeInSectors_v1
+ self._bat_step = self.header.m_Sectors
+ self._bat_multiplier = 1
+ else:
+ size = self.header.m_SizeInSectors_v2
+ self._bat_step = 1
+ self._bat_multiplier = self.header.m_Sectors
+
+ self.cluster_size = self.header.m_Sectors * SECTOR_SIZE
+
+ self.data_offset = self.header.m_FirstBlockOffset
+ self.in_use = self.header.m_DiskInUse == c_hdd.SIGNATURE_DISK_IN_USE
+
+ super().__init__(size * SECTOR_SIZE)
+
+ @cached_property
+ def bat(self) -> list[int]:
+ """Return the block allocation table (BAT)."""
+ self.fh.seek(len(c_hdd.pvd_header))
+ return c_hdd.uint32[self.header.m_Size](self.fh)
+
+ def _read(self, offset: int, length: int) -> bytes:
+ result = []
+
+ for read_offset, read_size in self._iter_runs(offset, length):
+ # Sentinel value for sparse runs
+ if read_offset is None:
+ if self.parent:
+ self.parent.seek(offset)
+ result.append(self.parent.read(read_size))
+ else:
+ result.append(b"\x00" * read_size)
+ else:
+ self.fh.seek(read_offset)
+ result.append(self.fh.read(read_size))
+
+ offset += read_size
+ length -= read_size
+
+ return b"".join(result)
+
+ def _iter_runs(self, offset: int, length: int) -> Iterator[Tuple[int, int]]:
+ """Iterate optimized read runs for a given offset and read length.
+
+ Args:
+ offset: The offset in bytes to generate runs for.
+ length: The length in bytes to generate runs for.
+ """
+ bat = self.bat
+
+ run_offset = None
+ run_size = 0
+
+ while offset < self.size and length > 0:
+ cluster_idx, offset_in_cluster = divmod(offset, self.cluster_size)
+ read_size = min(self.cluster_size - offset_in_cluster, length)
+
+ bat_entry = bat[cluster_idx]
+ if bat_entry == 0:
+ read_offset = 0
+ else:
+ read_offset = (bat_entry * self._bat_multiplier * SECTOR_SIZE) + offset_in_cluster
+
+ if run_offset is None:
+ # First iteration
+ run_offset = read_offset
+ run_size = read_size
+ elif read_offset == run_offset + run_size or (run_offset, read_offset) == (0, 0):
+ # Consecutive (sparse) clusters
+ run_size += read_size
+ else:
+ # New run
+ # Replace 0 with None as sparse sentinel
+ yield (run_offset or None, run_size)
+
+ # Reset run
+ run_offset = read_offset
+ run_size = read_size
+
+ offset += read_size
+ length -= read_size
+
+ if run_offset is not None:
+ # Flush remaining run
+ yield (run_offset or None, run_size)
diff --git a/tests/conftest.py b/tests/conftest.py
index 982f427..fedce99 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -63,6 +63,21 @@ def sesparse_vmdk():
yield from open_file_gz("data/sesparse.vmdk.gz")
+@pytest.fixture
+def plain_hdd():
+ yield absolute_path("data/plain.hdd")
+
+
+@pytest.fixture
+def expanding_hdd():
+ yield absolute_path("data/expanding.hdd")
+
+
+@pytest.fixture
+def split_hdd():
+ yield absolute_path("data/split.hdd")
+
+
@pytest.fixture
def simple_vma():
yield from open_file_gz("data/test.vma.gz")
diff --git a/tests/data/expanding.hdd/DiskDescriptor.xml b/tests/data/expanding.hdd/DiskDescriptor.xml
new file mode 100644
index 0000000..cefe2ca
--- /dev/null
+++ b/tests/data/expanding.hdd/DiskDescriptor.xml
@@ -0,0 +1,52 @@
+
+
+
+ 204800
+ 400
+ 4096
+ 512
+ 16
+ 32
+ 0
+
+ {00000000-0000-0000-0000-000000000000}
+
+
+
+ {0610bb35-447e-4aae-aa79-f1571d969081}
+ expanding
+
+ level2
+ 0
+ 0
+ 0
+ 0
+ 0
+ 0
+ 0
+ 0
+ 0
+ 0
+ 0
+ 0
+
+
+
+
+ 0
+ 204800
+ 2048
+
+ {5fbaabe3-6958-40ff-92a7-860e329aab41}
+ Compressed
+ expanding.hdd.0.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds
+
+
+
+
+
+ {5fbaabe3-6958-40ff-92a7-860e329aab41}
+ {00000000-0000-0000-0000-000000000000}
+
+
+
diff --git a/tests/data/expanding.hdd/expanding.hdd b/tests/data/expanding.hdd/expanding.hdd
new file mode 100644
index 0000000..e69de29
diff --git a/tests/data/expanding.hdd/expanding.hdd.0.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz b/tests/data/expanding.hdd/expanding.hdd.0.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz
new file mode 100644
index 0000000..90789fd
Binary files /dev/null and b/tests/data/expanding.hdd/expanding.hdd.0.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz differ
diff --git a/tests/data/plain.hdd/DiskDescriptor.xml b/tests/data/plain.hdd/DiskDescriptor.xml
new file mode 100644
index 0000000..c9653f3
--- /dev/null
+++ b/tests/data/plain.hdd/DiskDescriptor.xml
@@ -0,0 +1,52 @@
+
+
+
+ 204800
+ 400
+ 4096
+ 512
+ 16
+ 32
+ 0
+
+ {00000000-0000-0000-0000-000000000000}
+
+
+
+ {4be4afe0-ff6f-4544-b16c-d98d170a029c}
+ plain
+
+ level2
+ 0
+ 0
+ 0
+ 0
+ 0
+ 0
+ 0
+ 0
+ 0
+ 0
+ 0
+ 0
+
+
+
+
+ 0
+ 204800
+ 2048
+
+ {5fbaabe3-6958-40ff-92a7-860e329aab41}
+ Plain
+ plain.hdd.0.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds
+
+
+
+
+
+ {5fbaabe3-6958-40ff-92a7-860e329aab41}
+ {00000000-0000-0000-0000-000000000000}
+
+
+
diff --git a/tests/data/plain.hdd/plain.hdd b/tests/data/plain.hdd/plain.hdd
new file mode 100644
index 0000000..e69de29
diff --git a/tests/data/plain.hdd/plain.hdd.0.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz b/tests/data/plain.hdd/plain.hdd.0.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz
new file mode 100644
index 0000000..6a366d9
Binary files /dev/null and b/tests/data/plain.hdd/plain.hdd.0.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz differ
diff --git a/tests/data/split.hdd/DiskDescriptor.xml b/tests/data/split.hdd/DiskDescriptor.xml
new file mode 100644
index 0000000..bdeaf83
--- /dev/null
+++ b/tests/data/split.hdd/DiskDescriptor.xml
@@ -0,0 +1,102 @@
+
+
+
+ 20971520
+ 40960
+ 4096
+ 512
+ 16
+ 32
+ 0
+
+ {00000000-0000-0000-0000-000000000000}
+
+
+
+ {d6e2bfb7-109e-4f6b-954c-6e2e7ae60d5a}
+ split
+
+ level2
+ 0
+ 0
+ 0
+ 0
+ 0
+ 0
+ 0
+ 0
+ 0
+ 0
+ 1
+ 0
+
+
+
+
+ 0
+ 3989504
+ 2048
+
+ {5fbaabe3-6958-40ff-92a7-860e329aab41}
+ Compressed
+ split.hdd.0.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds
+
+
+
+ 3989504
+ 7979008
+ 2048
+
+ {5fbaabe3-6958-40ff-92a7-860e329aab41}
+ Compressed
+ split.hdd.1.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds
+
+
+
+ 7979008
+ 11968512
+ 2048
+
+ {5fbaabe3-6958-40ff-92a7-860e329aab41}
+ Compressed
+ split.hdd.2.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds
+
+
+
+ 11968512
+ 15958016
+ 2048
+
+ {5fbaabe3-6958-40ff-92a7-860e329aab41}
+ Compressed
+ split.hdd.3.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds
+
+
+
+ 15958016
+ 19947520
+ 2048
+
+ {5fbaabe3-6958-40ff-92a7-860e329aab41}
+ Compressed
+ split.hdd.4.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds
+
+
+
+ 19947520
+ 20971520
+ 2048
+
+ {5fbaabe3-6958-40ff-92a7-860e329aab41}
+ Compressed
+ split.hdd.5.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds
+
+
+
+
+
+ {5fbaabe3-6958-40ff-92a7-860e329aab41}
+ {00000000-0000-0000-0000-000000000000}
+
+
+
diff --git a/tests/data/split.hdd/split.hdd b/tests/data/split.hdd/split.hdd
new file mode 100644
index 0000000..e69de29
diff --git a/tests/data/split.hdd/split.hdd.0.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz b/tests/data/split.hdd/split.hdd.0.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz
new file mode 100644
index 0000000..a1163e7
Binary files /dev/null and b/tests/data/split.hdd/split.hdd.0.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz differ
diff --git a/tests/data/split.hdd/split.hdd.1.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz b/tests/data/split.hdd/split.hdd.1.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz
new file mode 100644
index 0000000..25f67c6
Binary files /dev/null and b/tests/data/split.hdd/split.hdd.1.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz differ
diff --git a/tests/data/split.hdd/split.hdd.2.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz b/tests/data/split.hdd/split.hdd.2.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz
new file mode 100644
index 0000000..db4722f
Binary files /dev/null and b/tests/data/split.hdd/split.hdd.2.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz differ
diff --git a/tests/data/split.hdd/split.hdd.3.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz b/tests/data/split.hdd/split.hdd.3.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz
new file mode 100644
index 0000000..e92ddcb
Binary files /dev/null and b/tests/data/split.hdd/split.hdd.3.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz differ
diff --git a/tests/data/split.hdd/split.hdd.4.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz b/tests/data/split.hdd/split.hdd.4.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz
new file mode 100644
index 0000000..b0451d0
Binary files /dev/null and b/tests/data/split.hdd/split.hdd.4.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz differ
diff --git a/tests/data/split.hdd/split.hdd.5.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz b/tests/data/split.hdd/split.hdd.5.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz
new file mode 100644
index 0000000..0bdc3c8
Binary files /dev/null and b/tests/data/split.hdd/split.hdd.5.{5fbaabe3-6958-40ff-92a7-860e329aab41}.hds.gz differ
diff --git a/tests/test_hdd.py b/tests/test_hdd.py
new file mode 100644
index 0000000..8294fb5
--- /dev/null
+++ b/tests/test_hdd.py
@@ -0,0 +1,83 @@
+import gzip
+from pathlib import Path
+from unittest.mock import patch
+
+from dissect.hypervisor.disk.hdd import HDD
+
+Path_open = Path.open
+
+
+def mock_open_gz(self, *args, **kwargs):
+ if self.suffix.lower() != ".hds":
+ return Path_open(self, *args, **kwargs)
+
+ return gzip.open(self.with_suffix(self.suffix + ".gz"))
+
+
+def test_plain_hdd(plain_hdd):
+ hdd = HDD(Path(plain_hdd))
+ storages = hdd.descriptor.storage_data.storages
+
+ assert len(storages) == 1
+ assert storages[0].start == 0
+ assert storages[0].end == 204800
+ assert len(storages[0].images) == 1
+ assert storages[0].images[0].type == "Plain"
+
+ with patch.object(Path, "open", mock_open_gz):
+ stream = hdd.open()
+
+ for i in range(100):
+ assert stream.read(1024 * 1024).strip(bytes([i])) == b""
+
+
+def test_expanding_hdd(expanding_hdd):
+ hdd = HDD(Path(expanding_hdd))
+ storages = hdd.descriptor.storage_data.storages
+
+ assert len(storages) == 1
+ assert storages[0].start == 0
+ assert storages[0].end == 204800
+ assert len(storages[0].images) == 1
+ assert storages[0].images[0].type == "Compressed"
+
+ with patch.object(Path, "open", mock_open_gz):
+ stream = hdd.open()
+
+ for i in range(100):
+ assert stream.read(1024 * 1024).strip(bytes([i])) == b""
+
+
+def test_split_hdd(split_hdd):
+ hdd = HDD(Path(split_hdd))
+ storages = hdd.descriptor.storage_data.storages
+
+ assert len(storages) == 6
+
+ split_sizes = [3989504, 3989504, 3989504, 3989504, 3989504, 1024000]
+
+ start = 0
+
+ for storage, split_size in zip(storages, split_sizes):
+ assert storage.start == start
+ assert storage.end == start + split_size
+ assert len(storage.images) == 1
+ assert storage.images[0].type == "Compressed"
+
+ start = storage.end
+
+ with patch.object(Path, "open", mock_open_gz):
+ stream = hdd.open()
+
+ assert stream.read(1024 * 1024).strip(b"\x01") == b""
+
+ offset = 0
+ for i, split_size in enumerate(split_sizes):
+ offset += split_size * 512
+ stream.seek(offset - 512)
+
+ buf = stream.read(1024)
+ if i < 5:
+ assert buf == bytes([i + 1] * 512) + bytes([i + 2] * 512)
+ else:
+ assert buf == bytes([i + 1] * 512)