Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/launchpad/models/range_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,13 @@ class BinaryTag(Enum):
CODE_SIGNATURE = "code_signature"

# DYLD info categories
DYLD = "dyld" # Parent category for all DYLD-related ranges
DYLD_REBASE = "dyld_rebase"
DYLD_BIND = "dyld_bind"
DYLD_LAZY_BIND = "dyld_lazy_bind"
DYLD_EXPORTS = "dyld_exports"
DYLD_FIXUPS = "dyld_fixups"
DYLD_STRING_TABLE = "dyld_string_table"

# Binary modules/classes
OBJC_CLASSES = "objc_classes"
Expand Down
245 changes: 152 additions & 93 deletions src/launchpad/parsers/apple/range_mapping_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,6 @@ class RangeMappingBuilder:
"""Builds range mappings for Mach-O binaries."""

def __init__(self, parser: MachOParser, file_size: int) -> None:
"""Initialize the builder.

Args:
parser: MachO parser instance
file_size: Total file size
"""
self.parser = parser
self.file_size = file_size

Expand Down Expand Up @@ -83,123 +77,168 @@ def _map_load_commands(self, range_map: RangeMap) -> None:

try:
if cmd_type == lief.MachO.LoadCommand.TYPE.SYMTAB:
self._map_symtab_command(range_map, command)
if cast_command := self._cast_command(command, lief.MachO.SymbolCommand):
self._map_symtab_command(range_map, cast_command)
elif cmd_type in [
lief.MachO.LoadCommand.TYPE.DYLD_INFO,
lief.MachO.LoadCommand.TYPE.DYLD_INFO_ONLY,
]:
self._map_dyld_info_command(range_map, command)
if cast_command := self._cast_command(command, lief.MachO.DyldInfo):
self._map_dyld_info_command(range_map, cast_command)
elif cmd_type == lief.MachO.LoadCommand.TYPE.FUNCTION_STARTS:
self._map_function_starts_command(range_map, command)
if cast_command := self._cast_command(command, lief.MachO.FunctionStarts):
self._map_function_starts_command(range_map, cast_command)
elif cmd_type == lief.MachO.LoadCommand.TYPE.CODE_SIGNATURE:
self._map_code_signature_command(range_map, command)
if cast_command := self._cast_command(command, lief.MachO.CodeSignature):
self._map_code_signature_command(range_map, cast_command)
elif cmd_type == lief.MachO.LoadCommand.TYPE.DATA_IN_CODE:
self._map_data_in_code_command(range_map, command)
if cast_command := self._cast_command(command, lief.MachO.DataInCode):
self._map_data_in_code_command(range_map, cast_command)
elif cmd_type == lief.MachO.LoadCommand.TYPE.DYLIB_CODE_SIGN_DRS:
self._map_dylib_code_sign_drs_command(range_map, command)
elif cmd_type == lief.MachO.LoadCommand.TYPE.LINKER_OPTIMIZATION_HINT:
self._map_linker_optimization_hint_command(range_map, command)
elif cmd_type == lief.MachO.LoadCommand.TYPE.DYLD_EXPORTS_TRIE:
self._map_dyld_exports_trie_command(range_map, command)
elif cmd_type == lief.MachO.LoadCommand.TYPE.DYLD_CHAINED_FIXUPS:
self._map_dyld_chained_fixups_command(range_map, command)
if cast_command := self._cast_command(command, lief.MachO.DyldChainedFixups):
self._map_dyld_chained_fixups_command(range_map, cast_command)
elif cmd_type == lief.MachO.LoadCommand.TYPE.RPATH:
if cast_command := self._cast_command(command, lief.MachO.RPathCommand):
self._map_rpath_command(range_map, cast_command)
elif cmd_type in [
lief.MachO.LoadCommand.TYPE.LOAD_DYLIB,
lief.MachO.LoadCommand.TYPE.LOAD_WEAK_DYLIB,
lief.MachO.LoadCommand.TYPE.REEXPORT_DYLIB,
]:
if cast_command := self._cast_command(command, lief.MachO.DylibCommand):
self._map_dylib_command(range_map, cast_command)
except Exception as e:
logger.debug(f"Failed to process command {i} {command.command.name}: {e}")

current_offset += cmd_size

def _map_symtab_command(self, range_map: RangeMap, command: Any) -> None:
def _cast_command(self, command: lief.MachO.LoadCommand, expected_type: type) -> Any | None:
if isinstance(command, expected_type):
return command
else:
logger.warning(f"Expected {expected_type.__name__}, got {type(command).__name__}")
return None

def _map_symtab_command(self, range_map: RangeMap, command: lief.MachO.SymbolCommand) -> None:
"""Map symbol table and string table from LC_SYMTAB command."""
try:
if command.symbol_offset > 0 and command.nb_symbols > 0:
# Each symbol entry is typically 16 bytes (64-bit)
symbol_size = command.nb_symbols * 16
range_map.add_range(
command.symbol_offset, command.symbol_offset + symbol_size, BinaryTag.DEBUG_INFO, "symbol_table"
)

if command.string_offset > 0 and command.string_size > 0:
range_map.add_range(
command.string_offset,
command.string_offset + command.string_size,
BinaryTag.C_STRINGS,
"string_table",
)
except Exception as e:
logger.error(f"Failed to map symtab command: {e}")
# Map symbol table
if command.symbol_offset > 0 and command.numberof_symbols > 0:
# Each symbol entry is typically 16 bytes (64-bit)
symbol_size = command.numberof_symbols * 16
range_map.add_range(
command.symbol_offset, command.symbol_offset + symbol_size, BinaryTag.DEBUG_INFO, "symbol_table"
)

# Map string table
if command.strings_offset > 0 and command.strings_size > 0:
range_map.add_range(
command.strings_offset,
command.strings_offset + command.strings_size,
BinaryTag.DYLD_STRING_TABLE,
"string_table",
)

def _map_dyld_info_command(self, range_map: RangeMap, command: Any) -> None:
def _map_dyld_info_command(self, range_map: RangeMap, command: lief.MachO.DyldInfo) -> None:
"""Map DYLD info sections from LC_DYLD_INFO command."""
try:
if hasattr(command, "rebase_off") and command.rebase_off > 0 and command.rebase_size > 0:
range_map.add_range(
command.rebase_off,
command.rebase_off + command.rebase_size,
BinaryTag.DYLD_REBASE,
"dyld_rebase_info",
)
# Rebase information
rebase_offset, rebase_size = command.rebase
if rebase_offset > 0 and rebase_size > 0:
range_map.add_range(
rebase_offset,
rebase_offset + rebase_size,
BinaryTag.DYLD_REBASE,
"dyld_rebase_info",
)

if hasattr(command, "bind_off") and command.bind_off > 0 and command.bind_size > 0:
range_map.add_range(
command.bind_off, command.bind_off + command.bind_size, BinaryTag.DYLD_BIND, "dyld_bind_info"
)
# Bind information
bind_offset, bind_size = command.bind
if bind_offset > 0 and bind_size > 0:
range_map.add_range(bind_offset, bind_offset + bind_size, BinaryTag.DYLD_BIND, "dyld_bind_info")

if hasattr(command, "lazy_bind_off") and command.lazy_bind_off > 0 and command.lazy_bind_size > 0:
range_map.add_range(
command.lazy_bind_off,
command.lazy_bind_off + command.lazy_bind_size,
BinaryTag.DYLD_LAZY_BIND,
"dyld_lazy_bind_info",
)
# Weak bind information
weak_bind_offset, weak_bind_size = command.weak_bind
if weak_bind_offset > 0 and weak_bind_size > 0:
range_map.add_range(
weak_bind_offset,
weak_bind_offset + weak_bind_size,
BinaryTag.DYLD_BIND,
"dyld_weak_bind_info",
)

if hasattr(command, "export_off") and command.export_off > 0 and command.export_size > 0:
range_map.add_range(
command.export_off,
command.export_off + command.export_size,
BinaryTag.DYLD_EXPORTS,
"dyld_export_info",
)
except Exception as e:
logger.debug(f"Failed to map DYLD info command: {e}")
# Lazy bind information
lazy_bind_offset, lazy_bind_size = command.lazy_bind
if lazy_bind_offset > 0 and lazy_bind_size > 0:
range_map.add_range(
lazy_bind_offset,
lazy_bind_offset + lazy_bind_size,
BinaryTag.DYLD_LAZY_BIND,
"dyld_lazy_bind_info",
)

def _map_function_starts_command(self, range_map: RangeMap, command: Any) -> None:
# Export information
export_offset, export_size = command.export_info
if export_offset > 0 and export_size > 0:
range_map.add_range(
export_offset,
export_offset + export_size,
BinaryTag.DYLD_EXPORTS,
"dyld_export_info",
)

def _map_function_starts_command(self, range_map: RangeMap, command: lief.MachO.FunctionStarts) -> None:
"""Map function starts information from LC_FUNCTION_STARTS command."""
try:
if hasattr(command, "data_offset") and command.data_offset > 0 and command.data_size > 0:
range_map.add_range(
command.data_offset,
command.data_offset + command.data_size,
BinaryTag.FUNCTION_STARTS,
"function_starts",
)
except Exception as e:
logger.debug(f"Failed to map function starts command: {e}")
range_map.add_range(
command.data_offset,
command.data_offset + command.data_size,
BinaryTag.FUNCTION_STARTS,
"function_starts",
)

def _map_code_signature_command(self, range_map: RangeMap, command: Any) -> None:
def _map_code_signature_command(self, range_map: RangeMap, command: lief.MachO.CodeSignature) -> None:
"""Map code signature from LC_CODE_SIGNATURE command."""
try:
if hasattr(command, "data_offset") and command.data_offset > 0 and command.data_size > 0:
range_map.add_range(
command.data_offset,
command.data_offset + command.data_size,
BinaryTag.CODE_SIGNATURE,
"code_signature",
)
except Exception as e:
logger.debug(f"Failed to map code signature command: {e}")
range_map.add_range(
command.data_offset,
command.data_offset + command.data_size,
BinaryTag.CODE_SIGNATURE,
"code_signature",
)

def _map_data_in_code_command(self, range_map: RangeMap, command: Any) -> None:
def _map_data_in_code_command(self, range_map: RangeMap, command: lief.MachO.DataInCode) -> None:
"""Map data-in-code information from LC_DATA_IN_CODE command."""
try:
if hasattr(command, "data_offset") and command.data_offset > 0 and command.data_size > 0:
range_map.add_range(
command.data_offset,
command.data_offset + command.data_size,
BinaryTag.DEBUG_INFO,
"data_in_code",
)
except Exception as e:
logger.debug(f"Failed to map data-in-code command: {e}")
range_map.add_range(
command.data_offset,
command.data_offset + command.data_size,
BinaryTag.DEBUG_INFO,
"data_in_code",
)

# Parse individual data-in-code entries
self._map_data_in_code_entries(range_map, command)

def _map_data_in_code_entries(self, range_map: RangeMap, command: lief.MachO.DataInCode) -> None:
"""Map individual data-in-code entries."""
# Each data_in_code_entry is typically 8 bytes:
# - offset: UInt32 (offset from start of function)
# - length: UInt16 (length of data)
# - kind: UInt16 (type of data)
entry_size = 8
num_entries = command.data_size // entry_size

if num_entries > 0:
# Map the entries table
range_map.add_range(
command.data_offset,
command.data_offset + (num_entries * entry_size),
BinaryTag.DEBUG_INFO,
"data_in_code_entries",
)

def _map_dylib_code_sign_drs_command(self, range_map: RangeMap, command: Any) -> None:
"""Map code signature DRs from LC_DYLIB_CODE_SIGN_DRS command."""
Expand Down Expand Up @@ -240,12 +279,12 @@ def _map_dyld_exports_trie_command(self, range_map: RangeMap, command: Any) -> N
except Exception as e:
logger.debug(f"Failed to map exports trie command: {e}")

def _map_dyld_chained_fixups_command(self, range_map: RangeMap, command: Any) -> None:
def _map_dyld_chained_fixups_command(self, range_map: RangeMap, command: lief.MachO.DyldChainedFixups) -> None:
"""Map chained fixups from LC_DYLD_CHAINED_FIXUPS command."""
range_map.add_range(
command.data_offset,
command.data_offset + command.data_size,
BinaryTag.DYLD_BIND,
BinaryTag.DYLD_FIXUPS,
"dyld_chained_fixups",
)

Expand All @@ -269,6 +308,26 @@ def _map_segments_and_sections(self, range_map: RangeMap) -> None:
except Exception as e:
logger.debug(f"Failed to map section {section_name}: {e}")

def _map_rpath_command(self, range_map: RangeMap, command: lief.MachO.RPathCommand) -> None:
"""Map RPATH command data."""
if command.path:
range_map.add_range(
command.command_offset,
command.command_offset + command.size,
BinaryTag.C_STRINGS,
"rpath_string",
)

def _map_dylib_command(self, range_map: RangeMap, command: lief.MachO.DylibCommand) -> None:
"""Map dylib loading command data (LC_LOAD_DYLIB, LC_LOAD_WEAK_DYLIB, LC_REEXPORT_DYLIB)."""
if command.name:
range_map.add_range(
command.command_offset,
command.command_offset + command.size,
BinaryTag.C_STRINGS,
"dylib_name",
)

def _categorize_section(self, section_name: str) -> BinaryTag:
"""Categorize a section based on its name."""
name_lower = section_name.lower()
Expand Down
61 changes: 50 additions & 11 deletions src/launchpad/utils/treemap/macho_element_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,20 +47,59 @@ def _build_binary_treemap(self, range_map: RangeMap, name: str, binary_path: str

# Create child elements for each tag
children: list[TreemapElement] = []
dyld_children: list[TreemapElement] = []

logger.debug(f"Processing tags: {list(ranges_by_tag.keys())}")

for tag, ranges in ranges_by_tag.items():
total_size = sum(r.size for r in ranges)
children.append(
TreemapElement(
name=tag,
install_size=total_size,
download_size=total_size, # Binary sections don't compress
element_type=TreemapType.EXECUTABLES,
path=None,
is_directory=False,
children=[],
details={"tag": tag},
)

# Determine element type based on tag
element_type = TreemapType.EXECUTABLES # Default
if tag.startswith("dyld_"):
element_type = TreemapType.DYLD
elif tag == "unmapped":
element_type = TreemapType.UNMAPPED
elif tag == "code_signature":
element_type = TreemapType.CODE_SIGNATURE
elif tag == "function_starts":
element_type = TreemapType.FUNCTION_STARTS
elif tag == "external_methods":
element_type = TreemapType.EXTERNAL_METHODS

element = TreemapElement(
name=tag,
install_size=total_size,
download_size=total_size, # TODO: add download size
element_type=element_type,
path=None,
is_directory=False,
children=[],
details={"tag": tag},
)

# Group DYLD-related tags under a parent DYLD element
if tag.startswith("dyld_"):
logger.debug(f"Adding {tag} to DYLD group")
dyld_children.append(element)
else:
logger.debug(f"Adding {tag} to regular children")
children.append(element)

# Create parent DYLD element if we have DYLD children
if dyld_children:
dyld_total_size = sum(child.install_size for child in dyld_children)
dyld_element = TreemapElement(
name="DYLD",
install_size=dyld_total_size,
download_size=dyld_total_size,
element_type=TreemapType.DYLD,
path=None,
is_directory=True,
children=dyld_children,
details={"tag": "dyld"},
)
children.append(dyld_element)

# Add unmapped regions if any
if range_map.unmapped_size > 0:
Expand Down
Loading