Skip to content

Commit

Permalink
refactored mach-o a bit to make it slightly more pull-apartable in fu…
Browse files Browse the repository at this point in the history
…ture
  • Loading branch information
copumpkin committed Apr 24, 2008
1 parent 35baffa commit f0f38d1
Show file tree
Hide file tree
Showing 3 changed files with 108 additions and 93 deletions.
194 changes: 106 additions & 88 deletions lib/mach-o.rb
Original file line number Original file line Diff line number Diff line change
@@ -1,7 +1,6 @@
class MachO require 'stringio'
HOST_BYTE_ORDER = ("\x01\x02\x03\x04".unpack("N")[0] == 0x01020304) ? :little : :big
NOT_HOST_BYTE_ORDER = ([:big, :little] - [HOST_BYTE_ORDER])[0] module MachOConstants

CPU_TYPES = { CPU_TYPES = {
1 => :vax, 1 => :vax,
6 => :mc680x0, 6 => :mc680x0,
Expand Down Expand Up @@ -162,84 +161,112 @@ class MachO
12 => :prebound, 12 => :prebound,
10 => :indirect 10 => :indirect
} }
end

class MachO
include MachOConstants

# TODO: move me elsewhere
HOST_BYTE_ORDER = ("\x01\x02\x03\x04".unpack("N")[0] == 0x01020304) ? :little : :big
NOT_HOST_BYTE_ORDER = ([:big, :little] - [HOST_BYTE_ORDER])[0]


attr_reader :source
attr_reader :images attr_reader :images


def initialize(content) def initialize(source)
file_magic = content.read(4).unpack("N")[0] @source = source.kind_of?(String) ? StringIO.new(source) : source

@source = content
architectures = {}


@images = {} @images = {}


@symbols = [] load_images
decode_images
end

private

def create_image(variables)
image = Image.allocate

variables.each do |name, value|
image.instance_variable_set("@#{name}", value)
end

image
end

def update_image(image, variables)
variables.each do |name, value|
image.instance_variable_set("@#{name}", value)
end

image
end

def load_images
file_magic = @source.read(4).unpack("N")[0]


# Check to see if we have a fat binary # Check to see if we have a fat binary
if file_magic == 0xcafebabe || file_magic == 0xbebafeca if file_magic == 0xcafebabe || file_magic == 0xbebafeca
# We have a fat binary, next int is the number of architectures # We have a fat binary, next int is the number of architectures
arch_count = content.read(4).unpack("N")[0] arch_count = @source.read(4).unpack("N")[0]


# Followed by arch_count contiguous architecture descriptors # Followed by arch_count contiguous architecture descriptors
arch_count.times do arch_count.times do
cpu_type, cpu_subtype, offset, size, align = content.read(20).unpack("N*") cpu_type, cpu_subtype, offset, size, align = @source.read(20).unpack("N*")


cpu_type = CPU_TYPES[cpu_type] cpu_type = CPU_TYPES[cpu_type]
cpu_subtype = CPU_SUBTYPES[cpu_type][cpu_subtype] cpu_subtype = CPU_SUBTYPES[cpu_type][cpu_subtype]
align = 2 ** align # unused align = 2 ** align # unused


old_offset = content.tell # (TODO: implement IO.push/pop, maybe)
old_offset = @source.tell


# Find out the byte ordering on this arch # Find out the byte ordering on this arch
content.seek(offset, IO::SEEK_SET) @source.seek(offset)
arch_magic = content.read(4).unpack("V")[0] arch_magic = @source.read(4).unpack("V")[0]


big_header = (arch_magic == 0xfeedfacf || arch_magic == 0xcffaedfe) image64 = (arch_magic == 0xfeedfacf || arch_magic == 0xcffaedfe)
byte_order = (arch_magic == 0xfeedface || arch_magic == 0xfeedfacf) ? HOST_BYTE_ORDER : NOT_HOST_BYTE_ORDER byte_order = (arch_magic == 0xfeedface || arch_magic == 0xfeedfacf) ? HOST_BYTE_ORDER : NOT_HOST_BYTE_ORDER


# Go back to where we were # Go back to where we were
content.seek(old_offset, IO::SEEK_SET) @source.seek(old_offset)


architectures[[cpu_type, cpu_subtype]] = {:offset => offset, :size => size, :byte_order => byte_order, :image64 => big_header} @images[[cpu_type, cpu_subtype]] = create_image(:offset => offset, :size => size, :byte_order => byte_order, :image64 => image64, :source => @source)
end end
elsif file_magic = 0xfeedface || file_magic == 0xcefadefe || file_magic == 0xfeedfacf || file_magic == 0xcffaedfe elsif file_magic = 0xfeedface || file_magic == 0xcefadefe || file_magic == 0xfeedfacf || file_magic == 0xcffaedfe
# We have a normal binary, check the size of its header # We have a normal binary, check the size of its header
big_header = (file_magic == 0xfeedfacf || file_magic == 0xcffaedfe) image64 = (file_magic == 0xfeedfacf || file_magic == 0xcffaedfe)
byte_order = (file_magic == 0xfeedface || file_magic == 0xfeedfacf) ? HOST_BYTE_ORDER : NOT_HOST_BYTE_ORDER byte_order = (file_magic == 0xfeedface || file_magic == 0xfeedfacf) ? HOST_BYTE_ORDER : NOT_HOST_BYTE_ORDER


cpu_type, cpu_subtype = content.read(8).unpack("#{byte_order == :big ? 'N' : 'V'}*") cpu_type, cpu_subtype = @source.read(8).unpack("#{byte_order == :big ? 'N' : 'V'}*")


cpu_type = CPU_TYPES[cpu_type] cpu_type = CPU_TYPES[cpu_type]
cpu_subtype = CPU_SUBTYPES[cpu_type][cpu_subtype] cpu_subtype = CPU_SUBTYPES[cpu_type][cpu_subtype]


architectures[[cpu_type, cpu_subtype]] = {:offset => 0, :size => content.stat.size, :byte_order => byte_order, :image64 => big_header} @images[[cpu_type, cpu_subtype]] = create_image(:offset => 0, :size => @source.stat.size, :byte_order => byte_order, :image64 => image64, :source => @source)
else else
raise "Not a Mach-O binary" raise "Not a Mach-O binary"
end end
end


architectures.each do |architecture, info| def decode_images
# p architecture @images.each do |architecture, image|
content.seek(info[:offset], IO::SEEK_SET) # Get an IO object that points at our image
content = image.physical


short_format = info[:byte_order] == :big ? "n" : "v" short_format = image.byte_order == :big ? "n" : "v"
int_format = info[:byte_order] == :big ? "N" : "V" int_format = image.byte_order == :big ? "N" : "V"


# Ruby doesn't provide endianness control for 64-bit ints, so we'll need to swap them later if necessary # Ruby doesn't provide endianness control for 64-bit ints, so we'll need to swap them later if necessary
pointer_format = info[:image64] ? "Q" : int_format pointer_format = image.image64 ? "Q" : int_format


header_size = info[:image64] ? 32 : 28 header_size = image.image64 ? 32 : 28


file_type, command_count, commands_size, macho_flags = content.read(header_size).unpack("x12#{int_format}*") file_type, command_count, commands_size, macho_flags = content.read(header_size).unpack("x12#{int_format}*")


info[:segments] = {} update_image(image, {:segments => {}, :sections => [], :symbols => {}, :libraries => []})
info[:sections] = []

# Local variable to maintain symbol ordering
info[:symbols] = {} symbols = []

info[:libraries] = []

info[:base_addr] = nil


command_count.times do command_count.times do
command_header = content.read(8) command_header = content.read(8)
Expand All @@ -252,12 +279,12 @@ def initialize(content)
uuid = command[8, 16].unpack("C*") uuid = command[8, 16].unpack("C*")
when :load_dylib, :load_weak_dylib, :id_dylib when :load_dylib, :load_weak_dylib, :id_dylib
name_offset, timestamp, current_version, compatibility_version = command[8, 16].unpack("#{int_format}*") name_offset, timestamp, current_version, compatibility_version = command[8, 16].unpack("#{int_format}*")
info[:libraries] << command[name_offset..-1].unpack("A*")[0] image.libraries << command[name_offset..-1].unpack("A*")[0]
when :segment, :segment_64 when :segment, :segment_64
segment_name, vm_addr, vm_size, file_offset, file_size, max_prot, init_prot, section_count, segment_flags = command[8, 48].unpack("A16#{pointer_format * 4}#{int_format}*") segment_name, vm_addr, vm_size, file_offset, file_size, max_prot, init_prot, section_count, segment_flags = command[8, 48].unpack("A16#{pointer_format * 4}#{int_format}*")


# Swap our pointers if necessary # Swap our pointers if necessary
if info[:byte_order] != HOST_BYTE_ORDER && info[:image64] if image.byte_order != HOST_BYTE_ORDER && image.image64
vm_addr = [vm_addr].pack("Q").unpack("C*").inject([0, 56]){|accum, x| p accum; [accum[0] + x * (2 ** accum[1]), accum[1] - 8]}[0] vm_addr = [vm_addr].pack("Q").unpack("C*").inject([0, 56]){|accum, x| p accum; [accum[0] + x * (2 ** accum[1]), accum[1] - 8]}[0]
vm_size = [vm_size].pack("Q").unpack("C*").inject([0, 56]){|accum, x| p accum; [accum[0] + x * (2 ** accum[1]), accum[1] - 8]}[0] vm_size = [vm_size].pack("Q").unpack("C*").inject([0, 56]){|accum, x| p accum; [accum[0] + x * (2 ** accum[1]), accum[1] - 8]}[0]
file_offset = [file_offset].pack("Q").unpack("C*").inject([0, 56]){|accum, x| p accum; [accum[0] + x * (2 ** accum[1]), accum[1] - 8]}[0] file_offset = [file_offset].pack("Q").unpack("C*").inject([0, 56]){|accum, x| p accum; [accum[0] + x * (2 ** accum[1]), accum[1] - 8]}[0]
Expand All @@ -266,12 +293,12 @@ def initialize(content)


# MH_SPLIT_SEGS (TODO deal with x86_64) # MH_SPLIT_SEGS (TODO deal with x86_64)
if (!((macho_flags & 0x20) != 0) || (init_prot & 3 == 3)) if (!((macho_flags & 0x20) != 0) || (init_prot & 3 == 3))
info[:base_addr] ||= vm_addr update_image(image, {:base_address => vm_addr}) unless image.base_address
end end


info[:segments][segment_name] = segment = {:vm_addr => vm_addr, :vm_size => vm_size, :file_offset => file_offset, :file_size => file_size, :max_prot => max_prot, :init_prot => init_prot, :flags => segment_flags} image.segments[segment_name] = segment = {:vm_addr => vm_addr, :vm_size => vm_size, :file_offset => file_offset, :file_size => file_size, :max_prot => max_prot, :init_prot => init_prot, :flags => segment_flags}


section_size = info[:image64] ? 76 : 68 section_size = image.image64 ? 76 : 68


segment[:sections] = {} segment[:sections] = {}


Expand All @@ -281,88 +308,88 @@ def initialize(content)
section_name, section_segment_name, addr, size, offset, align, relocation_offset, relocation_count, section_flags = section_body.unpack("A16A16#{pointer_format * 4}#{int_format}*") section_name, section_segment_name, addr, size, offset, align, relocation_offset, relocation_count, section_flags = section_body.unpack("A16A16#{pointer_format * 4}#{int_format}*")


# Fix our pointer info # Fix our pointer info
if info[:byte_order] != HOST_BYTE_ORDER && info[:image64] if image.byte_order != HOST_BYTE_ORDER && image.image64
addr = [addr].pack("Q").unpack("C*").inject([0, 56]){|accum, x| p accum; [accum[0] + x * (2 ** accum[1]), accum[1] - 8]}[0] addr = [addr].pack("Q").unpack("C*").inject([0, 56]){|accum, x| p accum; [accum[0] + x * (2 ** accum[1]), accum[1] - 8]}[0]
size = [size].pack("Q").unpack("C*").inject([0, 56]){|accum, x| p accum; [accum[0] + x * (2 ** accum[1]), accum[1] - 8]}[0] size = [size].pack("Q").unpack("C*").inject([0, 56]){|accum, x| p accum; [accum[0] + x * (2 ** accum[1]), accum[1] - 8]}[0]
end end


segment[:sections][section_name] = section = {:addr => addr, :size => size, :offset => offset, :align => align, :relocation_offset => relocation_offset, :relocation_count => relocation_count, :flags => section_flags} segment[:sections][section_name] = section = {:addr => addr, :size => size, :offset => offset, :align => align, :relocation_offset => relocation_offset, :relocation_count => relocation_count, :flags => section_flags}


# Maintain an ordered list of the sections # Maintain an ordered list of the sections
info[:sections] << section image.sections << section
end end
when :code_signature when :code_signature
data_offset, data_size = command[8, 8].unpack("#{int_format}*") data_offset, data_size = command[8, 8].unpack("#{int_format}*")


old_offset = content.tell old_offset = content.tell
content.seek(info[:offset] + data_offset, IO::SEEK_SET) content.seek(data_offset)


#info[:signature_data] = content.read(data_size) #info[:signature_data] = content.read(data_size)


content.seek(old_offset, IO::SEEK_SET) content.seek(old_offset)
when :symtab, :symtab_64 when :symtab, :symtab_64
symbols_offset, symbol_count, strings_offset, strings_size = command[8, 32].unpack("#{int_format}*") symbols_offset, symbol_count, strings_offset, strings_size = command[8, 32].unpack("#{int_format}*")


old_offset = content.tell old_offset = content.tell


symbol_size = info[:image64] ? 16 : 12 symbol_size = image.image64 ? 16 : 12


symbol_count.times do |i| symbol_count.times do |i|
content.seek(info[:offset] + symbols_offset + symbol_size * i, IO::SEEK_SET) content.seek(symbols_offset + symbol_size * i)


un, type, section, desc, value = content.read(symbol_size).unpack("#{int_format}C2#{short_format}#{pointer_format}") un, type, section, desc, value = content.read(symbol_size).unpack("#{int_format}C2#{short_format}#{pointer_format}")


if info[:byte_order] != HOST_BYTE_ORDER && info[:image64] if image.byte_order != HOST_BYTE_ORDER && image.image64
value = [value].pack("Q").unpack("C*").inject([0, 56]){|accum, x| p accum; [accum[0] + x * (2 ** accum[1]), accum[1] - 8]}[0] value = [value].pack("Q").unpack("C*").inject([0, 56]){|accum, x| p accum; [accum[0] + x * (2 ** accum[1]), accum[1] - 8]}[0]
end end


symbol_type = type & 0x0e symbol_type = type & 0x0e
external = (type & 1) != 0 external = (type & 1) != 0


content.seek(info[:offset] + strings_offset + un) content.seek(strings_offset + un)


symbol = content.gets("\x00")[0..-2] symbol = content.gets("\x00")[0..-2]


info[:symbols][symbol] = {:section => section, :desc => desc, :value => value, :external => external} image.symbols[symbol] = {:section => section, :desc => desc, :value => value, :external => external}


@symbols << symbol symbols << symbol


if symbol_type == 0 if symbol_type == 0
info[:symbols][symbol][:lazy] = (desc & 1) != 0 image.symbols[symbol][:lazy] = (desc & 1) != 0
else else
if (info[:symbols][symbol][:defined] = (desc & 2) != 0) if (image.symbols[symbol][:defined] = (desc & 2) != 0)
info[:symbols][symbol][:private] = (desc & 1) != 0 image.symbols[symbol][:private] = (desc & 1) != 0
end end
end end


if (desc & 4) != 0 if (desc & 4) != 0
info[:symbols][symbol][:defined] = false image.symbols[symbol][:defined] = false
info[:symbols][symbol][:lazy] = false image.symbols[symbol][:lazy] = false
end end


if (desc & 0x10) != 0 if (desc & 0x10) != 0
info[:symbols][symbol][:referenced_dynamically] = true image.symbols[symbol][:referenced_dynamically] = true
end end


if (desc & 0x40) != 0 if (desc & 0x40) != 0
info[:symbols][symbol][:weak_reference] = true image.symbols[symbol][:weak_reference] = true
end end


if (desc & 0x80) != 0 if (desc & 0x80) != 0
info[:symbols][symbol][:weak_definition] = true image.symbols[symbol][:weak_definition] = true
end end


# Check if MH_TWOLEVEL is set # Check if MH_TWOLEVEL is set
if (macho_flags & 0x80) != 0 if (macho_flags & 0x80) != 0
library_index = ((desc & 0xFF00) >> 8) - 1 library_index = ((desc & 0xFF00) >> 8) - 1


if library_index != -1 if library_index != -1
info[:symbols][symbol][:library_index] = library_index image.symbols[symbol][:library_index] = library_index
end end
end end
end end


content.seek(old_offset, IO::SEEK_SET) content.seek(old_offset)


when :dysymtab when :dysymtab
first_local_symbol_index, first_local_symbol_index,
Expand All @@ -384,28 +411,28 @@ def initialize(content)
local_relocation_table_offset, local_relocation_table_offset,
local_relocation_table_count = command[8, 88].unpack("#{int_format}*") local_relocation_table_count = command[8, 88].unpack("#{int_format}*")


info[:relocations] = {} update_image(image, {:relocations => {}})


old_offset = content.tell old_offset = content.tell


content.seek(info[:offset] + local_relocation_table_offset) content.seek(local_relocation_table_offset)


local_relocation_table_count.times do local_relocation_table_count.times do
relocation_address, relocation_info = content.read(8).unpack("#{int_format}*") relocation_address, relocation_info = content.read(8).unpack("#{int_format}*")


info[:relocations][info[:base_addr] + relocation_address] = @symbols[relocation_info & 0x00FFFFFF] image.relocations[image.base_address + relocation_address] = symbols[relocation_info & 0x00FFFFFF]
#p (relocation_info & 0x01000000) >> 24 #p (relocation_info & 0x01000000) >> 24
#p (relocation_info & 0x06000000) >> 25 #p (relocation_info & 0x06000000) >> 25
#p (relocation_info & 0x08000000) >> 27 #p (relocation_info & 0x08000000) >> 27
#p (relocation_info & 0xF0000000) >> 28 #p (relocation_info & 0xF0000000) >> 28
end end


content.seek(info[:offset] + external_relocation_table_offset) content.seek(external_relocation_table_offset)


external_relocation_table_count.times do external_relocation_table_count.times do
relocation_address, relocation_info = content.read(8).unpack("#{int_format}*") relocation_address, relocation_info = content.read(8).unpack("#{int_format}*")


info[:relocations][info[:base_addr] + relocation_address] = @symbols[relocation_info & 0x00FFFFFF] image.relocations[image.base_address + relocation_address] = symbols[relocation_info & 0x00FFFFFF]
#p (relocation_info & 0x01000000) >> 24 #p (relocation_info & 0x01000000) >> 24
#p (relocation_info & 0x06000000) >> 25 #p (relocation_info & 0x06000000) >> 25
#p (relocation_info & 0x08000000) >> 27 #p (relocation_info & 0x08000000) >> 27
Expand All @@ -417,10 +444,6 @@ def initialize(content)
end end
end end
end end

architectures.each_pair do |architecture, info|
@images[architecture] = Image.new(architecture, info, self)
end
end end


# TODO: This class should eventually be moved out of the MachO class, so we can support other object formats # TODO: This class should eventually be moved out of the MachO class, so we can support other object formats
Expand Down Expand Up @@ -463,6 +486,8 @@ def gets(separator)
class PhysicalIO class PhysicalIO
def initialize(source, offset, size) def initialize(source, offset, size)
@source, @offset, @size = source.dup, offset, size @source, @offset, @size = source.dup, offset, size

@source.seek(@offset)
end end


def tell def tell
Expand All @@ -487,28 +512,21 @@ def gets(separator)
end end
end end


attr_reader :segments, :sections, :symbols, :size, :relocations attr_reader :segments, :sections, :symbols, :size, :relocations, :base_address, :byte_order, :image64, :libraries

def initialize(architecture, info, parent)
@parent = parent
@sections = info[:sections]
@symbols = info[:symbols]
@image64 = info[:image64]
@byte_order = info[:byte_order]
@offset = info[:offset]
@size = info[:size]
@segments = info[:segments]
@relocations = info[:relocations]
end


# Returns an IO object that operates on the loaded image's memory # Returns an IO object that operates on the loaded image's memory
def virtual def virtual
VirtualIO.new(@parent.source, @segments) VirtualIO.new(@source, @segments)
end end


# Returns an IO object that operates on the file (or subset of it, if it's a fat binary) this image came from # Returns an IO object that operates on the file (or subset of it, if it's a fat binary) this image came from
def physical def physical
PhysicalIO.new(@parent.source, @offset, @size) PhysicalIO.new(@source, @offset, @size)
end

private

def initialize
end end
end end
end end
2 changes: 1 addition & 1 deletion lib/objc.rb
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def initialize(object)
end end


virtual.seek(info_ptr) virtual.seek(info_ptr)
unk0, unk1, unk2, unk3, name_ptr, methodlist_ptr, protocollist_ptr, ivarlist_ptr, unk5, propertylist_ptr = virtual.read(40).unpack("V*") version, info, instance_size, unk3, name_ptr, methodlist_ptr, protocollist_ptr, ivarlist_ptr, unk5, propertylist_ptr = virtual.read(40).unpack("V*")


virtual.seek(name_ptr) virtual.seek(name_ptr)
name = virtual.gets("\x00").chop name = virtual.gets("\x00").chop
Expand Down
Loading

0 comments on commit f0f38d1

Please sign in to comment.