Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

marshal-structure dumps a tree based on the Marshal format

  • Loading branch information...
commit 7c82f07a32fd5dd40dee09cb830a1d5750870e43 0 parents
@drbrain authored
9 .autotest
@@ -0,0 +1,9 @@
+# -*- ruby -*-
+
+require 'autotest/restart'
+require 'autotest/isolate'
+
+Autotest.add_hook :initialize do |at|
+ at.testlib = 'minitest/autorun'
+end
+
6 .gitignore
@@ -0,0 +1,6 @@
+*.rbc
+*.swp
+/TAGS
+/doc
+/pkg
+/tmp
5 History.txt
@@ -0,0 +1,5 @@
+=== 1.0 / 2011-08-05
+
+* Major enhancements
+ * Birthday!
+
7 Manifest.txt
@@ -0,0 +1,7 @@
+.autotest
+History.txt
+Manifest.txt
+README.rdoc
+Rakefile
+lib/marshal/structure.rb
+test/test_marshal_structure.rb
73 README.rdoc
@@ -0,0 +1,73 @@
+= marshal-structure
+
+* https://github.com/drbrain/marshal-structure
+
+== DESCRIPTION:
+
+Dumps a tree based on the Marshal format. Supports the Marshal 4.8 format.
+
+== FEATURES/PROBLEMS:
+
+* Works like Marshal.load
+
+== SYNOPSIS:
+
+From the command line:
+
+ ruby -rpp -rmarshal-structure \
+ -e 'pp Marshal::Structure.load Marshal.dump "hello"'
+
+Fancier usage:
+
+ require 'pp'
+ require 'marshal-structure'
+
+ ms = Marshal::Structure.new Marshal.dump %w[hello world]
+
+ # print the Marshal stream structure
+ pp ms.construct
+
+ # print ruby objects in Marshal stream
+ pp ms.objects
+
+== REQUIREMENTS:
+
+* Ruby 1.8.7+
+
+== INSTALL:
+
+ gem install marshal-structure
+
+== DEVELOPERS:
+
+After checking out the source, run:
+
+ $ rake newb
+
+This task will install any missing dependencies, run the tests/specs,
+and generate the RDoc.
+
+== LICENSE:
+
+(The MIT License)
+
+Copyright (c) 2011 Eric Hodel
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+'Software'), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 Rakefile
@@ -0,0 +1,27 @@
+# -*- ruby -*-
+
+require 'rubygems'
+require 'hoe'
+
+Hoe.plugin :git
+Hoe.plugin :isolate
+Hoe.plugin :minitest
+
+Hoe.spec 'marshal-structure' do
+ developer 'Eric Hodel', 'drbrain@segment7.net'
+
+ rdoc_locations << 'docs.seattlerb.org:/data/www/docs.seattlerb.org/marshal-structure/'
+
+ self.readme_file = 'README.rdoc'
+ self.extra_rdoc_files << 'README.rdoc' # HACK fix in Hoe
+
+ if respond_to? :isolate_dir= then # HACK Hoe issue #7
+ self.isolate_dir = 'tmp/isolate'
+ else
+ warn 'please: gem install isolate'
+ end
+
+ extra_dev_deps << ['ben_string', '~> 1']
+end
+
+# vim: syntax=ruby
619 lib/marshal/structure.rb
@@ -0,0 +1,619 @@
+##
+# Marshal::Structure dumps a nested Array describing the structure of a
+# Marshal stream.
+#
+# Marshal format 4.8 is supported.
+
+class Marshal::Structure
+
+ ##
+ # Version of Marshal::Structure you are using
+
+ VERSION = '1.0'
+
+ ##
+ # Supported major Marshal version
+
+ MAJOR_VERSION = 4
+
+ ##
+ # Supported minor Marshal version
+
+ MINOR_VERSION = 8
+
+ ##
+ # nil type prefix
+
+ TYPE_NIL = '0'
+
+ ##
+ # true type prefix
+
+ TYPE_TRUE = 'T'
+
+ ##
+ # false type prefix
+
+ TYPE_FALSE = 'F'
+
+ ##
+ # Fixnum type prefix
+
+ TYPE_FIXNUM = 'i'
+
+ ##
+ # An object that has been extended with a module
+
+ TYPE_EXTENDED = 'e'
+
+ ##
+ # A subclass of a built-in type
+
+ TYPE_UCLASS = 'C'
+
+ ##
+ # A ruby Object
+
+ TYPE_OBJECT = 'o'
+
+ ##
+ # A wrapped C pointer
+
+ TYPE_DATA = 'd'
+
+ ##
+ # An object saved with _dump
+
+ TYPE_USERDEF = 'u'
+
+ ##
+ # An object saved with marshal_dump
+
+ TYPE_USRMARSHAL = 'U'
+
+ ##
+ # A Float
+
+ TYPE_FLOAT = 'f'
+
+ ##
+ # A Bignum
+
+ TYPE_BIGNUM = 'l'
+
+ ##
+ # A String
+
+ TYPE_STRING = '"'
+
+ ##
+ # A Regexp
+
+ TYPE_REGEXP = '/'
+
+ ##
+ # An Array
+
+ TYPE_ARRAY = '['
+
+ ##
+ # A Hash
+
+ TYPE_HASH = '{'
+
+ ##
+ # A Hash with a default value (not proc)
+
+ TYPE_HASH_DEF = '}'
+
+ ##
+ # A Struct
+
+ TYPE_STRUCT = 'S'
+
+ ##
+ # An old-style Module (reference, not content)
+ #
+ # I'm not sure what makes this old. The byte stream is identical to
+ # TYPE_MODULE
+
+ TYPE_MODULE_OLD = 'M'
+
+ ##
+ # A class (reference, not content)
+
+ TYPE_CLASS = 'c'
+
+ ##
+ # A module (reference, not content)
+
+ TYPE_MODULE = 'm'
+
+ ##
+ # A Symbol
+
+ TYPE_SYMBOL = ':'
+
+ ##
+ # A reference to a previously Symbol
+
+ TYPE_SYMLINK = ';'
+
+ ##
+ # Instance variables for a following object
+
+ TYPE_IVAR = 'I'
+
+ ##
+ # A reference to a previously-stored Object
+
+ TYPE_LINK = '@'
+
+ ##
+ # Objects found in the Marshal stream. Since objects aren't constructed the
+ # actual object won't be present in this list.
+
+ attr_reader :objects
+
+ ##
+ # Symbols found in the Marshal stream
+
+ attr_reader :symbols
+
+ ##
+ # Returns the structure of the Marshaled object +obj+ as nested Arrays.
+ #
+ # For +true+, +false+ and +nil+ the symbol +:true+, +:false+, +:nil+ is
+ # returned, respectively.
+ #
+ # For Fixnum the value is returned.
+ #
+ # For other objects the first item is the reference for future occurrences
+ # of the object and the remaining items describe the object.
+ #
+ # Symbols have a separate reference table from all other objects.
+
+ def self.load obj
+ if obj.respond_to? :to_str then
+ data = obj.to_s
+ elsif obj.respond_to? :read then
+ data = obj.read
+ if data.empty? then
+ raise EOFError, "end of file reached"
+ end
+ elsif obj.respond_to? :getc then # FIXME - don't read all of it upfront
+ data = ''
+ data << c while (c = obj.getc.chr)
+ else
+ raise TypeError, "instance of IO needed"
+ end
+
+ major = data[0].ord
+ minor = data[1].ord
+
+ if major != MAJOR_VERSION or minor > MINOR_VERSION then
+ raise TypeError, "incompatible marshal file format (can't be read)\n\tformat version #{MAJOR_VERSION}.#{MINOR_VERSION} required; #{major}.#{minor} given"
+ end
+
+ new(data).construct
+ end
+
+ ##
+ # Prepares processing of +stream+
+
+ def initialize stream
+ @objects = []
+ @symbols = []
+
+ @stream = stream
+ @byte_array = stream.bytes.to_a
+ @consumed = 2
+ end
+
+ ##
+ # Adds +obj+ to the objects list
+
+ def add_object obj
+ return if
+ [NilClass, TrueClass, FalseClass, Symbol, Fixnum].any? { |c| c === obj }
+
+ index = @objects.size
+ @objects << obj
+ index
+ end
+
+ ##
+ # Adds +symbol+ to the symbols list
+
+ def add_symlink symbol
+ index = @symbols.size
+ @symbols << symbol
+ index
+ end
+
+ ##
+ # Creates the structure for the remaining stream.
+
+ def construct
+ type = consume_character
+
+ case type
+ when TYPE_NIL then
+ :nil
+ when TYPE_TRUE then
+ :true
+ when TYPE_FALSE then
+ :false
+
+ when TYPE_ARRAY then
+ [:array, *construct_array]
+ when TYPE_BIGNUM then
+ [:bignum, *construct_bignum]
+ when TYPE_CLASS then
+ ref = store_unique_object Object.allocate
+
+ [:class, ref, get_byte_sequence]
+ when TYPE_DATA then
+ [:data, *construct_data]
+ when TYPE_EXTENDED then
+ [:extended, get_symbol, construct]
+ when TYPE_FIXNUM then
+ [:fixnum, construct_integer]
+ when TYPE_FLOAT then
+ [:float, *construct_float]
+ when TYPE_HASH then
+ [:hash, *construct_hash]
+ when TYPE_HASH_DEF then
+ [:hash_default, *construct_hash_def]
+ when TYPE_IVAR then
+ [:instance_variables, construct, *construct_instance_variables]
+ when TYPE_LINK then
+ [:link, construct_integer]
+ when TYPE_MODULE, TYPE_MODULE_OLD then
+ ref = store_unique_object Object.allocate
+
+ [:module, ref, get_byte_sequence]
+ when TYPE_OBJECT then
+ [:object, *construct_object]
+ when TYPE_REGEXP then
+ [:regexp, *construct_regexp]
+ when TYPE_STRING then
+ [:string, *construct_string]
+ when TYPE_STRUCT then
+ [:struct, *construct_struct]
+ when TYPE_SYMBOL then
+ [:symbol, *construct_symbol]
+ when TYPE_SYMLINK then
+ [:symbol_link, construct_integer]
+ when TYPE_USERDEF then
+ [:user_defined, *construct_user_defined]
+ when TYPE_USRMARSHAL then
+ [:user_marshal, *construct_user_marshal]
+ when TYPE_UCLASS then
+ name = get_symbol
+
+ [:user_class, name, construct]
+ else
+ raise ArgumentError, "load error, unknown type #{type}"
+ end
+ end
+
+ ##
+ # Creates the body of an +:array+ object
+
+ def construct_array
+ ref = store_unique_object Object.allocate
+
+ obj = [ref]
+
+ items = construct_integer
+
+ obj << items
+
+ items.times do
+ obj << construct
+ end
+
+ obj
+ end
+
+ ##
+ # Creates the body of a +:bignum+ object
+
+ def construct_bignum
+ sign = consume_byte == ?- ? -1 : 1
+ size = construct_integer * 2
+
+ result = 0
+
+ data = consume_bytes size
+
+ data.each_with_index do |data, exp|
+ result += (data * 2**(exp*8))
+ end
+
+ ref = store_unique_object Object.allocate
+
+ [ref, sign, size, result]
+ end
+
+ ##
+ # Creates the body of a wrapped C pointer object
+
+ def construct_data
+ ref = store_unique_object Object.allocate
+
+ [ref, get_symbol, construct]
+ end
+
+ ##
+ # Creates the body of a +:float+ object
+
+ def construct_float
+ float = get_byte_sequence
+
+ ref = store_unique_object Object.allocate
+
+ [ref, float]
+ end
+
+ ##
+ # Creates the body of a +:hash+ object
+
+ def construct_hash
+ ref = store_unique_object Object.allocate
+
+ obj = [ref]
+
+ pairs = construct_integer
+ obj << pairs
+
+ pairs.times do
+ obj << construct
+ obj << construct
+ end
+
+ obj
+ end
+
+ ##
+ # Creates the body of a +:hash_def+ object
+
+ def construct_hash_def
+ ref, hash = construct_hash
+
+ [ref, hash, construct]
+ end
+
+ ##
+ # Instance variables contain an object followed by a count of instance
+ # variables and their contents
+
+ def construct_instance_variables
+ instance_variables = []
+
+ pairs = construct_integer
+ instance_variables << pairs
+
+ pairs.times do
+ instance_variables << get_symbol
+ instance_variables << construct
+ end
+
+ instance_variables
+ end
+
+ ##
+ # Decodes a stored Fixnum
+
+ def construct_integer
+ c = consume_byte
+
+ # The format appears to be a simple integer compression format
+ #
+ # The 0-123 cases are easy, and use one byte
+ # We've read c as unsigned char in a way, but we need to honor
+ # the sign bit. We do that by simply comparing with the +128 values
+ return 0 if c == 0
+ return c - 5 if 4 < c and c < 128
+
+ # negative, but checked known it's instead in 2's compliment
+ return c - 251 if 252 > c and c > 127
+
+ # otherwise c (now in the 1 to 4 range) indicates how many
+ # bytes to read to construct the value.
+ #
+ # Because we're operating on a small number of possible values,
+ # it's cleaner to just unroll the calculate of each
+
+ case c
+ when 1
+ consume_byte
+ when 2
+ consume_byte | (consume_byte << 8)
+ when 3
+ consume_byte | (consume_byte << 8) | (consume_byte << 16)
+ when 4
+ consume_byte | (consume_byte << 8) | (consume_byte << 16) |
+ (consume_byte << 24)
+
+ when 255 # -1
+ consume_byte - 256
+ when 254 # -2
+ (consume_byte | (consume_byte << 8)) - 65536
+ when 253 # -3
+ (consume_byte |
+ (consume_byte << 8) |
+ (consume_byte << 16)) - 16777216 # 2 ** 24
+ when 252 # -4
+ (consume_byte |
+ (consume_byte << 8) |
+ (consume_byte << 16) |
+ (consume_byte << 24)) - 4294967296
+ else
+ raise "Invalid integer size: #{c}"
+ end
+ end
+
+ ##
+ # Creates an Object
+
+ def construct_object
+ ref = store_unique_object Object.allocate
+
+ [ref, get_symbol, construct_instance_variables]
+ end
+
+ ##
+ # Creates a Regexp
+
+ def construct_regexp
+ ref =store_unique_object Object.allocate
+
+ [ref, get_byte_sequence, consume_byte]
+ end
+
+ ##
+ # Creates a String
+
+ def construct_string
+ ref = store_unique_object Object.allocate
+
+ [ref, get_byte_sequence]
+ end
+
+ ##
+ # Creates a Struct
+
+ def construct_struct
+ symbols = []
+ values = []
+
+ obj_ref = store_unique_object Object.allocate
+
+ obj = [obj_ref, get_symbol]
+
+ members = construct_integer
+ obj << members
+
+ members.times do
+ obj << get_symbol
+ obj << construct
+ end
+
+ obj
+ end
+
+ ##
+ # Creates a Symbol
+
+ def construct_symbol
+ sym = get_byte_sequence
+
+ ref = store_unique_object sym.to_sym
+
+ [ref, sym]
+ end
+
+ ##
+ # Creates an object saved by _dump
+
+ def construct_user_defined
+ name = get_symbol
+
+ data = get_byte_sequence
+
+ ref = store_unique_object Object.allocate
+
+ [ref, name, data]
+ end
+
+ ##
+ # Creates an object saved by marshal_dump
+
+ def construct_user_marshal
+ name = get_symbol
+
+ obj = Object.allocate
+
+ obj_ref = store_unique_object obj
+
+ [obj_ref, name, construct]
+ end
+
+ ##
+ # Consumes +bytes+ from the marshal stream
+
+ def consume bytes
+ raise ArgumentError, "marshal data too short" if @consumed > @stream.size
+ data = @stream[@consumed, bytes]
+ @consumed += bytes
+ data
+ end
+
+ ##
+ # Consumes +count+ bytes from the marshal stream as an Array of bytes
+
+ def consume_bytes count
+ consume(count).bytes.to_a
+ end
+
+ ##
+ # Consumes one byte from the marshal stream
+
+ def consume_byte
+ raise ArgumentError, "marshal data too short" if
+ @consumed > @byte_array.size
+
+ data = @byte_array[@consumed]
+ @consumed += 1
+
+ data
+ end
+
+ ##
+ # Consumes one byte from the marshal stream and returns a character
+
+ def consume_character
+ consume_byte.chr
+ end
+
+ ##
+ # Consumes a sequence of bytes from the marshal stream based on the next
+ # integer
+
+ def get_byte_sequence
+ size = construct_integer
+ consume size
+ end
+
+ ##
+ # Constructs a Symbol from a TYPE_SYMBOL or TYPE_SYMLINK
+
+ def get_symbol
+ type = consume_character
+
+ case type
+ when TYPE_SYMBOL then
+ [:symbol, *construct_symbol]
+ when TYPE_SYMLINK then
+ num = construct_integer
+ [:symbol_link, num]
+ else
+ raise ArgumentError, "expected TYPE_SYMBOL or TYPE_SYMLINK, got #{type.inspect}"
+ end
+ end
+
+ ##
+ # Stores a reference to +obj+
+
+ def store_unique_object obj
+ if Symbol === obj then
+ add_symlink obj
+ else
+ add_object obj
+ end
+ end
+
+end
+
171 test/test_marshal_structure.rb
@@ -0,0 +1,171 @@
+require 'minitest/autorun'
+require 'marshal/structure'
+require 'ben_string'
+require 'openssl'
+require 'pp'
+
+class OpenSSL::X509::Name
+ alias _dump_data to_a
+
+ def _load_data ary
+ ary.each do |entry|
+ add_entry(*entry)
+ end
+ end
+end
+
+class B; end
+
+module C; end
+
+module E; end
+
+class M
+ def marshal_dump
+ 'marshal_dump'
+ end
+
+ def marshal_load o
+ end
+end
+
+class U
+ def self._load str
+ new
+ end
+
+ def _dump limit
+ s = '_dump'
+ s.instance_variable_set :@ivar_on_dump_str, 'value on ivar on dump str'
+ s
+ end
+end
+
+S = Struct.new :f
+
+class TestMarshalStructure < MiniTest::Unit::TestCase
+
+ def mu_pp obj
+ s = ''
+ s = PP.pp obj, s
+ s.chomp
+ end
+
+ def setup
+ @MS = Marshal::Structure
+ end
+
+ def test_construct
+ str =
+ "\004\b{\006:\006a[\031c\006Bm\006C\"\006d/\006e\000i\006" \
+ "f\0322.2999999999999998\000ff" \
+ "l+\n\000\000\000\000\000\000\000\000\001\0000TF}\000i\000" \
+ "S:\006S\006:\006fi\000o:\vObject\000@\017" \
+ "U:\006M\"\021marshal_dump" \
+ "Iu:\006U\n_dump\006" \
+ ":\026@ivar_on_dump_str\"\036value on ivar on dump str" \
+ ";\000e:\006Eo;\b\000" \
+ "I\"\025string with ivar\006:\v@value\"\017some value" \
+ "C:\016BenString\"\000"
+
+ structure = @MS.load str
+
+ expected = [
+ :hash,
+ 0,
+ 1,
+ [:symbol, 0, "a"],
+ [:array,
+ 1,
+ 20,
+ [:class, 2, "B"],
+ [:module, 3, "C"],
+ [:string, 4, "d"],
+ [:regexp, 5, "e", 0],
+ [:fixnum, 1],
+ [:float, 6, "2.2999999999999998\000ff"],
+ [:bignum, 7, 1, 10, 18446744073709551616],
+ :nil,
+ :true,
+ :false,
+ [:hash_default, 8, 0, [:fixnum, 0]],
+ [:struct, 9, [:symbol, 1, "S"], 1, [:symbol, 2, "f"], [:fixnum, 0]],
+ [:object, 10, [:symbol, 3, "Object"], [0]],
+ [:link, 10],
+ [:user_marshal, 11, [:symbol, 4, "M"], [:string, 12, "marshal_dump"]],
+ [:instance_variables,
+ [:user_defined, 13, [:symbol, 5, "U"], "_dump"],
+ 1,
+ [:symbol, 6, "@ivar_on_dump_str"],
+ [:string, 14, "value on ivar on dump str"]],
+ [:symbol_link, 0],
+ [:extended, [:symbol, 7, "E"], [:object, 15, [:symbol_link, 3], [0]]],
+ [:instance_variables,
+ [:string, 16, "string with ivar"],
+ 1,
+ [:symbol, 8, "@value"],
+ [:string, 17, "some value"]],
+ [:user_class, [:symbol, 9, "BenString"], [:string, 18, ""]]]]
+
+ assert_equal expected, structure
+ end
+
+ def test_construct_data
+ name = OpenSSL::X509::Name.parse 'CN=nobody/DC=example'
+ str = Marshal.dump name
+
+ expected = [
+ :data,
+ 0,
+ [:symbol, 0, "OpenSSL::X509::Name"],
+ [:array,
+ 1,
+ 2,
+ [:array, 2, 3,
+ [:string, 3, "CN"],
+ [:string, 4, "nobody"],
+ [:fixnum, 12]],
+ [:array, 5, 3,
+ [:string, 6, "DC"],
+ [:string, 7, "example"],
+ [:fixnum, 22]]]]
+
+ assert_equal expected, @MS.load(str)
+ end
+
+ def test_construct_module_old
+ assert_equal [:module, 0, "M"], @MS.load("\x04\x08M\x06M")
+ end
+
+ def test_consume
+ ms = @MS.new "\x04\x08\x06M"
+
+ assert_equal "\x06M", ms.consume(2)
+ end
+
+ def test_consume_bytes
+ ms = @MS.new "\x04\x08\x06M"
+
+ assert_equal [6, 77], ms.consume_bytes(2)
+ end
+
+ def test_consume_byte
+ ms = @MS.new "\x04\x08M"
+
+ assert_equal 77, ms.consume_byte
+ end
+
+ def test_consume_character
+ ms = @MS.new "\x04\x08M"
+
+ assert_equal 'M', ms.consume_character
+ end
+
+ def test_get_byte_sequence
+ ms = @MS.new "\x04\x08\x06M"
+
+ assert_equal "M", ms.get_byte_sequence
+ end
+
+end
+
Please sign in to comment.
Something went wrong with that request. Please try again.