From 765cb316f15a542a8eb7393fc49a5e9d0801e55d Mon Sep 17 00:00:00 2001 From: Robert Haines Date: Tue, 16 Nov 2021 21:42:11 +0000 Subject: [PATCH] Fix reading unknown extra fields. When loading extra fields from both the central directory and local headers, unknown fields were not merged correctly. They were being appended, which means that we end up with the two versions stuck together - in some cases duplicating the field completely. This broke all kinds of things (like calculating the size of a local header) in subtle ways. This commit fixes this by implementing a new `Unknown` extra field type, and making sure that when reading local and central extra fields they are stored and preserved correctly. We cannot assume the unknown fields use the same data in the local and central headers. Fixes #505. --- .rubocop_todo.yml | 1 + lib/zip/central_directory.rb | 2 +- lib/zip/entry.rb | 8 ++--- lib/zip/extra_field.rb | 31 ++++++++----------- lib/zip/extra_field/unknown.rb | 33 +++++++++++++++++++++ test/extra_field_test.rb | 34 +++++++++++++++++---- test/extra_field_unknown_test.rb | 51 ++++++++++++++++++++++++++++++++ test/local_entry_test.rb | 12 ++++++-- 8 files changed, 139 insertions(+), 33 deletions(-) create mode 100644 lib/zip/extra_field/unknown.rb create mode 100644 test/extra_field_unknown_test.rb diff --git a/.rubocop_todo.yml b/.rubocop_todo.yml index 1afa96b1..b5466f40 100644 --- a/.rubocop_todo.yml +++ b/.rubocop_todo.yml @@ -61,6 +61,7 @@ Style/ClassAndModuleChildren: - 'lib/zip/extra_field/old_unix.rb' - 'lib/zip/extra_field/universal_time.rb' - 'lib/zip/extra_field/unix.rb' + - 'lib/zip/extra_field/unknown.rb' - 'lib/zip/extra_field/zip64.rb' - 'lib/zip/extra_field/zip64_placeholder.rb' diff --git a/lib/zip/central_directory.rb b/lib/zip/central_directory.rb index 29fb2d0d..4979ea6d 100644 --- a/lib/zip/central_directory.rb +++ b/lib/zip/central_directory.rb @@ -174,7 +174,7 @@ def read_central_directory_entries(io) #:nodoc: unless offset.nil? io_save = io.tell io.seek(offset, IO::SEEK_SET) - entry.read_extra_field(read_local_extra_field(io)) + entry.read_extra_field(read_local_extra_field(io), local: true) io.seek(io_save, IO::SEEK_SET) end diff --git a/lib/zip/entry.rb b/lib/zip/entry.rb index e87f3362..d1241e56 100644 --- a/lib/zip/entry.rb +++ b/lib/zip/entry.rb @@ -313,7 +313,7 @@ def read_local_entry(io) #:nodoc:all raise ::Zip::Error, 'Truncated local zip entry header' end - read_extra_field(extra) + read_extra_field(extra, local: true) parse_zip64_extra(true) @local_header_size = calculate_local_header_size end @@ -417,11 +417,11 @@ def check_c_dir_entry_comment_size raise ::Zip::Error, 'Truncated cdir zip entry header' end - def read_extra_field(buf) + def read_extra_field(buf, local: false) if @extra.kind_of?(::Zip::ExtraField) - @extra.merge(buf) if buf + @extra.merge(buf, local: local) if buf else - @extra = ::Zip::ExtraField.new(buf) + @extra = ::Zip::ExtraField.new(buf, local: local) end end diff --git a/lib/zip/extra_field.rb b/lib/zip/extra_field.rb index 24352aae..658675b4 100644 --- a/lib/zip/extra_field.rb +++ b/lib/zip/extra_field.rb @@ -4,8 +4,8 @@ module Zip class ExtraField < Hash ID_MAP = {} - def initialize(binstr = nil) - merge(binstr) if binstr + def initialize(binstr = nil, local: false) + merge(binstr, local: local) if binstr end def extra_field_type_exist(binstr, id, len, index) @@ -18,25 +18,18 @@ def extra_field_type_exist(binstr, id, len, index) end end - def extra_field_type_unknown(binstr, len, index) - create_unknown_item unless self['Unknown'] + def extra_field_type_unknown(binstr, len, index, local) + self['Unknown'] ||= Unknown.new + if !len || len + 4 > binstr[index..-1].bytesize - self['Unknown'] << binstr[index..-1] + self['Unknown'].merge(binstr[index..-1], local: local) return end - self['Unknown'] << binstr[index, len + 4] - end - def create_unknown_item - s = +'' - class << s - alias_method :to_c_dir_bin, :to_s - alias_method :to_local_bin, :to_s - end - self['Unknown'] = s + self['Unknown'].merge(binstr[index, len + 4], local: local) end - def merge(binstr) + def merge(binstr, local: false) return if binstr.empty? i = 0 @@ -46,8 +39,7 @@ def merge(binstr) if id && ID_MAP.member?(id) extra_field_type_exist(binstr, id, len, i) elsif id - create_unknown_item unless self['Unknown'] - break unless extra_field_type_unknown(binstr, len, i) + break unless extra_field_type_unknown(binstr, len, i, local) end i += len + 4 end @@ -61,8 +53,8 @@ def create(name) self[name] = field_class.new end - # place Unknown last, so "extra" data that is missing the proper signature/size - # does not prevent known fields from being read back in + # Place Unknown last, so "extra" data that is missing the proper + # signature/size does not prevent known fields from being read back in. def ordered_values result = [] each { |k, v| k == 'Unknown' ? result.push(v) : result.unshift(v) } @@ -92,6 +84,7 @@ def local_size end end +require 'zip/extra_field/unknown' require 'zip/extra_field/generic' require 'zip/extra_field/universal_time' require 'zip/extra_field/old_unix' diff --git a/lib/zip/extra_field/unknown.rb b/lib/zip/extra_field/unknown.rb new file mode 100644 index 00000000..84e87f34 --- /dev/null +++ b/lib/zip/extra_field/unknown.rb @@ -0,0 +1,33 @@ +# frozen_string_literal: true + +module Zip + # A class to hold unknown extra fields so that they are preserved. + class ExtraField::Unknown + def initialize + @local_bin = +'' + @cdir_bin = +'' + end + + def merge(binstr, local: false) + return if binstr.empty? + + if local + @local_bin << binstr + else + @cdir_bin << binstr + end + end + + def to_local_bin + @local_bin + end + + def to_c_dir_bin + @cdir_bin + end + + def ==(other) + @local_bin == other.to_local_bin && @cdir_bin == other.to_c_dir_bin + end + end +end diff --git a/test/extra_field_test.rb b/test/extra_field_test.rb index 7aea91e5..c91a7bc6 100644 --- a/test/extra_field_test.rb +++ b/test/extra_field_test.rb @@ -6,17 +6,29 @@ class ZipExtraFieldTest < MiniTest::Test def test_new extra_pure = ::Zip::ExtraField.new('') extra_withstr = ::Zip::ExtraField.new('foo') + extra_withstr_local = ::Zip::ExtraField.new('foo', local: true) + assert_instance_of(::Zip::ExtraField, extra_pure) assert_instance_of(::Zip::ExtraField, extra_withstr) + assert_instance_of(::Zip::ExtraField, extra_withstr_local) + + assert_equal('foo', extra_withstr['Unknown'].to_c_dir_bin) + assert_equal('foo', extra_withstr_local['Unknown'].to_local_bin) end def test_unknownfield extra = ::Zip::ExtraField.new('foo') - assert_equal(extra['Unknown'], 'foo') + assert_equal('foo', extra['Unknown'].to_c_dir_bin) + extra.merge('a') - assert_equal(extra['Unknown'], 'fooa') + assert_equal('fooa', extra['Unknown'].to_c_dir_bin) + extra.merge('barbaz') - assert_equal(extra.to_s, 'fooabarbaz') + assert_equal('fooabarbaz', extra['Unknown'].to_c_dir_bin) + + extra.merge('bar', local: true) + assert_equal('bar', extra['Unknown'].to_local_bin) + assert_equal('fooabarbaz', extra['Unknown'].to_c_dir_bin) end def test_bad_header_id @@ -66,9 +78,9 @@ def test_to_s extra = ::Zip::ExtraField.new(str) assert_instance_of(String, extra.to_s) - s = extra.to_s - extra.merge('foo') - assert_equal(s.length + 3, extra.to_s.length) + extra_len = extra.to_s.length + extra.merge('foo', local: true) + assert_equal(extra_len + 3, extra.to_s.length) end def test_equality @@ -99,4 +111,14 @@ def test_read_local_extra_field end end end + + def test_load_unknown_extra_field + ::Zip::File.open('test/data/osx-archive.zip') do |zf| + zf.each do |entry| + # Check that there is only one occurance of the 'ux' extra field. + assert_equal(0, entry.extra['Unknown'].to_c_dir_bin.rindex('ux')) + assert_equal(0, entry.extra['Unknown'].to_local_bin.rindex('ux')) + end + end + end end diff --git a/test/extra_field_unknown_test.rb b/test/extra_field_unknown_test.rb new file mode 100644 index 00000000..4d24d928 --- /dev/null +++ b/test/extra_field_unknown_test.rb @@ -0,0 +1,51 @@ +# frozen_string_literal: true + +require 'test_helper' + +class ZipExtraFieldUnknownTest < MiniTest::Test + def test_new + extra = ::Zip::ExtraField::Unknown.new + assert_empty(extra.to_c_dir_bin) + assert_empty(extra.to_local_bin) + end + + def test_merge_cdir_then_local + extra = ::Zip::ExtraField::Unknown.new + field = "ux\v\x00\x01\x04\xF6\x01\x00\x00\x04\x14\x00\x00\x00" + + extra.merge(field) + assert_empty(extra.to_local_bin) + assert_equal(field, extra.to_c_dir_bin) + + extra.merge(field, local: true) + assert_equal(field, extra.to_local_bin) + assert_equal(field, extra.to_c_dir_bin) + end + + def test_merge_local_only + extra = ::Zip::ExtraField::Unknown.new + field = "ux\v\x00\x01\x04\xF6\x01\x00\x00\x04\x14\x00\x00\x00" + + extra.merge(field, local: true) + assert_equal(field, extra.to_local_bin) + assert_empty(extra.to_c_dir_bin) + end + + def test_equality + extra1 = ::Zip::ExtraField::Unknown.new + extra2 = ::Zip::ExtraField::Unknown.new + assert_equal(extra1, extra2) + + extra1.merge("ux\v\x00\x01\x04\xF6\x01\x00\x00\x04\x14\x00\x00\x00") + refute_equal(extra1, extra2) + + extra2.merge("ux\v\x00\x01\x04\xF6\x01\x00\x00\x04\x14\x00\x00\x00") + assert_equal(extra1, extra2) + + extra1.merge('foo', local: true) + refute_equal(extra1, extra2) + + extra2.merge('foo', local: true) + assert_equal(extra1, extra2) + end +end diff --git a/test/local_entry_test.rb b/test/local_entry_test.rb index 15c367a7..3cf21e62 100644 --- a/test/local_entry_test.rb +++ b/test/local_entry_test.rb @@ -83,6 +83,7 @@ def test_write_entry_with_zip64 'file.zip', 'entry_name', comment: 'my little comment', size: 400, extra: 'thisIsSomeExtraInformation', compressed_size: 100, crc: 987_654 ) + entry.extra.merge('thisIsSomeExtraInformation', local: true) write_to_file(LEH_FILE, CEH_FILE, entry) local_entry, central_entry = read_from_file(LEH_FILE, CEH_FILE) @@ -153,18 +154,23 @@ def test_read64_local_offset private - def compare_local_entry_headers(entry1, entry2) + def compare_common_entry_headers(entry1, entry2) assert_equal(entry1.compressed_size, entry2.compressed_size) assert_equal(entry1.crc, entry2.crc) - assert_equal(entry1.extra, entry2.extra) assert_equal(entry1.compression_method, entry2.compression_method) assert_equal(entry1.name, entry2.name) assert_equal(entry1.size, entry2.size) assert_equal(entry1.local_header_offset, entry2.local_header_offset) end + def compare_local_entry_headers(entry1, entry2) + compare_common_entry_headers(entry1, entry2) + assert_equal(entry1.extra.to_local_bin, entry2.extra.to_local_bin) + end + def compare_c_dir_entry_headers(entry1, entry2) - compare_local_entry_headers(entry1, entry2) + compare_common_entry_headers(entry1, entry2) + assert_equal(entry1.extra.to_c_dir_bin, entry2.extra.to_c_dir_bin) assert_equal(entry1.comment, entry2.comment) end