Skip to content

Commit

Permalink
Set entry names encoding appropriately.
Browse files Browse the repository at this point in the history
If we don't set this then we can't do things like file.exists?("Résumé.txt")
without specifically forcing it to ASCII-8BIT, which is unnecessarily clunky:

file.exists?("Résumé.txt".force_encoding("ASCII-8BIT"))

This is because entry names are always read as ASCII-8BIT even if the EFS bit is
set. This patch fixes that.

Tests added/updated to make sure that this works for both files and streams.
  • Loading branch information
hainesr committed Jul 17, 2014
1 parent ce37c20 commit bd67d51
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 11 deletions.
11 changes: 7 additions & 4 deletions lib/zip/entry.rb
Original file line number Diff line number Diff line change
Expand Up @@ -232,11 +232,9 @@ def read_local_entry(io) #:nodoc:all
end
set_time(@last_mod_date, @last_mod_time)

@name = io.read(@name_length)
@name = read_name(io)
extra = io.read(@extra_length)

@name.gsub!('\\', '/')

if extra && extra.bytesize != @extra_length
raise ::Zip::Error, "Truncated local zip entry header"
else
Expand Down Expand Up @@ -362,7 +360,7 @@ def read_c_dir_entry(io) #:nodoc:all
unpack_c_dir_entry(static_sized_fields_buf)
check_c_dir_entry_signature
set_time(@last_mod_date, @last_mod_time)
@name = io.read(@name_length).gsub('\\', '/')
@name = read_name(io)
read_c_dir_extra_field(io)
@comment = io.read(@comment_length)
check_c_dir_entry_comment_size
Expand Down Expand Up @@ -570,6 +568,11 @@ def unicode_name?

private

def read_name(io)
name = io.read(@name_length).gsub('\\', '/')
unicode_name? ? name.force_encoding("UTF-8") : name
end

def set_time(binary_dos_date, binary_dos_time)
@time = ::Zip::DOSTime.parse_binary_dos_format(binary_dos_date, binary_dos_time)
rescue ArgumentError
Expand Down
Binary file added test/data/unicode.zip
Binary file not shown.
25 changes: 18 additions & 7 deletions test/unicode_file_names_and_comments_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@

class ZipUnicodeFileNamesAndComments < MiniTest::Test

FILENAME = File.join(File.dirname(__FILE__), "test1.zip")
STREAM_FILENAME = File.join(File.dirname(__FILE__), "test1.zip")
UNICODE_FILENAME = File.join(File.dirname(__FILE__), "data", "unicode.zip")

ENTRIES = [
"текстовыйфайл.txt", "Résumé.txt", "슬레이어스휘.txt",
Expand All @@ -14,25 +15,35 @@ class ZipUnicodeFileNamesAndComments < MiniTest::Test
def test_unicode_with_streams
Zip.unicode_names = true

stream = ::Zip::OutputStream.open(FILENAME) do |io|
stream = ::Zip::OutputStream.open(STREAM_FILENAME) do |io|
ENTRIES.each do |filename|
io.put_next_entry(filename)
io.write(filename)
end
end
assert(!stream.nil?)

::Zip::InputStream.open(FILENAME) do |io|
::Zip::InputStream.open(STREAM_FILENAME) do |io|
ENTRIES.each do |filename|
entry = io.get_next_entry
assert(entry.unicode_name?)
entry_name = entry.name
entry_name = entry_name.force_encoding("UTF-8") if RUBY_VERSION >= '1.9'
assert(filename == entry_name)
assert_equal(filename, entry.name)
end
end

::File.unlink(FILENAME)
::File.unlink(STREAM_FILENAME)
end

def test_unicode_from_file
::Zip::File.open(UNICODE_FILENAME) do |f|
ENTRIES.each do |name|
assert(f.file.exists?(name))
end

f.each_entry do |entry|
assert(entry.unicode_name?)
end
end
end

end

0 comments on commit bd67d51

Please sign in to comment.