Skip to content

Commit

Permalink
Using padding zeros for low unicode code points
Browse files Browse the repository at this point in the history
Java's native2ascii tool also uses zero padding for low code points.

Refs #5
  • Loading branch information
jnbt committed Oct 14, 2015
1 parent 22e8a78 commit 852bef0
Show file tree
Hide file tree
Showing 7 changed files with 20 additions and 23 deletions.
16 changes: 6 additions & 10 deletions lib/java-properties/encoding/unicode.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ module Encoding
# Module to encode and decode unicode chars
# @see JavaProperties::Encoding
module Unicode

# Marker for encoded unicode chars
# @return [Regexp]
UNICODE_MARKER = /\\[uU]([0-9a-fA-F]{4,5}|10[0-9a-fA-F]{4})/
Expand Down Expand Up @@ -42,20 +42,16 @@ def self.encode!(text)
private

def self.unicode(code)
[code].pack("U")
code.chr(::Encoding::UTF_8)
end

def self.hex(codepoint)
hex = codepoint.to_s(16)
size = hex.size
# padding the hex value for uneven digest
if (size % 2) == 1
"0#{hex}"
else
hex
end
size = [4, hex.size].max
target_size = size.even? ? size : size+1
hex.rjust(target_size, '0')
end

end
end
end
end
2 changes: 1 addition & 1 deletion spec/fixtures/test_out.properties
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@ it\:em6=item6
item7=line 1 line 2 line 3
item8=line 1 line 2 line 3
item9=line 1 line 2 line 3
item10=test\n\ttest\u05d4 test\n\ttest test\n\ttest = test
item10=test\n\ttest\u05d4 test\n\ttest test\n\ttest\u00fc = test
2 changes: 1 addition & 1 deletion spec/fixtures/test_out_skip_separators.properties
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@ it:em6=item6
item7=line 1 line 2 line 3
item8=line 1 line 2 line 3
item9=line 1 line 2 line 3
item10=test\n\ttest\u05d4 test\n\ttest test\n\ttest = test
item10=test\n\ttest\u05d4 test\n\ttest test\n\ttest\u00fc = test
2 changes: 1 addition & 1 deletion spec/fixtures/test_out_skip_special_chars.properties
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,4 @@ item9=line 1 line 2 line 3
item10=test
test\u05d4 test
test test
test = test
test\u00fc = test
2 changes: 1 addition & 1 deletion spec/fixtures/test_out_skip_unicode.properties
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@ it\:em6=item6
item7=line 1 line 2 line 3
item8=line 1 line 2 line 3
item9=line 1 line 2 line 3
item10=test\n\ttestה test\n\ttest test\n\ttest = test
item10=test\n\ttestה test\n\ttest test\n\ttestü = test
15 changes: 8 additions & 7 deletions spec/java-properties/encoding/unicode_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
describe JavaProperties::Encoding::Unicode do
subject{ JavaProperties::Encoding::Unicode }

let(:encoded) { 'this is some \u0024 text \U05D4 with unicode' }
let(:encoded_normalized) { 'this is some $ text \u05d4 with unicode' }
let(:decoded) { 'this is some $ text ה with unicode' }
let(:encoded) { 'this is some \u0024 text \U05D4 with unicode \u00fc' }
let(:encoded_normalized) { 'this is some $ text \u05d4 with unicode \u00fc' }
let(:decoded) { 'this is some $ text ה with unicode ü' }

it "decodes unicode chars" do
subject.decode!(encoded.dup).must_equal decoded
Expand All @@ -16,14 +16,15 @@
subject.encode!(decoded.dup).must_equal encoded_normalized
end

it "encodes unicode chars but has 2-based hex size" do
it "encodes unicode chars but has 2-based hex size, padded to at least 4" do
subject.encode!("ü").must_equal '\u00fc'
subject.encode!("ה").must_equal '\u05d4'
subject.encode!("ᘓ").must_equal '\u1613'
end

it "decodes and encodes" do
encoded = subject.encode!(decoded.dup)
deconded = subject.decode!(encoded.dup)
deconded.must_equal decoded
dec = subject.decode!(encoded.dup)
dec.must_equal decoded
end
end
end
4 changes: 2 additions & 2 deletions spec/java-properties/generating/generator_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
:item7 => "line 1 line 2 line 3",
:item8 => "line 1 line 2 line 3",
:item9 => "line 1 line 2 line 3",
:item10 => "test\n\ttestה test\n\ttest test\n\ttest = test"
:item10 => "test\n\ttestה test\n\ttest test\n\ttestü = test"
}
end

Expand Down Expand Up @@ -44,4 +44,4 @@
content.must_equal expected
end

end
end

0 comments on commit 852bef0

Please sign in to comment.