Skip to content
This repository has been archived by the owner on Oct 8, 2020. It is now read-only.

Commit

Permalink
Implement Xgit.Util.ParseCharlist.decode_ambiguous_charlist/1. (#196)
Browse files Browse the repository at this point in the history
* Implement Xgit.Util.ParseCharlist.decode_ambiguous_charlist/1.

* Replace uses of RawParseUtils.decode/1 with ParseCharlist.decode_ambiguous_charlist/1.

* A few small fixes.
  • Loading branch information
scouten committed Oct 12, 2019
1 parent 32ecea6 commit a0fbb88
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 3 deletions.
3 changes: 2 additions & 1 deletion lib/xgit/core/object.ex
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ defmodule Xgit.Core.Object do
alias Xgit.Core.FileMode
alias Xgit.Core.FilePath
alias Xgit.Core.ObjectId
alias Xgit.Util.ParseCharlist
alias Xgit.Util.ParseDecimal
alias Xgit.Util.RawParseUtils

Expand Down Expand Up @@ -424,7 +425,7 @@ defmodule Xgit.Core.Object do

defp normalize(name, true = _mac?) when is_list(name) do
name
|> RawParseUtils.decode()
|> ParseCharlist.decode_ambiguous_charlist()
|> String.downcase()
|> :unicode.characters_to_nfc_binary()
end
Expand Down
5 changes: 3 additions & 2 deletions lib/xgit/core/person_ident.ex
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ defmodule Xgit.Core.PersonIdent do
A combination of a person identity and time in git.
"""

alias Xgit.Util.ParseCharlist
alias Xgit.Util.ParseDecimal
alias Xgit.Util.RawParseUtils

Expand Down Expand Up @@ -104,8 +105,8 @@ defmodule Xgit.Core.PersonIdent do
name <- parse_name(b),
{time, tz} <- parse_tz(email_start) do
%__MODULE__{
name: RawParseUtils.decode(name),
email: RawParseUtils.decode(email),
name: ParseCharlist.decode_ambiguous_charlist(name),
email: ParseCharlist.decode_ambiguous_charlist(email),
when: time,
tz_offset: tz
}
Expand Down
20 changes: 20 additions & 0 deletions lib/xgit/util/parse_charlist.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
defmodule Xgit.Util.ParseCharlist do
@moduledoc false
# Internal utility for parsing charlists with ambiguous encodings.

import Xgit.Util.ForceCoverage

@doc ~S"""
Convert a list of bytes to an Elixir (UTF-8) string when the encoding is not
definitively known. Try parsing as a UTF-8 byte array first, then try ISO-8859-1.
"""
@spec decode_ambiguous_charlist(b :: [byte]) :: String.t()
def decode_ambiguous_charlist(b) when is_list(b) do
raw = :erlang.list_to_binary(b)

case :unicode.characters_to_binary(raw) do
utf8 when is_binary(utf8) -> cover(utf8)
_ -> :unicode.characters_to_binary(raw, :latin1)
end
end
end
13 changes: 13 additions & 0 deletions test/xgit/util/parse_charlist_test.exs
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
defmodule Xgit.Util.ParseCharlistTest do
use ExUnit.Case, async: true

alias Xgit.Util.ParseCharlist

test "decode/1" do
assert ParseCharlist.decode_ambiguous_charlist([64, 65, 66]) == "@AB"
assert ParseCharlist.decode_ambiguous_charlist([228, 105, 116, 105]) == "äiti"
assert ParseCharlist.decode_ambiguous_charlist([195, 164, 105, 116, 105]) == "äiti"
assert ParseCharlist.decode_ambiguous_charlist([66, 106, 246, 114, 110]) == "Björn"
assert ParseCharlist.decode_ambiguous_charlist([66, 106, 195, 182, 114, 110]) == "Björn"
end
end

0 comments on commit a0fbb88

Please sign in to comment.