Skip to content
This repository has been archived by the owner on Oct 8, 2020. It is now read-only.

Commit

Permalink
Move code to parse the PersonIdent structure from a tag or commit obj…
Browse files Browse the repository at this point in the history
…ect into PersonIdent module. (#36)
  • Loading branch information
scouten committed Jul 20, 2019
1 parent f2adcf2 commit ee82363
Show file tree
Hide file tree
Showing 4 changed files with 229 additions and 215 deletions.
93 changes: 91 additions & 2 deletions lib/xgit/core/person_ident.ex
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,11 @@ defmodule Xgit.Core.PersonIdent do
A combination of a person identity and time in git.
"""

alias Xgit.Util.RawParseUtils

@typedoc "Time zone offset in minutes +/- from GMT."
@type tz_offset :: -720..840

@typedoc ~S"""
The tuple of name, email, time, and time zone that specifies who wrote or
committed something.
Expand All @@ -66,12 +71,96 @@ defmodule Xgit.Core.PersonIdent do
name: String.t(),
email: String.t(),
when: integer,
tz_offset: Xgit.Lib.Constants.tz_offset()
tz_offset: tz_offset()
}

@enforce_keys [:name, :email, :when, :tz_offset]
defstruct [:name, :email, :when, :tz_offset]

@doc ~S"""
Parse a name line (e.g. author, committer, tagger) into a `PersonIdent` struct.
## Parameters
`b` should be a charlist of an "author" or "committer" line pointing to the
character after the header name and space.
The functions `Xgit.Util.RawParseUtils.author/1` and `Xgit.Util.RawParseUtils.committer/1`
will return suitable charlists.
## Return Value
Returns a `PersonIdent` struct or `nil` if the charlist did not point to a
properly-formatted identity.
"""
@spec from_byte_list(b :: [byte]) :: t() | nil
def from_byte_list(b) when is_list(b) do
with [?< | email_start] <- RawParseUtils.next_lf(b, ?<),
true <- has_closing_angle_bracket?(email_start),
email <- RawParseUtils.until_next_lf(email_start, ?>),
name <- parse_name(b),
{time, tz} <- parse_tz(email_start) do
%__MODULE__{
name: RawParseUtils.decode(name),
email: RawParseUtils.decode(email),
when: time,
tz_offset: tz
}
else
_ -> nil
end
end

defp has_closing_angle_bracket?(b), do: Enum.any?(b, &(&1 == ?>))

defp parse_name(b) do
b
|> RawParseUtils.until_next_lf(?<)
|> Enum.reverse()
|> drop_first_if_space()
|> Enum.reverse()
end

defp drop_first_if_space([?\s | b]), do: b
defp drop_first_if_space(b), do: b

defp parse_tz(first_email_start) do
# Start searching from end of line, as after first name-email pair,
# another name-email pair may occur. We will ignore all kinds of
# "junk" following the first email.

# We've to use (emailE - 1) for the case that raw[email] is LF,
# otherwise we would run too far. "-2" is necessary to position
# before the LF in case of LF termination resp. the penultimate
# character if there is no trailing LF.

[?> | first_email_end] = RawParseUtils.next_lf(first_email_start, ?>)
rev = Enum.reverse(first_email_end)

{tz, rev} = trim_word_and_rev(rev)
{time, _rev} = trim_word_and_rev(rev)

case {time, tz} do
{[_ | _], [_ | _]} ->
{time |> RawParseUtils.parse_base_10() |> elem(0),
tz |> RawParseUtils.parse_timezone_offset() |> elem(0)}

_ ->
{0, 0}
end
end

defp trim_word_and_rev(rev) do
rev = Enum.drop_while(rev, &(&1 == ?\s))

word =
rev
|> Enum.take_while(&(&1 != ?\s))
|> Enum.reverse()

{word, Enum.drop(rev, Enum.count(word))}
end

@doc ~S"""
Sanitize the given string for use in an identity and append to output.
Expand All @@ -88,7 +177,7 @@ defmodule Xgit.Core.PersonIdent do
@doc ~S"""
Formats a timezone offset.
"""
@spec format_timezone(offset :: Xgit.Lib.Constants.tz_offset()) :: String.t()
@spec format_timezone(offset :: tz_offset()) :: String.t()
def format_timezone(offset) when is_integer(offset) do
sign =
if offset < 0,
Expand Down
76 changes: 1 addition & 75 deletions lib/xgit/util/raw_parse_utils.ex
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,6 @@ defmodule Xgit.Util.RawParseUtils do
Handy utility functions to parse raw object contents.
"""

alias Xgit.Core.PersonIdent

@doc ~S"""
Does the charlist `b` start with the same characters as `str`?
Expand Down Expand Up @@ -183,7 +181,7 @@ defmodule Xgit.Util.RawParseUtils do
that was found (or 0 if no number found there) and `new_buffer` is the charlist
following the number that was parsed.
"""
@spec parse_timezone_offset(b :: charlist) :: {Xgit.Lib.Constants.tz_offset(), charlist}
@spec parse_timezone_offset(b :: charlist) :: {Xgit.Core.PersonIdent.tz_offset(), charlist}
def parse_timezone_offset(b) when is_list(b) do
{v, b} = parse_base_10(b)

Expand Down Expand Up @@ -359,78 +357,6 @@ defmodule Xgit.Util.RawParseUtils do
defp trim_if_string(s) when is_binary(s), do: String.trim(s)
defp trim_if_string(s), do: s

@doc ~S"""
Parse a name line (e.g. author, committer, tagger) into a `PersonIdent` struct.
When passing in a charlist for `b` callers should use the return value of
`author/1` or `committer/1`, as these functions provide the proper subset of
the buffer.
Returns `%PersonIdent{}` or `nil` in case the identity could not be parsed.
"""
@spec parse_person_ident(b :: charlist) :: Xgit.Core.PersonIdent.t()
def parse_person_ident(b) when is_list(b) do
with [?< | email_start] <- next_lf(b, ?<),
true <- has_closing_angle_bracket?(email_start),
email <- until_next_lf(email_start, ?>),
name <- parse_name(b),
{time, tz} <- parse_tz(email_start) do
%PersonIdent{name: decode(name), email: decode(email), when: time, tz_offset: tz}
else
# Could not parse the line as a PersonIdent.
_ -> nil
end
end

defp has_closing_angle_bracket?(b), do: Enum.any?(b, &(&1 == ?>))

defp parse_name(b) do
b
|> until_next_lf(?<)
|> Enum.reverse()
|> drop_first_if_space()
|> Enum.reverse()
end

defp drop_first_if_space([?\s | b]), do: b
defp drop_first_if_space(b), do: b

defp parse_tz(first_email_start) do
# Start searching from end of line, as after first name-email pair,
# another name-email pair may occur. We will ignore all kinds of
# "junk" following the first email.

# We've to use (emailE - 1) for the case that raw[email] is LF,
# otherwise we would run too far. "-2" is necessary to position
# before the LF in case of LF termination resp. the penultimate
# character if there is no trailing LF.

[?> | first_email_end] = next_lf(first_email_start, ?>)
rev = Enum.reverse(first_email_end)

{tz, rev} = trim_word_and_rev(rev)
{time, _rev} = trim_word_and_rev(rev)

case {time, tz} do
{[_ | _], [_ | _]} ->
{time |> parse_base_10() |> elem(0), tz |> parse_timezone_offset() |> elem(0)}

_ ->
{0, 0}
end
end

defp trim_word_and_rev(rev) do
rev = Enum.drop_while(rev, &(&1 == ?\s))

word =
rev
|> Enum.take_while(&(&1 != ?\s))
|> Enum.reverse()

{word, Enum.drop(rev, Enum.count(word))}
end

@doc ~S"""
Convert a list of bytes to an Elixir (UTF-8) string when the encoding is not
definitively know. Try parsing as a UTF-8 byte array first, then try ISO-8859-1.
Expand Down
137 changes: 137 additions & 0 deletions test/xgit/core/person_ident_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,143 @@ defmodule Xgit.Core.PersonIdentTest do

alias Xgit.Core.PersonIdent

describe "from_byte_list/1" do
defp assert_person_ident(line, expected) do
actual_pi = PersonIdent.from_byte_list(line)
assert expected == actual_pi
end

test "legal cases" do
whxn = 1_234_567_890
tz = -420

assert_person_ident(
'Me <me@example.com> 1234567890 -0700',
%PersonIdent{name: "Me", email: "me@example.com", when: whxn, tz_offset: tz}
)

assert_person_ident(
' Me <me@example.com> 1234567890 -0700',
%PersonIdent{name: " Me", email: "me@example.com", when: whxn, tz_offset: tz}
)

assert_person_ident(
'A U Thor <author@example.com> 1234567890 -0700',
%PersonIdent{name: "A U Thor", email: "author@example.com", when: whxn, tz_offset: tz}
)

assert_person_ident(
'A U Thor<author@example.com> 1234567890 -0700',
%PersonIdent{name: "A U Thor", email: "author@example.com", when: whxn, tz_offset: tz}
)

assert_person_ident(
'A U Thor<author@example.com>1234567890 -0700',
%PersonIdent{name: "A U Thor", email: "author@example.com", when: whxn, tz_offset: tz}
)

assert_person_ident(
' A U Thor < author@example.com > 1234567890 -0700',
%PersonIdent{
name: " A U Thor ",
email: " author@example.com ",
when: whxn,
tz_offset: tz
}
)

assert_person_ident(
'A U Thor<author@example.com>1234567890 -0700',
%PersonIdent{name: "A U Thor", email: "author@example.com", when: whxn, tz_offset: tz}
)
end

test "fuzzy cases" do
whxn = 1_234_567_890
tz = -420

assert_person_ident(
'A U Thor <author@example.com>, C O. Miter <comiter@example.com> 1234567890 -0700',
%PersonIdent{name: "A U Thor", email: "author@example.com", when: whxn, tz_offset: tz}
)

assert_person_ident(
'A U Thor <author@example.com> and others 1234567890 -0700',
%PersonIdent{name: "A U Thor", email: "author@example.com", when: whxn, tz_offset: tz}
)
end

test "incomplete cases" do
whxn = 1_234_567_890
tz = -420

assert_person_ident('Me <> 1234567890 -0700', %PersonIdent{
name: "Me",
email: "",
when: whxn,
tz_offset: tz
})

assert_person_ident(
' <me@example.com> 1234567890 -0700',
%PersonIdent{name: "", email: "me@example.com", when: whxn, tz_offset: tz}
)

assert_person_ident(' <> 1234567890 -0700', %PersonIdent{
name: "",
email: "",
when: whxn,
tz_offset: tz
})

assert_person_ident('<>', %PersonIdent{name: "", email: "", when: 0, tz_offset: 0})

assert_person_ident(' <>', %PersonIdent{name: "", email: "", when: 0, tz_offset: 0})

assert_person_ident('<me@example.com>', %PersonIdent{
name: "",
email: "me@example.com",
when: 0,
tz_offset: 0
})

assert_person_ident(' <me@example.com>', %PersonIdent{
name: "",
email: "me@example.com",
when: 0,
tz_offset: 0
})

assert_person_ident('Me <>', %PersonIdent{name: "Me", email: "", when: 0, tz_offset: 0})

assert_person_ident('Me <me@example.com>', %PersonIdent{
name: "Me",
email: "me@example.com",
when: 0,
tz_offset: 0
})

assert_person_ident('Me <me@example.com> 1234567890', %PersonIdent{
name: "Me",
email: "me@example.com",
when: 0,
tz_offset: 0
})

assert_person_ident('Me <me@example.com> 1234567890 ', %PersonIdent{
name: "Me",
email: "me@example.com",
when: 0,
tz_offset: 0
})
end

test "malformed cases" do
assert_person_ident('Me me@example.com> 1234567890 -0700', nil)
assert_person_ident('Me <me@example.com 1234567890 -0700', nil)
end
end

describe "sanitized/1" do
test "strips whitespace and non-parseable characters from raw string" do
assert PersonIdent.sanitized(" Baz>\n\u1234<Quux ") == "Baz\u1234Quux"
Expand Down
Loading

0 comments on commit ee82363

Please sign in to comment.