-
Notifications
You must be signed in to change notification settings - Fork 84
/
extractor.ex
420 lines (362 loc) · 14.1 KB
/
extractor.ex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
defmodule Gettext.Extractor do
@moduledoc false
# This module is responsible for extracting messages (it's called from the
# *gettext macros) and dumping those messages to POT files, merging with
# existing POT files if necessary.
#
# ## Ordering
#
# Ordering is mostly taken care of in merge_template/2, where we go over the
# messages in an existing POT file and merge them if necessary (thus
# keeping the order from the original file), then adding the messages from
# the new in-memory POT (sorted by name).
alias Gettext.Error
alias Gettext.ExtractorAgent
alias Gettext.Merger
alias Expo.PO
alias Expo.Message
alias Expo.Messages
@extracted_messages_flag "elixir-autogen"
@doc """
Enables message extraction.
"""
@spec enable() :: :ok
def enable() do
ExtractorAgent.enable()
end
@doc """
Disables extraction.
"""
@spec disable() :: :ok
def disable() do
ExtractorAgent.disable()
end
@doc """
Tells whether messages are being extracted.
"""
@spec extracting?() :: boolean
def extracting?() do
# Because the extractor agent may not be enabled during compilation
# time (as it requires the optional Gettext compiler), we need to
# check if the agent is up and running before querying it.
Process.whereis(ExtractorAgent) && ExtractorAgent.extracting?()
end
@doc """
Extracts a message by temporarily storing it in an agent.
Note that this function doesn't perform any operation on the filesystem.
"""
@spec extract(Macro.Env.t(), module, binary, binary, binary | {binary, binary}, [binary]) :: :ok
def extract(%Macro.Env{} = caller, backend, domain, msgctxt, id, extracted_comments) do
format_flag = backend.__gettext__(:interpolation).message_format()
message =
create_message_struct(
id,
msgctxt,
caller.file,
caller.line,
extracted_comments,
format_flag
)
ExtractorAgent.add_message(backend, domain, message)
end
@doc """
Returns a list of POT files based on the results of the extraction.
Returns a list of paths and their contents to be written to disk. Existing POT
files are either purged from obsolete messages (in case no extracted
message ends up in that file) or merged with the extracted messages;
new POT files are returned for extracted messages that belong to a POT
file that doesn't exist yet.
This is a stateful operation. Once pot_files are generated, their information
is permanently removed from the extractor.
"""
@spec pot_files(atom, Keyword.t()) :: [{path :: String.t(), contents :: iodata}]
def pot_files(app, gettext_config) do
backends = ExtractorAgent.pop_backends(app)
warn_on_conflicting_backends(backends)
existing_pot_files = pot_files_for_backends(backends)
backends
|> ExtractorAgent.pop_message()
|> create_po_structs_from_extracted_messages()
|> merge_pot_files(existing_pot_files, gettext_config)
end
defp warn_on_conflicting_backends(backends) do
Enum.reduce(backends, %{}, fn backend, acc ->
priv = backend.__gettext__(:priv)
case acc do
%{^priv => other_backend} ->
IO.warn(
"the Gettext backend #{inspect(backend)} has the same :priv directory as " <>
"#{inspect(other_backend)}, which means they will override each other. " <>
"Please set the :priv option to different directories or use Gettext " <>
"inside each backend"
)
%{} ->
Map.put(acc, priv, backend)
end
end)
end
# Returns all the .pot files for each of the given `backends`.
defp pot_files_for_backends(backends) do
Enum.flat_map(backends, fn backend ->
backend.__gettext__(:priv)
|> Path.join("**/*.pot")
|> Path.wildcard()
end)
end
# This returns a list of {absolute_path, %Gettext.PO{}} tuples.
# `all_messages` looks like this:
#
# %{MyBackend => %{"a_domain" => %{"a message id" => a_message}}}
#
defp create_po_structs_from_extracted_messages(all_messages) do
for {backend, domains} <- all_messages,
{domain, messages} <- domains do
messages = Map.values(messages)
{pot_path(backend, domain), po_struct_from_messages(messages)}
end
end
defp pot_path(backend, domain) do
Path.join(backend.__gettext__(:priv), "#{domain}.pot")
end
defp po_struct_from_messages(messages) do
# Sort all the messages and the references of each message in order
# to make as few changes as possible to the PO(T) files.
messages =
messages
|> Enum.sort_by(&Message.key/1)
|> Enum.map(&sort_references/1)
%Messages{messages: messages}
end
defp sort_references(message) do
update_in(message.references, &Enum.sort/1)
end
defp create_message_struct(
{msgid, msgid_plural},
msgctxt,
file,
line,
extracted_comments,
format_flag
) do
%Message.Plural{
msgid: [msgid],
msgctxt: if(msgctxt != nil, do: [msgctxt], else: nil),
msgid_plural: [msgid_plural],
msgstr: %{0 => [""], 1 => [""]},
flags: [[@extracted_messages_flag, format_flag]],
references: [[{Path.relative_to_cwd(file), line}]],
extracted_comments: extracted_comments
}
end
defp create_message_struct(msgid, msgctxt, file, line, extracted_comments, format_flag) do
%Message.Singular{
msgid: [msgid],
msgctxt: if(msgctxt != nil, do: [msgctxt], else: nil),
msgstr: [""],
flags: [[@extracted_messages_flag, format_flag]],
references: [[{Path.relative_to_cwd(file), line}]],
extracted_comments: extracted_comments
}
end
# Made public for testing.
@doc false
def merge_pot_files(po_structs, pot_files, gettext_config) do
# pot_files is a list of paths to existing .pot files while po_structs is a
# list of {path, struct} for new %Gettext.PO{} structs that we have
# extracted. If we turn pot_files into a list of {path, whatever} tuples,
# then we can take advantage of Map.merge/3 to find files that we have to
# update, delete, or add.
pot_files = Map.new(pot_files, &{&1, :existing})
po_structs = Map.new(po_structs)
# After Map.merge/3, we have something like:
# %{path => {:merged, :unchanged | %Messages{}}, path => %Messages{}, path => :existing}
# and after mapping tag_files/1 over that we have something like:
# %{path => {:merged, :unchanged | %Messages{}}, path => {:unmerged, :unchanged | %Messages{}}, path => {:new, %Messages{}}}
Map.merge(pot_files, po_structs, &merge_existing_and_extracted(&1, &2, &3, gettext_config))
|> Enum.map(&tag_files(&1, gettext_config))
|> Enum.reject(&match?({_, {_, :unchanged}}, &1))
|> Enum.map(&dump_tagged_file(&1, gettext_config))
end
# This function is called by merge_pot_files/2 as the function passed to
# Map.merge/3 (so when we have both an :existing file and a new extracted
# in-memory PO struct both located at "path").
defp merge_existing_and_extracted(path, :existing, extracted, gettext_config) do
{:merged, merge_or_unchanged(path, extracted, gettext_config)}
end
# Returns :unchanged if merging `existing_path` with `new_po` changes nothing,
# otherwise a %Gettext.PO{} struct with the changed contents.
defp merge_or_unchanged(existing_path, new_po, gettext_config) do
{existing_contents, existing_po} = read_contents_and_parse(existing_path)
merged_po = merge_template(existing_po, new_po, gettext_config)
if IO.iodata_to_binary(PO.compose(merged_po)) == existing_contents do
:unchanged
else
merged_po
end
end
defp read_contents_and_parse(path) do
contents = File.read!(path)
{contents, PO.parse_file!(path, file: path)}
end
# This function "tags" a {path, _} tuple in order to distinguish POT files
# that have been merged (one existed at `path` and there's a new one to put at
# `path` as well), POT files that exist but have no new counterpart (`{path,
# :existing}`) and new files that do not exist yet.
# These are marked as:
# * {path, {:merged, _}} - one existed and there's a new one
# * {path, {:unmerged, _}} - one existed, no new one
# * {path, {:new, _}} - none existed, there's a new one
# Note that existing files with no new corresponding file are "pruned", for example,
# merged with an empty %Messages{} struct to remove obsolete message (see
# prune_unmerged/1), because the user could still have PO message that
# they manually inserted in that file.
defp tag_files({_path, {:merged, _}} = entry, _gettext_config), do: entry
defp tag_files({path, :existing}, gettext_config),
do: {path, {:unmerged, prune_unmerged(path, gettext_config)}}
defp tag_files({path, new_po}, _gettext_config), do: {path, {:new, new_po}}
# This function "dumps" merged files and unmerged files without any changes,
# and dumps new POT files adding an informative comment to them. This doesn't
# write anything to disk, it just returns `{path, contents}` tuples.
defp dump_tagged_file({path, {:new, new_po}}, gettext_config),
do:
{path,
[
new_pot_comment(),
new_po
|> Merger.prune_references(gettext_config)
|> add_headers_to_new_po()
|> PO.compose()
]}
defp dump_tagged_file({path, {tag, po}}, gettext_config) when tag in [:unmerged, :merged],
do:
{path,
po
|> Merger.prune_references(gettext_config)
|> PO.compose()}
defp prune_unmerged(path, gettext_config) do
merge_or_unchanged(path, %Messages{messages: []}, gettext_config)
end
defp new_pot_comment() do
"""
## This file is a PO Template file.
##
## "msgid"s here are often extracted from source code.
## Add new message manually only if they're dynamic
## message that can't be statically extracted.
##
## Run "mix gettext.extract" to bring this file up to
## date. Leave "msgstr"s empty as changing them here has no
## effect: edit them in PO (.po) files instead.
"""
end
defp add_headers_to_new_po(%Messages{headers: []} = po) do
%{po | headers: [""]}
end
# Merges a %Messages{} struct representing an existing POT file with an
# in-memory-only %Messages{} struct representing the new POT file.
# Made public for testing.
@doc false
def merge_template(existing, new, gettext_config) do
protected_pattern = gettext_config[:excluded_refs_from_purging]
# We go over the existing message in order so as to keep the existing
# order as much as possible.
old_and_merged =
Enum.flat_map(existing.messages, fn message ->
cond do
same = Messages.find(new, message) -> [merge_message(message, same)]
protected?(message, protected_pattern) -> [message]
autogenerated?(message) -> []
true -> [message]
end
end)
# We reject all messages that appear in `existing` so that we're left
# with the messages that only appear in `new`.
unique_new = Enum.reject(new.messages, &Messages.find(existing, &1))
messages = old_and_merged ++ unique_new
messages =
if gettext_config[:sort_by_msgid] do
Enum.sort_by(messages, & &1.msgid)
else
messages
end
%Messages{
messages: messages,
headers: existing.headers,
top_comments: existing.top_comments
}
end
defp merge_message(
%Message.Singular{} = old,
%Message.Singular{comments: []} = new
) do
ensure_empty_msgstr!(old)
ensure_empty_msgstr!(new)
# Take all flags from `old` and only the `@extracted_messages_flag` flag from `new`
# to avoid re-adding manually removed flags.
flags =
if Message.has_flag?(new, @extracted_messages_flag) do
Message.append_flag(old, @extracted_messages_flag).flags
else
old.flags
end
%Message.Singular{
msgid: old.msgid,
msgstr: old.msgstr,
msgctxt: new.msgctxt,
flags: flags,
# The new in-memory message has no comments since it was extracted
# from the source code.
comments: old.comments,
# We don't care about the references of the old message since the new
# in-memory message has all the actual and current references.
references: new.references,
extracted_comments: new.extracted_comments
}
end
defp merge_message(%Message.Plural{} = old, %Message.Plural{comments: []} = new) do
ensure_empty_msgstr!(old)
ensure_empty_msgstr!(new)
# The logic here is the same as for %Message.Singular{}s.
%Message.Plural{
msgid: old.msgid,
msgctxt: new.msgctxt,
msgid_plural: old.msgid_plural,
msgstr: old.msgstr,
flags: old.flags,
comments: old.comments,
references: new.references,
extracted_comments: new.extracted_comments
}
end
defp ensure_empty_msgstr!(%Message.Singular{msgstr: msgstr} = message) do
unless blank?(msgstr) do
raise Error,
"message with msgid '#{IO.iodata_to_binary(message.msgid)}' has a non-empty msgstr"
end
end
defp ensure_empty_msgstr!(%Message.Plural{msgstr: %{0 => str0, 1 => str1}} = message) do
if not blank?(str0) or not blank?(str1) do
raise Error,
"plural message with msgid '#{IO.iodata_to_binary(message.msgid)}' has a non-empty msgstr"
end
end
defp ensure_empty_msgstr!(%Message.Plural{} = message) do
raise Error,
"plural message with msgid '#{IO.iodata_to_binary(message.msgid)}' has a non-empty msgstr"
end
defp blank?(str) when not is_nil(str), do: IO.iodata_length(str) == 0
defp blank?(_), do: true
@spec autogenerated?(message :: Message.t()) :: boolean
defp autogenerated?(message) do
Message.has_flag?(message, "elixir-autogen")
end
# A message that is protected from purging will never be removed by Gettext.
# Which messages are proteced can be configured using Mix.
@spec protected?(message :: Message.t(), protected_pattern :: Regex.t()) :: boolean
defp protected?(_t, nil),
do: false
defp protected?(%{references: []}, _pattern),
do: false
defp protected?(%{references: refs}, pattern),
do: Enum.any?(refs, fn {path, _} -> Regex.match?(pattern, path) end)
end