/
base.py
718 lines (589 loc) · 21.3 KB
/
base.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
#
# Copyright © 2012–2022 Michal Čihař <michal@cihar.com>
#
# This file is part of Weblate <https://weblate.org/>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
"""Base classes for file formats."""
import os
import tempfile
from copy import deepcopy
from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union
from django.utils.functional import cached_property
from django.utils.translation import gettext as _
from weblate_language_data.countries import DEFAULT_LANGS
from weblate.trans.util import get_string
from weblate.utils.errors import add_breadcrumb
from weblate.utils.hash import calculate_hash
from weblate.utils.state import STATE_TRANSLATED
EXPAND_LANGS = {code[:2]: f"{code[:2]}_{code[3:].upper()}" for code in DEFAULT_LANGS}
ANDROID_CODES = {
"he": "iw",
"id": "in",
"yi": "ji",
}
LEGACY_CODES = {
"zh_Hans": "zh_CN",
"zh_Hant": "zh_TW",
"zh_Hans_SG": "zh_SG",
"zh_Hant_HK": "zh_HK",
}
APPSTORE_CODES = {
"ar": "ar-SA",
"de": "de-DE",
"fr": "fr-FR",
"nl": "nl-NL",
"pt": "pt-PT",
}
class UnitNotFound(Exception):
def __str__(self):
args = list(self.args)
if "" in args:
args.remove("")
return "Unit not found: {}".format(", ".join(args))
class UpdateError(Exception):
def __init__(self, cmd, output):
super().__init__(output)
self.cmd = cmd
self.output = output
class TranslationUnit:
"""Wrapper for translate-toolkit unit.
It handles ID/template based translations and other API differences.
"""
id_hash_with_source: bool = False
def __init__(self, parent, unit, template=None):
"""Create wrapper object."""
self.unit = unit
self.template = template
self.parent = parent
if template is not None:
self.mainunit = template
else:
self.mainunit = unit
def _invalidate_target(self):
"""Invalidate target cache."""
if "target" in self.__dict__:
del self.__dict__["target"]
@cached_property
def locations(self):
"""Return comma separated list of locations."""
return ""
@cached_property
def flags(self):
"""Return flags or typecomments from units."""
return ""
@cached_property
def notes(self):
"""Return notes from units."""
return ""
@cached_property
def source(self):
"""Return source string from a ttkit unit."""
raise NotImplementedError()
@cached_property
def target(self):
"""Return target string from a ttkit unit."""
raise NotImplementedError()
@cached_property
def context(self):
"""Return context of message.
In some cases we have to use ID here to make all backends consistent.
"""
raise NotImplementedError()
@cached_property
def previous_source(self):
"""Return previous message source if there was any."""
return ""
@classmethod
def calculate_id_hash(cls, has_template: bool, source: str, context: str):
"""Return hash of source string, used for quick lookup.
We use siphash as it is fast and works well for our purpose.
"""
if not has_template or cls.id_hash_with_source:
return calculate_hash(source, context)
return calculate_hash(context)
@cached_property
def id_hash(self):
return self.calculate_id_hash(
self.template is not None,
self.source,
self.context,
)
def is_translated(self):
"""Check whether unit is translated."""
return bool(self.target)
def is_approved(self, fallback=False):
"""Check whether unit is approved."""
return fallback
def is_fuzzy(self, fallback=False):
"""Check whether unit needs edit."""
return fallback
def has_content(self):
"""Check whether unit has content."""
return True
def is_readonly(self):
"""Check whether unit is read only."""
return False
def set_target(self, target: Union[str, List[str]]):
"""Set translation unit target."""
raise NotImplementedError()
def set_state(self, state):
"""Set fuzzy /approved flag on translated unit."""
raise NotImplementedError()
def has_unit(self) -> bool:
return self.unit is not None
def clone_template(self):
self.mainunit = self.unit = deepcopy(self.template)
class TranslationFormat:
"""Generic object defining file format loader."""
name: str = ""
format_id: str = ""
monolingual: Optional[bool] = None
check_flags: Tuple[str, ...] = ()
unit_class: Type[TranslationUnit] = TranslationUnit
autoload: Tuple[str, ...] = ()
can_add_unit: bool = True
language_format: str = "posix"
simple_filename: bool = True
new_translation: Optional[Union[str, bytes]] = None
autoaddon: Dict[str, Dict[str, str]] = {}
create_empty_bilingual: bool = False
bilingual_class = None
create_style = "create"
has_multiple_strings: bool = False
plural_preference: Optional[Tuple[int, ...]] = None
@classmethod
def get_identifier(cls):
return cls.format_id
@classmethod
def parse(
cls,
storefile,
template_store=None,
language_code: Optional[str] = None,
source_language: Optional[str] = None,
is_template: bool = False,
):
"""Parse store and returns TranslationFormat instance.
This wrapper is needed for AutodetectFormat to be able to return instance of
different class.
"""
return cls(
storefile,
template_store=template_store,
language_code=language_code,
source_language=source_language,
is_template=is_template,
)
def __init__(
self,
storefile,
template_store=None,
language_code: Optional[str] = None,
source_language: Optional[str] = None,
is_template: bool = False,
):
"""Create file format object, wrapping up translate-toolkit's store."""
if not isinstance(storefile, str) and not hasattr(storefile, "mode"):
storefile.mode = "r"
self.storefile = storefile
self.language_code = language_code
self.source_language = source_language
# Remember template
self.template_store = template_store
self.is_template = is_template
# Load store
self.store = self.load(storefile, template_store)
self.add_breadcrumb(
"Loaded translation file {}".format(
getattr(storefile, "filename", storefile)
),
template_store=str(template_store),
is_template=is_template,
)
def check_valid(self):
"""Check store validity."""
if not self.is_valid():
raise ValueError(
_("Failed to load strings from the file, try choosing other format.")
)
def get_filenames(self):
if isinstance(self.storefile, str):
return [self.storefile]
return [self.storefile.name]
def load(self, storefile, template_store):
raise NotImplementedError()
@classmethod
def get_plural(cls, language, store=None):
"""Return matching plural object."""
if cls.plural_preference is not None:
# Fetch all matching plurals
plurals = language.plural_set.filter(source__in=cls.plural_preference)
# Use first matching in the order of preference
for source in cls.plural_preference:
for plural in plurals:
if plural.source == source:
return plural
# Fall back to default one
return language.plural
@cached_property
def has_template(self):
"""Check whether class is using template."""
return (
self.monolingual or self.monolingual is None
) and self.template_store is not None
@cached_property
def _template_index(self):
"""ID based index for units."""
return {unit.id_hash: unit for unit in self.template_units}
def find_unit_template(
self, context: str, source: str, id_hash: Optional[int] = None
) -> Optional[Any]:
if id_hash is None:
id_hash = self._calculate_string_hash(context, source)
try:
# The mono units always have only template set
return self._template_index[id_hash].template
except KeyError:
return None
def _find_unit_monolingual(self, context: str, source: str) -> Tuple[Any, bool]:
# We search by ID when using template
id_hash = self._calculate_string_hash(context, source)
try:
result = self._unit_index[id_hash]
except KeyError:
raise UnitNotFound(context, source)
add = False
if not result.has_unit():
# We always need copy of template unit to translate
result.clone_template()
add = True
return result, add
@cached_property
def _unit_index(self):
"""Context and source based index for units."""
return {unit.id_hash: unit for unit in self.content_units}
def _calculate_string_hash(self, context: str, source: str) -> int:
"""Calculates id hash for a string."""
return self.unit_class.calculate_id_hash(
self.has_template or self.is_template, get_string(source), context
)
def _find_unit_bilingual(self, context: str, source: str) -> Tuple[Any, bool]:
id_hash = self._calculate_string_hash(context, source)
try:
return (self._unit_index[id_hash], False)
except KeyError:
raise UnitNotFound(context, source)
def find_unit(self, context: str, source: Optional[str] = None) -> Tuple[Any, bool]:
"""Find unit by context and source.
Returns tuple (ttkit_unit, created) indicating whether returned unit is new one.
"""
if self.has_template:
return self._find_unit_monolingual(context, source)
return self._find_unit_bilingual(context, source)
def add_unit(self, ttkit_unit):
"""Add new unit to underlying store."""
raise NotImplementedError()
def update_header(self, **kwargs):
"""Update store header if available."""
return
def save_atomic(self, filename, callback):
dirname, basename = os.path.split(filename)
if not os.path.exists(dirname):
os.makedirs(dirname)
temp = tempfile.NamedTemporaryFile(prefix=basename, dir=dirname, delete=False)
try:
callback(temp)
temp.close()
os.replace(temp.name, filename)
finally:
if os.path.exists(temp.name):
os.unlink(temp.name)
def save(self):
"""Save underlying store to disk."""
raise NotImplementedError()
@property
def all_store_units(self):
"""Wrapper for all store units for possible filtering."""
return self.store.units
@cached_property
def template_units(self):
return [self.unit_class(self, None, unit) for unit in self.all_store_units]
def _get_all_bilingual_units(self):
return [self.unit_class(self, unit) for unit in self.all_store_units]
def _build_monolingual_unit(self, unit):
return self.unit_class(
self,
self.find_unit_template(unit.context, unit.source, unit.id_hash),
unit.template,
)
def _get_all_monolingual_units(self):
return [
self._build_monolingual_unit(unit)
for unit in self.template_store.template_units
]
@cached_property
def all_units(self):
"""List of all units."""
if not self.has_template:
return self._get_all_bilingual_units()
return self._get_all_monolingual_units()
@property
def content_units(self):
return [unit for unit in self.all_units if unit.has_content()]
@staticmethod
def mimetype():
"""Return most common mime type for format."""
return "text/plain"
@staticmethod
def extension():
"""Return most common file extension for format."""
return "txt"
def is_valid(self):
"""Check whether store seems to be valid."""
# Make sure we do not have a collision in id_hash
hashes = set()
for unit in self.content_units:
if unit.id_hash in hashes:
raise ValueError(f"Duplicate id_hash for unit {unit.unit}")
hashes.add(unit.id_hash)
return True
@classmethod
def is_valid_base_for_new(
cls,
base: str,
monolingual: bool,
errors: Optional[List] = None,
fast: bool = False,
) -> bool:
"""Check whether base is valid."""
raise NotImplementedError()
@classmethod
def get_language_code(cls, code: str, language_format: Optional[str] = None) -> str:
"""Do any possible formatting needed for language code."""
if not language_format:
language_format = cls.language_format
return getattr(cls, f"get_language_{language_format}")(code)
@staticmethod
def get_language_posix(code: str) -> str:
return code
@staticmethod
def get_language_bcp(code: str) -> str:
return code.replace("_", "-")
@staticmethod
def get_language_posix_long(code: str) -> str:
return EXPAND_LANGS.get(code, code)
@staticmethod
def get_language_linux(code: str) -> str:
"""Linux doesn't use Hans/Hant, but rather TW/CN variants."""
return LEGACY_CODES.get(code, code)
@classmethod
def get_language_bcp_long(cls, code: str) -> str:
return cls.get_language_bcp(cls.get_language_posix_long(code))
@classmethod
def get_language_android(cls, code: str) -> str:
"""Android doesn't use Hans/Hant, but rather TW/CN variants."""
# Exceptions
if code in ANDROID_CODES:
return ANDROID_CODES[code]
# Base on Java
sanitized = cls.get_language_linux(code)
# Handle variants
if "_" in sanitized and len(sanitized.split("_")[1]) > 2:
return "b+{}".format(sanitized.replace("_", "+"))
# Handle countries
return sanitized.replace("_", "-r")
@classmethod
def get_language_bcp_legacy(cls, code: str) -> str:
"""BCP, but doesn't use Hans/Hant, but rather TW/CN variants."""
return cls.get_language_bcp(cls.get_language_linux(code))
@classmethod
def get_language_appstore(cls, code: str) -> str:
"""App store language codes."""
return cls.get_language_bcp(APPSTORE_CODES.get(code, code))
@classmethod
def get_language_filename(cls, mask: str, code: str) -> str:
"""
Returns full filename of a language file.
Calculated for given path, filemask and language code.
"""
return mask.replace("*", code)
@classmethod
def add_language(
cls,
filename: str,
language: str,
base: str,
callback: Optional[Callable] = None,
):
"""Add new language file."""
# Create directory for a translation
dirname = os.path.dirname(filename)
if not os.path.exists(dirname):
os.makedirs(dirname)
cls.create_new_file(filename, language, base, callback)
@classmethod
def get_new_file_content(cls):
return None
@classmethod
def create_new_file(
cls,
filename: str,
language: str,
base: str,
callback: Optional[Callable] = None,
):
"""Handle creation of new translation file."""
raise NotImplementedError()
def iterate_merge(self, fuzzy: str, only_translated: bool = True):
"""Iterate over units for merging.
Note: This can change fuzzy state of units!
"""
for unit in self.content_units:
# Skip fuzzy (if asked for that)
if unit.is_fuzzy():
if not fuzzy:
continue
elif only_translated and not unit.is_translated():
continue
# Unmark unit as fuzzy (to allow merge)
set_fuzzy = False
if fuzzy and unit.is_fuzzy():
unit.set_state(STATE_TRANSLATED)
if fuzzy != "approve":
set_fuzzy = True
yield set_fuzzy, unit
def create_unit(
self,
key: str,
source: Union[str, List[str]],
target: Optional[Union[str, List[str]]] = None,
):
raise NotImplementedError()
def new_unit(
self,
key: str,
source: Union[str, List[str]],
target: Optional[Union[str, List[str]]] = None,
skip_build: bool = False,
):
"""Add new unit to monolingual store."""
# Create backend unit object
unit = self.create_unit(key, source, target)
# Add it to the file
self.add_unit(unit)
if skip_build:
return None
# Build an unit object
if self.has_template:
if self.is_template:
template_unit = unit
else:
template_unit = self._find_unit_monolingual(key, source)
else:
template_unit = None
result = self.unit_class(self, unit, template_unit)
mono_unit = self.unit_class(self, None, unit)
# Update cached lookups
if "all_units" in self.__dict__:
self.all_units.append(result)
if "template_units" in self.__dict__:
self.template_units.append(mono_unit)
if "_unit_index" in self.__dict__:
self._unit_index[result.id_hash] = result
if "_template_index" in self.__dict__:
self._template_index[mono_unit.id_hash] = mono_unit
return result
@classmethod
def get_class(cls):
raise NotImplementedError()
@classmethod
def add_breadcrumb(cls, message, **data):
add_breadcrumb(category="storage", message=message, **data)
def delete_unit(self, ttkit_unit) -> Optional[str]:
raise NotImplementedError()
def cleanup_unused(self) -> List[str]:
"""Removes unused strings, returning list of additional changed files."""
if not self.template_store:
return []
existing = {unit.context for unit in self.template_store.template_units}
changed = False
result = []
for ttkit_unit in self.all_store_units:
if self.unit_class(self, ttkit_unit, ttkit_unit).context not in existing:
item = self.delete_unit(ttkit_unit)
if item is not None:
result.append(item)
else:
changed = True
if changed:
self.save()
return result
def cleanup_blank(self) -> List[str]:
"""
Removes strings without translations.
Returning list of additional changed files.
"""
changed = False
result = []
for ttkit_unit in self.all_store_units:
target = self.unit_class(self, ttkit_unit, ttkit_unit).target
if not target or (isinstance(target, list) and not any(target)):
item = self.delete_unit(ttkit_unit)
if item is not None:
result.append(item)
else:
changed = True
if changed:
self.save()
return result
def remove_unit(self, ttkit_unit) -> List[str]:
"""High level wrapper for unit removal."""
changed = False
result = []
item = self.delete_unit(ttkit_unit)
if item is not None:
result.append(item)
else:
changed = True
if changed:
self.save()
return result
@staticmethod
def validate_context(context: str):
return
class EmptyFormat(TranslationFormat):
"""For testing purposes."""
@classmethod
def load(cls, storefile, template_store):
return type("", (object,), {"units": []})()
def save(self):
return
class BilingualUpdateMixin:
@classmethod
def do_bilingual_update(cls, in_file: str, out_file: str, template: str, **kwargs):
raise NotImplementedError()
@classmethod
def update_bilingual(cls, filename: str, template: str, **kwargs):
temp = tempfile.NamedTemporaryFile(
prefix=filename, dir=os.path.dirname(filename), delete=False
)
temp.close()
try:
cls.do_bilingual_update(filename, temp.name, template, **kwargs)
os.replace(temp.name, filename)
finally:
if os.path.exists(temp.name):
os.unlink(temp.name)