From dbc170fb7549396d8118a65a693447ca4efca4f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20=C4=8Ciha=C5=99?= Date: Thu, 18 May 2023 12:45:42 +0200 Subject: [PATCH] unit: Add md5 index on source/target/context fields This improves exact strings lookups like done in automatic translation or consistency checks. Issue #9118 Issue #1994 --- weblate/trans/autotranslate.py | 5 ++- ...it_trans_unit_source_md5_index_and_more.py | 38 +++++++++++++++++++ weblate/trans/models/unit.py | 12 ++++-- weblate/utils/apps.py | 11 ++++-- 4 files changed, 58 insertions(+), 8 deletions(-) create mode 100644 weblate/trans/migrations/0169_unit_trans_unit_source_md5_index_and_more.py diff --git a/weblate/trans/autotranslate.py b/weblate/trans/autotranslate.py index 68904895f205..2cec8f413c79 100644 --- a/weblate/trans/autotranslate.py +++ b/weblate/trans/autotranslate.py @@ -8,6 +8,7 @@ from django.conf import settings from django.core.exceptions import PermissionDenied from django.db import transaction +from django.db.models.functions import MD5 from weblate.machinery.models import MACHINERY from weblate.trans.models import Change, Component, Suggestion, Unit @@ -119,7 +120,9 @@ def process_others(self, source: Optional[int]): translations = { source: split_plural(target) for source, state, target in sources.filter( - source__in=self.get_units().values("source") + source__md5__in=self.get_units() + .annotate(source__md5=MD5("source")) + .values("source__md5") ).values_list("source", "state", "target") } diff --git a/weblate/trans/migrations/0169_unit_trans_unit_source_md5_index_and_more.py b/weblate/trans/migrations/0169_unit_trans_unit_source_md5_index_and_more.py new file mode 100644 index 000000000000..261a5c896aa4 --- /dev/null +++ b/weblate/trans/migrations/0169_unit_trans_unit_source_md5_index_and_more.py @@ -0,0 +1,38 @@ +# Copyright © Michal Čihař +# +# SPDX-License-Identifier: GPL-3.0-or-later + +# Generated by Django 4.2.1 on 2023-05-18 11:58 + +import django.db.models.functions.text +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("trans", "0168_unit_last_updated_unit_target_hash"), + ] + + operations = [ + migrations.AddIndex( + model_name="unit", + index=models.Index( + django.db.models.functions.text.MD5("source"), + name="trans_unit_source_md5_index", + ), + ), + migrations.AddIndex( + model_name="unit", + index=models.Index( + django.db.models.functions.text.MD5("target"), + name="trans_unit_target_md5_index", + ), + ), + migrations.AddIndex( + model_name="unit", + index=models.Index( + django.db.models.functions.text.MD5("context"), + name="trans_unit_context_md5_index", + ), + ), + ] diff --git a/weblate/trans/models/unit.py b/weblate/trans/models/unit.py index f8add4764f72..187565c25a78 100644 --- a/weblate/trans/models/unit.py +++ b/weblate/trans/models/unit.py @@ -9,7 +9,8 @@ from django.core.cache import cache from django.db import Error as DjangoDatabaseError from django.db import models, transaction -from django.db.models import Count, Max, Q +from django.db.models import Count, Max, Q, Value +from django.db.models.functions import MD5 from django.utils import timezone from django.utils.functional import cached_property from django.utils.translation import gettext, gettext_lazy, gettext_noop @@ -139,8 +140,8 @@ def same(self, unit, exclude=True): translation = unit.translation component = translation.component result = self.filter( - source=unit.source, - context=unit.context, + source__md5=MD5(Value(unit.source)), + context__md5=MD5(Value(unit.context)), translation__component__project_id=component.project_id, translation__language_id=translation.language_id, translation__component__source_language_id=component.source_language_id, @@ -340,6 +341,11 @@ class Meta: unique_together = [("translation", "id_hash")] verbose_name = "string" verbose_name_plural = "strings" + indexes = [ + models.Index(MD5("source"), name="trans_unit_source_md5_index"), + models.Index(MD5("target"), name="trans_unit_target_md5_index"), + models.Index(MD5("context"), name="trans_unit_context_md5_index"), + ] def __str__(self): if self.translation.is_template: diff --git a/weblate/utils/apps.py b/weblate/utils/apps.py index aa2d9a9e8c41..1491ac819313 100644 --- a/weblate/utils/apps.py +++ b/weblate/utils/apps.py @@ -5,6 +5,7 @@ from django.apps import AppConfig from django.core.checks import register from django.db.models import CharField, TextField +from django.db.models.functions import MD5 from django.db.models.lookups import Regex from weblate.utils.checks import ( @@ -55,17 +56,19 @@ def ready(self): init_error_collection() if using_postgresql(): - lookups = ( + lookups = [ (PostgreSQLSearchLookup,), (PostgreSQLSubstringLookup,), (PostgreSQLRegexLookup, "trgm_regex"), - ) + ] else: - lookups = ( + lookups = [ (MySQLSearchLookup,), (MySQLSearchLookup, "substring"), (Regex, "trgm_regex"), - ) + ] + + lookups.append((MD5,)) for lookup in lookups: CharField.register_lookup(*lookup)