Skip to content

Commit

Permalink
unit: Add md5 index on source/target/context fields
Browse files Browse the repository at this point in the history
This improves exact strings lookups like done in automatic translation
or consistency checks.

Issue #9118
Issue #1994
  • Loading branch information
nijel committed May 19, 2023
1 parent 38caf9a commit dbc170f
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 8 deletions.
5 changes: 4 additions & 1 deletion weblate/trans/autotranslate.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from django.conf import settings
from django.core.exceptions import PermissionDenied
from django.db import transaction
from django.db.models.functions import MD5

from weblate.machinery.models import MACHINERY
from weblate.trans.models import Change, Component, Suggestion, Unit
Expand Down Expand Up @@ -119,7 +120,9 @@ def process_others(self, source: Optional[int]):
translations = {
source: split_plural(target)
for source, state, target in sources.filter(
source__in=self.get_units().values("source")
source__md5__in=self.get_units()
.annotate(source__md5=MD5("source"))
.values("source__md5")
).values_list("source", "state", "target")
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Copyright © Michal Čihař <michal@weblate.org>
#
# SPDX-License-Identifier: GPL-3.0-or-later

# Generated by Django 4.2.1 on 2023-05-18 11:58

import django.db.models.functions.text
from django.db import migrations, models


class Migration(migrations.Migration):
dependencies = [
("trans", "0168_unit_last_updated_unit_target_hash"),
]

operations = [
migrations.AddIndex(
model_name="unit",
index=models.Index(
django.db.models.functions.text.MD5("source"),
name="trans_unit_source_md5_index",
),
),
migrations.AddIndex(
model_name="unit",
index=models.Index(
django.db.models.functions.text.MD5("target"),
name="trans_unit_target_md5_index",
),
),
migrations.AddIndex(
model_name="unit",
index=models.Index(
django.db.models.functions.text.MD5("context"),
name="trans_unit_context_md5_index",
),
),
]
12 changes: 9 additions & 3 deletions weblate/trans/models/unit.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
from django.core.cache import cache
from django.db import Error as DjangoDatabaseError
from django.db import models, transaction
from django.db.models import Count, Max, Q
from django.db.models import Count, Max, Q, Value
from django.db.models.functions import MD5
from django.utils import timezone
from django.utils.functional import cached_property
from django.utils.translation import gettext, gettext_lazy, gettext_noop
Expand Down Expand Up @@ -139,8 +140,8 @@ def same(self, unit, exclude=True):
translation = unit.translation
component = translation.component
result = self.filter(
source=unit.source,
context=unit.context,
source__md5=MD5(Value(unit.source)),
context__md5=MD5(Value(unit.context)),
translation__component__project_id=component.project_id,
translation__language_id=translation.language_id,
translation__component__source_language_id=component.source_language_id,
Expand Down Expand Up @@ -340,6 +341,11 @@ class Meta:
unique_together = [("translation", "id_hash")]
verbose_name = "string"
verbose_name_plural = "strings"
indexes = [
models.Index(MD5("source"), name="trans_unit_source_md5_index"),
models.Index(MD5("target"), name="trans_unit_target_md5_index"),
models.Index(MD5("context"), name="trans_unit_context_md5_index"),
]

def __str__(self):
if self.translation.is_template:
Expand Down
11 changes: 7 additions & 4 deletions weblate/utils/apps.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from django.apps import AppConfig
from django.core.checks import register
from django.db.models import CharField, TextField
from django.db.models.functions import MD5
from django.db.models.lookups import Regex

from weblate.utils.checks import (
Expand Down Expand Up @@ -55,17 +56,19 @@ def ready(self):
init_error_collection()

if using_postgresql():
lookups = (
lookups = [
(PostgreSQLSearchLookup,),
(PostgreSQLSubstringLookup,),
(PostgreSQLRegexLookup, "trgm_regex"),
)
]
else:
lookups = (
lookups = [
(MySQLSearchLookup,),
(MySQLSearchLookup, "substring"),
(Regex, "trgm_regex"),
)
]

lookups.append((MD5,))

for lookup in lookups:
CharField.register_lookup(*lookup)
Expand Down

0 comments on commit dbc170f

Please sign in to comment.