From 421097ab7dbd592790ea702b400d680bbc2df40d Mon Sep 17 00:00:00 2001 From: Arpad Borsos Date: Mon, 27 Jan 2025 14:28:41 +0100 Subject: [PATCH] Add missing indices relevant for Repo Deletion There are a few very slow `DELETE` queries as part of repo deletion. The worst of those is deleting the newly created "shadow owner", which takes >1 minute. Running all the `on_delete` hooks using django (which has since been fixes) has revealed that the `SET NULL` of `Commit.author` is the culprit, as that column did not have any index previously. That is similar to the `SET NULL` on `Repository.fork`, except that query is by far not as slow, probably because of a lot less records that need to be scanned. --- While at it, this also fixes a django warning related to the `default` not being a callable, and aligns the `Repository.fork` `SET NULL` constraint with the reality of the database. --- .../core/migrations/0064_missing_indices.py | 46 +++++++++++++++++++ shared/django_apps/core/models.py | 6 ++- 2 files changed, 50 insertions(+), 2 deletions(-) create mode 100644 shared/django_apps/core/migrations/0064_missing_indices.py diff --git a/shared/django_apps/core/migrations/0064_missing_indices.py b/shared/django_apps/core/migrations/0064_missing_indices.py new file mode 100644 index 000000000..199ce605d --- /dev/null +++ b/shared/django_apps/core/migrations/0064_missing_indices.py @@ -0,0 +1,46 @@ +# Generated by Django 4.2.16 on 2025-01-27 13:18 + +import django.db.models.deletion +from django.contrib.postgres.operations import AddIndexConcurrently +from django.db import migrations, models + + +class Migration(migrations.Migration): + atomic = False + + dependencies = [ + ("core", "0063_increment_version"), + ] + + operations = [ + migrations.AlterField( + model_name="repository", + name="fork", + field=models.ForeignKey( + blank=True, + db_column="forkid", + null=True, + on_delete=django.db.models.deletion.SET_NULL, + to="core.repository", + ), + ), + migrations.AlterField( + model_name="repository", + name="languages", + field=django.contrib.postgres.fields.ArrayField( + base_field=models.CharField(), + blank=True, + default=list, + null=True, + size=None, + ), + ), + AddIndexConcurrently( + model_name="commit", + index=models.Index(fields=["author"], name="commits_author_cd2f50_idx"), + ), + AddIndexConcurrently( + model_name="repository", + index=models.Index(fields=["fork"], name="repos_forkid_4cd440_idx"), + ), + ] diff --git a/shared/django_apps/core/models.py b/shared/django_apps/core/models.py index eb3afddb5..69c632a81 100644 --- a/shared/django_apps/core/models.py +++ b/shared/django_apps/core/models.py @@ -108,12 +108,12 @@ class Languages(models.TextChoices): language = models.TextField( null=True, blank=True, choices=Languages.choices ) # Really an ENUM in db - languages = ArrayField(models.CharField(), default=[], blank=True, null=True) + languages = ArrayField(models.CharField(), default=list, blank=True, null=True) languages_last_updated = DateTimeWithoutTZField(null=True, blank=True) fork = models.ForeignKey( "core.Repository", db_column="forkid", - on_delete=models.DO_NOTHING, + on_delete=models.SET_NULL, null=True, blank=True, ) @@ -149,6 +149,7 @@ class Meta: app_label = CORE_APP_LABEL ordering = ["-repoid"] indexes = [ + models.Index(fields=["fork"]), models.Index( fields=["service_id", "author"], name="repos_service_id_author", @@ -287,6 +288,7 @@ class Meta: ) ] indexes = [ + models.Index(fields=["author"]), models.Index( fields=["repository", "-timestamp"], name="commits_repoid_timestamp_desc",