Merge branch 'main' into user-export

bookwyrm-social · Mar 26, 2024 · 6a67943 · 6a67943
2 parents d9bf848 + 9dfa218
commit 6a67943
Show file tree

Hide file tree

Showing 52 changed files with 7,837 additions and 2,268 deletions.
diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-0.7.2
+0.7.3
diff --git a/bookwyrm/migrations/0197_author_search_vector.py b/bookwyrm/migrations/0197_author_search_vector.py
@@ -0,0 +1,41 @@
+# Generated by Django 3.2.25 on 2024-03-20 15:15
+
+import django.contrib.postgres.indexes
+from django.db import migrations
+import pgtrigger.compiler
+import pgtrigger.migrations
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("bookwyrm", "0196_merge_pr3134_into_main"),
+    ]
+
+    operations = [
+        migrations.AddIndex(
+            model_name="author",
+            index=django.contrib.postgres.indexes.GinIndex(
+                fields=["search_vector"], name="bookwyrm_au_search__b050a8_gin"
+            ),
+        ),
+        pgtrigger.migrations.AddTrigger(
+            model_name="author",
+            trigger=pgtrigger.compiler.Trigger(
+                name="update_search_vector_on_author_edit",
+                sql=pgtrigger.compiler.UpsertTriggerSql(
+                    func="new.search_vector := setweight(to_tsvector('simple', new.name), 'A') || setweight(to_tsvector('simple', coalesce(array_to_string(new.aliases, ' '), '')), 'B');RETURN NEW;",
+                    hash="b97919016236d74d0ade51a0769a173ea269da64",
+                    operation='INSERT OR UPDATE OF "name", "aliases", "search_vector"',
+                    pgid="pgtrigger_update_search_vector_on_author_edit_c61cb",
+                    table="bookwyrm_author",
+                    when="BEFORE",
+                ),
+            ),
+        ),
+        migrations.RunSQL(
+            # Calculate search vector for all Authors.
+            sql="UPDATE bookwyrm_author SET search_vector = NULL;",
+            reverse_sql="UPDATE bookwyrm_author SET search_vector = NULL;",
+        ),
+    ]
diff --git a/bookwyrm/migrations/0198_book_search_vector_author_aliases.py b/bookwyrm/migrations/0198_book_search_vector_author_aliases.py
@@ -0,0 +1,57 @@
+# Generated by Django 3.2.25 on 2024-03-20 15:52
+
+from django.db import migrations
+import pgtrigger.compiler
+import pgtrigger.migrations
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("bookwyrm", "0197_author_search_vector"),
+    ]
+
+    operations = [
+        pgtrigger.migrations.RemoveTrigger(
+            model_name="author",
+            name="reset_search_vector_on_author_edit",
+        ),
+        pgtrigger.migrations.RemoveTrigger(
+            model_name="book",
+            name="update_search_vector_on_book_edit",
+        ),
+        pgtrigger.migrations.AddTrigger(
+            model_name="author",
+            trigger=pgtrigger.compiler.Trigger(
+                name="reset_book_search_vector_on_author_edit",
+                sql=pgtrigger.compiler.UpsertTriggerSql(
+                    func="WITH updated_books AS (SELECT book_id FROM bookwyrm_book_authors WHERE author_id = new.id ) UPDATE bookwyrm_book SET search_vector = '' FROM updated_books WHERE id = updated_books.book_id;RETURN NEW;",
+                    hash="68422c0f29879c5802b82159dde45297eff53e73",
+                    operation='UPDATE OF "name", "aliases"',
+                    pgid="pgtrigger_reset_book_search_vector_on_author_edit_a50c7",
+                    table="bookwyrm_author",
+                    when="AFTER",
+                ),
+            ),
+        ),
+        pgtrigger.migrations.AddTrigger(
+            model_name="book",
+            trigger=pgtrigger.compiler.Trigger(
+                name="update_search_vector_on_book_edit",
+                sql=pgtrigger.compiler.UpsertTriggerSql(
+                    func="WITH author_names AS (SELECT array_to_string(bookwyrm_author.name || bookwyrm_author.aliases, ' ') AS name_and_aliases FROM bookwyrm_author LEFT JOIN bookwyrm_book_authors ON bookwyrm_author.id = bookwyrm_book_authors.author_id WHERE bookwyrm_book_authors.book_id = new.id ) SELECT setweight(coalesce(nullif(to_tsvector('english', new.title), ''), to_tsvector('simple', new.title)), 'A') || setweight(to_tsvector('english', coalesce(new.subtitle, '')), 'B') || (SELECT setweight(to_tsvector('simple', coalesce(array_to_string(array_agg(name_and_aliases), ' '), '')), 'C') FROM author_names) || setweight(to_tsvector('english', coalesce(new.series, '')), 'D') INTO new.search_vector;RETURN NEW;",
+                    hash="9324f5ca76a6f5e63931881d62d11da11f595b2c",
+                    operation='INSERT OR UPDATE OF "title", "subtitle", "series", "search_vector"',
+                    pgid="pgtrigger_update_search_vector_on_book_edit_bec58",
+                    table="bookwyrm_book",
+                    when="BEFORE",
+                ),
+            ),
+        ),
+        migrations.RunSQL(
+            # Recalculate search vector for all Books because it now includes
+            # Author aliases.
+            sql="UPDATE bookwyrm_book SET search_vector = NULL;",
+            reverse_sql="UPDATE bookwyrm_book SET search_vector = NULL;",
+        ),
+    ]
diff --git a/bookwyrm/models/author.py b/bookwyrm/models/author.py
@@ -3,6 +3,7 @@
 from typing import Tuple, Any
 
 from django.db import models
+from django.contrib.postgres.indexes import GinIndex
 import pgtrigger
 
 from bookwyrm import activitypub
@@ -71,11 +72,29 @@ def get_remote_id(self):
     class Meta:
         """sets up indexes and triggers"""
 
+        # pylint: disable=line-too-long
+
+        indexes = (GinIndex(fields=["search_vector"]),)
         triggers = [
             pgtrigger.Trigger(
-                name="reset_search_vector_on_author_edit",
+                name="update_search_vector_on_author_edit",
+                when=pgtrigger.Before,
+                operation=pgtrigger.Insert
+                | pgtrigger.UpdateOf("name", "aliases", "search_vector"),
+                func=format_trigger(
+                    """new.search_vector :=
+                    -- author name, with priority A
+                    setweight(to_tsvector('simple', new.name), 'A') ||
+                    -- author aliases, with priority B
+                    setweight(to_tsvector('simple', coalesce(array_to_string(new.aliases, ' '), '')), 'B');
+                    RETURN new;
+                """
+                ),
+            ),
+            pgtrigger.Trigger(
+                name="reset_book_search_vector_on_author_edit",
                 when=pgtrigger.After,
-                operation=pgtrigger.UpdateOf("name"),
+                operation=pgtrigger.UpdateOf("name", "aliases"),
                 func=format_trigger(
                     """WITH updated_books AS (
                          SELECT book_id
@@ -89,7 +108,7 @@ class Meta:
                     RETURN new;
                 """
                 ),
-            )
+            ),
         ]
 
     activity_serializer = activitypub.Author
diff --git a/bookwyrm/models/book.py b/bookwyrm/models/book.py
@@ -246,24 +246,34 @@ class Meta:
                 operation=pgtrigger.Insert
                 | pgtrigger.UpdateOf("title", "subtitle", "series", "search_vector"),
                 func=format_trigger(
-                    """new.search_vector :=
-                         -- title, with priority A (parse in English, default to simple if empty)
-                         setweight(COALESCE(nullif(
-                                       to_tsvector('english', new.title), ''),
-                                       to_tsvector('simple', new.title)), 'A') ||
-                         -- subtitle, with priority B (always in English?)
-                         setweight(to_tsvector('english', COALESCE(new.subtitle, '')), 'B') ||
-                         -- list of authors, with priority C (TODO: add aliases?, bookwyrm-social#3063)
-                         (SELECT setweight(to_tsvector('simple', COALESCE(array_to_string(ARRAY_AGG(bookwyrm_author.name), ' '), '')), 'C')
-                           FROM bookwyrm_author
-                           LEFT JOIN bookwyrm_book_authors
-                               ON bookwyrm_author.id = bookwyrm_book_authors.author_id
-                           WHERE bookwyrm_book_authors.book_id = new.id
-                         ) ||
-                         --- last: series name, with lowest priority
-                         setweight(to_tsvector('english', COALESCE(new.series, '')), 'D');
-                       RETURN new;
-                """
+                    """
+                    WITH author_names AS (
+                        SELECT array_to_string(bookwyrm_author.name || bookwyrm_author.aliases, ' ') AS name_and_aliases
+                            FROM bookwyrm_author
+                        LEFT JOIN bookwyrm_book_authors
+                            ON bookwyrm_author.id = bookwyrm_book_authors.author_id
+                        WHERE bookwyrm_book_authors.book_id = new.id
+                    )
+                    SELECT
+                        -- title, with priority A (parse in English, default to simple if empty)
+                        setweight(COALESCE(nullif(
+                            to_tsvector('english', new.title), ''),
+                            to_tsvector('simple', new.title)), 'A') ||
+
+                        -- subtitle, with priority B (always in English?)
+                        setweight(to_tsvector('english', COALESCE(new.subtitle, '')), 'B') ||
+
+                        -- list of authors names and aliases (with priority C)
+                        (SELECT setweight(to_tsvector('simple', COALESCE(array_to_string(ARRAY_AGG(name_and_aliases), ' '), '')), 'C')
+                            FROM author_names
+                        ) ||
+
+                        --- last: series name, with lowest priority
+                        setweight(to_tsvector('english', COALESCE(new.series, '')), 'D')
+
+                        INTO new.search_vector;
+                    RETURN new;
+                    """
                 ),
             )
         ]

diff --git a/bookwyrm/templates/413.html b/bookwyrm/templates/413.html
@@ -8,7 +8,7 @@
     <h1 class="title">{% trans "File too large" %}</h1>
     <p class="content">{% trans "The file you are uploading is too large." %}</p>
     <p class="content">
-        {% blocktrans %}
+        {% blocktrans trimmed %}
         You you can try using a smaller file, or ask your BookWyrm server administrator to increase the <code>DATA_UPLOAD_MAX_MEMORY_SIZE</code> setting.
         {% endblocktrans %}
     </p>

diff --git a/bookwyrm/tests/test_author_search.py b/bookwyrm/tests/test_author_search.py
@@ -0,0 +1,87 @@
+""" test searching for authors """
+from django.test import TestCase
+
+from django.contrib.postgres.search import SearchRank, SearchQuery
+from django.db.models import F
+
+from bookwyrm import models
+
+
+class AuthorSearch(TestCase):
+    """look for some authors"""
+
+    @classmethod
+    def setUpTestData(cls):
+        """we need basic test data and mocks"""
+        cls.bob = models.Author.objects.create(
+            name="Bob", aliases=["Robertus", "Alice"]
+        )
+        cls.alice = models.Author.objects.create(name="Alice")
+
+    def test_search(self):
+        """search for an author in the db"""
+        results = self._search("Bob")
+        self.assertEqual(len(results), 1)
+        self.assertEqual(results[0], self.bob)
+
+    def test_alias_priority(self):
+        """aliases should be included, with lower priority than name"""
+        results = self._search("Alice")
+        self.assertEqual(len(results), 2)
+        self.assertEqual(results[0], self.alice)
+
+    def _search_first(self, query):
+        """wrapper around search_title_author"""
+        return self._search(query, return_first=True)
+
+    @staticmethod
+    def _search(query, *, return_first=False):
+        """author search"""
+        search_query = SearchQuery(query, config="simple")
+        min_confidence = 0
+
+        results = (
+            models.Author.objects.filter(search_vector=search_query)
+            .annotate(rank=SearchRank(F("search_vector"), search_query))
+            .filter(rank__gt=min_confidence)
+            .order_by("-rank")
+        )
+        if return_first:
+            return results.first()
+        return results
+
+
+class SearchVectorTest(TestCase):
+    """check search_vector is computed correctly"""
+
+    def test_search_vector_simple(self):
+        """simplest search vector"""
+        author = self._create_author("Mary")
+        self.assertEqual(author.search_vector, "'mary':1A")
+
+    def test_search_vector_aliases(self):
+        """author aliases should be included with lower priority"""
+        author = self._create_author("Mary", aliases=["Maria", "Example"])
+        self.assertEqual(author.search_vector, "'example':3B 'maria':2B 'mary':1A")
+
+    def test_search_vector_parse_author(self):
+        """author name and alias is not stem'd or affected by stop words"""
+        author = self._create_author("Writes", aliases=["Reads"])
+        self.assertEqual(author.search_vector, "'reads':2B 'writes':1A")
+
+    def test_search_vector_on_update(self):
+        """make sure that search_vector is being set correctly on edit"""
+        author = self._create_author("Mary")
+        self.assertEqual(author.search_vector, "'mary':1A")
+
+        author.name = "Example"
+        author.save(broadcast=False)
+        author.refresh_from_db()
+        self.assertEqual(author.search_vector, "'example':1A")
+
+    @staticmethod
+    def _create_author(name, /, *, aliases=None):
+        """quickly create an author"""
+        author = models.Author.objects.create(name=name, aliases=aliases or [])
+        author.refresh_from_db()
+        return author