Skip to content

Commit

Permalink
Merge branch 'main' into user-export
Browse files Browse the repository at this point in the history
  • Loading branch information
Minnozz committed Mar 26, 2024
2 parents d9bf848 + 9dfa218 commit 6a67943
Show file tree
Hide file tree
Showing 52 changed files with 7,837 additions and 2,268 deletions.
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.7.2
0.7.3
41 changes: 41 additions & 0 deletions bookwyrm/migrations/0197_author_search_vector.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# Generated by Django 3.2.25 on 2024-03-20 15:15

import django.contrib.postgres.indexes
from django.db import migrations
import pgtrigger.compiler
import pgtrigger.migrations


class Migration(migrations.Migration):

dependencies = [
("bookwyrm", "0196_merge_pr3134_into_main"),
]

operations = [
migrations.AddIndex(
model_name="author",
index=django.contrib.postgres.indexes.GinIndex(
fields=["search_vector"], name="bookwyrm_au_search__b050a8_gin"
),
),
pgtrigger.migrations.AddTrigger(
model_name="author",
trigger=pgtrigger.compiler.Trigger(
name="update_search_vector_on_author_edit",
sql=pgtrigger.compiler.UpsertTriggerSql(
func="new.search_vector := setweight(to_tsvector('simple', new.name), 'A') || setweight(to_tsvector('simple', coalesce(array_to_string(new.aliases, ' '), '')), 'B');RETURN NEW;",
hash="b97919016236d74d0ade51a0769a173ea269da64",
operation='INSERT OR UPDATE OF "name", "aliases", "search_vector"',
pgid="pgtrigger_update_search_vector_on_author_edit_c61cb",
table="bookwyrm_author",
when="BEFORE",
),
),
),
migrations.RunSQL(
# Calculate search vector for all Authors.
sql="UPDATE bookwyrm_author SET search_vector = NULL;",
reverse_sql="UPDATE bookwyrm_author SET search_vector = NULL;",
),
]
57 changes: 57 additions & 0 deletions bookwyrm/migrations/0198_book_search_vector_author_aliases.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# Generated by Django 3.2.25 on 2024-03-20 15:52

from django.db import migrations
import pgtrigger.compiler
import pgtrigger.migrations


class Migration(migrations.Migration):

dependencies = [
("bookwyrm", "0197_author_search_vector"),
]

operations = [
pgtrigger.migrations.RemoveTrigger(
model_name="author",
name="reset_search_vector_on_author_edit",
),
pgtrigger.migrations.RemoveTrigger(
model_name="book",
name="update_search_vector_on_book_edit",
),
pgtrigger.migrations.AddTrigger(
model_name="author",
trigger=pgtrigger.compiler.Trigger(
name="reset_book_search_vector_on_author_edit",
sql=pgtrigger.compiler.UpsertTriggerSql(
func="WITH updated_books AS (SELECT book_id FROM bookwyrm_book_authors WHERE author_id = new.id ) UPDATE bookwyrm_book SET search_vector = '' FROM updated_books WHERE id = updated_books.book_id;RETURN NEW;",
hash="68422c0f29879c5802b82159dde45297eff53e73",
operation='UPDATE OF "name", "aliases"',
pgid="pgtrigger_reset_book_search_vector_on_author_edit_a50c7",
table="bookwyrm_author",
when="AFTER",
),
),
),
pgtrigger.migrations.AddTrigger(
model_name="book",
trigger=pgtrigger.compiler.Trigger(
name="update_search_vector_on_book_edit",
sql=pgtrigger.compiler.UpsertTriggerSql(
func="WITH author_names AS (SELECT array_to_string(bookwyrm_author.name || bookwyrm_author.aliases, ' ') AS name_and_aliases FROM bookwyrm_author LEFT JOIN bookwyrm_book_authors ON bookwyrm_author.id = bookwyrm_book_authors.author_id WHERE bookwyrm_book_authors.book_id = new.id ) SELECT setweight(coalesce(nullif(to_tsvector('english', new.title), ''), to_tsvector('simple', new.title)), 'A') || setweight(to_tsvector('english', coalesce(new.subtitle, '')), 'B') || (SELECT setweight(to_tsvector('simple', coalesce(array_to_string(array_agg(name_and_aliases), ' '), '')), 'C') FROM author_names) || setweight(to_tsvector('english', coalesce(new.series, '')), 'D') INTO new.search_vector;RETURN NEW;",
hash="9324f5ca76a6f5e63931881d62d11da11f595b2c",
operation='INSERT OR UPDATE OF "title", "subtitle", "series", "search_vector"',
pgid="pgtrigger_update_search_vector_on_book_edit_bec58",
table="bookwyrm_book",
when="BEFORE",
),
),
),
migrations.RunSQL(
# Recalculate search vector for all Books because it now includes
# Author aliases.
sql="UPDATE bookwyrm_book SET search_vector = NULL;",
reverse_sql="UPDATE bookwyrm_book SET search_vector = NULL;",
),
]
25 changes: 22 additions & 3 deletions bookwyrm/models/author.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from typing import Tuple, Any

from django.db import models
from django.contrib.postgres.indexes import GinIndex
import pgtrigger

from bookwyrm import activitypub
Expand Down Expand Up @@ -71,11 +72,29 @@ def get_remote_id(self):
class Meta:
"""sets up indexes and triggers"""

# pylint: disable=line-too-long

indexes = (GinIndex(fields=["search_vector"]),)
triggers = [
pgtrigger.Trigger(
name="reset_search_vector_on_author_edit",
name="update_search_vector_on_author_edit",
when=pgtrigger.Before,
operation=pgtrigger.Insert
| pgtrigger.UpdateOf("name", "aliases", "search_vector"),
func=format_trigger(
"""new.search_vector :=
-- author name, with priority A
setweight(to_tsvector('simple', new.name), 'A') ||
-- author aliases, with priority B
setweight(to_tsvector('simple', coalesce(array_to_string(new.aliases, ' '), '')), 'B');
RETURN new;
"""
),
),
pgtrigger.Trigger(
name="reset_book_search_vector_on_author_edit",
when=pgtrigger.After,
operation=pgtrigger.UpdateOf("name"),
operation=pgtrigger.UpdateOf("name", "aliases"),
func=format_trigger(
"""WITH updated_books AS (
SELECT book_id
Expand All @@ -89,7 +108,7 @@ class Meta:
RETURN new;
"""
),
)
),
]

activity_serializer = activitypub.Author
46 changes: 28 additions & 18 deletions bookwyrm/models/book.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,24 +246,34 @@ class Meta:
operation=pgtrigger.Insert
| pgtrigger.UpdateOf("title", "subtitle", "series", "search_vector"),
func=format_trigger(
"""new.search_vector :=
-- title, with priority A (parse in English, default to simple if empty)
setweight(COALESCE(nullif(
to_tsvector('english', new.title), ''),
to_tsvector('simple', new.title)), 'A') ||
-- subtitle, with priority B (always in English?)
setweight(to_tsvector('english', COALESCE(new.subtitle, '')), 'B') ||
-- list of authors, with priority C (TODO: add aliases?, bookwyrm-social#3063)
(SELECT setweight(to_tsvector('simple', COALESCE(array_to_string(ARRAY_AGG(bookwyrm_author.name), ' '), '')), 'C')
FROM bookwyrm_author
LEFT JOIN bookwyrm_book_authors
ON bookwyrm_author.id = bookwyrm_book_authors.author_id
WHERE bookwyrm_book_authors.book_id = new.id
) ||
--- last: series name, with lowest priority
setweight(to_tsvector('english', COALESCE(new.series, '')), 'D');
RETURN new;
"""
"""
WITH author_names AS (
SELECT array_to_string(bookwyrm_author.name || bookwyrm_author.aliases, ' ') AS name_and_aliases
FROM bookwyrm_author
LEFT JOIN bookwyrm_book_authors
ON bookwyrm_author.id = bookwyrm_book_authors.author_id
WHERE bookwyrm_book_authors.book_id = new.id
)
SELECT
-- title, with priority A (parse in English, default to simple if empty)
setweight(COALESCE(nullif(
to_tsvector('english', new.title), ''),
to_tsvector('simple', new.title)), 'A') ||
-- subtitle, with priority B (always in English?)
setweight(to_tsvector('english', COALESCE(new.subtitle, '')), 'B') ||
-- list of authors names and aliases (with priority C)
(SELECT setweight(to_tsvector('simple', COALESCE(array_to_string(ARRAY_AGG(name_and_aliases), ' '), '')), 'C')
FROM author_names
) ||
--- last: series name, with lowest priority
setweight(to_tsvector('english', COALESCE(new.series, '')), 'D')
INTO new.search_vector;
RETURN new;
"""
),
)
]
Expand Down
2 changes: 1 addition & 1 deletion bookwyrm/templates/413.html
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
<h1 class="title">{% trans "File too large" %}</h1>
<p class="content">{% trans "The file you are uploading is too large." %}</p>
<p class="content">
{% blocktrans %}
{% blocktrans trimmed %}
You you can try using a smaller file, or ask your BookWyrm server administrator to increase the <code>DATA_UPLOAD_MAX_MEMORY_SIZE</code> setting.
{% endblocktrans %}
</p>
Expand Down
87 changes: 87 additions & 0 deletions bookwyrm/tests/test_author_search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
""" test searching for authors """
from django.test import TestCase

from django.contrib.postgres.search import SearchRank, SearchQuery
from django.db.models import F

from bookwyrm import models


class AuthorSearch(TestCase):
"""look for some authors"""

@classmethod
def setUpTestData(cls):
"""we need basic test data and mocks"""
cls.bob = models.Author.objects.create(
name="Bob", aliases=["Robertus", "Alice"]
)
cls.alice = models.Author.objects.create(name="Alice")

def test_search(self):
"""search for an author in the db"""
results = self._search("Bob")
self.assertEqual(len(results), 1)
self.assertEqual(results[0], self.bob)

def test_alias_priority(self):
"""aliases should be included, with lower priority than name"""
results = self._search("Alice")
self.assertEqual(len(results), 2)
self.assertEqual(results[0], self.alice)

def _search_first(self, query):
"""wrapper around search_title_author"""
return self._search(query, return_first=True)

@staticmethod
def _search(query, *, return_first=False):
"""author search"""
search_query = SearchQuery(query, config="simple")
min_confidence = 0

results = (
models.Author.objects.filter(search_vector=search_query)
.annotate(rank=SearchRank(F("search_vector"), search_query))
.filter(rank__gt=min_confidence)
.order_by("-rank")
)
if return_first:
return results.first()
return results


class SearchVectorTest(TestCase):
"""check search_vector is computed correctly"""

def test_search_vector_simple(self):
"""simplest search vector"""
author = self._create_author("Mary")
self.assertEqual(author.search_vector, "'mary':1A")

def test_search_vector_aliases(self):
"""author aliases should be included with lower priority"""
author = self._create_author("Mary", aliases=["Maria", "Example"])
self.assertEqual(author.search_vector, "'example':3B 'maria':2B 'mary':1A")

def test_search_vector_parse_author(self):
"""author name and alias is not stem'd or affected by stop words"""
author = self._create_author("Writes", aliases=["Reads"])
self.assertEqual(author.search_vector, "'reads':2B 'writes':1A")

def test_search_vector_on_update(self):
"""make sure that search_vector is being set correctly on edit"""
author = self._create_author("Mary")
self.assertEqual(author.search_vector, "'mary':1A")

author.name = "Example"
author.save(broadcast=False)
author.refresh_from_db()
self.assertEqual(author.search_vector, "'example':1A")

@staticmethod
def _create_author(name, /, *, aliases=None):
"""quickly create an author"""
author = models.Author.objects.create(name=name, aliases=aliases or [])
author.refresh_from_db()
return author
Loading

0 comments on commit 6a67943

Please sign in to comment.