Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed #33788 -- Added TrigramStrictWordSimilarity() and TrigramStrictWordDistance() on PostgreSQL. #15776

Merged
merged 1 commit into from Jun 17, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions AUTHORS
Expand Up @@ -636,6 +636,7 @@ answer newbie questions, and generally made Django that much better:
Mathieu Agopian <mathieu.agopian@gmail.com>
Matías Bordese
Matt Boersma <matt@sprout.org>
Matt Brewer <matt.brewer693@gmail.com>
Matt Croydon <http://www.postneo.com/>
Matt Deacalion Stevens <matt@dirtymonkey.co.uk>
Matt Dennenbaum
Expand Down
12 changes: 11 additions & 1 deletion django/contrib/postgres/apps.py
Expand Up @@ -11,7 +11,13 @@
from django.utils.translation import gettext_lazy as _

from .indexes import OpClass
from .lookups import SearchLookup, TrigramSimilar, TrigramWordSimilar, Unaccent
from .lookups import (
SearchLookup,
TrigramSimilar,
TrigramStrictWordSimilar,
TrigramWordSimilar,
Unaccent,
)
from .serializers import RangeSerializer
from .signals import register_type_handlers

Expand All @@ -37,6 +43,8 @@ def uninstall_if_needed(setting, value, enter, **kwargs):
TextField._unregister_lookup(TrigramSimilar)
CharField._unregister_lookup(TrigramWordSimilar)
TextField._unregister_lookup(TrigramWordSimilar)
CharField._unregister_lookup(TrigramStrictWordSimilar)
TextField._unregister_lookup(TrigramStrictWordSimilar)
# Disconnect this receiver until the next time this app is installed
# and ready() connects it again to prevent unnecessary processing on
# each setting change.
Expand Down Expand Up @@ -73,5 +81,7 @@ def ready(self):
TextField.register_lookup(TrigramSimilar)
CharField.register_lookup(TrigramWordSimilar)
TextField.register_lookup(TrigramWordSimilar)
CharField.register_lookup(TrigramStrictWordSimilar)
TextField.register_lookup(TrigramStrictWordSimilar)
MigrationWriter.register_serializer(RANGE_TYPES, RangeSerializer)
IndexExpression.register_wrappers(OrderBy, OpClass, Collate)
5 changes: 5 additions & 0 deletions django/contrib/postgres/lookups.py
Expand Up @@ -63,3 +63,8 @@ class TrigramSimilar(PostgresOperatorLookup):
class TrigramWordSimilar(PostgresOperatorLookup):
lookup_name = "trigram_word_similar"
postgres_operator = "%%>"


class TrigramStrictWordSimilar(PostgresOperatorLookup):
lookup_name = "trigram_strict_word_similar"
postgres_operator = "%%>>"
9 changes: 9 additions & 0 deletions django/contrib/postgres/search.py
Expand Up @@ -366,5 +366,14 @@ class TrigramWordDistance(TrigramWordBase):
arg_joiner = " <<-> "


class TrigramStrictWordDistance(TrigramWordBase):
function = ""
arg_joiner = " <<<-> "


class TrigramWordSimilarity(TrigramWordBase):
function = "WORD_SIMILARITY"


class TrigramStrictWordSimilarity(TrigramWordBase):
function = "STRICT_WORD_SIMILARITY"
25 changes: 25 additions & 0 deletions docs/ref/contrib/postgres/lookups.txt
Expand Up @@ -7,6 +7,9 @@ Trigram similarity

.. fieldlookup:: trigram_similar

``trigram_similar``
-------------------

The ``trigram_similar`` lookup allows you to perform trigram lookups,
measuring the number of trigrams (three consecutive characters) shared, using a
dedicated PostgreSQL extension. A trigram lookup is given an expression and
Expand All @@ -27,6 +30,9 @@ The ``trigram_similar`` lookup can be used on

.. fieldlookup:: trigram_word_similar

``trigram_word_similar``
------------------------

The ``trigram_word_similar`` lookup allows you to perform trigram word
similarity lookups using a dedicated PostgreSQL extension. It can be
approximately understood as measuring the greatest number of trigrams shared
Expand All @@ -46,6 +52,25 @@ The ``trigram_word_similar`` lookup can be used on
>>> Sentence.objects.filter(name__trigram_word_similar='Middlesborough')
['<Sentence: Gumby rides on the path of Middlesbrough>']

.. fieldlookup:: trigram_strict_word_similar

``trigram_strict_word_similar``
-------------------------------
felixxm marked this conversation as resolved.
Show resolved Hide resolved

.. versionadded:: 4.2
felixxm marked this conversation as resolved.
Show resolved Hide resolved

Similar to :lookup:`trigram_word_similar`, except that it forces extent
boundaries to match word boundaries.

To use it, add ``'django.contrib.postgres'`` in your :setting:`INSTALLED_APPS`
and activate the `pg_trgm extension`_ on PostgreSQL. You can install the
extension using the
:class:`~django.contrib.postgres.operations.TrigramExtension` migration
operation.

The ``trigram_strict_word_similar`` lookup can be used on
:class:`~django.db.models.CharField` and :class:`~django.db.models.TextField`.

.. _`pg_trgm extension`: https://www.postgresql.org/docs/current/pgtrgm.html

``Unaccent``
Expand Down
28 changes: 25 additions & 3 deletions docs/ref/contrib/postgres/search.txt
Expand Up @@ -286,9 +286,9 @@ Trigram similarity
==================

Another approach to searching is trigram similarity. A trigram is a group of
three consecutive characters. In addition to the :lookup:`trigram_similar` and
:lookup:`trigram_word_similar` lookups, you can use a couple of other
expressions.
three consecutive characters. In addition to the :lookup:`trigram_similar`,
:lookup:`trigram_word_similar`, and :lookup:`trigram_strict_word_similar`
lookups, you can use a couple of other expressions.

To use them, you need to activate the `pg_trgm extension
<https://www.postgresql.org/docs/current/pgtrgm.html>`_ on PostgreSQL. You can
Expand Down Expand Up @@ -334,6 +334,18 @@ Usage example::
... ).filter(similarity__gt=0.3).order_by('-similarity')
[<Author: Katy Stevens>]

``TrigramStrictWordSimilarity``
-------------------------------

.. class:: TrigramStrictWordSimilarity(string, expression, **extra)

.. versionadded:: 4.2

Accepts a string or expression, and a field name or expression. Returns the
trigram strict word similarity between the two arguments. Similar to
:class:`TrigramWordSimilarity() <TrigramWordSimilarity>`, except that it forces
extent boundaries to match word boundaries.

``TrigramDistance``
-------------------

Expand Down Expand Up @@ -371,3 +383,13 @@ Usage example::
... distance=TrigramWordDistance(test, 'name'),
... ).filter(distance__lte=0.7).order_by('distance')
[<Author: Katy Stevens>]

``TrigramStrictWordDistance``
-----------------------------

.. class:: TrigramStrictWordDistance(string, expression, **extra)

.. versionadded:: 4.2

Accepts a string or expression, and a field name or expression. Returns the
trigram strict word distance between the two arguments.
7 changes: 6 additions & 1 deletion docs/releases/4.2.txt
Expand Up @@ -65,7 +65,12 @@ Minor features
:mod:`django.contrib.postgres`
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

* ...
* The new :lookup:`trigram_strict_word_similar` lookup, and the
:class:`TrigramStrictWordSimilarity()
<django.contrib.postgres.search.TrigramStrictWordSimilarity>` and
:class:`TrigramStrictWordDistance()
<django.contrib.postgres.search.TrigramStrictWordDistance>` expressions allow
using trigram strict word similarity.

:mod:`django.contrib.redirects`
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Expand Down
48 changes: 48 additions & 0 deletions tests/postgres_tests/test_trigram.py
Expand Up @@ -7,6 +7,8 @@
from django.contrib.postgres.search import (
TrigramDistance,
TrigramSimilarity,
TrigramStrictWordDistance,
TrigramStrictWordSimilarity,
TrigramWordDistance,
TrigramWordSimilarity,
)
Expand Down Expand Up @@ -43,6 +45,25 @@ def test_trigram_word_search(self):
self.Model.objects.filter(field__trigram_word_similar="Middlesborough"),
[obj],
)
self.assertSequenceEqual(
self.Model.objects.filter(field__trigram_word_similar="Middle"),
[obj],
)

def test_trigram_strict_word_search_matched(self):
obj = self.Model.objects.create(
field="Gumby rides on the path of Middlesbrough",
)
self.assertSequenceEqual(
self.Model.objects.filter(
field__trigram_strict_word_similar="Middlesborough"
),
[obj],
)
self.assertSequenceEqual(
self.Model.objects.filter(field__trigram_strict_word_similar="Middle"),
[],
)

def test_trigram_similarity(self):
search = "Bat sat on cat."
Expand Down Expand Up @@ -75,6 +96,19 @@ def test_trigram_word_similarity(self):
],
)

def test_trigram_strict_word_similarity(self):
search = "matt"
self.assertSequenceEqual(
self.Model.objects.filter(field__trigram_word_similar=search)
.annotate(word_similarity=TrigramStrictWordSimilarity(search, "field"))
.values("field", "word_similarity")
.order_by("-word_similarity"),
[
{"field": "Cat sat on mat.", "word_similarity": 0.5},
{"field": "Matthew", "word_similarity": 0.44444445},
],
)

def test_trigram_similarity_alternate(self):
# Round result of distance because PostgreSQL uses greater precision.
self.assertQuerysetEqual(
Expand Down Expand Up @@ -104,6 +138,20 @@ def test_trigram_word_similarity_alternate(self):
],
)

def test_trigram_strict_word_distance(self):
self.assertSequenceEqual(
self.Model.objects.annotate(
word_distance=TrigramStrictWordDistance("matt", "field"),
)
.filter(word_distance__lte=0.7)
.values("field", "word_distance")
.order_by("word_distance"),
[
{"field": "Cat sat on mat.", "word_distance": 0.5},
{"field": "Matthew", "word_distance": 0.5555556},
],
)


class TrigramTextFieldTest(TrigramTest):
"""
Expand Down