Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refs #28455 - Implement a method by which querysets may internally be opted out of cloning operations. #14675

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
19 changes: 16 additions & 3 deletions django/contrib/contenttypes/fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -557,7 +557,8 @@ def _apply_rel_filters(self, queryset):
Filter the queryset for the instance this manager is bound to.
"""
db = self._db or router.db_for_read(self.model, instance=self.instance)
return queryset.using(db).filter(**self.core_filters)
with queryset._avoid_cloning():
return queryset.using(db).filter(**self.core_filters)

def _remove_prefetched_objects(self):
try:
Expand All @@ -573,8 +574,13 @@ def get_queryset(self):
return self._apply_rel_filters(queryset)

def get_prefetch_queryset(self, instances, queryset=None):
# We only want to disable cloning if we own the creation of the
# QuerySet instance, otherwise we may subsequently re-enable
# cloning when we shouldn't.
cloning_disabled = queryset is None

if queryset is None:
queryset = super().get_queryset()
queryset = super().get_queryset()._disable_cloning()

queryset._add_hints(instance=instances[0])
queryset = queryset.using(queryset._db or self._db)
Expand All @@ -594,8 +600,15 @@ def get_prefetch_queryset(self, instances, queryset=None):
# instances' PK in order to match up instances:
object_id_converter = instances[0]._meta.pk.to_python
content_type_id_field_name = '%s_id' % self.content_type_field_name
queryset = queryset.filter(query)

# We owned the instantiation of the QuerySet, so we can restore
# subsequent cloning operations which were prevented within this method.
if cloning_disabled:
queryset._enable_cloning()

return (
queryset.filter(query),
queryset,
lambda relobj: (
object_id_converter(getattr(relobj, self.object_id_field_name)),
getattr(relobj, content_type_id_field_name),
Expand Down
71 changes: 57 additions & 14 deletions django/db/models/fields/related_descriptors.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,8 +118,13 @@ def get_queryset(self, **hints):
return self.field.remote_field.model._base_manager.db_manager(hints=hints).all()

def get_prefetch_queryset(self, instances, queryset=None):
# We only want to disable cloning if we own the creation of the
# QuerySet instance, otherwise we may subsequently re-enable
# cloning when we shouldn't.
cloning_disabled = queryset is None

if queryset is None:
queryset = self.get_queryset()
queryset = self.get_queryset()._disable_cloning()
queryset._add_hints(instance=instances[0])

rel_obj_attr = self.field.get_foreign_related_value
Expand All @@ -140,6 +145,11 @@ def get_prefetch_queryset(self, instances, queryset=None):
query = {'%s__in' % self.field.related_query_name(): instances}
queryset = queryset.filter(**query)

# We owned the instantiation of the QuerySet, so we can restore
# subsequent cloning operations which were prevented within this method.
if cloning_disabled:
queryset._enable_cloning()

# Since we're going to assign directly in the cache,
# we must manage the reverse relation cache manually.
if not remote_field.multiple:
Expand Down Expand Up @@ -363,8 +373,13 @@ def get_queryset(self, **hints):
return self.related.related_model._base_manager.db_manager(hints=hints).all()

def get_prefetch_queryset(self, instances, queryset=None):
# We only want to disable cloning if we own the creation of the
# QuerySet instance, otherwise we may subsequently re-enable
# cloning when we shouldn't.
cloning_disabled = queryset is None

if queryset is None:
queryset = self.get_queryset()
queryset = self.get_queryset()._disable_cloning()
queryset._add_hints(instance=instances[0])

rel_obj_attr = self.related.field.get_local_related_value
Expand All @@ -373,6 +388,11 @@ def get_prefetch_queryset(self, instances, queryset=None):
query = {'%s__in' % self.related.field.name: instances}
queryset = queryset.filter(**query)

# We owned the instantiation of the QuerySet, so we can restore
# subsequent cloning operations which were prevented within this method.
if cloning_disabled:
queryset._enable_cloning()

# Since we're going to assign directly in the cache,
# we must manage the reverse relation cache manually.
for rel_obj in queryset:
Expand Down Expand Up @@ -589,11 +609,12 @@ def _apply_rel_filters(self, queryset):
"""
db = self._db or router.db_for_read(self.model, instance=self.instance)
empty_strings_as_null = connections[db].features.interprets_empty_strings_as_nulls
queryset._add_hints(instance=self.instance)
if self._db:
queryset = queryset.using(self._db)
queryset._defer_next_filter = True
queryset = queryset.filter(**self.core_filters)
with queryset._avoid_cloning():
queryset._add_hints(instance=self.instance)
if self._db:
queryset = queryset.using(self._db)
queryset._defer_next_filter = True
queryset = queryset.filter(**self.core_filters)
for field in self.field.foreign_related_fields:
val = getattr(self.instance, field.attname)
if val is None or (val == '' and empty_strings_as_null):
Expand Down Expand Up @@ -631,8 +652,13 @@ def get_queryset(self):
return self._apply_rel_filters(queryset)

def get_prefetch_queryset(self, instances, queryset=None):
# We only want to disable cloning if we own the creation of the
# QuerySet instance, otherwise we may subsequently re-enable
# cloning when we shouldn't.
cloning_disabled = queryset is None

if queryset is None:
queryset = super().get_queryset()
queryset = super().get_queryset()._disable_cloning()

queryset._add_hints(instance=instances[0])
queryset = queryset.using(queryset._db or self._db)
Expand All @@ -643,6 +669,11 @@ def get_prefetch_queryset(self, instances, queryset=None):
query = {'%s__in' % self.field.name: instances}
queryset = queryset.filter(**query)

# We owned the instantiation of the QuerySet, so we can restore
# subsequent cloning operations which were prevented within this method.
if cloning_disabled:
queryset._enable_cloning()

# Since we just bypassed this class' get_queryset(), we must manage
# the reverse relation manually.
for rel_obj in queryset:
Expand Down Expand Up @@ -906,11 +937,12 @@ def _apply_rel_filters(self, queryset):
"""
Filter the queryset for the instance this manager is bound to.
"""
queryset._add_hints(instance=self.instance)
if self._db:
queryset = queryset.using(self._db)
queryset._defer_next_filter = True
return queryset._next_is_sticky().filter(**self.core_filters)
with queryset._avoid_cloning():
queryset._add_hints(instance=self.instance)
if self._db:
queryset = queryset.using(self._db)
queryset._defer_next_filter = True
return queryset._next_is_sticky().filter(**self.core_filters)

def _remove_prefetched_objects(self):
try:
Expand All @@ -926,8 +958,13 @@ def get_queryset(self):
return self._apply_rel_filters(queryset)

def get_prefetch_queryset(self, instances, queryset=None):
# We only want to disable cloning if we own the creation of the
# QuerySet instance, otherwise we may subsequently re-enable
# cloning when we shouldn't.
cloning_disabled = queryset is None

if queryset is None:
queryset = super().get_queryset()
queryset = super().get_queryset()._disable_cloning()

queryset._add_hints(instance=instances[0])
queryset = queryset.using(queryset._db or self._db)
Expand All @@ -949,6 +986,12 @@ def get_prefetch_queryset(self, instances, queryset=None):
queryset = queryset.extra(select={
'_prefetch_related_val_%s' % f.attname:
'%s.%s' % (qn(join_table), qn(f.column)) for f in fk.local_related_fields})

# We owned the instantiation of the QuerySet, so we can restore
# subsequent cloning operations which were prevented within this method.
if cloning_disabled:
queryset._enable_cloning()

return (
queryset,
lambda result: tuple(
Expand Down
64 changes: 62 additions & 2 deletions django/db/models/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,28 @@ def __iter__(self):
yield row[0]


class PreventQuerySetCloning:
"""
Temporarily prevent the given QuerySet from creating new QuerySet instances
on each mutating operation (e.g: filter(), exclude() etc), instead modifying
the QuerySet in-place.

Note that we are intentionally not using @contextlib.contextmanager for
performance reasons. Refs #28455.
"""

__slots__ = ("queryset",)

def __init__(self, queryset):
self.queryset = queryset

def __enter__(self):
return self.queryset._disable_cloning()

def __exit__(self, exc_type, exc_val, exc_tb):
self.queryset._enable_cloning()


class QuerySet:
"""Represent a lazy database lookup for a set of objects."""

Expand All @@ -190,6 +212,7 @@ def __init__(self, model=None, query=None, using=None, hints=None):
self._fields = None
self._defer_next_filter = False
self._deferred_filter = None
self._cloning_enabled = True

@property
def query(self):
Expand Down Expand Up @@ -1328,12 +1351,49 @@ def _batched_insert(self, objs, fields, batch_size, ignore_conflicts=False):
self._insert(item, fields=fields, using=self.db, ignore_conflicts=ignore_conflicts)
return inserted_rows

def _disable_cloning(self):
"""
Prevent calls to _chain() from creating a new QuerySet via _clone().
All subsequent QuerySet mutations will occur on this instance until
_enable_cloning() is used.
"""
self._cloning_enabled = False
return self

def _enable_cloning(self):
"""
Allow calls to _chain() to create a new QuerySet via _clone().
Restores the default behaviour where any QuerySet mutation will
return a new QuerySet instance. Necessary only when there has been
a _disable_cloning() call previously.
"""
self._cloning_enabled = True
return self

def _avoid_cloning(self):
"""
Temporarily prevent QuerySet _clone() operations, restoring the default
behaviour on exit. For the duration of the context managed statement,
all operations (e.g: .filter(), .exclude(), etc) will mutate the same
QuerySet instance.
"""
# Note that we are intentionally not using @contextlib.contextmanager
# performance reasons. Refs #28455.
return PreventQuerySetCloning(self)

def _chain(self):
"""
Return a copy of the current QuerySet that's ready for another
operation.

If the QuerySet has opted in to in-place mutations via _disable_cloning()
temporarily, the copy doesn't occur and instead the same QuerySet instance
will be modified.
"""
obj = self._clone()
if not self._cloning_enabled:
obj = self
else:
obj = self._clone()
if obj._sticky_filter:
obj.query.filter_is_sticky = True
obj._sticky_filter = False
Expand Down Expand Up @@ -1953,7 +2013,7 @@ def prefetch_one_level(instances, prefetcher, lookup, level):
else:
manager = getattr(obj, to_attr)
if leaf and lookup.queryset is not None:
qs = manager._apply_rel_filters(lookup.queryset)
qs = manager._apply_rel_filters(lookup.queryset._chain())
else:
qs = manager.get_queryset()
qs._result_cache = vals
Expand Down
38 changes: 38 additions & 0 deletions tests/queries/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -3995,3 +3995,41 @@ def test_ticket_23622(self):
set(Ticket23605A.objects.filter(qy).values_list('pk', flat=True))
)
self.assertSequenceEqual(Ticket23605A.objects.filter(qx), [a2])


class QuerySetCloningTests(TestCase):
@classmethod
def setUpTestData(cls):
NamedCategory.objects.create(id=1, name='first')
NamedCategory.objects.create(id=2, name='second')
NamedCategory.objects.create(id=3, name='third')
NamedCategory.objects.create(id=4, name='fourth')

def test_context_manager(self):
"""_avoid_cloning() makes modifications apply to the original QuerySet"""
qs = NamedCategory.objects.all()
with qs._avoid_cloning():
qs2 = qs.filter(name__in={'first', 'second'}).exclude(name='second')
self.assertIs(qs2, qs)
qs3 = qs2.exclude(name__in={'third', 'fourth'})
# qs3 is not a mutation of qs2 (which is actually also qs) but a new
# instance entirely.
self.assertIsNot(qs3, qs)
self.assertIsNot(qs3, qs2)

def test_explicit_toggling(self):
qs = NamedCategory.objects.filter(name__in={'first', 'second'})
qs2 = qs._disable_cloning()
# The _disable_cloning() method doesn't return a new QuerySet, but
# toggles the value on the current instance. qs2 can be ignored.
self.assertIs(qs2, qs)
qs3 = qs.filter(name__in={'first', 'second'})
qs3 = qs3.exclude(name='second')
qs3._enable_cloning()
# these are still both references to the same QuerySet, despite re-binding
# as if they were normal chained operations providing new QuerySet instances.
self.assertIs(qs3, qs)
qs3 = qs3.filter(name='second')
# Cloning has been re-enabled so subsequent operations yield a new QuerySet.
# qs3 is now all of the filters applied to qs + an additional filter.
self.assertIsNot(qs3, qs)