Permalink
Browse files

Refactored Query.join()

Refactored both the implementation and signature of Query.join() to
better match current needs in the code. This refactoring cascades
to some other parts, too. The most significant of them is the changes
in qs.combine(), and compiler.select_related_descent().
  • Loading branch information...
akaariai committed Sep 16, 2012
1 parent adf1e6c commit b42280f45c7cd0fcde835ee4665e9d67327fda1d
Showing with 72 additions and 80 deletions.
  1. +7 −11 django/db/models/sql/compiler.py
  2. +3 −0 django/db/models/sql/constants.py
  3. +62 −69 django/db/models/sql/query.py
@@ -6,7 +6,7 @@
from django.db.models.constants import LOOKUP_SEP
from django.db.models.query_utils import select_related_descend
from django.db.models.sql.constants import (SINGLE, MULTI, ORDER_DIR,
GET_ITERATOR_CHUNK_SIZE)
GET_ITERATOR_CHUNK_SIZE, REUSE_ALL)
from django.db.models.sql.datastructures import EmptyResultSet
from django.db.models.sql.expressions import SQLEvaluator
from django.db.models.sql.query import get_order_dir, Query
@@ -457,7 +457,7 @@ def _setup_joins(self, pieces, opts, alias):
if not alias:
alias = self.query.get_initial_alias()
field, target, opts, joins, _, _ = self.query.setup_joins(pieces,
opts, alias, False)
opts, alias, REUSE_ALL)
# We will later on need to promote those joins that were added to the
# query afresh above.
joins_to_promote = [j for j in joins if self.query.alias_refcount[j] < 2]
@@ -573,7 +573,7 @@ def get_grouping(self):
return result, params
def fill_related_selections(self, opts=None, root_alias=None, cur_depth=1,
used=None, requested=None, restricted=None, nullable=None):
requested=None, restricted=None, nullable=None):
"""
Fill in the information needed for a select_related query. The current
depth is measured as the number of connections away from the root model
@@ -589,8 +589,6 @@ def fill_related_selections(self, opts=None, root_alias=None, cur_depth=1,
root_alias = self.query.get_initial_alias()
self.query.related_select_cols = []
self.query.related_select_fields = []
if not used:
used = set()
only_load = self.query.get_loaded_field_names()
# Setup for the case when only particular related fields should be
@@ -635,8 +633,7 @@ def fill_related_selections(self, opts=None, root_alias=None, cur_depth=1,
alias = self.query.join((alias, table, f.column,
f.rel.get_related_field().column),
promote=promote, reuse=used)
used.add(alias)
promote=promote)
columns, aliases = self.get_default_columns(start_alias=alias,
opts=f.rel.to._meta, as_pairs=True)
self.query.related_select_cols.extend(columns)
@@ -647,7 +644,7 @@ def fill_related_selections(self, opts=None, root_alias=None, cur_depth=1,
next = False
new_nullable = f.null or promote
self.fill_related_selections(f.rel.to._meta, alias, cur_depth + 1,
used, next, restricted, new_nullable)
next, restricted, new_nullable)
if restricted:
related_fields = [
@@ -682,14 +679,13 @@ def fill_related_selections(self, opts=None, root_alias=None, cur_depth=1,
int_opts = int_model._meta
alias = self.query.join(
(alias, int_opts.db_table, lhs_col, int_opts.pk.column),
promote=True, reuse=used
promote=True,
)
alias_chain.append(alias)
alias = self.query.join(
(alias, table, f.rel.get_related_field().column, f.column),
promote=True
)
used.add(alias)
columns, aliases = self.get_default_columns(start_alias=alias,
opts=model._meta, as_pairs=True, local_only=True)
self.query.related_select_cols.extend(columns)
@@ -701,7 +697,7 @@ def fill_related_selections(self, opts=None, root_alias=None, cur_depth=1,
new_nullable = True
self.fill_related_selections(model._meta, table, cur_depth+1,
used, next, restricted, new_nullable)
next, restricted, new_nullable)
def deferred_to_columns(self):
"""
@@ -34,3 +34,6 @@
'ASC': ('ASC', 'DESC'),
'DESC': ('DESC', 'ASC'),
}
# A marker for join-reusability.
REUSE_ALL = 'REUSE_ALL'

This comment has been minimized.

Show comment
Hide comment
@alex

alex Sep 16, 2012

Use object() for this, it's more obviously a marker IMO.

@alex

alex Sep 16, 2012

Use object() for this, it's more obviously a marker IMO.

@@ -20,7 +20,7 @@
from django.db.models.fields import FieldDoesNotExist
from django.db.models.sql import aggregates as base_aggregates_module
from django.db.models.sql.constants import (QUERY_TERMS, ORDER_DIR, SINGLE,
ORDER_PATTERN, JoinInfo)
ORDER_PATTERN, JoinInfo, REUSE_ALL)
from django.db.models.sql.datastructures import EmptyResultSet, Empty, MultiJoin
from django.db.models.sql.expressions import SQLEvaluator
from django.db.models.sql.where import (WhereNode, Constraint, EverythingNode,
@@ -462,24 +462,41 @@ def combine(self, rhs, connector):
self.remove_inherited_models()
# Work out how to relabel the rhs aliases, if necessary.
change_map = {}
used = set()
conjunction = (connector == AND)
# Determine which existing joins can be reused. We set the reusability
# to match how queries are consructed by .filter() calls - the default
# for AND is two chained .filter() calls, and in that case we will
# be creating new joins for each (m2m) table. For OR however we know
# that the queries MUST be constructed using a single .filter() call,
# and to mimic this we will reuse all joins.
#
# An example is fk.filter(m2m__a=1|m2m__a=2) -> same filter call means
# reused join for m2m. So, when doing fk.filter(m2m__a=1)|
# fk.filter(m2m__a=2) we reuse joins. For AND case we mimic joins
# generated by fk.filter(m2m__a=1).filter(m2m__a=2).
#
# Note that we will be creating duplicate joins for non-m2m joins in
# the AND case. This is something that could be fixed later on.
reusability = None if conjunction else REUSE_ALL
# Make sure the first table is already in the query - this is the same
# table on both sides.
self.get_initial_alias()
# Add the joins in the rhs query into the new query.
first = True
for alias in rhs.tables:
for alias in rhs.tables[1:]:
if not rhs.alias_refcount[alias]:
# An unused alias.
continue
table, _, join_type, lhs, lhs_col, col, _ = rhs.alias_map[alias]
table, _, join_type, lhs, lhs_col, col, nullable = rhs.alias_map[alias]
promote = join_type == self.LOUTER
# If the left side of the join was already relabeled, use the
# updated alias.
lhs = change_map.get(lhs, lhs)
new_alias = self.join((lhs, table, lhs_col, col),
conjunction and not first, used, promote, not conjunction)
used.add(new_alias)
reuse=reusability, promote=promote,
outer_if_first=not conjunction, nullable=nullable)
change_map[alias] = new_alias
first = False
# So that we don't exclude valid results in an "or" query combination,
# all joins exclusive to either the lhs or the rhs must be converted
@@ -765,9 +782,11 @@ def change_aliases(self, change_map):
col.relabel_aliases(change_map)
# 2. Rename the alias in the internal table/alias datastructures.
for k, aliases in self.join_map.items():
for ident, aliases in self.join_map.items():
del self.join_map[ident]
aliases = tuple([change_map.get(a, a) for a in aliases])
self.join_map[k] = aliases
ident = (change_map.get(ident[0], ident[0]),) + ident[1:]
self.join_map[ident] = aliases
for old_alias, new_alias in six.iteritems(change_map):
alias_data = self.alias_map[old_alias]
alias_data = alias_data._replace(rhs_alias=new_alias)
@@ -842,8 +861,8 @@ def count_active_tables(self):
"""
return len([1 for count in six.itervalues(self.alias_refcount) if count])
def join(self, connection, always_create=False,
promote=False, outer_if_first=False, nullable=False, reuse=None):
def join(self, connection, reuse=REUSE_ALL, promote=False,
outer_if_first=False, nullable=False):
"""
Returns an alias for the join in 'connection', either reusing an
existing alias for that join or creating a new one. 'connection' is a
@@ -853,16 +872,10 @@ def join(self, connection, always_create=False,
lhs.lhs_col = table.col
If 'always_create' is True and 'reuse' is None, a new alias is always
created, regardless of whether one already exists or not. If
'always_create' is True and 'reuse' is a set, an alias in 'reuse' that
matches the connection will be returned, if possible. If
'always_create' is False, the first existing alias that matches the
'connection' is returned, if any. Otherwise a new join is created.
If 'exclusions' is specified, it is something satisfying the container
protocol ("foo in exclusions" must work) and specifies a list of
aliases that should not be returned, even if they satisfy the join.
The 'reuse' parameter can be used in three ways: it can be REUSE_ALL
which means all joins (matching the connection) are reusable, it can
be a set containing the joins reusable, or it can be None which means
a new join is always created.
If 'promote' is True, the join type for the alias will be LOUTER (if
the alias previously existed, the join type will be promoted from INNER
@@ -879,32 +892,18 @@ def join(self, connection, always_create=False,
is a candidate for promotion (to "left outer") when combining querysets.
"""
lhs, table, lhs_col, col = connection
if lhs in self.alias_map:
lhs_table = self.alias_map[lhs].table_name
else:
lhs_table = lhs
if reuse and always_create and table in self.table_map:
# Convert the 'reuse' to case to be "exclude everything but the
# reusable set, minus exclusions, for this table".
exclusions = set(self.table_map[table]).difference(reuse)
always_create = False
existing = self.join_map.get(connection, ())
if reuse == REUSE_ALL:
reuse = existing
elif reuse is None:
reuse = set()
else:
exclusions = set()
t_ident = (lhs_table, table, lhs_col, col)
if not always_create:
for alias in self.join_map.get(t_ident, ()):
if alias not in exclusions:
if lhs_table and not self.alias_refcount[self.alias_map[alias].lhs_alias]:
# The LHS of this join tuple is no longer part of the
# query, so skip this possibility.
continue
if self.alias_map[alias].lhs_alias != lhs:
continue
self.ref_alias(alias)
if promote or (lhs and self.alias_map[lhs].join_type == self.LOUTER):
self.promote_joins([alias])
return alias
reuse = [a for a in existing if a in reuse]
for alias in reuse:
self.ref_alias(alias)
if promote or (lhs and self.alias_map[lhs].join_type == self.LOUTER):
self.promote_joins([alias])
return alias
# No reuse is possible, so we need a new alias.
alias, _ = self.table_alias(table, True)
@@ -923,10 +922,10 @@ def join(self, connection, always_create=False,
join_type = self.INNER
join = JoinInfo(table, alias, join_type, lhs, lhs_col, col, nullable)
self.alias_map[alias] = join
if t_ident in self.join_map:
self.join_map[t_ident] += (alias,)
if connection in self.join_map:
self.join_map[connection] += (alias,)
else:
self.join_map[t_ident] = (alias,)
self.join_map[connection] = (alias,)
return alias
def setup_inherited_models(self):
@@ -1003,7 +1002,7 @@ def add_aggregate(self, aggregate, model, alias, is_summary):
# then we need to explore the joins that are required.
field, source, opts, join_list, last, _ = self.setup_joins(
field_list, opts, self.get_initial_alias(), False)
field_list, opts, self.get_initial_alias(), REUSE_ALL)
# Process the join chain to see if it can be trimmed
col, _, join_list = self.trim_joins(source, join_list, last, False)
@@ -1114,8 +1113,8 @@ def add_filter(self, filter_expr, connector=AND, negate=False, trim=False,
try:
field, target, opts, join_list, last, extra_filters = self.setup_joins(
parts, opts, alias, True, allow_many, allow_explicit_fk=True,
can_reuse=can_reuse, negate=negate,
parts, opts, alias, can_reuse, allow_many,
allow_explicit_fk=True, negate=negate,
process_extras=process_extras)
except MultiJoin as e:
self.split_exclude(filter_expr, LOOKUP_SEP.join(parts[:e.level]),
@@ -1265,9 +1264,8 @@ def add_q(self, q_object, used_aliases=None, force_having=False):
if self.filter_is_sticky:
self.used_aliases = used_aliases
def setup_joins(self, names, opts, alias, dupe_multis, allow_many=True,
allow_explicit_fk=False, can_reuse=None, negate=False,
process_extras=True):
def setup_joins(self, names, opts, alias, can_reuse, allow_many=True,
allow_explicit_fk=False, negate=False, process_extras=True):
"""
Compute the necessary table joins for the passage through the fields
given in 'names'. 'opts' is the Options class for the current model
@@ -1353,16 +1351,14 @@ def setup_joins(self, names, opts, alias, dupe_multis, allow_many=True,
target)
int_alias = self.join((alias, table1, from_col1, to_col1),
dupe_multis, nullable=True,
reuse=can_reuse)
reuse=can_reuse, nullable=True)
if int_alias == table2 and from_col2 == to_col2:
joins.append(int_alias)
alias = int_alias
else:
alias = self.join(
(int_alias, table2, from_col2, to_col2),
dupe_multis, nullable=True,
reuse=can_reuse)
reuse=can_reuse, nullable=True)
joins.extend([int_alias, alias])
elif field.rel:
# One-to-one or many-to-one field
@@ -1408,11 +1404,9 @@ def setup_joins(self, names, opts, alias, dupe_multis, allow_many=True,
target)
int_alias = self.join((alias, table1, from_col1, to_col1),
dupe_multis, nullable=True,
reuse=can_reuse)
reuse=can_reuse, nullable=True)
alias = self.join((int_alias, table2, from_col2, to_col2),
dupe_multis, nullable=True,
reuse=can_reuse)
reuse=can_reuse, nullable=True)
joins.extend([int_alias, alias])
else:
# One-to-many field (ForeignKey defined on the target model)
@@ -1436,8 +1430,7 @@ def setup_joins(self, names, opts, alias, dupe_multis, allow_many=True,
opts, target)
alias = self.join((alias, table, from_col, to_col),
dupe_multis, nullable=True,
reuse=can_reuse)
reuse=can_reuse, nullable=True)
joins.append(alias)
if pos != len(names) - 1:
@@ -1603,8 +1596,8 @@ def add_fields(self, field_names, allow_m2m=True):
try:
for name in field_names:
field, target, u2, joins, u3, u4 = self.setup_joins(
name.split(LOOKUP_SEP), opts, alias, False, allow_m2m,
True)
name.split(LOOKUP_SEP), opts, alias, REUSE_ALL,
allow_m2m, True)
final_alias = joins[-1]
col = target.column
if len(joins) > 1:
@@ -1896,7 +1889,7 @@ def set_start(self, start):
opts = self.model._meta
alias = self.get_initial_alias()
field, col, opts, joins, last, extra = self.setup_joins(
start.split(LOOKUP_SEP), opts, alias, False)
start.split(LOOKUP_SEP), opts, alias, REUSE_ALL)
select_col = self.alias_map[joins[1]].lhs_join_col
select_alias = alias

0 comments on commit b42280f

Please sign in to comment.