diff --git a/datajoint/__init__.py b/datajoint/__init__.py index 7cac45a0e..45df9c9c8 100644 --- a/datajoint/__init__.py +++ b/datajoint/__init__.py @@ -16,7 +16,8 @@ __all__ = ['__author__', '__version__', 'config', 'conn', 'kill', 'Connection', 'Heading', 'BaseRelation', 'FreeRelation', 'Not', 'schema', - 'Manual', 'Lookup', 'Imported', 'Computed', 'Part'] + 'Manual', 'Lookup', 'Imported', 'Computed', 'Part', + 'AndList', 'OrList'] class key: @@ -57,7 +58,7 @@ class DataJointError(Exception): * modify the local copy of %s that datajoint just saved for you * put a file named %s with the same configuration format in your home * specify the environment variables DJ_USER, DJ_HOST, DJ_PASS - """) + """ % (LOCALCONFIG, GLOBALCONFIG)) local_config_file = os.path.expanduser(LOCALCONFIG) logger.log(logging.INFO, "No config found. Generating {0:s}".format(local_config_file)) config.save(local_config_file) @@ -69,7 +70,7 @@ class DataJointError(Exception): from .connection import conn, Connection from .base_relation import BaseRelation from .user_relations import Manual, Lookup, Imported, Computed, Part -from .relational_operand import Not +from .relational_operand import Not, AndList, OrList from .heading import Heading from .schema import Schema as schema from .kill import kill diff --git a/datajoint/autopopulate.py b/datajoint/autopopulate.py index c910925e6..8b4c652ec 100644 --- a/datajoint/autopopulate.py +++ b/datajoint/autopopulate.py @@ -3,7 +3,7 @@ import logging import datetime import random -from .relational_operand import RelationalOperand +from .relational_operand import RelationalOperand, AndList from . import DataJointError from .base_relation import FreeRelation @@ -56,13 +56,12 @@ def target(self): """ return self - def populate(self, restriction=None, suppress_errors=False, - reserve_jobs=False, order="original"): + def populate(self, *restrictions, suppress_errors=False, reserve_jobs=False, order="original"): """ rel.populate() calls rel._make_tuples(key) for every primary key in self.populated_from for which there is not already a tuple in rel. - :param restriction: restriction on rel.populated_from - target + :param restrictions: a list of restrictions each restrict (rel.populated_from - target.proj()) :param suppress_errors: suppresses error if true :param reserve_jobs: if true, reserves job to populate in asynchronous fashion :param order: "original"|"reverse"|"random" - the order of execution @@ -77,13 +76,13 @@ def populate(self, restriction=None, suppress_errors=False, todo = self.populated_from if not isinstance(todo, RelationalOperand): raise DataJointError('Invalid populated_from value') - todo.restrict(restriction) + todo.restrict(AndList(restrictions)) error_list = [] if suppress_errors else None - jobs = self.connection.jobs[self.target.database] - table_name = self.target.table_name - keys = (todo - self.target.project()).fetch.keys() + jobs = self.connection.jobs[self.target.database] if reserve_jobs else None + todo -= self.target.proj() + keys = todo.fetch.keys() if order == "reverse": keys = list(keys) keys.reverse() @@ -94,12 +93,12 @@ def populate(self, restriction=None, suppress_errors=False, raise DataJointError('Invalid order specification') for key in keys: - if not reserve_jobs or jobs.reserve(table_name, key): + if not reserve_jobs or jobs.reserve(self.target.table_name, key): self.connection.start_transaction() if key in self.target: # already populated self.connection.cancel_transaction() if reserve_jobs: - jobs.complete(table_name, key) + jobs.complete(self.target.table_name, key) else: logger.info('Populating: ' + str(key)) try: @@ -107,7 +106,7 @@ def populate(self, restriction=None, suppress_errors=False, except Exception as error: self.connection.cancel_transaction() if reserve_jobs: - jobs.error(table_name, key, error_message=str(error)) + jobs.error(self.target.table_name, key, error_message=str(error)) if not suppress_errors: raise else: @@ -116,7 +115,7 @@ def populate(self, restriction=None, suppress_errors=False, else: self.connection.commit_transaction() if reserve_jobs: - jobs.complete(table_name, key) + jobs.complete(self.target.table_name, key) return error_list def progress(self, restriction=None, display=True): diff --git a/datajoint/fetch.py b/datajoint/fetch.py index ffbea5c75..96c7b9754 100644 --- a/datajoint/fetch.py +++ b/datajoint/fetch.py @@ -190,7 +190,7 @@ def keys(self, **kwargs): """ Iterator that returns primary keys. """ - yield from self._relation.project().fetch.set_behavior(**dict(self.behavior, as_dict=True, **kwargs)) + yield from self._relation.proj().fetch.set_behavior(**dict(self.behavior, as_dict=True, **kwargs)) def __getitem__(self, item): """ diff --git a/datajoint/heading.py b/datajoint/heading.py index a08e3f3af..0e81e0ce4 100644 --- a/datajoint/heading.py +++ b/datajoint/heading.py @@ -184,7 +184,7 @@ def init_from_database(self, conn, database, table_name): attr['dtype'] = numeric_types[(t, is_unsigned)] self.attributes = OrderedDict([(q['name'], Attribute(**q)) for q in attributes]) - def project(self, *attribute_list, **renamed_attributes): + def proj(self, *attribute_list, **renamed_attributes): """ derive a new heading by selecting, renaming, or computing attributes. In relational algebra these operators are known as project, rename, and expand. diff --git a/datajoint/relational_operand.py b/datajoint/relational_operand.py index 6188c3989..362a8fb21 100644 --- a/datajoint/relational_operand.py +++ b/datajoint/relational_operand.py @@ -11,45 +11,77 @@ logger = logging.getLogger(__name__) -class AndList(Sequence): +class AndList(list): """ A list of restrictions to by applied to a relation. The restrictions are ANDed. Each restriction can be a list or set or a relation whose elements are ORed. - But the elements that are lists can contain + But the elements that are lists can contain other AndLists. + + Example: + rel2 = rel & dj.AndList((cond1, cond2, cond3)) + is equivalent to + rel2 = rel & cond1 & cond2 & cond3 """ + pass - def __init__(self, heading): - self.heading = heading - self._list = [] - def __len__(self): - return len(self._list) - - def __getitem__(self, i): - return self._list[i] - - def add(self, *args): - # remove Nones and duplicates - args = [r for r in args if r is not None and r not in self] - if args: - if any(is_empty_set(r) for r in args): - # if any condition is an empty list, return FALSE - self._list = ['FALSE'] - else: - self._list.extend(args) +class OrList(list): + """ + A list of restrictions to by applied to a relation. The restrictions are ORed. + If any restriction is . + But the elements that are lists can contain other AndLists. + + Example: + rel2 = rel & dj.ORList((cond1, cond2, cond3)) + is equivalent to + rel2 = rel & [cond1, cond2, cond3] + + Since ORList is just an alias for list, it is not necessary and is only provided + for consistency with AndList. + """ + pass + +class RelationalOperand(metaclass=abc.ABCMeta): + """ + RelationalOperand implements relational algebra and fetch methods. + RelationalOperand objects reference other relation objects linked by operators. + The leaves of this tree of objects are base relations. + When fetching data from the database, this tree of objects is compiled into an SQL expression. + It is a mixin class that provides relational operators, iteration, and fetch capability. + RelationalOperand operators are: restrict, pro, and join. + """ + + _restrictions = None + + @property + def restrictions(self): + if self._restrictions is None: + self._restrictions = AndList() + return self._restrictions + + def clear_restrictions(self): + self._restrictions = None + + @property + def primary_key(self): + return self.heading.primary_key + + @property def where_clause(self): """ convert to a WHERE clause string """ - def make_condition(arg, _negate=False): if isinstance(arg, str): return arg, _negate elif isinstance(arg, AndList): - return '(' + ' AND '.join([make_condition(element)[0] for element in arg]) + ')', _negate + if arg: + return '(' + ' AND '.join([make_condition(element)[0] for element in arg]) + ')', _negate + else: + return 'FALSE' if _negate else 'TRUE', False - # semijoin or antijoin + # semijoin or antijoin elif isinstance(arg, RelationalOperand): common_attributes = [q for q in self.heading.names if q in arg.heading.names] if not common_attributes: @@ -71,20 +103,26 @@ def make_condition(arg, _negate=False): # element of a record array condition = ['`%s`=%s' % (k, arg[k]) for k in arg.dtype.fields if k in self.heading] else: - raise DataJointError('invalid restriction type') + raise DataJointError('Invalid restriction type') return ' AND '.join(condition) if condition else 'TRUE', _negate - if not self: + if len(self.restrictions) == 0: # an empty list -> no WHERE clause return '' + # An empty or-list in the restrictions immediately causes an empty result + assert isinstance(self.restrictions, AndList) + if any(is_empty_or_list(r) for r in self.restrictions): + return ' WHERE FALSE' + conditions = [] - for item in self: + for item in self.restrictions: negate = isinstance(item, Not) if negate: - item = item.restriction + item = item.restriction # NOT is added below if isinstance(item, (list, tuple, set, np.ndarray)): - # sets of conditions are ORed - item = '(' + ') OR ('.join([make_condition(q)[0] for q in item]) + ')' + # process an OR list + temp = [make_condition(q)[0] for q in item if q is not is_empty_or_list(q)] + item = '(' + ') OR ('.join(temp) + ')' if temp else 'FALSE' else: item, negate = make_condition(item, negate) if not item: @@ -92,39 +130,6 @@ def make_condition(arg, _negate=False): conditions.append(('NOT (%s)' if negate else '(%s)') % item) return ' WHERE ' + ' AND '.join(conditions) - def __repr__(self): - return 'AND List: ' + repr(self._list) - - -class RelationalOperand(metaclass=abc.ABCMeta): - """ - RelationalOperand implements relational algebra and fetch methods. - RelationalOperand objects reference other relation objects linked by operators. - The leaves of this tree of objects are base relations. - When fetching data from the database, this tree of objects is compiled into an SQL expression. - It is a mixin class that provides relational operators, iteration, and fetch capability. - RelationalOperand operators are: restrict, pro, and join. - """ - - _restrictions = None - - @property - def restrictions(self): - if self._restrictions is None: - self._restrictions = AndList(self.heading) - return self._restrictions - - def clear_restrictions(self): - self._restrictions = None - - @property - def primary_key(self): - return self.heading.primary_key - - @property - def where_clause(self): - return self.restrictions.where_clause() - # --------- abstract properties ----------- @property @@ -171,15 +176,21 @@ def __mod__(self, attributes=None): """ relational projection operator. See RelationalOperand.project """ - return self.project(*attributes) + return self.proj(*attributes) - def project(self, *attributes, **renamed_attributes): + def project(self, *args, **kwargs): + """ + alias for self.proj() for backward compatibility + """ + return self.proj(*args, **kwargs) + + def proj(self, *attributes, **renamed_attributes): """ Relational projection operator. :param attributes: a list of attribute names to be included in the result. :return: a new relation with selected fields Primary key attributes are always selected and cannot be excluded. - Therefore obj.project() produces a relation with only the primary key attributes. + Therefore obj.proj() produces a relation with only the primary key attributes. If attributes includes the string '*', all attributes are selected. Each attribute can only be used once in attributes or renamed_attributes. Therefore, the projected relation cannot have more attributes than the original relation. @@ -203,44 +214,112 @@ def aggregate(self, group, *attributes, **renamed_attributes): def __iand__(self, restriction): """ - in-place restriction by a single condition + in-place restriction + + See relational_operand.restrict for more detail. """ self.restrict(restriction) + return self def __and__(self, restriction): """ relational restriction or semijoin :return: a restricted copy of the argument + + See relational_operand.restrict for more detail. """ ret = copy(self) ret.clear_restrictions() - ret.restrict(restriction, *list(self.restrictions)) + ret.restrict(self.restrictions) + ret.restrict(restriction) return ret - def restrict(self, *restrictions): + def __isub__(self, restriction): """ - In-place restriction. Primarily intended for datajoint's internal use. - Users are encouraged to use self & restriction to apply a restriction. - Each condition in restrictions is applied and the conditions are combined with AND. - However, each member of restrictions can be a list of conditions, which are combined with OR. - :param restrictions: list of restrictions. + in-place inverted restriction + + See relational_operand.restrict for more detail. """ - self.restrictions.add(*restrictions) + self.restrict(Not(restriction)) + return self + + def __sub__(self, restriction): + """ + inverse restriction aka antijoin + :return: a restricted copy of the argument + + See relational_operand.restrict for more detail. + """ + return self & Not(restriction) + + def restrict(self, restriction): + """ + In-place restriction. Restricts the relation to a subset of its original tuples. + rel.restrict(restriction) is equivalent to rel = rel & restriction or rel &= restriction + rel.restrict(Not(restriction)) is equivalent to rel = rel - restriction or rel -= restriction + The primary key of the result is unaffected. + Successive restrictions are combined using the logical AND. + The AndList class is provided to play the role of successive restrictions. + Any relation, collection, or sequence other than an AndList are treated as OrLists. + However, the class OrList is still provided for cases when explicitness is required. + Inverse restriction is accomplished by either using the subtraction operator or the Not class. + + The expressions in each row equivalent: + rel & 'TRUE' rel + rel & 'FALSE' the empty relation + rel - cond rel & Not(cond) + rel - 'TRUE' rel & 'FALSE' + rel - 'FALSE' rel + rel & AndList((cond1,cond2)) rel & cond1 & cond2 + rel & AndList() rel + rel & [cond1, cond2] rel & OrList((cond1, cond2)) + rel & [] rel & 'FALSE' + rel & None rel & 'FALSE' + rel & any_empty_relation rel & 'FALSE' + rel - AndList((cond1,cond2)) rel & [Not(cond1), Not(cond2)] + rel - [cond1, cond2] rel & Not(cond1) & Not(cond2) + rel - AndList() rel & 'FALSE' + rel - [] rel + rel - None rel + rel - any_empty_relation rel + + When arg is another relation, the restrictions rel & arg and rel - arg become the relational semijoin and + antijoin operators, respectively. + Then, rel & arg restricts rel to tuples that match at least one tuple in arg (hence arg is treated as an OrList). + Conversely, rel - arg restricts rel to tuples that do not match any tuples in arg. + Two tuples match when their common attributes have equal values or when they have no common attributes. + All shared attributes must be in the primary key of either rel or arg or both or an error will be raised. + + relational_operand.restrict is the only access point that modifies restrictions. All other operators must + ultimately call restrict() + + :param restriction: a sequence or an array (treated as OR list), another relation, an SQL condition string, or + an AndList. + """ + if isinstance(restriction, AndList): + self.restrictions.extend(restriction) + elif is_empty_or_list(restriction): + self.clear_restrictions() + self.restrictions.append('FALSE') + else: + self.restrictions.append(restriction) + + @property + def fetch1(self): + return Fetch1(self) + + @property + def fetch(self): + return Fetch(self) def attributes_in_restrictions(self): """ :return: list of attributes that are probably used in the restrictions. This is used internally for optimizing SQL statements """ - s = self.restrictions.where_clause() # avoid calling multiple times + s = self.where_clause return set(name for name in self.heading.names if name in s) - def __sub__(self, restriction): - """ - inverted restriction aka antijoin - """ - return self & (None if is_empty_set(restriction) else Not(restriction)) - @abc.abstractmethod def _repr_helper(self): """ @@ -253,7 +332,7 @@ def __repr__(self): if self._restrictions: ret += ' & %r' % self._restrictions else: - rel = self.project(*self.heading.non_blobs) # project out blobs + rel = self.proj(*self.heading.non_blobs) # project out blobs limit = config['display.limit'] width = config['display.width'] @@ -262,7 +341,7 @@ def __repr__(self): widths = {f: min(max([len(f)] + [len(str(e)) for e in tups[f]])+4,width) for f in columns} - templates = {f:'%%-%d.%ds' % (widths[f], widths[f]) for f in columns} + templates = {f: '%%-%d.%ds' % (widths[f], widths[f]) for f in columns} repr_string = ' '.join([templates[column] % column for column in columns]) + '\n' repr_string += ' '.join(['+' + '-' * (widths[column] - 2) + '+' for column in columns]) + '\n' for tup in tups: @@ -276,7 +355,7 @@ def __repr__(self): def _repr_html_(self): limit = config['display.limit'] - rel = self.project(*self.heading.non_blobs) # project out blobs + rel = self.proj(*self.heading.non_blobs) # project out blobs columns = rel.heading.names content = dict( head='