From 1557a98be3d71cb2b79d0228dd3c128229d431a1 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Tue, 25 Aug 2015 14:03:16 -0500 Subject: [PATCH 1/5] minor cleanup --- datajoint/__init__.py | 12 ++++++------ datajoint/schema.py | 12 ++++++------ datajoint/user_relations.py | 13 +++++++------ 3 files changed, 19 insertions(+), 18 deletions(-) diff --git a/datajoint/__init__.py b/datajoint/__init__.py index aea688382..0163fc343 100644 --- a/datajoint/__init__.py +++ b/datajoint/__init__.py @@ -16,17 +16,17 @@ __all__ = ['__author__', '__version__', 'config', 'Connection', 'Heading', 'Relation', 'FreeRelation', 'Not', - 'Relation', 'schema', - 'Manual', 'Lookup', 'Imported', 'Computed', 'Part', + 'schema', + 'Relation', 'Manual', 'Lookup', 'Imported', 'Computed', 'Part', 'conn', 'kill'] -# define an object that identifies the primary key in RelationalOperand.__getitem__ -class PrimaryKey: +class key: + """ + object that allows requesting the primary key in Fetch.__getitem__ + """ pass -key = PrimaryKey - class DataJointError(Exception): """ diff --git a/datajoint/schema.py b/datajoint/schema.py index bdf985c61..8e0727a37 100644 --- a/datajoint/schema.py +++ b/datajoint/schema.py @@ -45,15 +45,15 @@ def __call__(self, cls): :param cls: class to be decorated """ - def process_relation_class(class_object, context): + def process_relation_class(relation_class, context): """ assign schema properties to the relation class and declare the table """ - class_object.database = self.database - class_object._connection = self.connection - class_object._heading = Heading() - class_object._context = context - instance = class_object() + relation_class.database = self.database + relation_class._connection = self.connection + relation_class._heading = Heading() + relation_class._context = context + instance = relation_class() instance.heading # trigger table declaration instance._prepare() diff --git a/datajoint/user_relations.py b/datajoint/user_relations.py index 2e140f62f..b8adb0f44 100644 --- a/datajoint/user_relations.py +++ b/datajoint/user_relations.py @@ -2,13 +2,14 @@ Hosts the table tiers, user relations should be derived from. """ -from datajoint.relation import Relation +import abc +from .relation import Relation from .autopopulate import AutoPopulate from .utils import from_camel_case from . import DataJointError -class Part(Relation): +class Part(Relation, metaclass=abc.ABCMeta): @property def master(self): @@ -22,7 +23,7 @@ def table_name(self): return self.master().table_name + '__' + from_camel_case(self.__class__.__name__) -class Manual(Relation): +class Manual(Relation, metaclass=abc.ABCMeta): """ Inherit from this class if the table's values are entered manually. """ @@ -35,7 +36,7 @@ def table_name(self): return from_camel_case(self.__class__.__name__) -class Lookup(Relation): +class Lookup(Relation, metaclass=abc.ABCMeta): """ Inherit from this class if the table's values are for lookup. This is currently equivalent to defining the table as Manual and serves semantic @@ -57,7 +58,7 @@ def _prepare(self): self.insert(self.contents, ignore_errors=True) -class Imported(Relation, AutoPopulate): +class Imported(Relation, AutoPopulate, metaclass=abc.ABCMeta): """ Inherit from this class if the table's values are imported from external data sources. The inherited class must at least provide the function `_make_tuples`. @@ -71,7 +72,7 @@ def table_name(self): return "_" + from_camel_case(self.__class__.__name__) -class Computed(Relation, AutoPopulate): +class Computed(Relation, AutoPopulate, metaclass=abc.ABCMeta): """ Inherit from this class if the table's values are computed from other relations in the schema. The inherited class must at least provide the function `_make_tuples`. From fc4c0391795f7a81ca249896858798cc3bcd22e3 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Tue, 25 Aug 2015 14:06:08 -0500 Subject: [PATCH 2/5] minor cleanup --- datajoint/__init__.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/datajoint/__init__.py b/datajoint/__init__.py index 0163fc343..47ce89b30 100644 --- a/datajoint/__init__.py +++ b/datajoint/__init__.py @@ -14,11 +14,9 @@ __author__ = "Dimitri Yatsenko, Edgar Walker, and Fabian Sinz at Baylor College of Medicine" __version__ = "0.2" __all__ = ['__author__', '__version__', - 'config', - 'Connection', 'Heading', 'Relation', 'FreeRelation', 'Not', - 'schema', - 'Relation', 'Manual', 'Lookup', 'Imported', 'Computed', 'Part', - 'conn', 'kill'] + 'config', 'conn', 'kill', + 'Connection', 'Heading', 'Relation', 'FreeRelation', 'Not', 'schema', + 'Manual', 'Lookup', 'Imported', 'Computed', 'Part'] class key: From 3f5fdee79d594c56b094e0bf6328f117ce5e672d Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Tue, 25 Aug 2015 15:08:24 -0500 Subject: [PATCH 3/5] Now Relation._prepare() is called after all the Part relations are declared. --- datajoint/relation.py | 41 ++++++++++++++++++++++++----------------- datajoint/schema.py | 12 +++++++++--- 2 files changed, 33 insertions(+), 20 deletions(-) diff --git a/datajoint/relation.py b/datajoint/relation.py index c93826c3a..2b2133cf4 100644 --- a/datajoint/relation.py +++ b/datajoint/relation.py @@ -56,14 +56,20 @@ def heading(self): if self._heading is None: self._heading = Heading() # instance-level heading if not self._heading: - if not self.is_declared: - self.connection.query( - declare(self.full_table_name, self.definition, self._context)) - if self.is_declared: - self.connection.erm.load_dependencies(self.full_table_name) - self._heading.init_from_database(self.connection, self.database, self.table_name) + self.declare() return self._heading + def declare(self): + """ + load the table heading. If the table is not declared, use self.definition to declare + """ + if not self.is_declared: + self.connection.query( + declare(self.full_table_name, self.definition, self._context)) + if self.is_declared: + self.connection.erm.load_dependencies(self.full_table_name) + self._heading.init_from_database(self.connection, self.database, self.table_name) + @property def from_clause(self): """ @@ -115,7 +121,6 @@ def descendants(self): for table in self.connection.erm.get_descendants(self.full_table_name)) return [relation for relation in relations if relation.is_declared] - def _repr_helper(self): return "%s.%s()" % (self.__module__, self.__class__.__name__) @@ -133,7 +138,7 @@ def full_table_name(self): def insert(self, rows, **kwargs): """ - Inserts a collection of tuples. Additional keyword arguments are passed to insert1. + Insert a collection of tuples. Additional keyword arguments are passed to insert1. :param iter: Must be an iterator that generates a sequence of valid arguments for insert. """ @@ -154,19 +159,19 @@ def insert1(self, tup, replace=False, ignore_errors=False): heading = self.heading if isinstance(tup, np.void): # np.array insert - for fieldname in tup.dtype.fields: - if fieldname not in heading: - raise KeyError(u'{0:s} is not in the attribute list'.format(fieldname)) + for field in tup.dtype.fields: + if field not in heading: + raise KeyError(u'{0:s} is not in the attribute list'.format(field)) value_list = ','.join([repr(tup[name]) if not heading[name].is_blob else '%s' for name in heading if name in tup.dtype.fields]) args = tuple(pack(tup[name]) for name in heading if name in tup.dtype.fields and heading[name].is_blob) attribute_list = '`' + '`,`'.join(q for q in heading if q in tup.dtype.fields) + '`' - elif isinstance(tup, Mapping): # dict-based insert - for fieldname in tup.keys(): - if fieldname not in heading: - raise KeyError(u'{0:s} is not in the attribute list'.format(fieldname)) + elif isinstance(tup, Mapping): # dict-based insert + for field in tup.keys(): + if field not in heading: + raise KeyError(u'{0:s} is not in the attribute list'.format(field)) value_list = ','.join(repr(tup[name]) if not heading[name].is_blob else '%s' for name in heading if name in tup) args = tuple(pack(tup[name]) for name in heading @@ -175,9 +180,11 @@ def insert1(self, tup, replace=False, ignore_errors=False): else: # positional insert try: - if len(tup) != len(self.heading): + if len(tup) != len(heading): raise DataJointError( - 'Tuple size does not match the number of relation attributes') + 'Incorrect number of attributes: ' + '{given} given; {expected} expected'.format( + given=len(tup), expected=len(heading))) except TypeError: raise DataJointError('Datatype %s cannot be inserted' % type(tup)) else: diff --git a/datajoint/schema.py b/datajoint/schema.py index 8e0727a37..2d82aa25c 100644 --- a/datajoint/schema.py +++ b/datajoint/schema.py @@ -53,9 +53,7 @@ def process_relation_class(relation_class, context): relation_class._connection = self.connection relation_class._heading = Heading() relation_class._context = context - instance = relation_class() - instance.heading # trigger table declaration - instance._prepare() + relation_class().declare() if issubclass(cls, Part): raise DataJointError('The schema decorator should not apply to part relations') @@ -63,6 +61,7 @@ def process_relation_class(relation_class, context): process_relation_class(cls, context=self.context) # Process subordinate relations + parts = list() for name in (name for name in dir(cls) if not name.startswith('_')): part = getattr(cls, name) try: @@ -71,10 +70,17 @@ def process_relation_class(relation_class, context): pass else: if is_sub: + parts.append(part) part._master = cls process_relation_class(part, context=dict(self.context, **{cls.__name__: cls})) elif issubclass(part, Relation): raise DataJointError('Part relations must subclass from datajoint.Part') + + # invoke _prepare() + cls()._prepare() + for part in parts: + part()._prepare() + return cls @property From 4c046dee85b40c40f8fcfce1fa2c1f71b2a37f9e Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Thu, 27 Aug 2015 10:54:53 -0500 Subject: [PATCH 4/5] added the ability to control the order of _make_tuples calls from AutoPopulate.populate --- datajoint/autopopulate.py | 21 ++++++++++++++++++--- datajoint/kill.py | 2 +- datajoint/relation.py | 16 +++++++++++----- datajoint/schema.py | 2 +- 4 files changed, 31 insertions(+), 10 deletions(-) diff --git a/datajoint/autopopulate.py b/datajoint/autopopulate.py index 7d2b43f95..4ac656cc3 100644 --- a/datajoint/autopopulate.py +++ b/datajoint/autopopulate.py @@ -2,6 +2,7 @@ import abc import logging import datetime +import random from .relational_operand import RelationalOperand from . import DataJointError from .relation import FreeRelation @@ -52,7 +53,8 @@ def target(self): """ return self - def populate(self, restriction=None, suppress_errors=False, reserve_jobs=False): + def populate(self, restriction=None, suppress_errors=False, + reserve_jobs=False, order="original"): """ rel.populate() calls rel._make_tuples(key) for every primary key in self.populated_from for which there is not already a tuple in rel. @@ -61,18 +63,31 @@ def populate(self, restriction=None, suppress_errors=False, reserve_jobs=False): :param suppress_errors: suppresses error if true :param reserve_jobs: currently not implemented :param batch: batch size of a single job + :param order: "original"|"reverse"|"random" - the order of execution """ - error_list = [] if suppress_errors else None if not isinstance(self.populated_from, RelationalOperand): raise DataJointError('Invalid populated_from value') if self.connection.in_transaction: raise DataJointError('Populate cannot be called during a transaction.') + valid_order = ['original', 'reverse', 'random'] + if order not in valid_order: + raise DataJointError('The order argument must be one of %s' % str(valid_order)) + + error_list = [] if suppress_errors else None + jobs = self.connection.jobs[self.target.database] table_name = self.target.table_name unpopulated = (self.populated_from & restriction) - self.target.project() - for key in unpopulated.fetch.keys(): + keys = unpopulated.fetch.keys() + if order == "reverse": + keys = list(keys).reverse() + elif order == "random": + keys = list(keys) + random.shuffle(keys) + + for key in keys: if not reserve_jobs or jobs.reserve(table_name, key): self.connection.start_transaction() if key in self.target: # already populated diff --git a/datajoint/kill.py b/datajoint/kill.py index 82c9ac38d..b4c0c526c 100644 --- a/datajoint/kill.py +++ b/datajoint/kill.py @@ -32,7 +32,7 @@ def kill(restriction=None, connection=None): except TypeError as err: print(process) - response = input('process to kill or "q" to quit)') + response = input('process to kill or "q" to quit > ') if response == 'q': break if response: diff --git a/datajoint/relation.py b/datajoint/relation.py index 2b2133cf4..80eb570f7 100644 --- a/datajoint/relation.py +++ b/datajoint/relation.py @@ -229,7 +229,8 @@ def delete(self): relations[dep] &= r.project() if name in restrict_by_me else r.restrictions do_delete = False # indicate if there is anything to delete - print('The contents of the following tables are about to be deleted:') + if config['safemode']: + print('The contents of the following tables are about to be deleted:') for relation in relations.values(): count = len(relation) if count: @@ -238,10 +239,15 @@ def delete(self): print(relation.full_table_name, '(%d tuples)' % count) else: relations.pop(relation.full_table_name) - if do_delete and (not config['safemode'] or user_choice("Proceed?", default='no') == 'yes'): - with self.connection.transaction: - for r in reversed(list(relations.values())): - r.delete_quick() + if not do_delete: + if config['safemode']: + print('Nothing to delete') + else: + if not config['safemode'] or user_choice("Proceed?", default='no') == 'yes': + with self.connection.transaction: + for r in reversed(list(relations.values())): + r.delete_quick() + print('Done') def drop_quick(self): """ diff --git a/datajoint/schema.py b/datajoint/schema.py index 2d82aa25c..a96f1678e 100644 --- a/datajoint/schema.py +++ b/datajoint/schema.py @@ -76,7 +76,7 @@ def process_relation_class(relation_class, context): elif issubclass(part, Relation): raise DataJointError('Part relations must subclass from datajoint.Part') - # invoke _prepare() + # invoke Relation._prepare() on class and its part relations. cls()._prepare() for part in parts: part()._prepare() From c8e0474253cb509db77febc0c7af129ba42e7fb4 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Thu, 27 Aug 2015 12:58:16 -0500 Subject: [PATCH 5/5] minor --- datajoint/relation.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/datajoint/relation.py b/datajoint/relation.py index 097e6c15d..5442e4d44 100644 --- a/datajoint/relation.py +++ b/datajoint/relation.py @@ -274,8 +274,7 @@ def size_on_disk(self): """ ret = self.connection.query( 'SHOW TABLE STATUS FROM `{database}` WHERE NAME="{table}"'.format( - database=self.database, table=self.table_name), as_dict=True - ).fetchone() + database=self.database, table=self.table_name), as_dict=True).fetchone() return ret['Data_length'] + ret['Index_length'] # --------- functionality used by the decorator ---------