Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 15 additions & 25 deletions datajoint/erd.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,11 @@

import logging
from collections import defaultdict
import pyparsing as pp
import networkx as nx
from networkx import DiGraph
from functools import cmp_to_key
import operator

from collections import OrderedDict

# use pygraphviz if available
try:
from networkx import pygraphviz_layout
Expand All @@ -20,37 +17,21 @@

import matplotlib.pyplot as plt
from inspect import isabstract
from .base_relation import BaseRelation
from .user_relations import UserRelation, Part

logger = logging.getLogger(__name__)


def get_concrete_descendants(cls):
def get_concrete_subclasses(cls):
desc = []
child= cls.__subclasses__()
for c in child:
if not isabstract(c):
desc.append(c)
desc.extend(get_concrete_descendants(c))
desc.extend(get_concrete_subclasses(c))
return desc


def parse_base_relations(rels):
name_map = {}
for r in rels:
try:
name_map[r().full_table_name] = '{module}.{cls}'.format(module=r.__module__, cls=r.__name__)
except TypeError:
# skip if failed to instantiate BaseRelation derivative
pass
return name_map


def get_table_relation_name_map():
rels = get_concrete_descendants(BaseRelation)
return parse_base_relations(rels)


class ERD(DiGraph):
"""
A directed graph representing dependencies between Relations within and across
Expand All @@ -65,15 +46,24 @@ def node_labels(self):
"""
:return: dictionary of key : label pairs for plotting
"""
name_map = get_table_relation_name_map()
def full_class_name(user_class):
if issubclass(user_class, Part):
return '{module}.{master}.{cls}'.format(
module=user_class.__module__,
master=user_class.master.__name__,
cls=user_class.__name__)
else:
return '{module}.{cls}'.format(
module=user_class.__module__,
cls=user_class.__name__)

name_map = {rel.full_table_name: full_class_name(rel) for rel in get_concrete_subclasses(UserRelation)}
return {k: self.get_label(k, name_map) for k in self.nodes()}

def get_label(self, node, name_map=None):
label = self.node[node].get('label', '')
if label.strip():
return label

# it's not efficient to recreate name-map on every call!
if name_map is not None and node in name_map:
return name_map[node]
# no other name exists, so just use full table now
Expand Down
19 changes: 11 additions & 8 deletions datajoint/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,15 +87,18 @@ def process_relation_class(relation_class, context):

process_relation_class(cls, context=self.context)

# Process subordinate relations
parts = list()
is_part = lambda x: inspect.isclass(x) and issubclass(x, Part)
# Process part relations
def is_part(x):
return inspect.isclass(x) and issubclass(x, Part)

for var, part in inspect.getmembers(cls, is_part):
parts.append(part)
part._master = cls
# TODO: look into local namespace for the subclasses
process_relation_class(part, context=dict(self.context, **{cls.__name__: cls}))
parts = list()
for part in dir(cls):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why add more lines of code unncessarily? Since we are using inspect, we should just use inspect.getmembers rather than manually traversing dir and applying if selection.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please correct me if I am wrong, but I think inspect.getmembers actually evaluates every member. So if there is a computed property, e.g. _populated_from, it will actually trigger its computation. In contrast, the proposed implementation looks at the names first and only evaluates members if they start with a capital letter. So it avoids extra computations.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see your concern. As long as you are invoking inspect.getmembers on a class and that there is no member that implements __get__, no additional computation is performed. However, I see that you have reintroduced classproperty concept, and if such thing is present, indeed inspect.getmembers will evaluate the class property.

Also, since _populated_from is directly set equal to a relation object, there is no real trigger of computation caused by just accessing the member, if that's what you are concerned with.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought property implemented __get__. No?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The default AutoPopulate._populated_from is a property that loads dependencies, looks up parents, and returns the join of the parents.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The property and thus _populated_from doesn't run the function unless it's run on an instance. That's why I was saying that as long as you invoke inspect.getmembers on a class (which is what we'd do anyway), the content of the _populated_from will not be evaluated.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Then I don't feel too strongly about it. It seems that inspect is designed for inspecting objects during debugging with no regard for performance. inspect.getmembers seems to be doing a lot more work than necessary.

if part[0].isupper():
part = getattr(cls, part)
if is_part(part):
parts.append(part)
part._master = cls
process_relation_class(part, context=dict(self.context, **{cls.__name__: cls}))

# invoke Relation._prepare() on class and its part relations.
cls()._prepare()
Expand Down
78 changes: 51 additions & 27 deletions datajoint/user_relations.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,59 +2,83 @@
Hosts the table tiers, user relations should be derived from.
"""

import abc
from .base_relation import BaseRelation
from .autopopulate import AutoPopulate
from .utils import from_camel_case
from . import DataJointError


class Part(BaseRelation, metaclass=abc.ABCMeta):
class classproperty:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are we reintroducing classproperty? You have ditched this idea in the past since the concept of classproperty is not a native one to Python.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, but I think that last time the case for using them was not as compelling. Let's review that again. Frankly, I did not understand classproperties very well last time and it seemed like then we found an equally good solution without them.

In this case, introducing class properties substantially simplified the code in erd for looking up the master class.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Last time I brought up the case for stuff like table_name as I felt many of these values do not depend on the instance but still needs to be computed. We avoided using classproperty ultimately and ended up converting all of such cases into instance level property, requiring us to instantiate the class to access some of these properties. This probably means that we can rewrite the schema to take advantage of class property.

Anyway, I'm just giving you a bit of hard time, and I agree that use of classproperty is nice (after all I pushed for it last time). I just felt compelled to comment on this :P

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What I had not realized in my ignorance was that to downstream users, class properties can be used the same way as instance properties. At the time, I was under the impression that class properties always had to be called on the class and could not be called on the instance. I also did not realize that when subclassing, each subclass gets its own class property and not just one shared across all subclasses. These features, combined, make it acceptable to use classproperties. Sorry I did not know this. I thought classproperty would introduce all sorts of complications but after checking everything, it seems safe.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, now I realize that my arguments agains classmethods and classproperties were invalid. Thankfully, changing things to classmethods now does not break any user code.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why doesn't Python3 have a standard classproperty?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah I have no idea why there is no classproperty... Anyway, I'm glad to reintroduce classproperty for our use. Given that it's not standard, I still think it'd make sense to keep its use only within DataJoint internal, and not directly at user interfaces.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

perhaps it is because there might be a confusion with properties of the metaclass. I would like to find a discussion explaining this.


def __init__(self, f):
self.f = f

def __get__(self, obj, owner):
return self.f(owner)


class UserRelation(BaseRelation):
"""
A subclass of UserRelation defines is a dedicated class interfacing a base relation.
UserRelation is initialized by the decorator generated by schema().
"""
_connection = None
_context = None
_heading = None

@classproperty
def connection(cls):
return cls._connection

@classproperty
def full_table_name(cls):
return r"`{0:s}`.`{1:s}`".format(cls.database, cls.table_name)


class Part(UserRelation):
"""
Inherit from this class if the table's values are details of an entry in another relation
and if this table is populated by this relation. For example, the entries inheriting from
dj.Part could be single entries of a matrix, while the parent table refers to the entire matrix.
Part relations are implemented as classes inside classes.
"""
_master = None

@property
def master(self):
if not hasattr(self, '_master'):
raise DataJointError(
'Part relations must be declared inside a base relation class')
return self._master
@classproperty
def master(cls):
return cls._master

@property
def table_name(self):
return self.master().table_name + '__' + from_camel_case(self.__class__.__name__)
@classproperty
def table_name(cls):
return cls.master.table_name + '__' + from_camel_case(cls.__name__)


class Manual(BaseRelation, metaclass=abc.ABCMeta):
class Manual(UserRelation):
"""
Inherit from this class if the table's values are entered manually.
"""

@property
def table_name(self):
@classproperty
def table_name(cls):
"""
:returns: the table name of the table formatted for mysql.
"""
return from_camel_case(self.__class__.__name__)
return from_camel_case(cls.__name__)


class Lookup(BaseRelation, metaclass=abc.ABCMeta):
class Lookup(UserRelation):
"""
Inherit from this class if the table's values are for lookup. This is
currently equivalent to defining the table as Manual and serves semantic
purposes only.
"""

@property
def table_name(self):
@classproperty
def table_name(cls):
"""
:returns: the table name of the table formatted for mysql.
"""
return '#' + from_camel_case(self.__class__.__name__)
return '#' + from_camel_case(cls.__name__)

def _prepare(self):
"""
Expand All @@ -64,29 +88,29 @@ def _prepare(self):
self.insert(self.contents, skip_duplicates=True)


class Imported(BaseRelation, AutoPopulate, metaclass=abc.ABCMeta):
class Imported(UserRelation, AutoPopulate):
"""
Inherit from this class if the table's values are imported from external data sources.
The inherited class must at least provide the function `_make_tuples`.
"""

@property
def table_name(self):
@classproperty
def table_name(cls):
"""
:returns: the table name of the table formatted for mysql.
"""
return "_" + from_camel_case(self.__class__.__name__)
return "_" + from_camel_case(cls.__name__)


class Computed(BaseRelation, AutoPopulate, metaclass=abc.ABCMeta):
class Computed(UserRelation, AutoPopulate):
"""
Inherit from this class if the table's values are computed from other relations in the schema.
The inherited class must at least provide the function `_make_tuples`.
"""

@property
def table_name(self):
@classproperty
def table_name(cls):
"""
:returns: the table name of the table formatted for mysql.
"""
return "__" + from_camel_case(self.__class__.__name__)
return "__" + from_camel_case(cls.__name__)