From 29e56b4d5b6f520c19eab77a6698b7b55780cc33 Mon Sep 17 00:00:00 2001 From: asherpasha Date: Tue, 26 May 2020 18:31:51 -0400 Subject: [PATCH 01/39] Trying to upgrade on Python 3.8 --- .gitignore | 1 + docs/source/conf.py | 1 - intermine.egg-info/PKG-INFO | 30 +++++++++++++++++++++++++ intermine.egg-info/SOURCES.txt | 26 +++++++++++++++++++++ intermine.egg-info/dependency_links.txt | 1 + intermine.egg-info/top_level.txt | 1 + intermine/__init__.py | 2 +- intermine/model.py | 3 ++- requirements.txt | 14 +++++------- samples/alleles.py | 14 ++++++------ tests/acceptance.py | 4 ++-- 11 files changed, 77 insertions(+), 20 deletions(-) create mode 100644 intermine.egg-info/PKG-INFO create mode 100644 intermine.egg-info/SOURCES.txt create mode 100644 intermine.egg-info/dependency_links.txt create mode 100644 intermine.egg-info/top_level.txt diff --git a/.gitignore b/.gitignore index 6b926d49..44023b0c 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ current-docs.zip intermine-docs-1.00.03.zip build MANIFEST +.idea diff --git a/docs/source/conf.py b/docs/source/conf.py index 4742dd8e..0ef3b3d6 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -196,4 +196,3 @@ # If true, `todo` and `todoList` produce output, else they produce nothing. todo_include_todos = True -\n diff --git a/intermine.egg-info/PKG-INFO b/intermine.egg-info/PKG-INFO new file mode 100644 index 00000000..abfc9350 --- /dev/null +++ b/intermine.egg-info/PKG-INFO @@ -0,0 +1,30 @@ +Metadata-Version: 1.1 +Name: intermine +Version: 1.12.0 +Summary: InterMine WebService client +Home-page: http://www.intermine.org +Author: InterMine team +Author-email: all@intermine.org +License: LGPL, BSD +Description: InterMine Webservice Client + ---------------------------- + + A Python API to access bioinformatics + data warehouses powered by the InterMine platform. + + +Keywords: webservice,genomic,bioinformatics +Platform: UNKNOWN +Classifier: Programming Language :: Python :: 2.7 +Classifier: Programming Language :: Python :: 3 +Classifier: Development Status :: 5 - Production/Stable +Classifier: Intended Audience :: Science/Research +Classifier: Intended Audience :: Developers +Classifier: License :: OSI Approved :: GNU Library or Lesser General Public License (LGPL) +Classifier: License :: OSI Approved :: BSD License +Classifier: Topic :: Software Development :: Libraries :: Python Modules +Classifier: Topic :: Internet :: WWW/HTTP +Classifier: Topic :: Scientific/Engineering :: Bio-Informatics +Classifier: Topic :: Scientific/Engineering :: Information Analysis +Classifier: Operating System :: OS Independent +Provides: intermine diff --git a/intermine.egg-info/SOURCES.txt b/intermine.egg-info/SOURCES.txt new file mode 100644 index 00000000..5bd729c2 --- /dev/null +++ b/intermine.egg-info/SOURCES.txt @@ -0,0 +1,26 @@ +LICENSE-BSD +LICENSE-LGPL +MANIFEST.in +README.md +setup.py +intermine/__init__.py +intermine/bar_chart.py +intermine/constraints.py +intermine/decorators.py +intermine/errors.py +intermine/idresolution.py +intermine/model.py +intermine/pathfeatures.py +intermine/query.py +intermine/query_manager.py +intermine/registry.py +intermine/results.py +intermine/util.py +intermine/webservice.py +intermine.egg-info/PKG-INFO +intermine.egg-info/SOURCES.txt +intermine.egg-info/dependency_links.txt +intermine.egg-info/top_level.txt +intermine/lists/__init__.py +intermine/lists/list.py +intermine/lists/listmanager.py \ No newline at end of file diff --git a/intermine.egg-info/dependency_links.txt b/intermine.egg-info/dependency_links.txt new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/intermine.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/intermine.egg-info/top_level.txt b/intermine.egg-info/top_level.txt new file mode 100644 index 00000000..b4eea31e --- /dev/null +++ b/intermine.egg-info/top_level.txt @@ -0,0 +1 @@ +intermine diff --git a/intermine/__init__.py b/intermine/__init__.py index d891f299..5878cbea 100644 --- a/intermine/__init__.py +++ b/intermine/__init__.py @@ -1 +1 @@ -VERSION = "1.11.0" +VERSION = "1.12.0" diff --git a/intermine/model.py b/intermine/model.py index e9fedca0..d849cbb4 100644 --- a/intermine/model.py +++ b/intermine/model.py @@ -397,7 +397,8 @@ def field_dict(self): @property def parent_classes(self): """The flattened list of parent classes, with the parts""" - all_parents = [pc for pc in p.parent_classes for p in self.parts] + for p in self.parts: + all_parents = [pc for pc in p.parent_classes] return all_parents + self.parts diff --git a/requirements.txt b/requirements.txt index 17060f62..82b4332d 100755 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,6 @@ - -requests>=2.20.0 -Lxml==4.2.1 -Unittest2==1.1.0 -Numpy==1.14.5 -Pandas==0.22.0 -matplotlib==2.2.2 -autopep8>=1.4.4 +requests==2.23.0 +numpy==1.18.4 +matplotlib==3.2.1 +simplejson==3.17.0 +pandas==1.0.3 +lxml==4.5.1 diff --git a/samples/alleles.py b/samples/alleles.py index 2288cb14..b0b1c7fd 100644 --- a/samples/alleles.py +++ b/samples/alleles.py @@ -44,24 +44,24 @@ def fit_to_cell(a): return a.ljust(col_width) if len( order_by("symbol") for row in q.rows(): - print row + print(row) gene_symbols = ["zen", "eve", "bib", "h"] filter_genes = (s.model.Gene.symbol == (gene_symbols).add_columns(s.model.Gene.alleles)) for gene in s.query(s.model.Gene).filter(filter_genes): - print summary % (gene.symbol, len(gene.alleles)) - print hrule + print(summary % (gene.symbol, len(gene.alleles))) + print(hrule) iterhelper = itertools.groupby(sorted(map(lambda a: a.symbol, gene.alleles)), lines_of(cols)) for k, line_of_alleles in (iterhelper): - print sep.join(map(fit_to_cell, line_of_alleles)) + print(sep.join(map(fit_to_cell, line_of_alleles))) - print "\nAllele Classes:" + print("\nAllele Classes:") allele_classes = [(key, len(list(group))) for key, group in itertools.groupby( sorted(map(lambda x: x.alleleClass, gene.alleles)))] for pair in reversed(sorted(allele_classes, key=lambda g: g[1])): - print "%s (%d)" % pair + print("%s (%d)" % pair) - print hrule + print(hrule) diff --git a/tests/acceptance.py b/tests/acceptance.py index d85123de..baf3094b 100644 --- a/tests/acceptance.py +++ b/tests/acceptance.py @@ -13,5 +13,5 @@ q.add_constraint('Gene.symbol', 'ONE OF', ['eve', 'zen']) q.add_join('Gene.alleles') q.add_path_description('Gene', 'One of those gene-y things') -print q.to_xml() -print q.to_formatted_xml() +print(q.to_xml()) +print(q.to_formatted_xml()) From fd3f1f41dea36a778f9764021b0df635bb4cb6c6 Mon Sep 17 00:00:00 2001 From: asherpasha Date: Wed, 27 May 2020 14:30:55 -0400 Subject: [PATCH 02/39] Updated requirements.txt --- requirements.txt | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/requirements.txt b/requirements.txt index 82b4332d..d837d583 100755 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,17 @@ -requests==2.23.0 -numpy==1.18.4 +certifi==2020.4.5.1 +chardet==3.0.4 +cycler==0.10.0 +Cython==0.29.19 +idna==2.9 +kiwisolver==1.2.0 +lxml==4.5.1 matplotlib==3.2.1 -simplejson==3.17.0 +numpy==1.18.4 pandas==1.0.3 -lxml==4.5.1 +pyparsing==2.4.7 +python-dateutil==2.8.1 +pytz==2020.1 +requests==2.23.0 +simplejson==3.17.0 +six==1.15.0 +urllib3==1.25.9 From decfb69e41ec97dc838349b6e1059f70dc7b2bd3 Mon Sep 17 00:00:00 2001 From: asherpasha Date: Wed, 27 May 2020 16:04:41 -0400 Subject: [PATCH 03/39] Ran dos2unix. Flymine http to https. --- README.md | 2 +- docs/make.bat | 72 +- intermine/constraints.py | 2494 +++++++++++----------- intermine/decorators.py | 38 +- intermine/errors.py | 30 +- intermine/idresolution.py | 192 +- intermine/model.py | 2240 +++++++++---------- intermine/pathfeatures.py | 298 +-- intermine/query.py | 4134 ++++++++++++++++++------------------ intermine/registry.py | 302 +-- intermine/results.py | 114 +- intermine/util.py | 66 +- intermine/webservice.py | 4 +- samples/alleles.py | 134 +- setup.py | 412 ++-- tests/acceptance.py | 2 +- tests/live_lists.py | 1322 ++++++------ tests/live_registry.py | 42 +- tests/live_results.py | 436 ++-- tests/live_summary_test.py | 94 +- tests/live_widgets.py | 40 +- tests/server.py | 160 +- tests/test_core.py | 2766 ++++++++++++------------ tests/test_lists.py | 110 +- tests/test_templates.py | 428 ++-- 25 files changed, 7972 insertions(+), 7960 deletions(-) diff --git a/README.md b/README.md index e784a87c..970c15d2 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@ to perform queries. Some examples of sites that are powered by InterMine software, and thus offer a compatible webservice API, are: - * [FlyMine](http://www.flymine.org) + * [FlyMine](https://www.flymine.org) * [MouseMine](http://www.mousemine.org) * [YeastMine](http://yeastmine.yeastgenome.org) * [ZebrafishMine](http://zebrafishmine.org) diff --git a/docs/make.bat b/docs/make.bat index 243f5397..a3ac2700 100644 --- a/docs/make.bat +++ b/docs/make.bat @@ -1,36 +1,36 @@ -@ECHO OFF - -pushd %~dp0 - -REM Command file for Sphinx documentation - -if "%SPHINXBUILD%" == "" ( - set SPHINXBUILD=sphinx-build -) -set SOURCEDIR=source -set BUILDDIR=build -set SPHINXPROJ=Intermine - -if "%1" == "" goto help - -%SPHINXBUILD% >NUL 2>NUL -if errorlevel 9009 ( - echo. - echo.The 'sphinx-build' command was not found. Make sure you have Sphinx - echo.installed, then set the SPHINXBUILD environment variable to point - echo.to the full path of the 'sphinx-build' executable. Alternatively you - echo.may add the Sphinx directory to PATH. - echo. - echo.If you don't have Sphinx installed, grab it from - echo.http://sphinx-doc.org/ - exit /b 1 -) - -%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% -goto end - -:help -%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% - -:end -popd +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=source +set BUILDDIR=build +set SPHINXPROJ=Intermine + +if "%1" == "" goto help + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% + +:end +popd diff --git a/intermine/constraints.py b/intermine/constraints.py index c99b85d1..57056680 100644 --- a/intermine/constraints.py +++ b/intermine/constraints.py @@ -1,1247 +1,1247 @@ -import re -import string -from intermine.pathfeatures import PathFeature, PATH_PATTERN -from intermine.util import ReadableException - - -class Constraint(PathFeature): - """ - A class representing constraints on a query - =========================================== - - All constraints inherit from this class, which - simply defines the type of element for the - purposes of serialisation. - """ - child_type = "constraint" - - -class LogicNode(object): - """ - A class representing nodes in a logic graph - =========================================== - - Objects which can be represented as nodes - in the AST of a constraint logic graph should - inherit from this class, which defines - methods for overloading built-in operations. - """ - - def __add__(self, other): - """ - Overloads + - =========== - - Logic may be defined by using addition to sum - logic nodes:: - - > query.set_logic(con_a + con_b + con_c) - > str(query.logic) - ... A and B and C - - """ - if not isinstance(other, LogicNode): - return NotImplemented - else: - return LogicGroup(self, 'AND', other) - - def __and__(self, other): - """ - Overloads & - =========== - - Logic may be defined by using the & operator:: - - > query.set_logic(con_a & con_b) - > sr(query.logic) - ... A and B - - """ - if not isinstance(other, LogicNode): - return NotImplemented - else: - return LogicGroup(self, 'AND', other) - - def __or__(self, other): - """ - Overloads | - =========== - - Logic may be defined by using the | operator:: - - > query.set_logic(con_a | con_b) - > str(query.logic) - ... A or B - - """ - if not isinstance(other, LogicNode): - return NotImplemented - else: - return LogicGroup(self, 'OR', other) - - -class LogicGroup(LogicNode): - """ - A logic node that represents two sub-nodes joined in some way - ============================================================= - - A logic group is a logic node with two child nodes, which are - either connected by AND or by OR logic. - """ - - LEGAL_OPS = frozenset(['AND', 'OR']) - - def __init__(self, left, op, right, parent=None): - """ - Constructor - =========== - - Makes a new node composes of two nodes (left and right), - and some operator. - - Groups may have a reference to their parent. - """ - if op not in self.LEGAL_OPS: - raise TypeError(op + " is not a legal logical operation") - self.parent = parent - self.left = left - self.right = right - self.op = op - for node in [self.left, self.right]: - if isinstance(node, LogicGroup): - node.parent = self - - def __repr__(self): - """ - Provide a sensible representation of a node - """ - return '<' + self.__class__.__name__ + ': ' + str(self) + '>' - - def __str__(self): - """ - Provide a human readable version of the group. The - string version should be able to be parsed back into the - original logic group. - """ - core = ' '.join(map(str, [self.left, self.op.lower(), self.right])) - if self.parent and self.op != self.parent.op: - return '(' + core + ')' - else: - return core - - def get_codes(self): - """ - Get a list of all constraint codes used in this group. - """ - codes = [] - for node in [self.left, self.right]: - if isinstance(node, LogicGroup): - codes.extend(node.get_codes()) - else: - codes.append(node.code) - return codes - - -class LogicParseError(ReadableException): - """ - An error representing problems in parsing constraint logic. - """ - pass - - -class EmptyLogicError(ValueError): - """ - An error representing the fact that an the logic - string to be parsed was empty - """ - pass - - -class LogicParser(object): - """ - Parses logic strings into logic groups - ====================================== - - Instances of this class are used to parse logic strings into - abstract syntax trees, and then logic groups. This aims to provide - robust parsing of logic strings, with the ability to identify syntax - errors in such strings. - """ - - def __init__(self, query): - """ - Constructor - =========== - - Parsers need access to the query they are parsing for, in - order to reference the constraints on the query. - - @param query: The parent query object - @type query: intermine.query.Query - """ - self._query = query - - def get_constraint(self, code): - """ - Get the constraint with the given code - ====================================== - - This method fetches the constraint from the - parent query with the matching code. - - @see: intermine.query.Query.get_constraint - @rtype: intermine.constraints.CodedConstraint - """ - return self._query.get_constraint(code) - - def get_priority(self, op): - """ - Get the priority for a given operator - ===================================== - - Operators have a specific precedence, from highest - to lowest: - - () - - AND - - OR - - This method returns an integer which can be - used to compare operator priorities. - - @rtype: int - """ - return { - "AND": 2, - "OR": 1, - "(": 3, - ")": 3 - }.get(op) - - ops = { - "AND": "AND", - "&": "AND", - "&&": "AND", - "OR": "OR", - "|": "OR", - "||": "OR", - "(": "(", - ")": ")" - } - - def parse(self, logic_str): - """ - Parse a logic string into an abstract syntax tree - ================================================= - - Takes a string such as "A and B or C and D", and parses it - into a structure which represents this logic as a binary - abstract syntax tree. The above string would parse to - "(A and B) or (C and D)", as AND binds more tightly than OR. - - Note that only singly rooted trees are parsed. - - @param logic_str: The logic defininition as a string - @type logic_str: string - - @rtype: LogicGroup - - @raise LogicParseError: if there is a syntax error in the logic - """ - def flatten(l): - """Flatten out a list which contains both values and sublists""" - ret = [] - for item in l: - if isinstance(item, list): - ret.extend(item) - else: - ret.append(item) - return ret - - def canonical(x, d): - if x in d: - return d[x] - else: - return re.split("\b", x) - - def dedouble(x): - if re.search("[()]", x): - return list(x) - else: - return x - - logic_str = logic_str.upper() - tokens = [t for t in re.split("\\s+", logic_str) if t] - if not tokens: - raise EmptyLogicError() - tokens = flatten([canonical(x, self.ops) for x in tokens]) - tokens = flatten([dedouble(x) for x in tokens]) - self.check_syntax(tokens) - postfix_tokens = self.infix_to_postfix(tokens) - abstract_syntax_tree = self.postfix_to_tree(postfix_tokens) - return abstract_syntax_tree - - def check_syntax(self, infix_tokens): - """ - Check the syntax for errors before parsing - ========================================== - - Syntax is checked before parsing to provide better errors, - which should hopefully lead to more informative error messages. - - This checks for: - - correct operator positions (cannot put two codes next to each - other without intervening operators) - - correct grouping (all brackets are matched, - and contain valid expressions) - - @param infix_tokens: The input parsed into a list of tokens. - @type infix_tokens: iterable - - @raise LogicParseError: if there is a problem. - """ - need_an_op = False - need_binary_op_or_closing_bracket = False - processed = [] - open_brackets = 0 - for token in infix_tokens: - if token not in self.ops: - if need_an_op: - raise LogicParseError("Expected an operator after: '" - + ' '.join(processed) + "'" - + " - but got: '" + token + "'") - if need_binary_op_or_closing_bracket: - raise LogicParseError("Logic grouping error after: '" - + ' '.join(processed) + "'" - + " - expected an operator " - "or a closing bracket") - - need_an_op = True - else: - need_an_op = False - if token == "(": - if processed and processed[-1] not in self.ops: - raise LogicParseError("Logic grouping error after: '" - + ' '.join(processed) + "'" - + " - got an unexpeced " - "opening bracket") - if need_binary_op_or_closing_bracket: - raise LogicParseError("Logic grouping error after: '" - + ' '.join(processed) + "'" - + " - expected an operator or " - "a closing bracket") - - open_brackets += 1 - elif token == ")": - need_binary_op_or_closing_bracket = True - open_brackets -= 1 - else: - need_binary_op_or_closing_bracket = False - processed.append(token) - if open_brackets != 0: - if open_brackets < 0: - message = "Unmatched closing bracket in: " - else: - message = "Unmatched opening bracket in: " - raise LogicParseError(message + '"' + ' '.join(infix_tokens) - + '"') - - def infix_to_postfix(self, infix_tokens): - """ - Convert a list of infix tokens to postfix notation - ================================================== - - Take in a set of infix tokens and return the set parsed - to a postfix sequence. - - @param infix_tokens: The list of tokens - @type infix_tokens: iterable - - @rtype: list - """ - stack = [] - postfix_tokens = [] - for token in infix_tokens: - if token not in self.ops: - postfix_tokens.append(token) - else: - op = token - if op == "(": - stack.append(token) - elif op == ")": - while stack: - last_op = stack.pop() - if last_op == "(": - if stack: - previous_op = stack.pop() - if previous_op != "(": - postfix_tokens.append(previous_op) - break - else: - postfix_tokens.append(last_op) - else: - while stack and (self.get_priority(stack[-1]) - <= self.get_priority(op)): - prev_op = stack.pop() - if prev_op != "(": - postfix_tokens.append(prev_op) - stack.append(op) - while stack: - postfix_tokens.append(stack.pop()) - return postfix_tokens - - def postfix_to_tree(self, postfix_tokens): - """ - Convert a set of structured tokens to a single LogicGroup - ========================================================= - - Convert a set of tokens in postfix notation to a single - LogicGroup object. - - @param postfix_tokens: A list of tokens in postfix notation. - @type postfix_tokens: list - - @rtype: LogicGroup - - @raise AssertionError: is the tree doesn't have a unique root. - """ - stack = [] - try: - for token in postfix_tokens: - if token not in self.ops: - stack.append(self.get_constraint(token)) - else: - op = token - right = stack.pop() - left = stack.pop() - stack.append(LogicGroup(left, op, right)) - assert len(stack) == 1, "Tree doesn't have a unique root" - return stack.pop() - except IndexError: - raise EmptyLogicError() - - -class CodedConstraint(Constraint, LogicNode): - """ - A parent class for all constraints that have codes - ================================================== - - Constraints that have codes are the principal logical - filters on queries, and need to be refered to individually - (hence the codes). They will all have a logical operation they - embody, and so have a reference to an operator. - - This class is not meant to be instantiated directly, but instead - inherited from to supply default behaviour. - """ - - OPS = set([]) - - def __init__(self, path, op, code="A"): - """ - Constructor - =========== - - @param path: The path to constrain - @type path: string - - @param op: The operation to apply - must be in the OPS set - @type op: string - """ - if op not in self.OPS: - raise TypeError(op + " not in " + str(self.OPS)) - self.op = op - self.code = code - super(CodedConstraint, self).__init__(path) - - def get_codes(self): - return [self.code] - - def __str__(self): - """ - Stringify to the code they are refered to by. - """ - return self.code - - def to_string(self): - """ - Provide a human readable representation of the logic. - This method is called by repr. - """ - s = super(CodedConstraint, self).to_string() - return " ".join([s, self.op]) - - def to_dict(self): - """ - Return a dict object which can be used to construct a - DOM element with the appropriate attributes. - """ - d = super(CodedConstraint, self).to_dict() - d.update(op=self.op, code=self.code) - return d - - -class UnaryConstraint(CodedConstraint): - """ - Constraints which have just a path and an operator - ================================================== - - These constraints are simple assertions about the - object/value refered to by the path. The set of valid - operators is: - - IS NULL - - IS NOT NULL - - """ - OPS = set(['IS NULL', 'IS NOT NULL']) - - -class BinaryConstraint(CodedConstraint): - """ - Constraints which have an operator and a value - ============================================== - - These constraints assert a relationship between the - value represented by the path (it must be a representation - of a value, ie an Attribute) and another value - ie. the - operator takes two parameters. - - In all case the 'left' side of the relationship is the path, - and the 'right' side is the supplied value. - - Valid operators are: - - = (equal to) - - != (not equal to) - - < (less than) - - > (greater than) - - <= (less than or equal to) - - >= (greater than or equal to) - - LIKE (same as equal to, but with implied wildcards) - - CONTAINS (same as equal to, but with implied wildcards) - - NOT LIKE (same as not equal to, but with implied wildcards) - - """ - OPS = set(['=', '!=', '<', '>', '<=', '>=', - 'LIKE', 'NOT LIKE', 'CONTAINS']) - - def __init__(self, path, op, value, code="A"): - """ - Constructor - =========== - - @param path: The path to constrain - @type path: string - - @param op: The relationship between the value represented by the path - and the value provided (must be a valid operator) - @type op: string - - @param value: The value to compare the stored value to - @type value: string or number - - @param code: The code for this constraint (default = "A") - @type code: string - """ - self.value = value - super(BinaryConstraint, self).__init__(path, op, code) - - def to_string(self): - """ - Provide a human readable representation of the logic. - This method is called by repr. - """ - s = super(BinaryConstraint, self).to_string() - return " ".join([s, str(self.value)]) - - def to_dict(self): - """ - Return a dict object which can be used to construct a - DOM element with the appropriate attributes. - """ - d = super(BinaryConstraint, self).to_dict() - d.update(value=str(self.value)) - return d - - -class ListConstraint(CodedConstraint): - """ - Constraints which refer to an objects membership of lists - ========================================================= - - These constraints assert a membership relationship between the - object represented by the path (it must always be an object, ie. - a Reference or a Class) and a List. Lists are collections of - objects in the database which are stored in InterMine - datawarehouses. These lists must be set up before the query is run, either - manually in the webapp or by using the webservice API list - upload feature. - - Valid operators are: - - IN - - NOT IN - - """ - OPS = set(['IN', 'NOT IN']) - - def __init__(self, path, op, list_name, code="A"): - if hasattr(list_name, 'to_query'): - q = list_name.to_query() - list_name1 = q.service.create_list(q) - self.list_name = list_name1.name - elif hasattr(list_name, "name"): - self.list_name = list_name.name - else: - self.list_name = list_name - super(ListConstraint, self).__init__(path, op, code) - - def to_string(self): - """ - Provide a human readable representation of the logic. - This method is called by repr. - """ - s = super(ListConstraint, self).to_string() - return " ".join([s, str(self.list_name)]) - - def to_dict(self): - """ - Return a dict object which can be used to construct a - DOM element with the appropriate attributes. - """ - d = super(ListConstraint, self).to_dict() - d.update(value=str(self.list_name)) - return d - - -class LoopConstraint(CodedConstraint): - """ - Constraints with refer to object identity - ========================================= - - These constraints assert that two paths refer to the same - object. - - Valid operators: - - IS - - IS NOT - - The operators IS and IS NOT map to the ops "=" and "!=" when they - are used in XML serialisation. - - """ - OPS = set(['IS', 'IS NOT']) - SERIALISED_OPS = {'IS': '=', 'IS NOT': '!='} - - def __init__(self, path, op, loopPath, code="A"): - """ - Constructor - =========== - - @param path: The path to constrain - @type path: string - - @param op: The relationship between the path and the path provided - (must be a valid operator) - @type op: string - - @param loopPath: The path to check for identity against - @type loopPath: string - - @param code: The code for this constraint (default = "A") - @type code: string - """ - self.loopPath = loopPath - super(LoopConstraint, self).__init__(path, op, code) - - def to_string(self): - """ - Provide a human readable representation of the logic. - This method is called by repr. - """ - s = super(LoopConstraint, self).to_string() - return " ".join([s, self.loopPath]) - - def to_dict(self): - """ - Return a dict object which can be used to construct a - DOM element with the appropriate attributes. - """ - d = super(LoopConstraint, self).to_dict() - d.update(loopPath=self.loopPath, op=self.SERIALISED_OPS[self.op]) - return d - - -class TernaryConstraint(BinaryConstraint): - """ - Constraints for broad, general searching over all fields - ======================================================== - - These constraints request a wide-ranging search for matching - fields over all aspects of an object, including up to coercion - from related classes. - - Valid operators: - - LOOKUP - - To aid disambiguation, Ternary constaints accept an extra_value as - well as the main value. - """ - OPS = set(['LOOKUP']) - - def __init__(self, path, op, value, extra_value=None, code="A"): - """ - Constructor - =========== - - @param path: The path to constrain. Here is must be a class, - or a reference to a class. - @type path: string - - @param op: The relationship between the path and the path provided - (must be a valid operator) - @type op: string - - @param value: The value to check other fields against. - @type value: string - - @param extra_value: A further value for disambiguation. The meaning - of this value varies by class and configuration. - For example, if the class of the object is Gene, - then extra_value will refer to the Organism. - @type extra_value: string - - @param code: The code for this constraint (default = "A") - @type code: string - """ - self.extra_value = extra_value - super(TernaryConstraint, self).__init__(path, op, value, code) - - def to_string(self): - """ - Provide a human readable representation of the logic. - This method is called by repr. - """ - s = super(TernaryConstraint, self).to_string() - if self.extra_value is None: - return s - else: - return " ".join([s, 'IN', self.extra_value]) - - def to_dict(self): - """ - Return a dict object which can be used to construct a - DOM element with the appropriate attributes. - """ - d = super(TernaryConstraint, self).to_dict() - if self.extra_value is not None: - d.update(extraValue=self.extra_value) - return d - - -class MultiConstraint(CodedConstraint): - """ - Constraints for checking membership of a set of values - ====================================================== - - These constraints require the value they constrain to be - either a member of a set of values, or not a member. - - Valid operators: - - ONE OF - - NONE OF - - These constraints are similar in use to List constraints, with - the following differences: - - The list in this case is a defined set of values that is passed - along with the query itself, rather than anything stored - independently on a server. - - The object of the constaint is the value of an attribute, rather - than an object's identity. - """ - OPS = set(['ONE OF', 'NONE OF']) - - def __init__(self, path, op, values, code="A"): - """ - Constructor - =========== - - @param path: The path to constrain. Here it must be an attribute of - some object. - @type path: string - - @param op: The relationship between the path and the path provided - (must be a valid operator) - @type op: string - - @param values: The set of values which the object of the constraint - either must or must not belong to. - @type values: set or list - - @param code: The code for this constraint (default = "A") - @type code: string - """ - if not isinstance(values, (set, list)): - raise TypeError( - "values must be a set or a list, not " + str(type(values))) - self.values = values - super(MultiConstraint, self).__init__(path, op, code) - - def to_string(self): - """ - Provide a human readable representation of the logic. - This method is called by repr. - """ - s = super(MultiConstraint, self).to_string() - return ' '.join([s, str(self.values)]) - - def to_dict(self): - """ - Return a dict object which can be used to construct a - DOM element with the appropriate attributes. - """ - d = super(MultiConstraint, self).to_dict() - d.update(value=self.values) - return d - - -class RangeConstraint(MultiConstraint): - """ - Constraints for testing where a value lies relative to a set of ranges - ====================================================================== - - These constraints require that the value of the path they constrain - should lie in relationship to the set of values passed according to - the specific operator. - - Valid operators: - - OVERLAPS : The value overlaps at least one of the given ranges - - WITHIN : The value is wholly outside the given set of ranges - - CONTAINS : The value contains all the given ranges - - DOES NOT CONTAIN : The value does not contain all the given ranges - - OUTSIDE : Some part is outside the given set of ranges - - DOES NOT OVERLAP : The value does not overlap with any of the ranges - - For example: - - 4 WITHIN [1..5, 20..25] => True - - The format of the ranges depends on the value being constrained and what - range parsers have been configured on the target server. A common range - parser for biological mines is the one for Locations: - - Gene.chromosomeLocation OVERLAPS [2X:54321..67890, 3R:12345..456789] - - """ - OPS = set(['OVERLAPS', 'DOES NOT OVERLAP', 'WITHIN', - 'OUTSIDE', 'CONTAINS', 'DOES NOT CONTAIN']) - - -class IsaConstraint(MultiConstraint): - """ - Constraints for testing the class of a value, as a disjoint union - ====================================================================== - - These constraints require that the value of the path they constrain - should be an instance of one of the classes provided. - - Valid operators: - - ISA : The value is an instance of one of the provided classes. - - For example: - - SequenceFeature ISA [Exon, Intron] - - """ - OPS = set(['ISA']) - - -class SubClassConstraint(Constraint): - """ - Constraints on the class of a reference - ======================================= - - If an object has a reference X to another object of type A, - and type B extends type A, then any object of type B may be - the value of the reference X. If you only want to see X's - which are B's, this may be achieved with subclass constraints, - which allow the type of an object to be limited to one of the - subclasses (at any depth) of the class type required - by the attribute. - - These constraints do not use operators. Since they cannot be - conditional (eg. "A is a B or A is a C" would not be possible - in an InterMine query), they do not have codes - and cannot be referenced in logic expressions. - """ - - def __init__(self, path, subclass): - """ - Constructor - =========== - - @param path: The path to constrain. This must refer to a class or a - reference to a class. - @type path: str - - @param subclass: The class to subclass the path to. This must be a - simple class name (not a dotted name) - @type subclass: str - """ - if not PATH_PATTERN.match(subclass): - raise TypeError - self.subclass = subclass - super(SubClassConstraint, self).__init__(path) - - def to_string(self): - """ - Provide a human readable representation of the logic. - This method is called by repr. - """ - s = super(SubClassConstraint, self).to_string() - return s + ' ISA ' + self.subclass - - def to_dict(self): - """ - Return a dict object which can be used to construct a - DOM element with the appropriate attributes. - """ - d = super(SubClassConstraint, self).to_dict() - d.update(type=self.subclass) - return d - - -class TemplateConstraint(object): - """ - A mixin to supply the behaviour and state of constraints on templates - ===================================================================== - - Constraints on templates can also be designated as "on", "off" or - "locked", which refers to whether they are active or not. Inactive - constraints are still configured, but behave as if absent for the purpose - of results. In addition, template constraints can be editable or not. - Only values for editable constraints can be provided when requesting - results, and only constraints that can participate in logic expressions - can be editable. - """ - REQUIRED = "locked" - OPTIONAL_ON = "on" - OPTIONAL_OFF = "off" - - def __init__(self, editable=True, optional="locked"): - """ - Constructor - =========== - - @param editable: Whether or not this constraint should accept new - values. - @type editable: bool - - @param optional: Whether a value for this constraint must be provided - when running. - @type optional: "locked", "on" or "off" - """ - self.editable = editable - if optional == TemplateConstraint.REQUIRED: - self.optional = False - self.switched_on = True - else: - self.optional = True - if optional == TemplateConstraint.OPTIONAL_ON: - self.switched_on = True - elif optional == TemplateConstraint.OPTIONAL_OFF: - self.switched_on = False - else: - raise TypeError("Bad value for optional") - - @property - def required(self): - """ - True if a value must be provided for this constraint. - - @rtype: bool - """ - return not self.optional - - @property - def switched_off(self): - """ - True if this constraint is currently inactive. - - @rtype: bool - """ - return not self.switched_on - - def get_switchable_status(self): - """ - Returns either "locked", "on" or "off". - """ - if not self.optional: - return "locked" - else: - if self.switched_on: - return "on" - else: - return "off" - - def switch_on(self): - """ - Make sure this constraint is active - =================================== - - @raise ValueError: if the constraint is not editable and optional - """ - if self.editable and self.optional: - self.switched_on = True - else: - raise ValueError("This constraint is not switchable") - - def switch_off(self): - """ - Make sure this constraint is inactive - ===================================== - - @raise ValueError: if the constraint is not editable and optional - """ - if self.editable and self.optional: - self.switched_on = False - else: - raise ValueError("This constraint is not switchable") - - def to_string(self): - """ - Provide a template specific human readable representation of the - constraint. This method is called by repr. - """ - if self.editable: - editable = "editable" - else: - editable = "non-editable" - return '(' + editable + ", " + self.get_switchable_status() + ')' - - def separate_arg_sets(self, args): - """ - A static function to use when building template constraints. - ============================================================ - - dict -> (dict, dict) - - Splits a dictionary of arguments into two separate dictionaries, one - with arguments for the main constraint, and one with arguments for the - template portion of the behaviour - """ - c_args = {} - t_args = {} - for k, v in list(args.items()): - if k == "editable": - t_args[k] = v == "true" - elif k == "optional": - t_args[k] = v - else: - c_args[k] = v - return (c_args, t_args) - - -class TemplateUnaryConstraint(UnaryConstraint, TemplateConstraint): - def __init__(self, *a, **d): - (c_args, t_args) = self.separate_arg_sets(d) - UnaryConstraint.__init__(self, *a, **c_args) - TemplateConstraint.__init__(self, **t_args) - - def to_string(self): - """ - Provide a template specific human readable representation of the - constraint. This method is called by repr. - """ - return(UnaryConstraint.to_string(self) - + " " + TemplateConstraint.to_string(self)) - - -class TemplateBinaryConstraint(BinaryConstraint, TemplateConstraint): - def __init__(self, *a, **d): - (c_args, t_args) = self.separate_arg_sets(d) - BinaryConstraint.__init__(self, *a, **c_args) - TemplateConstraint.__init__(self, **t_args) - - def to_string(self): - """ - Provide a template specific human readable representation of the - constraint. This method is called by repr. - """ - return(BinaryConstraint.to_string(self) - + " " + TemplateConstraint.to_string(self)) - - -class TemplateListConstraint(ListConstraint, TemplateConstraint): - def __init__(self, *a, **d): - (c_args, t_args) = self.separate_arg_sets(d) - ListConstraint.__init__(self, *a, **c_args) - TemplateConstraint.__init__(self, **t_args) - - def to_string(self): - """ - Provide a template specific human readable representation of the - constraint. This method is called by repr. - """ - return(ListConstraint.to_string(self) - + " " + TemplateConstraint.to_string(self)) - - -class TemplateLoopConstraint(LoopConstraint, TemplateConstraint): - def __init__(self, *a, **d): - (c_args, t_args) = self.separate_arg_sets(d) - LoopConstraint.__init__(self, *a, **c_args) - TemplateConstraint.__init__(self, **t_args) - - def to_string(self): - """ - Provide a template specific human readable representation of the - constraint. This method is called by repr. - """ - return(LoopConstraint.to_string(self) - + " " + TemplateConstraint.to_string(self)) - - -class TemplateTernaryConstraint(TernaryConstraint, TemplateConstraint): - def __init__(self, *a, **d): - (c_args, t_args) = self.separate_arg_sets(d) - TernaryConstraint.__init__(self, *a, **c_args) - TemplateConstraint.__init__(self, **t_args) - - def to_string(self): - """ - Provide a template specific human readable representation of the - constraint. This method is called by repr. - """ - return(TernaryConstraint.to_string(self) - + " " + TemplateConstraint.to_string(self)) - - -class TemplateMultiConstraint(MultiConstraint, TemplateConstraint): - def __init__(self, *a, **d): - (c_args, t_args) = self.separate_arg_sets(d) - MultiConstraint.__init__(self, *a, **c_args) - TemplateConstraint.__init__(self, **t_args) - - def to_string(self): - """ - Provide a template specific human readable representation of the - constraint. This method is called by repr. - """ - return(MultiConstraint.to_string(self) - + " " + TemplateConstraint.to_string(self)) - - -class TemplateRangeConstraint(RangeConstraint, TemplateConstraint): - def __init__(self, *a, **d): - (c_args, t_args) = self.separate_arg_sets(d) - RangeConstraint.__init__(self, *a, **c_args) - TemplateConstraint.__init__(self, **t_args) - - def to_string(self): - """ - Provide a template specific human readable representation of the - constraint. This method is called by repr. - """ - return(RangeConstraint.to_string(self) - + " " + TemplateConstraint.to_string(self)) - - -class TemplateIsaConstraint(IsaConstraint, TemplateConstraint): - def __init__(self, *a, **d): - (c_args, t_args) = self.separate_arg_sets(d) - IsaConstraint.__init__(self, *a, **c_args) - TemplateConstraint.__init__(self, **t_args) - - def to_string(self): - """ - Provide a template specific human readable representation of the - constraint. This method is called by repr. - """ - return(IsaConstraint.to_string(self) - + " " + TemplateConstraint.to_string(self)) - - -class TemplateSubClassConstraint(SubClassConstraint, TemplateConstraint): - def __init__(self, *a, **d): - (c_args, t_args) = self.separate_arg_sets(d) - SubClassConstraint.__init__(self, *a, **c_args) - TemplateConstraint.__init__(self, **t_args) - - def to_string(self): - """ - Provide a template specific human readable representation of the - constraint. This method is called by repr. - """ - return(SubClassConstraint.to_string(self) - + " " + TemplateConstraint.to_string(self)) - - -class ConstraintFactory(object): - """ - A factory for creating constraints from a set of arguments. - =========================================================== - - A constraint factory is responsible for finding an appropriate - constraint class for the given arguments and instantiating the - constraint. - """ - CONSTRAINT_CLASSES = set([ - UnaryConstraint, BinaryConstraint, TernaryConstraint, - MultiConstraint, SubClassConstraint, LoopConstraint, - ListConstraint, RangeConstraint, IsaConstraint]) - - def __init__(self): - """ - Constructor - =========== - - Creates a new ConstraintFactory - """ - self._codes = iter(string.ascii_uppercase) - self.reference_ops = (TernaryConstraint.OPS | RangeConstraint.OPS - | ListConstraint.OPS | IsaConstraint.OPS) - - def get_next_code(self): - """ - Return the available constraint code. - - @return: A single uppercase character - @rtype: str - """ - return next(self._codes) - - def make_constraint(self, *args, **kwargs): - """ - Create a constraint from a set of arguments. - ============================================ - - Finds a suitable constraint class, and instantiates it. - - @rtype: Constraint - """ - for CC in self.CONSTRAINT_CLASSES: - try: - c = CC(*args, **kwargs) - if hasattr(c, "code") and c.code == "A": - c.code = self.get_next_code() - return c - except TypeError as e: - pass - raise TypeError("No matching constraint class found for " - + str(args) + ", " + str(kwargs)) - - -class TemplateConstraintFactory(ConstraintFactory): - """ - A factory for creating constraints with template specific characteristics. - ========================================================================== - - A constraint factory is responsible for finding an appropriate - constraint class for the given arguments and instantiating the - constraint. TemplateConstraintFactories make constraints with the - extra set of TemplateConstraint qualities. - """ - CONSTRAINT_CLASSES = set([ - TemplateUnaryConstraint, TemplateBinaryConstraint, - TemplateTernaryConstraint, TemplateMultiConstraint, - TemplateSubClassConstraint, TemplateLoopConstraint, - TemplateListConstraint, TemplateRangeConstraint, TemplateIsaConstraint - ]) +import re +import string +from intermine.pathfeatures import PathFeature, PATH_PATTERN +from intermine.util import ReadableException + + +class Constraint(PathFeature): + """ + A class representing constraints on a query + =========================================== + + All constraints inherit from this class, which + simply defines the type of element for the + purposes of serialisation. + """ + child_type = "constraint" + + +class LogicNode(object): + """ + A class representing nodes in a logic graph + =========================================== + + Objects which can be represented as nodes + in the AST of a constraint logic graph should + inherit from this class, which defines + methods for overloading built-in operations. + """ + + def __add__(self, other): + """ + Overloads + + =========== + + Logic may be defined by using addition to sum + logic nodes:: + + > query.set_logic(con_a + con_b + con_c) + > str(query.logic) + ... A and B and C + + """ + if not isinstance(other, LogicNode): + return NotImplemented + else: + return LogicGroup(self, 'AND', other) + + def __and__(self, other): + """ + Overloads & + =========== + + Logic may be defined by using the & operator:: + + > query.set_logic(con_a & con_b) + > sr(query.logic) + ... A and B + + """ + if not isinstance(other, LogicNode): + return NotImplemented + else: + return LogicGroup(self, 'AND', other) + + def __or__(self, other): + """ + Overloads | + =========== + + Logic may be defined by using the | operator:: + + > query.set_logic(con_a | con_b) + > str(query.logic) + ... A or B + + """ + if not isinstance(other, LogicNode): + return NotImplemented + else: + return LogicGroup(self, 'OR', other) + + +class LogicGroup(LogicNode): + """ + A logic node that represents two sub-nodes joined in some way + ============================================================= + + A logic group is a logic node with two child nodes, which are + either connected by AND or by OR logic. + """ + + LEGAL_OPS = frozenset(['AND', 'OR']) + + def __init__(self, left, op, right, parent=None): + """ + Constructor + =========== + + Makes a new node composes of two nodes (left and right), + and some operator. + + Groups may have a reference to their parent. + """ + if op not in self.LEGAL_OPS: + raise TypeError(op + " is not a legal logical operation") + self.parent = parent + self.left = left + self.right = right + self.op = op + for node in [self.left, self.right]: + if isinstance(node, LogicGroup): + node.parent = self + + def __repr__(self): + """ + Provide a sensible representation of a node + """ + return '<' + self.__class__.__name__ + ': ' + str(self) + '>' + + def __str__(self): + """ + Provide a human readable version of the group. The + string version should be able to be parsed back into the + original logic group. + """ + core = ' '.join(map(str, [self.left, self.op.lower(), self.right])) + if self.parent and self.op != self.parent.op: + return '(' + core + ')' + else: + return core + + def get_codes(self): + """ + Get a list of all constraint codes used in this group. + """ + codes = [] + for node in [self.left, self.right]: + if isinstance(node, LogicGroup): + codes.extend(node.get_codes()) + else: + codes.append(node.code) + return codes + + +class LogicParseError(ReadableException): + """ + An error representing problems in parsing constraint logic. + """ + pass + + +class EmptyLogicError(ValueError): + """ + An error representing the fact that an the logic + string to be parsed was empty + """ + pass + + +class LogicParser(object): + """ + Parses logic strings into logic groups + ====================================== + + Instances of this class are used to parse logic strings into + abstract syntax trees, and then logic groups. This aims to provide + robust parsing of logic strings, with the ability to identify syntax + errors in such strings. + """ + + def __init__(self, query): + """ + Constructor + =========== + + Parsers need access to the query they are parsing for, in + order to reference the constraints on the query. + + @param query: The parent query object + @type query: intermine.query.Query + """ + self._query = query + + def get_constraint(self, code): + """ + Get the constraint with the given code + ====================================== + + This method fetches the constraint from the + parent query with the matching code. + + @see: intermine.query.Query.get_constraint + @rtype: intermine.constraints.CodedConstraint + """ + return self._query.get_constraint(code) + + def get_priority(self, op): + """ + Get the priority for a given operator + ===================================== + + Operators have a specific precedence, from highest + to lowest: + - () + - AND + - OR + + This method returns an integer which can be + used to compare operator priorities. + + @rtype: int + """ + return { + "AND": 2, + "OR": 1, + "(": 3, + ")": 3 + }.get(op) + + ops = { + "AND": "AND", + "&": "AND", + "&&": "AND", + "OR": "OR", + "|": "OR", + "||": "OR", + "(": "(", + ")": ")" + } + + def parse(self, logic_str): + """ + Parse a logic string into an abstract syntax tree + ================================================= + + Takes a string such as "A and B or C and D", and parses it + into a structure which represents this logic as a binary + abstract syntax tree. The above string would parse to + "(A and B) or (C and D)", as AND binds more tightly than OR. + + Note that only singly rooted trees are parsed. + + @param logic_str: The logic defininition as a string + @type logic_str: string + + @rtype: LogicGroup + + @raise LogicParseError: if there is a syntax error in the logic + """ + def flatten(l): + """Flatten out a list which contains both values and sublists""" + ret = [] + for item in l: + if isinstance(item, list): + ret.extend(item) + else: + ret.append(item) + return ret + + def canonical(x, d): + if x in d: + return d[x] + else: + return re.split("\b", x) + + def dedouble(x): + if re.search("[()]", x): + return list(x) + else: + return x + + logic_str = logic_str.upper() + tokens = [t for t in re.split("\\s+", logic_str) if t] + if not tokens: + raise EmptyLogicError() + tokens = flatten([canonical(x, self.ops) for x in tokens]) + tokens = flatten([dedouble(x) for x in tokens]) + self.check_syntax(tokens) + postfix_tokens = self.infix_to_postfix(tokens) + abstract_syntax_tree = self.postfix_to_tree(postfix_tokens) + return abstract_syntax_tree + + def check_syntax(self, infix_tokens): + """ + Check the syntax for errors before parsing + ========================================== + + Syntax is checked before parsing to provide better errors, + which should hopefully lead to more informative error messages. + + This checks for: + - correct operator positions (cannot put two codes next to each + other without intervening operators) + - correct grouping (all brackets are matched, + and contain valid expressions) + + @param infix_tokens: The input parsed into a list of tokens. + @type infix_tokens: iterable + + @raise LogicParseError: if there is a problem. + """ + need_an_op = False + need_binary_op_or_closing_bracket = False + processed = [] + open_brackets = 0 + for token in infix_tokens: + if token not in self.ops: + if need_an_op: + raise LogicParseError("Expected an operator after: '" + + ' '.join(processed) + "'" + + " - but got: '" + token + "'") + if need_binary_op_or_closing_bracket: + raise LogicParseError("Logic grouping error after: '" + + ' '.join(processed) + "'" + + " - expected an operator " + "or a closing bracket") + + need_an_op = True + else: + need_an_op = False + if token == "(": + if processed and processed[-1] not in self.ops: + raise LogicParseError("Logic grouping error after: '" + + ' '.join(processed) + "'" + + " - got an unexpeced " + "opening bracket") + if need_binary_op_or_closing_bracket: + raise LogicParseError("Logic grouping error after: '" + + ' '.join(processed) + "'" + + " - expected an operator or " + "a closing bracket") + + open_brackets += 1 + elif token == ")": + need_binary_op_or_closing_bracket = True + open_brackets -= 1 + else: + need_binary_op_or_closing_bracket = False + processed.append(token) + if open_brackets != 0: + if open_brackets < 0: + message = "Unmatched closing bracket in: " + else: + message = "Unmatched opening bracket in: " + raise LogicParseError(message + '"' + ' '.join(infix_tokens) + + '"') + + def infix_to_postfix(self, infix_tokens): + """ + Convert a list of infix tokens to postfix notation + ================================================== + + Take in a set of infix tokens and return the set parsed + to a postfix sequence. + + @param infix_tokens: The list of tokens + @type infix_tokens: iterable + + @rtype: list + """ + stack = [] + postfix_tokens = [] + for token in infix_tokens: + if token not in self.ops: + postfix_tokens.append(token) + else: + op = token + if op == "(": + stack.append(token) + elif op == ")": + while stack: + last_op = stack.pop() + if last_op == "(": + if stack: + previous_op = stack.pop() + if previous_op != "(": + postfix_tokens.append(previous_op) + break + else: + postfix_tokens.append(last_op) + else: + while stack and (self.get_priority(stack[-1]) + <= self.get_priority(op)): + prev_op = stack.pop() + if prev_op != "(": + postfix_tokens.append(prev_op) + stack.append(op) + while stack: + postfix_tokens.append(stack.pop()) + return postfix_tokens + + def postfix_to_tree(self, postfix_tokens): + """ + Convert a set of structured tokens to a single LogicGroup + ========================================================= + + Convert a set of tokens in postfix notation to a single + LogicGroup object. + + @param postfix_tokens: A list of tokens in postfix notation. + @type postfix_tokens: list + + @rtype: LogicGroup + + @raise AssertionError: is the tree doesn't have a unique root. + """ + stack = [] + try: + for token in postfix_tokens: + if token not in self.ops: + stack.append(self.get_constraint(token)) + else: + op = token + right = stack.pop() + left = stack.pop() + stack.append(LogicGroup(left, op, right)) + assert len(stack) == 1, "Tree doesn't have a unique root" + return stack.pop() + except IndexError: + raise EmptyLogicError() + + +class CodedConstraint(Constraint, LogicNode): + """ + A parent class for all constraints that have codes + ================================================== + + Constraints that have codes are the principal logical + filters on queries, and need to be refered to individually + (hence the codes). They will all have a logical operation they + embody, and so have a reference to an operator. + + This class is not meant to be instantiated directly, but instead + inherited from to supply default behaviour. + """ + + OPS = set([]) + + def __init__(self, path, op, code="A"): + """ + Constructor + =========== + + @param path: The path to constrain + @type path: string + + @param op: The operation to apply - must be in the OPS set + @type op: string + """ + if op not in self.OPS: + raise TypeError(op + " not in " + str(self.OPS)) + self.op = op + self.code = code + super(CodedConstraint, self).__init__(path) + + def get_codes(self): + return [self.code] + + def __str__(self): + """ + Stringify to the code they are refered to by. + """ + return self.code + + def to_string(self): + """ + Provide a human readable representation of the logic. + This method is called by repr. + """ + s = super(CodedConstraint, self).to_string() + return " ".join([s, self.op]) + + def to_dict(self): + """ + Return a dict object which can be used to construct a + DOM element with the appropriate attributes. + """ + d = super(CodedConstraint, self).to_dict() + d.update(op=self.op, code=self.code) + return d + + +class UnaryConstraint(CodedConstraint): + """ + Constraints which have just a path and an operator + ================================================== + + These constraints are simple assertions about the + object/value refered to by the path. The set of valid + operators is: + - IS NULL + - IS NOT NULL + + """ + OPS = set(['IS NULL', 'IS NOT NULL']) + + +class BinaryConstraint(CodedConstraint): + """ + Constraints which have an operator and a value + ============================================== + + These constraints assert a relationship between the + value represented by the path (it must be a representation + of a value, ie an Attribute) and another value - ie. the + operator takes two parameters. + + In all case the 'left' side of the relationship is the path, + and the 'right' side is the supplied value. + + Valid operators are: + - = (equal to) + - != (not equal to) + - < (less than) + - > (greater than) + - <= (less than or equal to) + - >= (greater than or equal to) + - LIKE (same as equal to, but with implied wildcards) + - CONTAINS (same as equal to, but with implied wildcards) + - NOT LIKE (same as not equal to, but with implied wildcards) + + """ + OPS = set(['=', '!=', '<', '>', '<=', '>=', + 'LIKE', 'NOT LIKE', 'CONTAINS']) + + def __init__(self, path, op, value, code="A"): + """ + Constructor + =========== + + @param path: The path to constrain + @type path: string + + @param op: The relationship between the value represented by the path + and the value provided (must be a valid operator) + @type op: string + + @param value: The value to compare the stored value to + @type value: string or number + + @param code: The code for this constraint (default = "A") + @type code: string + """ + self.value = value + super(BinaryConstraint, self).__init__(path, op, code) + + def to_string(self): + """ + Provide a human readable representation of the logic. + This method is called by repr. + """ + s = super(BinaryConstraint, self).to_string() + return " ".join([s, str(self.value)]) + + def to_dict(self): + """ + Return a dict object which can be used to construct a + DOM element with the appropriate attributes. + """ + d = super(BinaryConstraint, self).to_dict() + d.update(value=str(self.value)) + return d + + +class ListConstraint(CodedConstraint): + """ + Constraints which refer to an objects membership of lists + ========================================================= + + These constraints assert a membership relationship between the + object represented by the path (it must always be an object, ie. + a Reference or a Class) and a List. Lists are collections of + objects in the database which are stored in InterMine + datawarehouses. These lists must be set up before the query is run, either + manually in the webapp or by using the webservice API list + upload feature. + + Valid operators are: + - IN + - NOT IN + + """ + OPS = set(['IN', 'NOT IN']) + + def __init__(self, path, op, list_name, code="A"): + if hasattr(list_name, 'to_query'): + q = list_name.to_query() + list_name1 = q.service.create_list(q) + self.list_name = list_name1.name + elif hasattr(list_name, "name"): + self.list_name = list_name.name + else: + self.list_name = list_name + super(ListConstraint, self).__init__(path, op, code) + + def to_string(self): + """ + Provide a human readable representation of the logic. + This method is called by repr. + """ + s = super(ListConstraint, self).to_string() + return " ".join([s, str(self.list_name)]) + + def to_dict(self): + """ + Return a dict object which can be used to construct a + DOM element with the appropriate attributes. + """ + d = super(ListConstraint, self).to_dict() + d.update(value=str(self.list_name)) + return d + + +class LoopConstraint(CodedConstraint): + """ + Constraints with refer to object identity + ========================================= + + These constraints assert that two paths refer to the same + object. + + Valid operators: + - IS + - IS NOT + + The operators IS and IS NOT map to the ops "=" and "!=" when they + are used in XML serialisation. + + """ + OPS = set(['IS', 'IS NOT']) + SERIALISED_OPS = {'IS': '=', 'IS NOT': '!='} + + def __init__(self, path, op, loopPath, code="A"): + """ + Constructor + =========== + + @param path: The path to constrain + @type path: string + + @param op: The relationship between the path and the path provided + (must be a valid operator) + @type op: string + + @param loopPath: The path to check for identity against + @type loopPath: string + + @param code: The code for this constraint (default = "A") + @type code: string + """ + self.loopPath = loopPath + super(LoopConstraint, self).__init__(path, op, code) + + def to_string(self): + """ + Provide a human readable representation of the logic. + This method is called by repr. + """ + s = super(LoopConstraint, self).to_string() + return " ".join([s, self.loopPath]) + + def to_dict(self): + """ + Return a dict object which can be used to construct a + DOM element with the appropriate attributes. + """ + d = super(LoopConstraint, self).to_dict() + d.update(loopPath=self.loopPath, op=self.SERIALISED_OPS[self.op]) + return d + + +class TernaryConstraint(BinaryConstraint): + """ + Constraints for broad, general searching over all fields + ======================================================== + + These constraints request a wide-ranging search for matching + fields over all aspects of an object, including up to coercion + from related classes. + + Valid operators: + - LOOKUP + + To aid disambiguation, Ternary constaints accept an extra_value as + well as the main value. + """ + OPS = set(['LOOKUP']) + + def __init__(self, path, op, value, extra_value=None, code="A"): + """ + Constructor + =========== + + @param path: The path to constrain. Here is must be a class, + or a reference to a class. + @type path: string + + @param op: The relationship between the path and the path provided + (must be a valid operator) + @type op: string + + @param value: The value to check other fields against. + @type value: string + + @param extra_value: A further value for disambiguation. The meaning + of this value varies by class and configuration. + For example, if the class of the object is Gene, + then extra_value will refer to the Organism. + @type extra_value: string + + @param code: The code for this constraint (default = "A") + @type code: string + """ + self.extra_value = extra_value + super(TernaryConstraint, self).__init__(path, op, value, code) + + def to_string(self): + """ + Provide a human readable representation of the logic. + This method is called by repr. + """ + s = super(TernaryConstraint, self).to_string() + if self.extra_value is None: + return s + else: + return " ".join([s, 'IN', self.extra_value]) + + def to_dict(self): + """ + Return a dict object which can be used to construct a + DOM element with the appropriate attributes. + """ + d = super(TernaryConstraint, self).to_dict() + if self.extra_value is not None: + d.update(extraValue=self.extra_value) + return d + + +class MultiConstraint(CodedConstraint): + """ + Constraints for checking membership of a set of values + ====================================================== + + These constraints require the value they constrain to be + either a member of a set of values, or not a member. + + Valid operators: + - ONE OF + - NONE OF + + These constraints are similar in use to List constraints, with + the following differences: + - The list in this case is a defined set of values that is passed + along with the query itself, rather than anything stored + independently on a server. + - The object of the constaint is the value of an attribute, rather + than an object's identity. + """ + OPS = set(['ONE OF', 'NONE OF']) + + def __init__(self, path, op, values, code="A"): + """ + Constructor + =========== + + @param path: The path to constrain. Here it must be an attribute of + some object. + @type path: string + + @param op: The relationship between the path and the path provided + (must be a valid operator) + @type op: string + + @param values: The set of values which the object of the constraint + either must or must not belong to. + @type values: set or list + + @param code: The code for this constraint (default = "A") + @type code: string + """ + if not isinstance(values, (set, list)): + raise TypeError( + "values must be a set or a list, not " + str(type(values))) + self.values = values + super(MultiConstraint, self).__init__(path, op, code) + + def to_string(self): + """ + Provide a human readable representation of the logic. + This method is called by repr. + """ + s = super(MultiConstraint, self).to_string() + return ' '.join([s, str(self.values)]) + + def to_dict(self): + """ + Return a dict object which can be used to construct a + DOM element with the appropriate attributes. + """ + d = super(MultiConstraint, self).to_dict() + d.update(value=self.values) + return d + + +class RangeConstraint(MultiConstraint): + """ + Constraints for testing where a value lies relative to a set of ranges + ====================================================================== + + These constraints require that the value of the path they constrain + should lie in relationship to the set of values passed according to + the specific operator. + + Valid operators: + - OVERLAPS : The value overlaps at least one of the given ranges + - WITHIN : The value is wholly outside the given set of ranges + - CONTAINS : The value contains all the given ranges + - DOES NOT CONTAIN : The value does not contain all the given ranges + - OUTSIDE : Some part is outside the given set of ranges + - DOES NOT OVERLAP : The value does not overlap with any of the ranges + + For example: + + 4 WITHIN [1..5, 20..25] => True + + The format of the ranges depends on the value being constrained and what + range parsers have been configured on the target server. A common range + parser for biological mines is the one for Locations: + + Gene.chromosomeLocation OVERLAPS [2X:54321..67890, 3R:12345..456789] + + """ + OPS = set(['OVERLAPS', 'DOES NOT OVERLAP', 'WITHIN', + 'OUTSIDE', 'CONTAINS', 'DOES NOT CONTAIN']) + + +class IsaConstraint(MultiConstraint): + """ + Constraints for testing the class of a value, as a disjoint union + ====================================================================== + + These constraints require that the value of the path they constrain + should be an instance of one of the classes provided. + + Valid operators: + - ISA : The value is an instance of one of the provided classes. + + For example: + + SequenceFeature ISA [Exon, Intron] + + """ + OPS = set(['ISA']) + + +class SubClassConstraint(Constraint): + """ + Constraints on the class of a reference + ======================================= + + If an object has a reference X to another object of type A, + and type B extends type A, then any object of type B may be + the value of the reference X. If you only want to see X's + which are B's, this may be achieved with subclass constraints, + which allow the type of an object to be limited to one of the + subclasses (at any depth) of the class type required + by the attribute. + + These constraints do not use operators. Since they cannot be + conditional (eg. "A is a B or A is a C" would not be possible + in an InterMine query), they do not have codes + and cannot be referenced in logic expressions. + """ + + def __init__(self, path, subclass): + """ + Constructor + =========== + + @param path: The path to constrain. This must refer to a class or a + reference to a class. + @type path: str + + @param subclass: The class to subclass the path to. This must be a + simple class name (not a dotted name) + @type subclass: str + """ + if not PATH_PATTERN.match(subclass): + raise TypeError + self.subclass = subclass + super(SubClassConstraint, self).__init__(path) + + def to_string(self): + """ + Provide a human readable representation of the logic. + This method is called by repr. + """ + s = super(SubClassConstraint, self).to_string() + return s + ' ISA ' + self.subclass + + def to_dict(self): + """ + Return a dict object which can be used to construct a + DOM element with the appropriate attributes. + """ + d = super(SubClassConstraint, self).to_dict() + d.update(type=self.subclass) + return d + + +class TemplateConstraint(object): + """ + A mixin to supply the behaviour and state of constraints on templates + ===================================================================== + + Constraints on templates can also be designated as "on", "off" or + "locked", which refers to whether they are active or not. Inactive + constraints are still configured, but behave as if absent for the purpose + of results. In addition, template constraints can be editable or not. + Only values for editable constraints can be provided when requesting + results, and only constraints that can participate in logic expressions + can be editable. + """ + REQUIRED = "locked" + OPTIONAL_ON = "on" + OPTIONAL_OFF = "off" + + def __init__(self, editable=True, optional="locked"): + """ + Constructor + =========== + + @param editable: Whether or not this constraint should accept new + values. + @type editable: bool + + @param optional: Whether a value for this constraint must be provided + when running. + @type optional: "locked", "on" or "off" + """ + self.editable = editable + if optional == TemplateConstraint.REQUIRED: + self.optional = False + self.switched_on = True + else: + self.optional = True + if optional == TemplateConstraint.OPTIONAL_ON: + self.switched_on = True + elif optional == TemplateConstraint.OPTIONAL_OFF: + self.switched_on = False + else: + raise TypeError("Bad value for optional") + + @property + def required(self): + """ + True if a value must be provided for this constraint. + + @rtype: bool + """ + return not self.optional + + @property + def switched_off(self): + """ + True if this constraint is currently inactive. + + @rtype: bool + """ + return not self.switched_on + + def get_switchable_status(self): + """ + Returns either "locked", "on" or "off". + """ + if not self.optional: + return "locked" + else: + if self.switched_on: + return "on" + else: + return "off" + + def switch_on(self): + """ + Make sure this constraint is active + =================================== + + @raise ValueError: if the constraint is not editable and optional + """ + if self.editable and self.optional: + self.switched_on = True + else: + raise ValueError("This constraint is not switchable") + + def switch_off(self): + """ + Make sure this constraint is inactive + ===================================== + + @raise ValueError: if the constraint is not editable and optional + """ + if self.editable and self.optional: + self.switched_on = False + else: + raise ValueError("This constraint is not switchable") + + def to_string(self): + """ + Provide a template specific human readable representation of the + constraint. This method is called by repr. + """ + if self.editable: + editable = "editable" + else: + editable = "non-editable" + return '(' + editable + ", " + self.get_switchable_status() + ')' + + def separate_arg_sets(self, args): + """ + A static function to use when building template constraints. + ============================================================ + + dict -> (dict, dict) + + Splits a dictionary of arguments into two separate dictionaries, one + with arguments for the main constraint, and one with arguments for the + template portion of the behaviour + """ + c_args = {} + t_args = {} + for k, v in list(args.items()): + if k == "editable": + t_args[k] = v == "true" + elif k == "optional": + t_args[k] = v + else: + c_args[k] = v + return (c_args, t_args) + + +class TemplateUnaryConstraint(UnaryConstraint, TemplateConstraint): + def __init__(self, *a, **d): + (c_args, t_args) = self.separate_arg_sets(d) + UnaryConstraint.__init__(self, *a, **c_args) + TemplateConstraint.__init__(self, **t_args) + + def to_string(self): + """ + Provide a template specific human readable representation of the + constraint. This method is called by repr. + """ + return(UnaryConstraint.to_string(self) + + " " + TemplateConstraint.to_string(self)) + + +class TemplateBinaryConstraint(BinaryConstraint, TemplateConstraint): + def __init__(self, *a, **d): + (c_args, t_args) = self.separate_arg_sets(d) + BinaryConstraint.__init__(self, *a, **c_args) + TemplateConstraint.__init__(self, **t_args) + + def to_string(self): + """ + Provide a template specific human readable representation of the + constraint. This method is called by repr. + """ + return(BinaryConstraint.to_string(self) + + " " + TemplateConstraint.to_string(self)) + + +class TemplateListConstraint(ListConstraint, TemplateConstraint): + def __init__(self, *a, **d): + (c_args, t_args) = self.separate_arg_sets(d) + ListConstraint.__init__(self, *a, **c_args) + TemplateConstraint.__init__(self, **t_args) + + def to_string(self): + """ + Provide a template specific human readable representation of the + constraint. This method is called by repr. + """ + return(ListConstraint.to_string(self) + + " " + TemplateConstraint.to_string(self)) + + +class TemplateLoopConstraint(LoopConstraint, TemplateConstraint): + def __init__(self, *a, **d): + (c_args, t_args) = self.separate_arg_sets(d) + LoopConstraint.__init__(self, *a, **c_args) + TemplateConstraint.__init__(self, **t_args) + + def to_string(self): + """ + Provide a template specific human readable representation of the + constraint. This method is called by repr. + """ + return(LoopConstraint.to_string(self) + + " " + TemplateConstraint.to_string(self)) + + +class TemplateTernaryConstraint(TernaryConstraint, TemplateConstraint): + def __init__(self, *a, **d): + (c_args, t_args) = self.separate_arg_sets(d) + TernaryConstraint.__init__(self, *a, **c_args) + TemplateConstraint.__init__(self, **t_args) + + def to_string(self): + """ + Provide a template specific human readable representation of the + constraint. This method is called by repr. + """ + return(TernaryConstraint.to_string(self) + + " " + TemplateConstraint.to_string(self)) + + +class TemplateMultiConstraint(MultiConstraint, TemplateConstraint): + def __init__(self, *a, **d): + (c_args, t_args) = self.separate_arg_sets(d) + MultiConstraint.__init__(self, *a, **c_args) + TemplateConstraint.__init__(self, **t_args) + + def to_string(self): + """ + Provide a template specific human readable representation of the + constraint. This method is called by repr. + """ + return(MultiConstraint.to_string(self) + + " " + TemplateConstraint.to_string(self)) + + +class TemplateRangeConstraint(RangeConstraint, TemplateConstraint): + def __init__(self, *a, **d): + (c_args, t_args) = self.separate_arg_sets(d) + RangeConstraint.__init__(self, *a, **c_args) + TemplateConstraint.__init__(self, **t_args) + + def to_string(self): + """ + Provide a template specific human readable representation of the + constraint. This method is called by repr. + """ + return(RangeConstraint.to_string(self) + + " " + TemplateConstraint.to_string(self)) + + +class TemplateIsaConstraint(IsaConstraint, TemplateConstraint): + def __init__(self, *a, **d): + (c_args, t_args) = self.separate_arg_sets(d) + IsaConstraint.__init__(self, *a, **c_args) + TemplateConstraint.__init__(self, **t_args) + + def to_string(self): + """ + Provide a template specific human readable representation of the + constraint. This method is called by repr. + """ + return(IsaConstraint.to_string(self) + + " " + TemplateConstraint.to_string(self)) + + +class TemplateSubClassConstraint(SubClassConstraint, TemplateConstraint): + def __init__(self, *a, **d): + (c_args, t_args) = self.separate_arg_sets(d) + SubClassConstraint.__init__(self, *a, **c_args) + TemplateConstraint.__init__(self, **t_args) + + def to_string(self): + """ + Provide a template specific human readable representation of the + constraint. This method is called by repr. + """ + return(SubClassConstraint.to_string(self) + + " " + TemplateConstraint.to_string(self)) + + +class ConstraintFactory(object): + """ + A factory for creating constraints from a set of arguments. + =========================================================== + + A constraint factory is responsible for finding an appropriate + constraint class for the given arguments and instantiating the + constraint. + """ + CONSTRAINT_CLASSES = set([ + UnaryConstraint, BinaryConstraint, TernaryConstraint, + MultiConstraint, SubClassConstraint, LoopConstraint, + ListConstraint, RangeConstraint, IsaConstraint]) + + def __init__(self): + """ + Constructor + =========== + + Creates a new ConstraintFactory + """ + self._codes = iter(string.ascii_uppercase) + self.reference_ops = (TernaryConstraint.OPS | RangeConstraint.OPS + | ListConstraint.OPS | IsaConstraint.OPS) + + def get_next_code(self): + """ + Return the available constraint code. + + @return: A single uppercase character + @rtype: str + """ + return next(self._codes) + + def make_constraint(self, *args, **kwargs): + """ + Create a constraint from a set of arguments. + ============================================ + + Finds a suitable constraint class, and instantiates it. + + @rtype: Constraint + """ + for CC in self.CONSTRAINT_CLASSES: + try: + c = CC(*args, **kwargs) + if hasattr(c, "code") and c.code == "A": + c.code = self.get_next_code() + return c + except TypeError as e: + pass + raise TypeError("No matching constraint class found for " + + str(args) + ", " + str(kwargs)) + + +class TemplateConstraintFactory(ConstraintFactory): + """ + A factory for creating constraints with template specific characteristics. + ========================================================================== + + A constraint factory is responsible for finding an appropriate + constraint class for the given arguments and instantiating the + constraint. TemplateConstraintFactories make constraints with the + extra set of TemplateConstraint qualities. + """ + CONSTRAINT_CLASSES = set([ + TemplateUnaryConstraint, TemplateBinaryConstraint, + TemplateTernaryConstraint, TemplateMultiConstraint, + TemplateSubClassConstraint, TemplateLoopConstraint, + TemplateListConstraint, TemplateRangeConstraint, TemplateIsaConstraint + ]) diff --git a/intermine/decorators.py b/intermine/decorators.py index 387da83c..be90d93b 100644 --- a/intermine/decorators.py +++ b/intermine/decorators.py @@ -1,19 +1,19 @@ -from functools import wraps -from intermine.errors import ServiceError - - -def requires_version(required): - - error_fmt = "Service must be at version %s, but is at %s" - - def decorator(f): - - @wraps(f) - def wrapper(self, *args, **kwargs): - if self.version < required: - raise ServiceError(error_fmt % (required, self.version)) - return f(self, *args, **kwargs) - - return wrapper - - return decorator +from functools import wraps +from intermine.errors import ServiceError + + +def requires_version(required): + + error_fmt = "Service must be at version %s, but is at %s" + + def decorator(f): + + @wraps(f) + def wrapper(self, *args, **kwargs): + if self.version < required: + raise ServiceError(error_fmt % (required, self.version)) + return f(self, *args, **kwargs) + + return wrapper + + return decorator diff --git a/intermine/errors.py b/intermine/errors.py index 592a25ae..d608f002 100644 --- a/intermine/errors.py +++ b/intermine/errors.py @@ -1,15 +1,15 @@ -from intermine.util import ReadableException - - -class UnimplementedError(Exception): - pass - - -class ServiceError(ReadableException): - """Errors in the creation and use of the Service object""" - pass - - -class WebserviceError(IOError): - """Errors from interaction with the webservice""" - pass +from intermine.util import ReadableException + + +class UnimplementedError(Exception): + pass + + +class ServiceError(ReadableException): + """Errors in the creation and use of the Service object""" + pass + + +class WebserviceError(IOError): + """Errors from interaction with the webservice""" + pass diff --git a/intermine/idresolution.py b/intermine/idresolution.py index e8c48a4d..13f4024e 100644 --- a/intermine/idresolution.py +++ b/intermine/idresolution.py @@ -1,96 +1,96 @@ -import weakref -import time - -# Use core json for 2.6+, simplejson for <=2.5 -try: - import json -except ImportError: - import simplejson as json - - -def get_json(service, path, key): - text = service.opener.read(service.root + path) - data = json.loads(text) - if data['error'] is not None: - raise Exception(data['error']) - if key not in data: - raise Exception(key + " not returned from " + path) - return data[key] - - -ONE_MINUTE = 60 - -COMPLETED = set(["SUCCESS", "ERROR"]) - - -class Job(object): - """ - A Representation of an Identifier Resolution Job - ================================================ - - Users can submit requests to resolve sets of IDs to - objects in the data-store. These jobs begin in a PENDING - state, and transition through RUNNING to either SUCCESS - or ERROR. - - Upon completion, the results of the job may be fetched, and the - job may be deleted on the server. - """ - - INITIAL_DECAY = 1.25 - INITIAL_BACKOFF = 0.05 - MAX_BACKOFF = ONE_MINUTE - - def __init__(self, service, uid): - self.service = weakref.proxy(service) - self.uid = uid - self.status = None - self.backoff = Job.INITIAL_BACKOFF - self.decay = Job.INITIAL_DECAY - self.max_backoff = Job.MAX_BACKOFF - if self.uid is None: - raise Exception("No uid found") - - def poll(self): - """ - Check to see if the job has been completed, updating the - status of the job in the process. - - @return: Boolean Whether or not the job is complete. - """ - if self.status not in COMPLETED: - backoff = self.backoff - self.backoff = min(self.max_backoff, backoff * self.decay) - time.sleep(backoff) - self.status = self.fetch_status() - return self.status in COMPLETED - - def fetch_status(self): - """ - Retrieve the results of this completed job from the server. - - @rtype: dict - """ - return get_json(self.service, - "/ids/{0}/status".format(self.uid), "status") - - def delete(self): - """ - Delete the job from the server. - - The job should not be used again once this method has been invoked. - """ - path = "/ids/" + self.uid - response = self.service.opener.delete(self.service.root + path) - response_data = json.loads(response) - if response_data['error'] is not None: - raise Exception(response_data['error']) - - def fetch_results(self): - """ - Retrieve the current status of this job from the server. - - @rtype String - """ - return get_json(self.service, - "/ids/{0}/result".format(self.uid), "results") +import weakref +import time + +# Use core json for 2.6+, simplejson for <=2.5 +try: + import json +except ImportError: + import simplejson as json + + +def get_json(service, path, key): + text = service.opener.read(service.root + path) + data = json.loads(text) + if data['error'] is not None: + raise Exception(data['error']) + if key not in data: + raise Exception(key + " not returned from " + path) + return data[key] + + +ONE_MINUTE = 60 + +COMPLETED = set(["SUCCESS", "ERROR"]) + + +class Job(object): + """ + A Representation of an Identifier Resolution Job + ================================================ + + Users can submit requests to resolve sets of IDs to + objects in the data-store. These jobs begin in a PENDING + state, and transition through RUNNING to either SUCCESS + or ERROR. + + Upon completion, the results of the job may be fetched, and the + job may be deleted on the server. + """ + + INITIAL_DECAY = 1.25 + INITIAL_BACKOFF = 0.05 + MAX_BACKOFF = ONE_MINUTE + + def __init__(self, service, uid): + self.service = weakref.proxy(service) + self.uid = uid + self.status = None + self.backoff = Job.INITIAL_BACKOFF + self.decay = Job.INITIAL_DECAY + self.max_backoff = Job.MAX_BACKOFF + if self.uid is None: + raise Exception("No uid found") + + def poll(self): + """ + Check to see if the job has been completed, updating the + status of the job in the process. + + @return: Boolean Whether or not the job is complete. + """ + if self.status not in COMPLETED: + backoff = self.backoff + self.backoff = min(self.max_backoff, backoff * self.decay) + time.sleep(backoff) + self.status = self.fetch_status() + return self.status in COMPLETED + + def fetch_status(self): + """ + Retrieve the results of this completed job from the server. + + @rtype: dict + """ + return get_json(self.service, + "/ids/{0}/status".format(self.uid), "status") + + def delete(self): + """ + Delete the job from the server. + + The job should not be used again once this method has been invoked. + """ + path = "/ids/" + self.uid + response = self.service.opener.delete(self.service.root + path) + response_data = json.loads(response) + if response_data['error'] is not None: + raise Exception(response_data['error']) + + def fetch_results(self): + """ + Retrieve the current status of this job from the server. + + @rtype String + """ + return get_json(self.service, + "/ids/{0}/result".format(self.uid), "results") diff --git a/intermine/model.py b/intermine/model.py index d849cbb4..ab409725 100644 --- a/intermine/model.py +++ b/intermine/model.py @@ -1,1120 +1,1120 @@ -from xml.dom import minidom -import weakref -import re -import logging - -from intermine.util import openAnything, ReadableException - -try: - from functools import reduce -except ImportError: - pass - -logging.basicConfig() - -""" -Classes representing the data model -=================================== - -Representations of tables and columns, and behaviour -for validating connections between them. - -""" - -__author__ = "Alex Kalderimis" -__organization__ = "InterMine" -__license__ = "LGPL" -__contact__ = "dev@intermine.org" - - -class Field(object): - """ - A class representing columns on database tables - =============================================== - - The base class for attributes, references and collections. All - columns in DB tables are represented by fields - - SYNOPSIS - -------- - - >>> service = Service("http://www.flymine.org/query/service") - >>> model = service.model - >>> cd = model.get_class("Gene") - >>> print "Gene has", len(cd.fields), "fields" - >>> for field in gene_cd.fields: - ... print " - ", field - Gene has 45 fields - - CDSs is a group of CDS objects, which link back to this as gene - - GLEANRsymbol is a String - - UTRs is a group of UTR objects, which link back to this as gene - - alleles is a group of Allele objects, which link back - to this as gene - - chromosome is a Chromosome - - chromosomeLocation is a Location - - clones is a group of CDNAClone objects, which link - back to this as gene - - crossReferences is a group of CrossReference objects, - which link back to this as subject - - cytoLocation is a String - - dataSets is a group of DataSet objects, - which link back to this as bioEntities - - downstreamIntergenicRegion is a IntergenicRegion - - exons is a group of Exon objects, - which link back to this as gene - - flankingRegions is a group of GeneFlankingRegion objects, - which link back to this as gene - - goAnnotation is a group of GOAnnotation objects - - homologues is a group of Homologue objects, - which link back to this as gene - - id is a Integer - - interactions is a group of Interaction objects, - which link back to this as gene - - length is a Integer - ... - - @see: L{Attribute} - @see: L{Reference} - @see: L{Collection} - """ - - def __init__(self, name, type_name, class_origin): - """ - Constructor - DO NOT USE - ======================== - - THIS CLASS IS NOT MEANT TO BE INSTANTIATED DIRECTLY - - you are unlikely to need to do - so anyway: it is recommended you access fields - through the classes generated by the model - - @param name: The name of the reference - @param type_name: The name of the model.Class this refers to - @param class_origin: The model.Class this was declared in - - """ - self.name = name - self.type_name = type_name - self.type_class = None - self.declared_in = class_origin - - def __repr__(self): - return self.name + " is a " + self.type_name - - def __str__(self): - return self.name - - @property - def fieldtype(self): - raise Exception("Fields should never be directly instantiated") - - -class Attribute(Field): - """ - Attributes represent columns that contain actual data - ===================================================== - - The Attribute class inherits all the behaviour of L{intermine.model.Field} - """ - - @property - def fieldtype(self): - return "attribute" - - -class Reference(Field): - """ - References represent columns that refer to records in other tables - ================================================================== - - In addition the the behaviour and properties of Field, references - may also have a reverse reference, if the other record points - back to this one as well. And all references will have their - type upgraded to a type_class during parsing - """ - - def __init__(self, name, type_name, class_origin, reverse_ref=None): - """ - Constructor - =========== - - In addition to the a parameters of Field, Reference also - takes an optional reverse reference name (str) - - @param name: The name of the reference - @param type_name: The name of the model.Class this refers to - @param class_origin: The model.Class this was declared in - @param reverse_ref: The name of the reverse reference (default: None) - - """ - self.reverse_reference_name = reverse_ref - super(Reference, self).__init__(name, type_name, class_origin) - self.reverse_reference = None - - def __repr__(self): - """ - Return a string representation - ============================== - - @rtype: str - """ - s = super(Reference, self).__repr__() - if self.reverse_reference is None: - return s - else: - return (s + ", which links back to this as " + - self.reverse_reference.name) - - @property - def fieldtype(self): - return "reference" - - -class Collection(Reference): - """ - Collections are references which refer to groups of objects - =========================================================== - - Collections have all the same behaviour and properties as References - """ - - def __repr__(self): - """Return a string representation""" - ret = super(Collection, self).__repr__().replace( - " is a ", " is a group of ") - if self.reverse_reference is None: - return ret + " objects" - else: - return ret.replace(", which links", " objects, which link") - - @property - def fieldtype(self): - return "collection" - - -class Class(object): - """ - An abstraction of database tables in the data model - =================================================== - - These objects refer to the table objects in the - InterMine ORM layer. - - SYNOPSIS - -------- - - >>> service = Service("http://www.flymine.org/query/service") - >>> model = service.model - >>> - >>> if "Gene" in model.classes: - ... gene_cd = model.get_class("Gene") - ... print "Gene has", len(gene_cd.fields), "fields" - ... for field in gene_cd.fields: - ... print " - ", field.name - - OVERVIEW - -------- - - Each class can have attributes (columns) of various types, - and can have references to other classes (tables), on either - a one-to-one (references) or one-to-many (collections) basis - - Classes should not be instantiated by hand, but rather used - as part of the model they belong to. - - """ - - def __init__(self, name, parents, model, interface=True): - """ - Constructor - Creates a new Class descriptor - ============================================ - - >>> cd = intermine.model.Class("Gene", ["SequenceFeature"]) - - - This constructor is called when deserialising the - model - you should have no need to create Classes by hand - - @param name: The name of this class - @param parents: a list of parental names - - """ - self.name = name - self.parents = parents - self.model = model - self.parent_classes = [] - self.is_interface = interface - self.field_dict = {} - self.has_id = "Object" not in parents - if self.has_id: - # All InterMineObject classes have an id attribute. - id_field = Attribute("id", "Integer", self) - self.field_dict["id"] = id_field - - def __repr__(self): - return "<%s.%s %s.%s>" % (self.__module__, self.__class__.__name__, - self.model.package_name if - hasattr(self.model, 'package_name') - else "__test__", self.name) - - @property - def fields(self): - """ - The fields of this class - ======================== - - The fields are returned sorted by name. Fields - includes all Attributes, References and Collections - - @rtype: list(L{Field}) - """ - return sorted(list(self.field_dict.values()), - key=lambda field: field.name) - - def __iter__(self): - for f in list(self.field_dict.values()): - yield f - - def __contains__(self, item): - if isinstance(item, Field): - return item in list(self.field_dict.values()) - else: - return str(item) in self.field_dict - - @property - def attributes(self): - """ - The fields of this class which contain data - =========================================== - - @rtype: list(L{Attribute}) - """ - return [x for x in self.fields if isinstance(x, Attribute)] - - @property - def references(self): - """ - fields which reference other objects - ==================================== - - @rtype: list(L{Reference}) - """ - def isRef(x): return isinstance( - x, Reference) and not isinstance(x, Collection) - return list(filter(isRef, self.fields)) - - @property - def collections(self): - """ - fields which reference many other objects - ========================================= - - @rtype: list(L{Collection}) - """ - return [x for x in self.fields if isinstance(x, Collection)] - - def get_field(self, name): - """ - Get a field by name - =================== - - The standard way of retrieving a field - - @raise ModelError: if the Class does not have such a field - - @rtype: subclass of L{intermine.model.Field} - """ - if name in self.field_dict: - return self.field_dict[name] - else: - raise ModelError("There is no field called %s in %s" % - (name, self.name)) - - def isa(self, other): - """ - Check if self is, or inherits from other - ======================================== - - This method validates statements about inheritance. - Returns true if the "other" is, or is within the - ancestry of, this class - - Other can be passed as a name (str), or as the class object itself - - @rtype: boolean - """ - if isinstance(other, Class): - other_name = other.name - else: - other_name = other - if self.name == other_name: - return True - if other_name in self.parents: - return True - for p in self.parent_classes: - if p.isa(other): - return True - return False - - -class ComposedClass(Class): - """ - An abstraction of dynamic objects that are in two classes - ========================================================== - - These objects are structural unions of two or more different data-types. - """ - - def __init__(self, parts, model): - self.is_interface = True - self.parts = parts - self.model = weakref.proxy(model) - - @property - def parents(self): - return reduce(lambda ps, cls: ps + cls.parents, self.parts, []) - - @property - def name(self): - return '_'.join(c.name for c in self.parts) - - @property - def has_id(self): - return "Object" not in self.parents - - @property - def field_dict(self): - """The combined field dictionary of all parts""" - fields = {} - if self.has_id: - # All InterMineObject classes have an id attribute. - fields["id"] = Attribute("id", "Integer", self) - for p in self.parts: - fields.update(p.field_dict) - return fields - - @property - def parent_classes(self): - """The flattened list of parent classes, with the parts""" - for p in self.parts: - all_parents = [pc for pc in p.parent_classes] - return all_parents + self.parts - - -class Path(object): - """ - A class representing a validated dotted string path - =================================================== - - A path represents a connection between records and fields - - SYNOPSIS - -------- - - >>> service = Service("http://www.flymine.org/query/service") - model = service.model - path = model.make_path("Gene.organism.name") - path.is_attribute() - ... True - >>> path2 = model.make_path("Gene.proteins") - path2.is_attribute() - ... False - >>> path2.is_reference() - ... True - >>> path2.get_class() - ... - - OVERVIEW - -------- - - This class is used for performing validation on dotted path strings. - The simple act of parsing it into existence will validate the path - to some extent, but there are additional methods for verifying certain - relationships as well - """ - - def __init__(self, path, model, subclasses={}): - """ - Constructor - =========== - - >>> path = Path("Gene.name", model) - - You will not need to use this constructor directly. Instead, - use the "make_path" method on the model to construct paths for you. - - @param path: the dotted path string (eg: Gene.proteins.name) - @type path: str - @param model: the model to validate the path against - @type model: L{Model} - @param subclasses: a dict which maps - subclasses (defaults to an empty dict) - @type subclasses: dict - """ - self.model = weakref.proxy(model) - self.subclasses = subclasses - if isinstance(path, Class): - self._string = path.name - self.parts = [path] - else: - self._string = str(path) - self.parts = model.parse_path_string(str(path), subclasses) - - def __str__(self): - return self._string - - def __repr__(self): - return ('<' + self.__module__ + "." + self.__class__.__name__ + - ": " + self._string + '>') - - def prefix(self): - """ - The path one step above this path. - ================================== - - >>> p1 = Path("Gene.exons.name", model) - >>> p2 = p1.prefix() - >>> print p2 - ... Gene.exons - - """ - parts = list(self.parts) - parts.pop() - if len(parts) < 1: - raise PathParseError(str(self) + " does not have a prefix") - s = ".".join([x.name for x in parts]) - return Path(s, self.model._unproxied(), self.subclasses) - - def append(self, *elements): - """ - Construct a new path by adding elements to the end of this one. - =============================================================== - - >>> p1 = Path("Gene.exons", model) - >>> p2 = p1.append("name") - >>> print p2 - ... Gene.exons.name - - This is the inverse of prefix. - """ - s = str(self) + "." + ".".join(elements) - return Path(s, self.model._unproxied(), self.subclasses) - - @property - def root(self): - """ - The descriptor for the first part of the string. - This should always a class descriptor. - - @rtype: L{intermine.model.Class} - """ - return self.parts[0] - - @property - def end(self): - """ - The descriptor for the last part of the string. - - @rtype: L{model.Class} or L{model.Field} - """ - return self.parts[-1] - - def get_class(self): - """ - Return the class object for this path, if it refers to a class - or a reference. Attribute paths return None - - @rtype: L{model.Class} - """ - if self.is_class(): - return self.end - elif self.is_reference(): - if str(self) in self.subclasses: - return self.model.get_class(self.subclasses[str(self)]) - return self.end.type_class - else: - return None - - end_class = property(get_class) - - def is_reference(self): - """ - Return true if the path is a reference, - eg: Gene.organism or Gene.proteins - Note: Collections are ALSO references - - @rtype: boolean - """ - return isinstance(self.end, Reference) - - def is_class(self): - """ - Return true if the path just refers to a class, eg: Gene - - @rtype: boolean - """ - return isinstance(self.end, Class) - - def is_attribute(self): - """ - Return true if the path refers to an attribute, eg: Gene.length - - @rtype: boolean - """ - return isinstance(self.end, Attribute) - - def __eq__(self, other): - return str(self) == str(other) - - def __hash__(self): - i = hash(str(self)) - return (reduce(lambda a, b: a ^ b, [hash(k) ^ hash(v) - for k, v in list(self.subclasses.items())], i)) - - -class ConstraintTree(object): - - def __init__(self, op, left, right): - self.op = op - self.left = left - self.right = right - - def __and__(self, other): - return ConstraintTree('AND', self, other) - - def __or__(self, other): - return ConstraintTree('OR', self, other) - - def __iter__(self): - for n in [self.left, self.right]: - for subn in n: - yield subn - - def as_logic(self, codes=None, start='A'): - if codes is None: - codes = (chr(c) for c in range(ord(start), ord('Z'))) - return ("(%s %s %s)" % (self.left.as_logic(codes), - self.op, self.right.as_logic(codes))) - - -class ConstraintNode(ConstraintTree): - - def __init__(self, *args, **kwargs): - self.vargs = args - self.kwargs = kwargs - - def __iter__(self): - yield self - - def as_logic(self, codes=None, start='A'): - if codes is None: - codes = (chr(c) for c in range(ord(start), ord('Z'))) - return next(codes) - - -class CodelessNode(ConstraintNode): - - def as_logic(self, code=None, start='A'): - return '' - - -class Column(object): - """ - A representation of a path in a query that can be constrained - ============================================================= - - Column objects allow constraints to be constructed in something - close to a declarative style - """ - - def __init__(self, path, model, subclasses={}, query=None, parent=None): - self._model = model - self._query = query - self._subclasses = subclasses - self._parent = parent - self.filter = self.where # alias - if isinstance(path, Path): - self._path = path - else: - self._path = model.make_path(path, subclasses) - self._branches = {} - - def select(self, *cols): - """ - Create a new query with this column as the base class, - selecting the given fields. - - If no fields are given, then just this column will be selected. - """ - q = self._model.service.new_query(str(self)) - if len(cols): - q.select(*cols) - else: - q.select(self) - return q - - def where(self, *args, **kwargs): - """ - Create a new query based on this column, - filtered with the given constraint. - - also available as "filter" - """ - q = self.select() - return q.where(*args, **kwargs) - - def __len__(self): - """ - Return the number of values in this column. - """ - return self.select().count() - - def __iter__(self): - """ - Iterate over the things this column represents. - - In the case of an attribute column, that is the values it may have. - In the caseof a reference or class column, - it is the objects that this path may refer to. - """ - q = self.select() - if self._path.is_attribute(): - for row in q.rows(): - yield row[0] - else: - for obj in q: - yield obj - - def __getattr__(self, name): - if name in self._branches: - return self._branches[name] - cld = self._path.get_class() - if cld is not None: - try: - fld = cld.get_field(name) - branch = Column(str(self) + "." + name, self._model, - self._subclasses, self._query, self) - self._branches[name] = branch - return branch - except ModelError as e: - raise AttributeError(str(e)) - raise AttributeError("No attribute '" + name + "'") - - def __str__(self): - return str(self._path) - - def __mod__(self, other): - if isinstance(other, tuple): - return ConstraintNode(str(self), 'LOOKUP', *other) - else: - return ConstraintNode(str(self), 'LOOKUP', str(other)) - - def __rshift__(self, other): - return CodelessNode(str(self), str(other)) - - __lshift__ = __rshift__ - - def __eq__(self, other): - if other is None: - return ConstraintNode(str(self), "IS NULL") - elif isinstance(other, Column): - return ConstraintNode(str(self), "IS", str(other)) - elif hasattr(other, "make_list_constraint"): - return other.make_list_constraint(str(self), "IN") - elif isinstance(other, list): - return ConstraintNode(str(self), "ONE OF", other) - else: - return ConstraintNode(str(self), "=", other) - - def __ne__(self, other): - if other is None: - return ConstraintNode(str(self), "IS NOT NULL") - elif isinstance(other, Column): - return ConstraintNode(str(self), "IS NOT", str(other)) - elif hasattr(other, "make_list_constraint"): - return other.make_list_constraint(str(self), "NOT IN") - elif isinstance(other, list): - return ConstraintNode(str(self), "NONE OF", other) - else: - return ConstraintNode(str(self), "!=", other) - - def __xor__(self, other): - if hasattr(other, "make_list_constraint"): - return other.make_list_constraint(str(self), "NOT IN") - elif isinstance(other, list): - return ConstraintNode(str(self), "NONE OF", other) - raise TypeError("Invalid argument for xor: %r" % other) - - def in_(self, other): - if hasattr(other, "make_list_constraint"): - return other.make_list_constraint(str(self), "IN") - elif isinstance(other, list): - return ConstraintNode(str(self), "ONE OF", other) - raise TypeError("Invalid argument for in_: %r" % other) - - def __lt__(self, other): - if isinstance(other, Column): - self._parent._subclasses[str(self)] = str(other) - self._parent._branches = {} - return CodelessNode(str(self), str(other)) - try: - return self.in_(other) - except TypeError: - return ConstraintNode(str(self), "<", other) - - def __le__(self, other): - if isinstance(other, Column): - return CodelessNode(str(self), str(other)) - try: - return self.in_(other) - except TypeError: - return ConstraintNode(str(self), "<=", other) - - def __gt__(self, other): - return ConstraintNode(str(self), ">", other) - - def __ge__(self, other): - return ConstraintNode(str(self), ">=", other) - - -class Model(object): - """ - A class for representing the data model of an InterMine datawarehouse - ===================================================================== - - An abstraction of the database schema - - SYNOPSIS - -------- - - >>> service = Service("http://www.flymine.org/query/service") - >>> model = service.model - >>> model.get_class("Gene") - - - OVERVIEW - -------- - - This class represents the data model - ie. an abstraction - of the database schema. It can be used to introspect what - data is available and how it is inter-related - """ - - NUMERIC_TYPES = frozenset(["int", "Integer", "float", "Float", - "double", "Double", "long", - "Long", "short", "Short"]) - - LOG = logging.getLogger('Model') - - def __init__(self, source, service=None): - """ - Constructor - =========== - - >>> model = Model(xml) - - You will most like not need to create a model directly, - instead get one from the Service object: - - @see: L{intermine.webservice.Service} - - @param source: the model.xml, as a local file, string, or url - """ - assert source is not None - self.source = source - - if service is not None: - self.service = weakref.proxy(service) - else: - self.service = None - - self.classes = {} - self.parse_model(source) - self.vivify() - - # Make sugary aliases - self.table = self.column - - def parse_model(self, source): - """ - Create classes, attributes, references and - collections from the model.xml - ===================================================================== - - The xml can be provided as a file, url or string. This method - is called during instantiation - it does not need to be called - directly. - - @param source: the model.xml, as a local file, string, or url - @raise ModelParseError: if there is a problem parsing the source - """ - try: - io = openAnything(source) - src = io.read() - # Handle binary and text streams equally. - if hasattr(src, 'decode'): - src = src.decode('utf8') - self.LOG.debug("model = [{0}]".format(src)) - doc = minidom.parseString(src) - for node in doc.getElementsByTagName('model'): - self.name = node.getAttribute('name') - self.package_name = node.getAttribute('package') - assert node.nextSibling is None, "More than one model element" - error = "No model name or package name" - assert self.name and self.package_name, error - - - for c in doc.getElementsByTagName('class'): - class_name = c.getAttribute('name') - assert class_name, "Name not defined in" + c.toxml() - - def strip_java_prefix(x): - return re.sub(r'.*\.', '', x) - parents = [strip_java_prefix(p) for p in c.getAttribute( - 'extends').split(' ') if len(p)] - interface = c.getAttribute('is-interface') == 'true' - cl = Class(class_name, parents, self, interface) - self.LOG.debug('Created {0}'.format(cl.name)) - for a in c.getElementsByTagName('attribute'): - name = a.getAttribute('name') - type_name = strip_java_prefix(a.getAttribute('type')) - at = Attribute(name, type_name, cl) - cl.field_dict[name] = at - self.LOG.debug('set {0}.{1}'.format(cl.name, at.name)) - for r in c.getElementsByTagName('reference'): - name = r.getAttribute('name') - type_name = r.getAttribute('referenced-type') - linked_field_name = r.getAttribute('reverse-reference') - ref = Reference(name, type_name, cl, linked_field_name) - cl.field_dict[name] = ref - self.LOG.debug('set {0}.{1}'.format(cl.name, ref.name)) - for co in c.getElementsByTagName('collection'): - name = co.getAttribute('name') - type_name = co.getAttribute('referenced-type') - linked_field_name = co.getAttribute('reverse-reference') - col = Collection(name, type_name, cl, linked_field_name) - cl.field_dict[name] = col - self.LOG.debug('set {0}.{1}'.format(cl.name, col.name)) - self.classes[class_name] = cl - except Exception as error: - model_src = src if src is not None else source - raise ModelParseError("Error parsing model", model_src, error) - finally: - if io is not None: - io.close() - - def vivify(self): - """ - Make names point to instances and insert inherited fields - ========================================================= - - This method ensures the model is internally consistent. This method - is called during instantiaton. It does not need to be called - directly. - - @raise ModelError: if the names point to non-existent objects - """ - for c in list(self.classes.values()): - c.parent_classes = self.to_ancestry(c) - self.LOG.debug("{0.name} < {0.parent_classes}".format(c)) - for pc in c.parent_classes: - c.field_dict.update(pc.field_dict) - for f in c.fields: - f.type_class = self.classes.get(f.type_name) - if (hasattr(f, 'reverse_reference_name') and - f.reverse_reference_name != ''): - rrn = f.reverse_reference_name - f.reverse_reference = f.type_class.field_dict[rrn] - - def to_ancestry(self, cd): - """ - Returns the lineage of the class - ================================ - - >>> classes = Model.to_ancestry(cd) - - Returns the class' parents, and all the class' parents' parents - - @rtype: list(L{intermine.model.Class}) - """ - parents = cd.parents - self.LOG.debug('{0} < {1}'.format(cd.name, cd.parents)) - def defined(x): return x is not None # weeds out the java classes - def to_class(x): return self.classes.get(x) - ancestry = list(filter(defined, list(map(to_class, parents)))) - for ancestor in ancestry: - self.LOG.debug('{0} is ancestor of {1}'.format(ancestor, cd.name)) - ancestry.extend(self.to_ancestry(ancestor)) - return ancestry - - def to_classes(self, classnames): - """ - take a list of class names and return a list of classes - ======================================================= - - >>> classes = model.to_classes(["Gene", "Protein", "Organism"]) - - This simply maps from a list of strings to a list of - classes in the calling model. - - @raise ModelError: if the list of class names - includes ones that don't exist - - @rtype: list(L{intermine.model.Class}) - """ - return list(map(self.get_class, classnames)) - - def column(self, path, *rest): - return Column(path, self, *rest) - - def __getattr__(self, name): - return self.column(name) - - def get_class(self, name): - """ - Get a class by its name, or by a dotted path - ============================================ - - >>> model = Model("http://www.flymine.org/query/service/model") - >>> model.get_class("Gene") - - >>> model.get_class("Gene.proteins") - - - This is the recommended way of retrieving a class from - the model. As well as handling class names, you can also - pass in a path such as "Gene.proteins" and get the - corresponding class back () - - @raise ModelError: if the class name refers to a non-existant object - - @rtype: L{intermine.model.Class} - """ - if name.find(',') != -1: - names = name.split(',') - classes = [self.get_class(n) for n in names] - return ComposedClass(classes, self) - elif name.find(".") != -1: - path = self.make_path(name) - if path.is_attribute(): - raise ModelError("'" + str(path) + "' is not a class") - else: - return path.get_class() - elif name in self.classes: - return self.classes[name] - else: - raise ModelError("'" + name + "' is not a class in this model") - - def make_path(self, path, subclasses={}): - """ - Return a path object for the given path string - ============================================== - - >>> path = model.make_path("Gene.organism.name") - - - This is recommended manner of constructing path objects. - - @type path: str - @type subclasses: dict - - @raise PathParseError: if there is a problem parsing the path string - - @rtype: L{intermine.model.Path} - """ - return Path(path, self, subclasses) - - def validate_path(self, path_string, subclasses={}): - """ - Validate a path - =============== - - >>> try: - ... model.validate_path("Gene.symbol") - ... return "path is valid" - ... except PathParseError: - ... return "path is invalid" - "path is valid" - - When you don't need to interrogate relationships - between paths, simply using this method to validate - a path string is enough. It guarantees that there - is a descriptor for each section of the string, - with the appropriate relationships - - @raise PathParseError: if there is a problem parsing the path string - """ - try: - self.parse_path_string(path_string, subclasses) - return True - except PathParseError as e: - raise PathParseError("Error parsing '%s' (subclasses: %s)" - % (path_string, str(subclasses)), e) - - def parse_path_string(self, path_string, subclasses={}): - """ - Parse a path string into a list of descriptors - one for each section - ===================================================================== - - >>> parts = Model.parse_path_string(string) - - This method is used when making paths from a model, and - when validating path strings. It probably won't need to - be called directly. - - @see: L{intermine.model.Model.make_path} - @see: L{intermine.model.Model.validate_path} - @see: L{intermine.model.Path} - """ - descriptors = [] - names = path_string.split('.') - root_name = names.pop(0) - - root_descriptor = self.get_class(root_name) - descriptors.append(root_descriptor) - - if root_name in subclasses: - current_class = self.get_class(subclasses[root_name]) - else: - current_class = root_descriptor - - for field_name in names: - field = current_class.get_field(field_name) - descriptors.append(field) - - if isinstance(field, Reference): - key = '.'.join([x.name for x in descriptors]) - if key in subclasses: - current_class = self.get_class(subclasses[key]) - else: - current_class = field.type_class - else: - current_class = None - - return descriptors - - def _unproxied(self): - return self - - -class ModelError(ReadableException): - pass - - -class PathParseError(ModelError): - pass - - -class ModelParseError(ModelError): - - def __init__(self, message, source, cause=None): - self.source = source - super(ModelParseError, self).__init__(message, cause) - - def __str__(self): - base = repr(self.message) + ":" + repr(self.source) - if self.cause is None: - return base - else: - return base + repr(self.cause) +from xml.dom import minidom +import weakref +import re +import logging + +from intermine.util import openAnything, ReadableException + +try: + from functools import reduce +except ImportError: + pass + +logging.basicConfig() + +""" +Classes representing the data model +=================================== + +Representations of tables and columns, and behaviour +for validating connections between them. + +""" + +__author__ = "Alex Kalderimis" +__organization__ = "InterMine" +__license__ = "LGPL" +__contact__ = "dev@intermine.org" + + +class Field(object): + """ + A class representing columns on database tables + =============================================== + + The base class for attributes, references and collections. All + columns in DB tables are represented by fields + + SYNOPSIS + -------- + + >>> service = Service("https://www.flymine.org/query/service") + >>> model = service.model + >>> cd = model.get_class("Gene") + >>> print "Gene has", len(cd.fields), "fields" + >>> for field in gene_cd.fields: + ... print " - ", field + Gene has 45 fields + - CDSs is a group of CDS objects, which link back to this as gene + - GLEANRsymbol is a String + - UTRs is a group of UTR objects, which link back to this as gene + - alleles is a group of Allele objects, which link back + to this as gene + - chromosome is a Chromosome + - chromosomeLocation is a Location + - clones is a group of CDNAClone objects, which link + back to this as gene + - crossReferences is a group of CrossReference objects, + which link back to this as subject + - cytoLocation is a String + - dataSets is a group of DataSet objects, + which link back to this as bioEntities + - downstreamIntergenicRegion is a IntergenicRegion + - exons is a group of Exon objects, + which link back to this as gene + - flankingRegions is a group of GeneFlankingRegion objects, + which link back to this as gene + - goAnnotation is a group of GOAnnotation objects + - homologues is a group of Homologue objects, + which link back to this as gene + - id is a Integer + - interactions is a group of Interaction objects, + which link back to this as gene + - length is a Integer + ... + + @see: L{Attribute} + @see: L{Reference} + @see: L{Collection} + """ + + def __init__(self, name, type_name, class_origin): + """ + Constructor - DO NOT USE + ======================== + + THIS CLASS IS NOT MEANT TO BE INSTANTIATED DIRECTLY + + you are unlikely to need to do + so anyway: it is recommended you access fields + through the classes generated by the model + + @param name: The name of the reference + @param type_name: The name of the model.Class this refers to + @param class_origin: The model.Class this was declared in + + """ + self.name = name + self.type_name = type_name + self.type_class = None + self.declared_in = class_origin + + def __repr__(self): + return self.name + " is a " + self.type_name + + def __str__(self): + return self.name + + @property + def fieldtype(self): + raise Exception("Fields should never be directly instantiated") + + +class Attribute(Field): + """ + Attributes represent columns that contain actual data + ===================================================== + + The Attribute class inherits all the behaviour of L{intermine.model.Field} + """ + + @property + def fieldtype(self): + return "attribute" + + +class Reference(Field): + """ + References represent columns that refer to records in other tables + ================================================================== + + In addition the the behaviour and properties of Field, references + may also have a reverse reference, if the other record points + back to this one as well. And all references will have their + type upgraded to a type_class during parsing + """ + + def __init__(self, name, type_name, class_origin, reverse_ref=None): + """ + Constructor + =========== + + In addition to the a parameters of Field, Reference also + takes an optional reverse reference name (str) + + @param name: The name of the reference + @param type_name: The name of the model.Class this refers to + @param class_origin: The model.Class this was declared in + @param reverse_ref: The name of the reverse reference (default: None) + + """ + self.reverse_reference_name = reverse_ref + super(Reference, self).__init__(name, type_name, class_origin) + self.reverse_reference = None + + def __repr__(self): + """ + Return a string representation + ============================== + + @rtype: str + """ + s = super(Reference, self).__repr__() + if self.reverse_reference is None: + return s + else: + return (s + ", which links back to this as " + + self.reverse_reference.name) + + @property + def fieldtype(self): + return "reference" + + +class Collection(Reference): + """ + Collections are references which refer to groups of objects + =========================================================== + + Collections have all the same behaviour and properties as References + """ + + def __repr__(self): + """Return a string representation""" + ret = super(Collection, self).__repr__().replace( + " is a ", " is a group of ") + if self.reverse_reference is None: + return ret + " objects" + else: + return ret.replace(", which links", " objects, which link") + + @property + def fieldtype(self): + return "collection" + + +class Class(object): + """ + An abstraction of database tables in the data model + =================================================== + + These objects refer to the table objects in the + InterMine ORM layer. + + SYNOPSIS + -------- + + >>> service = Service("https://www.flymine.org/query/service") + >>> model = service.model + >>> + >>> if "Gene" in model.classes: + ... gene_cd = model.get_class("Gene") + ... print "Gene has", len(gene_cd.fields), "fields" + ... for field in gene_cd.fields: + ... print " - ", field.name + + OVERVIEW + -------- + + Each class can have attributes (columns) of various types, + and can have references to other classes (tables), on either + a one-to-one (references) or one-to-many (collections) basis + + Classes should not be instantiated by hand, but rather used + as part of the model they belong to. + + """ + + def __init__(self, name, parents, model, interface=True): + """ + Constructor - Creates a new Class descriptor + ============================================ + + >>> cd = intermine.model.Class("Gene", ["SequenceFeature"]) + + + This constructor is called when deserialising the + model - you should have no need to create Classes by hand + + @param name: The name of this class + @param parents: a list of parental names + + """ + self.name = name + self.parents = parents + self.model = model + self.parent_classes = [] + self.is_interface = interface + self.field_dict = {} + self.has_id = "Object" not in parents + if self.has_id: + # All InterMineObject classes have an id attribute. + id_field = Attribute("id", "Integer", self) + self.field_dict["id"] = id_field + + def __repr__(self): + return "<%s.%s %s.%s>" % (self.__module__, self.__class__.__name__, + self.model.package_name if + hasattr(self.model, 'package_name') + else "__test__", self.name) + + @property + def fields(self): + """ + The fields of this class + ======================== + + The fields are returned sorted by name. Fields + includes all Attributes, References and Collections + + @rtype: list(L{Field}) + """ + return sorted(list(self.field_dict.values()), + key=lambda field: field.name) + + def __iter__(self): + for f in list(self.field_dict.values()): + yield f + + def __contains__(self, item): + if isinstance(item, Field): + return item in list(self.field_dict.values()) + else: + return str(item) in self.field_dict + + @property + def attributes(self): + """ + The fields of this class which contain data + =========================================== + + @rtype: list(L{Attribute}) + """ + return [x for x in self.fields if isinstance(x, Attribute)] + + @property + def references(self): + """ + fields which reference other objects + ==================================== + + @rtype: list(L{Reference}) + """ + def isRef(x): return isinstance( + x, Reference) and not isinstance(x, Collection) + return list(filter(isRef, self.fields)) + + @property + def collections(self): + """ + fields which reference many other objects + ========================================= + + @rtype: list(L{Collection}) + """ + return [x for x in self.fields if isinstance(x, Collection)] + + def get_field(self, name): + """ + Get a field by name + =================== + + The standard way of retrieving a field + + @raise ModelError: if the Class does not have such a field + + @rtype: subclass of L{intermine.model.Field} + """ + if name in self.field_dict: + return self.field_dict[name] + else: + raise ModelError("There is no field called %s in %s" % + (name, self.name)) + + def isa(self, other): + """ + Check if self is, or inherits from other + ======================================== + + This method validates statements about inheritance. + Returns true if the "other" is, or is within the + ancestry of, this class + + Other can be passed as a name (str), or as the class object itself + + @rtype: boolean + """ + if isinstance(other, Class): + other_name = other.name + else: + other_name = other + if self.name == other_name: + return True + if other_name in self.parents: + return True + for p in self.parent_classes: + if p.isa(other): + return True + return False + + +class ComposedClass(Class): + """ + An abstraction of dynamic objects that are in two classes + ========================================================== + + These objects are structural unions of two or more different data-types. + """ + + def __init__(self, parts, model): + self.is_interface = True + self.parts = parts + self.model = weakref.proxy(model) + + @property + def parents(self): + return reduce(lambda ps, cls: ps + cls.parents, self.parts, []) + + @property + def name(self): + return '_'.join(c.name for c in self.parts) + + @property + def has_id(self): + return "Object" not in self.parents + + @property + def field_dict(self): + """The combined field dictionary of all parts""" + fields = {} + if self.has_id: + # All InterMineObject classes have an id attribute. + fields["id"] = Attribute("id", "Integer", self) + for p in self.parts: + fields.update(p.field_dict) + return fields + + @property + def parent_classes(self): + """The flattened list of parent classes, with the parts""" + for p in self.parts: + all_parents = [pc for pc in p.parent_classes] + return all_parents + self.parts + + +class Path(object): + """ + A class representing a validated dotted string path + =================================================== + + A path represents a connection between records and fields + + SYNOPSIS + -------- + + >>> service = Service("https://www.flymine.org/query/service") + model = service.model + path = model.make_path("Gene.organism.name") + path.is_attribute() + ... True + >>> path2 = model.make_path("Gene.proteins") + path2.is_attribute() + ... False + >>> path2.is_reference() + ... True + >>> path2.get_class() + ... + + OVERVIEW + -------- + + This class is used for performing validation on dotted path strings. + The simple act of parsing it into existence will validate the path + to some extent, but there are additional methods for verifying certain + relationships as well + """ + + def __init__(self, path, model, subclasses={}): + """ + Constructor + =========== + + >>> path = Path("Gene.name", model) + + You will not need to use this constructor directly. Instead, + use the "make_path" method on the model to construct paths for you. + + @param path: the dotted path string (eg: Gene.proteins.name) + @type path: str + @param model: the model to validate the path against + @type model: L{Model} + @param subclasses: a dict which maps + subclasses (defaults to an empty dict) + @type subclasses: dict + """ + self.model = weakref.proxy(model) + self.subclasses = subclasses + if isinstance(path, Class): + self._string = path.name + self.parts = [path] + else: + self._string = str(path) + self.parts = model.parse_path_string(str(path), subclasses) + + def __str__(self): + return self._string + + def __repr__(self): + return ('<' + self.__module__ + "." + self.__class__.__name__ + + ": " + self._string + '>') + + def prefix(self): + """ + The path one step above this path. + ================================== + + >>> p1 = Path("Gene.exons.name", model) + >>> p2 = p1.prefix() + >>> print p2 + ... Gene.exons + + """ + parts = list(self.parts) + parts.pop() + if len(parts) < 1: + raise PathParseError(str(self) + " does not have a prefix") + s = ".".join([x.name for x in parts]) + return Path(s, self.model._unproxied(), self.subclasses) + + def append(self, *elements): + """ + Construct a new path by adding elements to the end of this one. + =============================================================== + + >>> p1 = Path("Gene.exons", model) + >>> p2 = p1.append("name") + >>> print p2 + ... Gene.exons.name + + This is the inverse of prefix. + """ + s = str(self) + "." + ".".join(elements) + return Path(s, self.model._unproxied(), self.subclasses) + + @property + def root(self): + """ + The descriptor for the first part of the string. + This should always a class descriptor. + + @rtype: L{intermine.model.Class} + """ + return self.parts[0] + + @property + def end(self): + """ + The descriptor for the last part of the string. + + @rtype: L{model.Class} or L{model.Field} + """ + return self.parts[-1] + + def get_class(self): + """ + Return the class object for this path, if it refers to a class + or a reference. Attribute paths return None + + @rtype: L{model.Class} + """ + if self.is_class(): + return self.end + elif self.is_reference(): + if str(self) in self.subclasses: + return self.model.get_class(self.subclasses[str(self)]) + return self.end.type_class + else: + return None + + end_class = property(get_class) + + def is_reference(self): + """ + Return true if the path is a reference, + eg: Gene.organism or Gene.proteins + Note: Collections are ALSO references + + @rtype: boolean + """ + return isinstance(self.end, Reference) + + def is_class(self): + """ + Return true if the path just refers to a class, eg: Gene + + @rtype: boolean + """ + return isinstance(self.end, Class) + + def is_attribute(self): + """ + Return true if the path refers to an attribute, eg: Gene.length + + @rtype: boolean + """ + return isinstance(self.end, Attribute) + + def __eq__(self, other): + return str(self) == str(other) + + def __hash__(self): + i = hash(str(self)) + return (reduce(lambda a, b: a ^ b, [hash(k) ^ hash(v) + for k, v in list(self.subclasses.items())], i)) + + +class ConstraintTree(object): + + def __init__(self, op, left, right): + self.op = op + self.left = left + self.right = right + + def __and__(self, other): + return ConstraintTree('AND', self, other) + + def __or__(self, other): + return ConstraintTree('OR', self, other) + + def __iter__(self): + for n in [self.left, self.right]: + for subn in n: + yield subn + + def as_logic(self, codes=None, start='A'): + if codes is None: + codes = (chr(c) for c in range(ord(start), ord('Z'))) + return ("(%s %s %s)" % (self.left.as_logic(codes), + self.op, self.right.as_logic(codes))) + + +class ConstraintNode(ConstraintTree): + + def __init__(self, *args, **kwargs): + self.vargs = args + self.kwargs = kwargs + + def __iter__(self): + yield self + + def as_logic(self, codes=None, start='A'): + if codes is None: + codes = (chr(c) for c in range(ord(start), ord('Z'))) + return next(codes) + + +class CodelessNode(ConstraintNode): + + def as_logic(self, code=None, start='A'): + return '' + + +class Column(object): + """ + A representation of a path in a query that can be constrained + ============================================================= + + Column objects allow constraints to be constructed in something + close to a declarative style + """ + + def __init__(self, path, model, subclasses={}, query=None, parent=None): + self._model = model + self._query = query + self._subclasses = subclasses + self._parent = parent + self.filter = self.where # alias + if isinstance(path, Path): + self._path = path + else: + self._path = model.make_path(path, subclasses) + self._branches = {} + + def select(self, *cols): + """ + Create a new query with this column as the base class, + selecting the given fields. + + If no fields are given, then just this column will be selected. + """ + q = self._model.service.new_query(str(self)) + if len(cols): + q.select(*cols) + else: + q.select(self) + return q + + def where(self, *args, **kwargs): + """ + Create a new query based on this column, + filtered with the given constraint. + + also available as "filter" + """ + q = self.select() + return q.where(*args, **kwargs) + + def __len__(self): + """ + Return the number of values in this column. + """ + return self.select().count() + + def __iter__(self): + """ + Iterate over the things this column represents. + + In the case of an attribute column, that is the values it may have. + In the caseof a reference or class column, + it is the objects that this path may refer to. + """ + q = self.select() + if self._path.is_attribute(): + for row in q.rows(): + yield row[0] + else: + for obj in q: + yield obj + + def __getattr__(self, name): + if name in self._branches: + return self._branches[name] + cld = self._path.get_class() + if cld is not None: + try: + fld = cld.get_field(name) + branch = Column(str(self) + "." + name, self._model, + self._subclasses, self._query, self) + self._branches[name] = branch + return branch + except ModelError as e: + raise AttributeError(str(e)) + raise AttributeError("No attribute '" + name + "'") + + def __str__(self): + return str(self._path) + + def __mod__(self, other): + if isinstance(other, tuple): + return ConstraintNode(str(self), 'LOOKUP', *other) + else: + return ConstraintNode(str(self), 'LOOKUP', str(other)) + + def __rshift__(self, other): + return CodelessNode(str(self), str(other)) + + __lshift__ = __rshift__ + + def __eq__(self, other): + if other is None: + return ConstraintNode(str(self), "IS NULL") + elif isinstance(other, Column): + return ConstraintNode(str(self), "IS", str(other)) + elif hasattr(other, "make_list_constraint"): + return other.make_list_constraint(str(self), "IN") + elif isinstance(other, list): + return ConstraintNode(str(self), "ONE OF", other) + else: + return ConstraintNode(str(self), "=", other) + + def __ne__(self, other): + if other is None: + return ConstraintNode(str(self), "IS NOT NULL") + elif isinstance(other, Column): + return ConstraintNode(str(self), "IS NOT", str(other)) + elif hasattr(other, "make_list_constraint"): + return other.make_list_constraint(str(self), "NOT IN") + elif isinstance(other, list): + return ConstraintNode(str(self), "NONE OF", other) + else: + return ConstraintNode(str(self), "!=", other) + + def __xor__(self, other): + if hasattr(other, "make_list_constraint"): + return other.make_list_constraint(str(self), "NOT IN") + elif isinstance(other, list): + return ConstraintNode(str(self), "NONE OF", other) + raise TypeError("Invalid argument for xor: %r" % other) + + def in_(self, other): + if hasattr(other, "make_list_constraint"): + return other.make_list_constraint(str(self), "IN") + elif isinstance(other, list): + return ConstraintNode(str(self), "ONE OF", other) + raise TypeError("Invalid argument for in_: %r" % other) + + def __lt__(self, other): + if isinstance(other, Column): + self._parent._subclasses[str(self)] = str(other) + self._parent._branches = {} + return CodelessNode(str(self), str(other)) + try: + return self.in_(other) + except TypeError: + return ConstraintNode(str(self), "<", other) + + def __le__(self, other): + if isinstance(other, Column): + return CodelessNode(str(self), str(other)) + try: + return self.in_(other) + except TypeError: + return ConstraintNode(str(self), "<=", other) + + def __gt__(self, other): + return ConstraintNode(str(self), ">", other) + + def __ge__(self, other): + return ConstraintNode(str(self), ">=", other) + + +class Model(object): + """ + A class for representing the data model of an InterMine datawarehouse + ===================================================================== + + An abstraction of the database schema + + SYNOPSIS + -------- + + >>> service = Service("https://www.flymine.org/query/service") + >>> model = service.model + >>> model.get_class("Gene") + + + OVERVIEW + -------- + + This class represents the data model - ie. an abstraction + of the database schema. It can be used to introspect what + data is available and how it is inter-related + """ + + NUMERIC_TYPES = frozenset(["int", "Integer", "float", "Float", + "double", "Double", "long", + "Long", "short", "Short"]) + + LOG = logging.getLogger('Model') + + def __init__(self, source, service=None): + """ + Constructor + =========== + + >>> model = Model(xml) + + You will most like not need to create a model directly, + instead get one from the Service object: + + @see: L{intermine.webservice.Service} + + @param source: the model.xml, as a local file, string, or url + """ + assert source is not None + self.source = source + + if service is not None: + self.service = weakref.proxy(service) + else: + self.service = None + + self.classes = {} + self.parse_model(source) + self.vivify() + + # Make sugary aliases + self.table = self.column + + def parse_model(self, source): + """ + Create classes, attributes, references and + collections from the model.xml + ===================================================================== + + The xml can be provided as a file, url or string. This method + is called during instantiation - it does not need to be called + directly. + + @param source: the model.xml, as a local file, string, or url + @raise ModelParseError: if there is a problem parsing the source + """ + try: + io = openAnything(source) + src = io.read() + # Handle binary and text streams equally. + if hasattr(src, 'decode'): + src = src.decode('utf8') + self.LOG.debug("model = [{0}]".format(src)) + doc = minidom.parseString(src) + for node in doc.getElementsByTagName('model'): + self.name = node.getAttribute('name') + self.package_name = node.getAttribute('package') + assert node.nextSibling is None, "More than one model element" + error = "No model name or package name" + assert self.name and self.package_name, error + + + for c in doc.getElementsByTagName('class'): + class_name = c.getAttribute('name') + assert class_name, "Name not defined in" + c.toxml() + + def strip_java_prefix(x): + return re.sub(r'.*\.', '', x) + parents = [strip_java_prefix(p) for p in c.getAttribute( + 'extends').split(' ') if len(p)] + interface = c.getAttribute('is-interface') == 'true' + cl = Class(class_name, parents, self, interface) + self.LOG.debug('Created {0}'.format(cl.name)) + for a in c.getElementsByTagName('attribute'): + name = a.getAttribute('name') + type_name = strip_java_prefix(a.getAttribute('type')) + at = Attribute(name, type_name, cl) + cl.field_dict[name] = at + self.LOG.debug('set {0}.{1}'.format(cl.name, at.name)) + for r in c.getElementsByTagName('reference'): + name = r.getAttribute('name') + type_name = r.getAttribute('referenced-type') + linked_field_name = r.getAttribute('reverse-reference') + ref = Reference(name, type_name, cl, linked_field_name) + cl.field_dict[name] = ref + self.LOG.debug('set {0}.{1}'.format(cl.name, ref.name)) + for co in c.getElementsByTagName('collection'): + name = co.getAttribute('name') + type_name = co.getAttribute('referenced-type') + linked_field_name = co.getAttribute('reverse-reference') + col = Collection(name, type_name, cl, linked_field_name) + cl.field_dict[name] = col + self.LOG.debug('set {0}.{1}'.format(cl.name, col.name)) + self.classes[class_name] = cl + except Exception as error: + model_src = src if src is not None else source + raise ModelParseError("Error parsing model", model_src, error) + finally: + if io is not None: + io.close() + + def vivify(self): + """ + Make names point to instances and insert inherited fields + ========================================================= + + This method ensures the model is internally consistent. This method + is called during instantiaton. It does not need to be called + directly. + + @raise ModelError: if the names point to non-existent objects + """ + for c in list(self.classes.values()): + c.parent_classes = self.to_ancestry(c) + self.LOG.debug("{0.name} < {0.parent_classes}".format(c)) + for pc in c.parent_classes: + c.field_dict.update(pc.field_dict) + for f in c.fields: + f.type_class = self.classes.get(f.type_name) + if (hasattr(f, 'reverse_reference_name') and + f.reverse_reference_name != ''): + rrn = f.reverse_reference_name + f.reverse_reference = f.type_class.field_dict[rrn] + + def to_ancestry(self, cd): + """ + Returns the lineage of the class + ================================ + + >>> classes = Model.to_ancestry(cd) + + Returns the class' parents, and all the class' parents' parents + + @rtype: list(L{intermine.model.Class}) + """ + parents = cd.parents + self.LOG.debug('{0} < {1}'.format(cd.name, cd.parents)) + def defined(x): return x is not None # weeds out the java classes + def to_class(x): return self.classes.get(x) + ancestry = list(filter(defined, list(map(to_class, parents)))) + for ancestor in ancestry: + self.LOG.debug('{0} is ancestor of {1}'.format(ancestor, cd.name)) + ancestry.extend(self.to_ancestry(ancestor)) + return ancestry + + def to_classes(self, classnames): + """ + take a list of class names and return a list of classes + ======================================================= + + >>> classes = model.to_classes(["Gene", "Protein", "Organism"]) + + This simply maps from a list of strings to a list of + classes in the calling model. + + @raise ModelError: if the list of class names + includes ones that don't exist + + @rtype: list(L{intermine.model.Class}) + """ + return list(map(self.get_class, classnames)) + + def column(self, path, *rest): + return Column(path, self, *rest) + + def __getattr__(self, name): + return self.column(name) + + def get_class(self, name): + """ + Get a class by its name, or by a dotted path + ============================================ + + >>> model = Model("https://www.flymine.org/query/service/model") + >>> model.get_class("Gene") + + >>> model.get_class("Gene.proteins") + + + This is the recommended way of retrieving a class from + the model. As well as handling class names, you can also + pass in a path such as "Gene.proteins" and get the + corresponding class back () + + @raise ModelError: if the class name refers to a non-existant object + + @rtype: L{intermine.model.Class} + """ + if name.find(',') != -1: + names = name.split(',') + classes = [self.get_class(n) for n in names] + return ComposedClass(classes, self) + elif name.find(".") != -1: + path = self.make_path(name) + if path.is_attribute(): + raise ModelError("'" + str(path) + "' is not a class") + else: + return path.get_class() + elif name in self.classes: + return self.classes[name] + else: + raise ModelError("'" + name + "' is not a class in this model") + + def make_path(self, path, subclasses={}): + """ + Return a path object for the given path string + ============================================== + + >>> path = model.make_path("Gene.organism.name") + + + This is recommended manner of constructing path objects. + + @type path: str + @type subclasses: dict + + @raise PathParseError: if there is a problem parsing the path string + + @rtype: L{intermine.model.Path} + """ + return Path(path, self, subclasses) + + def validate_path(self, path_string, subclasses={}): + """ + Validate a path + =============== + + >>> try: + ... model.validate_path("Gene.symbol") + ... return "path is valid" + ... except PathParseError: + ... return "path is invalid" + "path is valid" + + When you don't need to interrogate relationships + between paths, simply using this method to validate + a path string is enough. It guarantees that there + is a descriptor for each section of the string, + with the appropriate relationships + + @raise PathParseError: if there is a problem parsing the path string + """ + try: + self.parse_path_string(path_string, subclasses) + return True + except PathParseError as e: + raise PathParseError("Error parsing '%s' (subclasses: %s)" + % (path_string, str(subclasses)), e) + + def parse_path_string(self, path_string, subclasses={}): + """ + Parse a path string into a list of descriptors - one for each section + ===================================================================== + + >>> parts = Model.parse_path_string(string) + + This method is used when making paths from a model, and + when validating path strings. It probably won't need to + be called directly. + + @see: L{intermine.model.Model.make_path} + @see: L{intermine.model.Model.validate_path} + @see: L{intermine.model.Path} + """ + descriptors = [] + names = path_string.split('.') + root_name = names.pop(0) + + root_descriptor = self.get_class(root_name) + descriptors.append(root_descriptor) + + if root_name in subclasses: + current_class = self.get_class(subclasses[root_name]) + else: + current_class = root_descriptor + + for field_name in names: + field = current_class.get_field(field_name) + descriptors.append(field) + + if isinstance(field, Reference): + key = '.'.join([x.name for x in descriptors]) + if key in subclasses: + current_class = self.get_class(subclasses[key]) + else: + current_class = field.type_class + else: + current_class = None + + return descriptors + + def _unproxied(self): + return self + + +class ModelError(ReadableException): + pass + + +class PathParseError(ModelError): + pass + + +class ModelParseError(ModelError): + + def __init__(self, message, source, cause=None): + self.source = source + super(ModelParseError, self).__init__(message, cause) + + def __str__(self): + base = repr(self.message) + ":" + repr(self.source) + if self.cause is None: + return base + else: + return base + repr(self.cause) diff --git a/intermine/pathfeatures.py b/intermine/pathfeatures.py index 5b4c8d1b..6ed76704 100644 --- a/intermine/pathfeatures.py +++ b/intermine/pathfeatures.py @@ -1,149 +1,149 @@ -import re - -PATTERN_STR = "^(?:\\w+\\.)*\\w+$" -PATH_PATTERN = re.compile(PATTERN_STR) - - -class PathFeature(object): - def __init__(self, path): - if path is None: - raise ValueError("path must not be None") - try: - path = path.name - except Exception: - pass - if not PATH_PATTERN.match(path): - raise TypeError( - "Path '" + path + - "' does not match expected pattern" + PATTERN_STR) - self.path = path - - def __repr__(self): - return "<" + self.__class__.__name__ + ": " + self.to_string() + ">" - - def to_string(self): - return str(self.path) - - def to_dict(self): - return {'path': self.path} - - @property - def child_type(self): - raise AttributeError() - - -class Join(PathFeature): - valid_join_styles = ['OUTER', 'INNER'] - INNER = "INNER" - OUTER = "OUTER" - child_type = 'join' - - def __init__(self, path, style='OUTER'): - if style.upper() not in Join.valid_join_styles: - raise TypeError("Unknown join style: " + style) - self.style = style.upper() - super(Join, self).__init__(path) - - def to_dict(self): - d = super(Join, self).to_dict() - d.update(style=self.style) - return d - - def __repr__(self): - return('<' + self.__class__.__name__ - + ' '.join([':', self.path, self.style]) + '>') - - -class PathDescription(PathFeature): - child_type = 'pathDescription' - - def __init__(self, path, description): - self.description = description - super(PathDescription, self).__init__(path) - - def to_dict(self): - d = super(PathDescription, self).to_dict() - d.update(description=self.description) - return d - - -class SortOrder(PathFeature): - ASC = "asc" - DESC = "desc" - DIRECTIONS = frozenset(["asc", "desc"]) - - def __init__(self, path, order): - try: - order = order.lower() - except Exception: - pass - - if order not in self.DIRECTIONS: - raise TypeError("Order must be one of " + str(self.DIRECTIONS) - + " - not " + order) - self.order = order - super(SortOrder, self).__init__(path) - - def __str__(self): - return self.path + " " + self.order - - def to_string(self): - return str(self) - - -class SortOrderList(object): - """ - A container implementation for holding sort orders - ================================================== - - This class exists to hold the sort order information for a - query. It handles appending elements, and the stringification - of the sort order. - """ - - def __init__(self, *sos): - self.sort_orders = [] - self.append(*sos) - - def append(self, *sos): - """ - Add sort order elements to the sort order list. - =============================================== - - Elements can be provided as a SortOrder object or - as a tuple of arguments (path, direction). - """ - for so in sos: - if isinstance(so, SortOrder): - self.sort_orders.append(so) - elif isinstance(so, tuple): - self.sort_orders.append(SortOrder(*so)) - else: - raise TypeError( - "Sort orders must be either SortOrder instances," - + " or tuples of arguments: I got:" + so + sos) - - def __repr__(self): - return '<' + self.__class__.__name__ + ': [' + str(self) + ']>' - - def __str__(self): - return " ".join(map(str, self.sort_orders)) - - def clear(self): - self.sort_orders = [] - - def is_empty(self): - return len(self.sort_orders) == 0 - - def __len__(self): - return len(self.sort_orders) - - def __next__(self): - """2.x to 3.x bridge""" - return self.next() - - def next(self): - return next(self.sort_orders) - - def __iter__(self): - return iter(self.sort_orders) +import re + +PATTERN_STR = "^(?:\\w+\\.)*\\w+$" +PATH_PATTERN = re.compile(PATTERN_STR) + + +class PathFeature(object): + def __init__(self, path): + if path is None: + raise ValueError("path must not be None") + try: + path = path.name + except Exception: + pass + if not PATH_PATTERN.match(path): + raise TypeError( + "Path '" + path + + "' does not match expected pattern" + PATTERN_STR) + self.path = path + + def __repr__(self): + return "<" + self.__class__.__name__ + ": " + self.to_string() + ">" + + def to_string(self): + return str(self.path) + + def to_dict(self): + return {'path': self.path} + + @property + def child_type(self): + raise AttributeError() + + +class Join(PathFeature): + valid_join_styles = ['OUTER', 'INNER'] + INNER = "INNER" + OUTER = "OUTER" + child_type = 'join' + + def __init__(self, path, style='OUTER'): + if style.upper() not in Join.valid_join_styles: + raise TypeError("Unknown join style: " + style) + self.style = style.upper() + super(Join, self).__init__(path) + + def to_dict(self): + d = super(Join, self).to_dict() + d.update(style=self.style) + return d + + def __repr__(self): + return('<' + self.__class__.__name__ + + ' '.join([':', self.path, self.style]) + '>') + + +class PathDescription(PathFeature): + child_type = 'pathDescription' + + def __init__(self, path, description): + self.description = description + super(PathDescription, self).__init__(path) + + def to_dict(self): + d = super(PathDescription, self).to_dict() + d.update(description=self.description) + return d + + +class SortOrder(PathFeature): + ASC = "asc" + DESC = "desc" + DIRECTIONS = frozenset(["asc", "desc"]) + + def __init__(self, path, order): + try: + order = order.lower() + except Exception: + pass + + if order not in self.DIRECTIONS: + raise TypeError("Order must be one of " + str(self.DIRECTIONS) + + " - not " + order) + self.order = order + super(SortOrder, self).__init__(path) + + def __str__(self): + return self.path + " " + self.order + + def to_string(self): + return str(self) + + +class SortOrderList(object): + """ + A container implementation for holding sort orders + ================================================== + + This class exists to hold the sort order information for a + query. It handles appending elements, and the stringification + of the sort order. + """ + + def __init__(self, *sos): + self.sort_orders = [] + self.append(*sos) + + def append(self, *sos): + """ + Add sort order elements to the sort order list. + =============================================== + + Elements can be provided as a SortOrder object or + as a tuple of arguments (path, direction). + """ + for so in sos: + if isinstance(so, SortOrder): + self.sort_orders.append(so) + elif isinstance(so, tuple): + self.sort_orders.append(SortOrder(*so)) + else: + raise TypeError( + "Sort orders must be either SortOrder instances," + + " or tuples of arguments: I got:" + so + sos) + + def __repr__(self): + return '<' + self.__class__.__name__ + ': [' + str(self) + ']>' + + def __str__(self): + return " ".join(map(str, self.sort_orders)) + + def clear(self): + self.sort_orders = [] + + def is_empty(self): + return len(self.sort_orders) == 0 + + def __len__(self): + return len(self.sort_orders) + + def __next__(self): + """2.x to 3.x bridge""" + return self.next() + + def next(self): + return next(self.sort_orders) + + def __iter__(self): + return iter(self.sort_orders) diff --git a/intermine/query.py b/intermine/query.py index 8d8f7668..03c16438 100644 --- a/intermine/query.py +++ b/intermine/query.py @@ -1,2067 +1,2067 @@ -import intermine.constraints as constraints -from intermine.model import Column, Class, Model, Reference, ConstraintNode -import re -from copy import deepcopy -from pandas import DataFrame -from xml.dom import minidom, getDOMImplementation -from pandas import DataFrame - -from intermine.util import openAnything, ReadableException -from intermine.pathfeatures import PathDescription, Join, SortOrder -from intermine.pathfeatures import SortOrderList - -try: - from functools import reduce -except ImportError: - pass -""" -Classes representing queries against webservices -================================================ - -Representations of queries, and templates. - -""" - -__author__ = "Alex Kalderimis" -__organization__ = "InterMine" -__license__ = "LGPL" -__contact__ = "dev@intermine.org" - -LOGIC_OPS = ["and", "or"] -LOGIC_PRODUCT = [(x, y) for x in LOGIC_OPS for y in LOGIC_OPS] - - -class Query(object): - """ - A Class representing a structured database query - ================================================ - - Objects of this class have properties that model the - attributes of the query, and methods for performing - the request. - - SYNOPSIS - -------- - - example: - - >>> service = Service("http://www.flymine.org/query/service") - >>> query = service.new_query() - >>> - >>> query.add_view("Gene.symbol", "Gene.pathways.name", "Gene.proteins.symbol") - >>> query.add_sort_order("Gene.pathways.name") - >>> - >>> query.add_constraint("Gene", "LOOKUP", "eve") - >>> query.add_constraint("Gene.pathways.name", "=", "Phosphate*") - >>> - >>> query.set_logic("A or B") - >>> - >>> for row in query.rows(): - ... handle_row(row) - - OR, using an SQL style DSL: - - >>> s = Service("www.flymine.org/query") - >>> query = s.query("Gene").\\ - ... select("*", "pathways.*").\\ - ... where("symbol", "=", "H").\\ - ... outerjoin("pathways").\\ - ... order_by("symbol") - >>> for row in query.rows(start=10, size=5): - ... handle_row(row) - - OR, for a more SQL-alchemy, ORM style: - - >>> for gene in s.query(s.model.Gene).filter(s.model.Gene.symbol == ["zen", "H", "eve"]).add_columns(s.model.Gene.alleles): - ... handle(gene) - - Query objects represent structured requests for information over the - database housed at the datawarehouse whose webservice you are querying. - They utilise some of the concepts of relational databases, within an - object-related ORM context. If you don't know what that means, don't - worry: you don't need to write SQL, and the queries will be fast. - - To make things slightly more familiar to those with knowledge of SQL, - some syntactical sugar is provided to make constructing queries a bit - more recognisable. - - PRINCIPLES - ---------- - - The data model represents tables in the databases as classes, with records - within tables as instances of that class. The columns of the database are - the fields of that object:: - - The Gene table - showing two records/objects - +---------------------------------------------------+ - | id | symbol | length | cyto-location | organism | - +----------------------------------------+----------+ - | 01 | eve | 1539 | 46C10-46C10 | 01 | - +----------------------------------------+----------+ - | 02 | zen | 1331 | 84A5-84A5 | 01 | - +----------------------------------------+----------+ - ... - - The organism table - showing one record/object - +----------------------------------+ - | id | name | taxon id | - +----------------------------------+ - | 01 | D. melanogaster | 7227 | - +----------------------------------+ - - Columns that contain a meaningful value are known as 'attributes' (in the - tables above, that is everything except the id columns). The other columns - (such as "organism" in the gene table) are ones that reference records of - other tables (ie. other objects), and are called references. You can refer - to any field in any class, that has a connection, however tenuous, with a - table, by using dotted path notation:: - - Gene.organism.name -> the name column in the organism table, referenced - by a record in the gene table - - These paths, and the connections between records and tables they represent, - are the basis for the structure of InterMine queries. - - THE STUCTURE OF A QUERY - ----------------------- - - A query has two principle sets of properties: - - its view: the set of output columns - - its constraints: the set of rules for what to include - - A query must have at least one output column in its view, but constraints - are optional - if you don't include any, you will get back every record - from the table (every object of that type) - - In addition, the query must be coherent: if you have information about - an organism, and you want a list of genes, then the "Gene" table - should be the basis for your query, and as such the Gene class, which - represents this table, should be the root of all the paths that appear in - it: - - So, to take a simple example:: - - I have an organism name, and I want a list of genes: - - The view is the list of things I want to know about those genes: - - >>> query.add_view("Gene.name") - >>> query.add_view("Gene.length") - >>> query.add_view("Gene.proteins.sequence.length") - - Note I can freely mix attributes and references, as long as every view ends - in an attribute (a meaningful value). As a short-cut I can also write: - - >>> query.add_views("Gene.name", "Gene.length", "Gene.proteins.sequence.length") - - or: - - >>> query.add_views("Gene.name Gene.length Gene.proteins.sequence.length") - - They are all equivalent. You can also use common SQL style shortcuts such - as "*" for all attribute fields: - - >>> query.add_views("Gene.*") - - You can also use "select" as a synonymn for "add_view" - - Now I can add my constraints. As, we mentioned, I have information about an - organism, so: - - >>> query.add_constraint("Gene.organism.name", "=", "D. melanogaster") - - (note, here I can use "where" as a synonymn for "add_constraint") - - If I run this query, I will get literally millions of results - - it needs to be filtered further: - - >>> query.add_constraint("Gene.proteins.sequence.length", "<", 500) - - If that doesn't restrict things enough I can add more filters: - - >>> query.add_constraint("Gene.symbol", "ONE OF", ["eve", "zen", "h"]) - - Now I am guaranteed to get only information on genes I am interested in. - - Note, though, that because I have included the link (or "join") from - Gene -> Protein, this, by default, means that I only want genes that have - protein information associated with them. If in fact I want information on - all genes, and just want to know the protein information if it is - available, then I can specify that with: - - >>> query.add_join("Gene.proteins", "OUTER") - - And if perhaps my query is not as simple as a strict cumulative filter, - but I want all D. mel genes that EITHER have a short protein sequence OR - come from one of my favourite genes (as unlikely as that sounds), I can - specify the logic for that too: - - >>> query.set_logic("A and (B or C)") - - Each letter refers to one of the constraints - the codes are assigned in - the order you add the constraints. If you want to be absolutely certain - about the constraints you mean, you can use the constraint objects - themselves: - - >>> gene_is_eve = query.add_constraint("Gene.symbol", "=", "eve") - >>> gene_is_zen = query.add_constraint("Gene.symbol", "=", "zne") - >>> - >>> query.set_logic(gene_is_eve | gene_is_zen) - - By default the logic is a straight cumulative filter - (ie: A and B and C and D and ...) - - Putting it all together: - - >>> query.add_view("Gene.name", "Gene.length", "Gene.proteins.sequence.length") - >>> query.add_constraint("Gene.organism.name", "=", "D. melanogaster") - >>> query.add_constraint("Gene.proteins.sequence.length", "<", 500) - >>> query.add_constraint("Gene.symbol", "ONE OF", ["eve", "zen", "h"]) - >>> query.add_join("Gene.proteins", "OUTER") - >>> query.set_logic("A and (B or C)") - - This can be made more concise and readable with a little DSL sugar: - - >>> query = service.query("Gene") - >>> query.select("name", "length", "proteins.sequence.length").\ - ... where('organism.name' '=', 'D. melanogaster').\ - ... where("proteins.sequence.length", "<", 500).\ - ... where('symbol', 'ONE OF', ['eve', 'h', 'zen']).\ - ... outerjoin('proteins').\ - ... set_logic("A and (B or C)") - - And the query is defined. - - Result Processing: Rows - ----------------------- - - calling ".rows()" on a query will return an iterator of rows, where each - row is a ResultRow object, which can be treated as both a list and a - dictionary. - - Which means you can refer to columns by name: - - >>> for row in query.rows(): - ... print "name is %s" % (row["name"]) - ... print "length is %d" % (row["length"]) - - As well as using list indices: - - >>> for row in query.rows(): - ... print "The first column is %s" % (row[0]) - - Iterating over a row iterates over the cell values as a list: - - >>> for row in query.rows(): - ... for column in row: - ... do_something(column) - - Here each row will have a gene name, a gene length, and a sequence length - eg: - - >>> print row.to_l - ["even skipped", "1359", "376"] - - To make that clearer, you can ask for a dictionary instead of a list: - - >>> for row in query.rows() - ... print row.to_d - {"Gene.name":"even skipped","Gene.length":"1359","Gene.proteins.sequence.length":"376"} - - - If you just want the raw results, for printing to a file, or for piping to - another program, you can request the results in one of these - formats: 'json', 'rr', 'tsv', 'jsonobjects', 'jsonrows', 'list', 'dict', - 'csv' - - >>> for row in query.result("", size = ) - ... print(row) - - - Result Processing: Results - -------------------------- - - Results can also be processing on a record by record basis. If you have a - query that has output columns of "Gene.symbol", "Gene.pathways.name" and - "Gene.proteins.proteinDomains.primaryIdentifier", than processing it by - records will return one object per gene, and that gene will have a property - named "pathways" which contains objects which have a name property. - Likewise there will be a proteins property which holds a list of - proteinDomains which all have a primaryIdentifier property, and so on. - This allows a more object orientated approach to database records, - familiar to users of other ORMs. - - This is the format used when you choose to iterate over a query directly, - or can be explicitly chosen by invoking L{intermine.query.Query.results}: - - >>> for gene in query: - ... print gene.name, map(lambda x: x.name, gene.pathways) - - The structure of the object and the information it contains depends - entirely on the output columns selected. The values may be None, of course, - but also any valid values of an object (according to the data model) will - also be None if they were not selected for output. Attempts to access - invalid properties (such as gene.favourite_colour) will cause exceptions - to be thrown. - - Getting us to Generate your Code - -------------------------------- - - Not that you have to actually write any of this! The webapp will happily - generate the code for any query (and template) you can build in it. A good - way to get started is to use the webapp to generate your code, and then - run it as scripts to speed up your queries. You can always tinker with and - edit the scripts you download. - - To get generated queries, look for the "python" link at the bottom of - query-builder and template form pages, it looks a bit like this:: - - . +=====================================+============= - | | - | Perl | Python | Java [Help] | - | | - +============================================== - - """ - - SO_SPLIT_PATTERN = re.compile("\\s*(asc|desc)\\s*", re.I) - LOGIC_SPLIT_PATTERN = re.compile("\\s*(?:and|or|\\(|\\))\\s*", re.I) - TRAILING_OP_PATTERN = re.compile("\\s*(and|or)\\s*$", re.I) - LEADING_OP_PATTERN = re.compile("^\\s*(and|or)\\s*", re.I) - ORPHANED_OP_PATTERN = re.compile( - "(?:\\(\\s*(?:and|or)\\s*|\\s*(?:and|or)\\s*\\))", re.I) - - def __init__(self, model, service=None, validate=True, root=None): - """ - Construct a new Query - ===================== - - Construct a new query for making database queries - against an InterMine data warehouse. - - Normally you would not need to use this constructor - directly, but instead use the factory method on - intermine.webservice.Service, which will handle construction - for you. - - @param model: an instance of L{intermine.model.Model}. Required - @param service: an instance of l{intermine.service.Service}. Optional, - but you will not be able to make requests without one. - @param validate: a boolean - defaults to True. If set to false, the - query will not try and validate itself. You should not set this to - false. - - """ - self.model = model - if root is None: - self.root = root - else: - self.root = model.make_path(root).root - - self.name = '' - self.description = '' - self.service = service - self.prefetch_depth = service.prefetch_depth if service is not None else 1 - self.prefetch_id_only = service.prefetch_id_only if service is not None else False - self.do_verification = validate - self.path_descriptions = [] - self.joins = [] - self.constraint_dict = {} - self.uncoded_constraints = [] - self.views = [] - self._sort_order_list = SortOrderList() - self._logic_parser = constraints.LogicParser(self) - self._logic = None - self.constraint_factory = constraints.ConstraintFactory() - - # Set up sugary aliases - self.c = self.column - self.filter = self.where - self.add_column = self.add_view - self.add_columns = self.add_view - self.add_views = self.add_view - self.add_to_select = self.add_view - self.order_by = self.add_sort_order - self.all = self.get_results_list - self.size = self.count - self.summarize = self.summarise - - def __iter__(self): - """Return an iterator over all the objects returned by this query""" - return self.results("jsonobjects") - - def __len__(self): - """Return the number of rows this query will return.""" - return self.count() - - def __sub__(self, other): - """ - Construct a new list from the symmetric difference of these things - """ - return self.service._list_manager.subtract([self], [other]) - - def __xor__(self, other): - """Calculate the symmetric difference of this query and another""" - return self.service._list_manager.xor([self, other]) - - def __and__(self, other): - """ - Intersect this query and another query or list - """ - return self.service._list_manager.intersect([self, other]) - - def __or__(self, other): - """ - Return the union of this query and another query or list. - """ - return self.service._list_manager.union([self, other]) - - def __add__(self, other): - """ - Return the union of this query and another query or list - """ - return self.service._list_manager.union([self, other]) - - @classmethod - def from_xml(cls, xml, *args, **kwargs): - """ - Deserialise a query serialised to XML - ===================================== - - This method is used to instantiate serialised queries. - It is used by intermine.webservice.Service objects - to instantiate Template objects and it can be used - to read in queries you have saved to a file. - - @param xml: The xml as a file name, url, or string - - @raise QueryParseError: if the query cannot be parsed - @raise ModelError: if the query has illegal paths in it - @raise ConstraintError: if the constraints don't make sense - - @rtype: L{Query} - """ - obj = cls(*args, **kwargs) - obj.do_verification = False - f = openAnything(xml) - doc = minidom.parse(f) - f.close() - - queries = doc.getElementsByTagName('query') - if len(queries) != 1: - raise QueryParseError( - "wrong number of queries in xml. " + - "Only one element is allowed. Found %d" % len(queries)) - q = queries[0] - obj.name = q.getAttribute('name') - obj.description = q.getAttribute('longDescription') - obj.add_view(q.getAttribute('view')) - for p in q.getElementsByTagName('pathDescription'): - path = p.getAttribute('pathString') - description = p.getAttribute('description') - obj.add_path_description(path, description) - for j in q.getElementsByTagName('join'): - path = j.getAttribute('path') - style = j.getAttribute('style') - obj.add_join(path, style) - for c in q.getElementsByTagName('constraint'): - args = {} - args['path'] = c.getAttribute('path') - if args['path'] is None: - if c.parentNode.tagName != "node": - msg = "Constraints must have a path" - raise QueryParseError(msg) - args['path'] = c.parentNode.getAttribute('path') - args['op'] = c.getAttribute('op') - args['value'] = c.getAttribute('value') - args['code'] = c.getAttribute('code') - args['subclass'] = c.getAttribute('type') - args['editable'] = c.getAttribute('editable') - args['optional'] = c.getAttribute('switchable') - args['extra_value'] = c.getAttribute('extraValue') - args['loopPath'] = c.getAttribute('loopPath') - values = [] - for val_e in c.getElementsByTagName('value'): - texts = [] - for node in val_e.childNodes: - if node.nodeType == node.TEXT_NODE: - texts.append(node.data) - values.append(' '.join(texts)) - if len(values) > 0: - args["values"] = values - args = dict((k, v) for k, v in list(args.items()) - if v is not None and v != '') - if "loopPath" in args: - args["op"] = {"=": "IS", "!=": "IS NOT"}.get(args["op"]) - con = obj.add_constraint(**args) - if not con: - raise ConstraintError("error adding constraint with args: " + - args) - - def group(iterator, count): - itr = iter(iterator) - while True: - try: - yield tuple([next(itr) for i in range(count)]) - except StopIteration: - return - - if q.getAttribute('sortOrder') is not None: - sos = Query.SO_SPLIT_PATTERN.split(q.getAttribute('sortOrder')) - if len(sos) == 1: - if sos[0] in obj.views: # Be tolerant of irrelevant sort-orders - obj.add_sort_order(sos[0]) - else: - sos.pop() # Get rid of empty string at end - for path, direction in group(sos, 2): - if path in obj.views: # Be tolerant of irrelevant so. - obj.add_sort_order(path, direction) - - if q.getAttribute('constraintLogic') is not None: - obj._set_questionable_logic(q.getAttribute('constraintLogic')) - - obj.verify() - - return obj - - def _set_questionable_logic(self, questionable_logic): - """Attempts to sanity check the logic argument before it is set""" - logic = questionable_logic - used_codes = set(self.constraint_dict.keys()) - logic_codes = set(Query.LOGIC_SPLIT_PATTERN.split(questionable_logic)) - if "" in logic_codes: - logic_codes.remove("") - irrelevant_codes = logic_codes - used_codes - for c in irrelevant_codes: - pattern = re.compile("\\b" + c + "\\b", re.I) - logic = pattern.sub("", logic) - # Remove empty groups - logic = re.sub("\\((:?and|or|\\s)*\\)", "", logic) - # Remove trailing and leading operators - logic = Query.LEADING_OP_PATTERN.sub("", logic) - logic = Query.TRAILING_OP_PATTERN.sub("", logic) - for x in range(2): # repeat, as this process can leave doubles - for left, right in LOGIC_PRODUCT: - if left == right: - repl = left - else: - repl = "and" - pattern = re.compile(left + "\\s*" + right, re.I) - logic = pattern.sub(repl, logic) - logic = Query.ORPHANED_OP_PATTERN.sub( - lambda x: "(" if "(" in x.group(0) else ")", logic) - logic = logic.strip().lstrip() - logic = Query.LEADING_OP_PATTERN.sub("", logic) - logic = Query.TRAILING_OP_PATTERN.sub("", logic) - try: - if len(logic) > 0 and logic not in ["and", "or"]: - self.set_logic(logic) - except Exception as e: - raise Exception("Error parsing logic string " + repr( - questionable_logic) + " (which is " + repr( - logic) + " after irrelevant codes have been removed)" - + " with available codes: " + repr( - list(used_codes)) + " because: " + e.message) - - def __str__(self): - """Return the XML serialisation of this query""" - return self.to_xml() - - def verify(self): - """ - Validate the query - ================== - - Invalid queries will fail to run, and it is not always - obvious why. The validation routine checks to see that - the query will not cause errors on execution, and tries to - provide informative error messages. - - This method is called immediately after a query is fully - deserialised. - - @raise ModelError: if the paths are invalid - @raise QueryError: if there are errors in query construction - @raise ConstraintError: if there are errors in constraint construction - - """ - self.verify_views() - self.verify_constraint_paths() - self.verify_join_paths() - self.verify_pd_paths() - self.validate_sort_order() - self.do_verification = True - - def select(self, *paths): - """ - Replace the current selection of output columns with this one - ============================================================= - - example:: - - query.select("*", "proteins.name") - - This method is intended to provide an API familiar to those - with experience of SQL or other ORM layers. This method, in - contrast to other view manipulation methods, replaces - the selection of output columns, rather than appending to it. - - Note that any sort orders that are no longer in the view will - be removed. - - @param paths: The output columns to add - """ - self.views = [] - self.add_view(*paths) - so_elems = self._sort_order_list - self._sort_order_list = SortOrderList() - - for so in so_elems: - if so.path in self.views: - self._sort_order_list.append(so) - return self - - def add_view(self, *paths): - """ - Add one or more views to the list of output columns - =================================================== - - example:: - - query.add_view("Gene.name Gene.organism.name") - - This is the main method for adding views to the list - of output columns. As well as appending views, it - will also split a single, space or comma delimited - string into multiple paths, and flatten out lists, or any - combination. It will also immediately try to validate - the views. - - Output columns must be valid paths according to the - data model, and they must represent attributes of tables - - Also available as: - - add_views - - add_column - - add_columns - - add_to_select - - @see: intermine.model.Model - @see: intermine.model.Path - @see: intermine.model.Attribute - """ - views = [] - for p in paths: - if isinstance(p, (set, list)): - views.extend(list(p)) - elif isinstance(p, Class): - views.append(p.name + ".*") - elif isinstance(p, Column): - if p._path.is_attribute(): - views.append(str(p)) - else: - views.append(str(p) + ".*") - elif isinstance(p, Reference): - views.append(p.name + ".*") - else: - views.extend(re.split("(?:,?\\s+|,)", str(p))) - - views = list(map(self.prefix_path, views)) - - views_to_add = [] - for view in views: - if view.endswith(".*"): - view = re.sub("\\.\\*$", "", view) - scd = self.get_subclass_dict() - - def expand(p, level, id_only=False): - if level > 0: - path = self.model.make_path(p, scd) - cd = path.end_class - - def add_f(x): - return p + "." + x.name - - vs = [p + ".id"] if id_only and cd.has_id else [ - add_f(a) for a in cd.attributes - ] - next_level = level - 1 - rs_and_cs = list(cd.references) + list(cd.collections) - for r in rs_and_cs: - rp = add_f(r) - if next_level: - self.outerjoin(rp) - vs.extend( - expand(rp, next_level, self.prefetch_id_only)) - return vs - else: - return [] - - depth = self.prefetch_depth - views_to_add.extend(expand(view, depth)) - else: - views_to_add.append(view) - - if self.do_verification: - self.verify_views(views_to_add) - - self.views.extend(views_to_add) - - return self - - def prefix_path(self, path): - if self.root is None: - if self.do_verification: # eg. not when building from XML - if path.endswith(".*"): - trimmed = re.sub("\\.\\*$", "", path) - else: - trimmed = path - self.root = self.model.make_path(trimmed, - self.get_subclass_dict()).root - return path - else: - if path.startswith(self.root.name): - return path - else: - return self.root.name + "." + path - - def clear_view(self): - """ - Clear the output column list - ============================ - - Deletes all entries currently in the view list. - """ - self.views = [] - - def verify_views(self, views=None): - """ - Check to see if the views given are valid - ========================================= - - This method checks to see if the views: - - are valid according to the model - - represent attributes - - @see: L{intermine.model.Attribute} - - @raise intermine.model.ModelError: if the paths are invalid - @raise ConstraintError: if the paths are not attributes - """ - if views is None: - views = self.views - for path in views: - path = self.model.make_path(path, self.get_subclass_dict()) - if not path.is_attribute(): - raise ConstraintError("'" + str(path) + - "' does not represent an attribute") - - def add_constraint(self, *args, **kwargs): - """ - Add a constraint (filter on records) - ==================================== - - example:: - - query.add_constraint("Gene.symbol", "=", "zen") - - This method will try to make a constraint from the arguments - given, trying each of the classes it knows of in turn - to see if they accept the arguments. This allows you - to add constraints of different types without having to know - or care what their classes or implementation details are. - All constraints derive from intermine.constraints.Constraint, - and they all have a path attribute, but are otherwise diverse. - - Before adding the constraint to the query, this method - will also try to check that the constraint is valid by - calling Query.verify_constraint_paths() - - @see: L{intermine.constraints} - - @rtype: L{intermine.constraints.Constraint} - """ - if len(args) == 1 and len(kwargs) == 0: - if isinstance(args[0], tuple): - con = self.constraint_factory.make_constraint(*args[0]) - else: - try: - con = self.constraint_factory.make_constraint( - *args[0].vargs, **args[0].kwargs) - except AttributeError: - con = args[0] - else: - if len(args) == 0 and len(kwargs) == 1: - k, v = list(kwargs.items())[0] - d = {"path": k} - if v in constraints.UnaryConstraint.OPS: - d["op"] = v - else: - d["op"] = "=" - d["value"] = v - kwargs = d - - if len(args) and args[0] in self.constraint_factory.reference_ops: - args = [self.root] + list(args) - - con = self.constraint_factory.make_constraint(*args, **kwargs) - - con.path = self.prefix_path(con.path) - if self.do_verification: - self.verify_constraint_paths([con]) - if hasattr(con, "code"): - self.constraint_dict[con.code] = con - else: - self.uncoded_constraints.append(con) - - return con - - def where(self, *cons, **kwargs): - """ - Return a new query like this one but with an additional constraint - ================================================================== - - In contrast to add_constraint, this method returns - a new object with the given comstraint added, it does not - mutate the Query it is invoked on. - - Also available as Query.filter - """ - c = self.clone() - try: - for conset in cons: - codeds = c.coded_constraints - lstr = str(c.get_logic()) + " AND " if codeds else "" - start_c = chr(ord(codeds[-1].code) + 1) if codeds else 'A' - for con in conset: - c.add_constraint(*con.vargs, **con.kwargs) - try: - c.set_logic(lstr + conset.as_logic(start=start_c)) - except constraints.EmptyLogicError: - pass - for path, value in list(kwargs.items()): - c.add_constraint(path, "=", value) - except AttributeError: - c.add_constraint(*cons, **kwargs) - return c - - def column(self, col): - """ - Return a Column object suitable for using to construct constraints with - ======================================================================= - - This method is part of the SQLAlchemy style API. - - Also available as Query.c - """ - return self.model.column( - self.prefix_path(str(col)), self.get_subclass_dict(), self) - - def verify_constraint_paths(self, cons=None): - """ - Check that the constraints are valid - ==================================== - - This method will check the path attribute of each constraint. - In addition it will: - - Check that BinaryConstraints and MultiConstraints have an - Attribute as their path - - Check that TernaryConstraints have a Reference as theirs - - Check that SubClassConstraints have a correct subclass relationship - - Check that LoopConstraints have a valid loopPath, of a compatible - type - - Check that ListConstraints refer to an object - - Don't even try to check RangeConstraints: these have variable - semantics - - @param cons: The constraints to check - (defaults to all constraints on the query) - - @raise ModelError: if the paths are not valid - @raise ConstraintError: if the constraints do not satisfy the above - rules - - """ - if cons is None: - cons = self.constraints - for con in cons: - pathA = self.model.make_path(con.path, self.get_subclass_dict()) - if isinstance(con, constraints.RangeConstraint): - # No verification done on these, beyond checking its path, of course. - pass - elif isinstance(con, constraints.IsaConstraint): - if pathA.get_class() is None: - raise ConstraintError( - "'" + str(pathA) + - "' does not represent a class, or a reference to a class" - ) - for c in con.values: - if c not in self.model.classes: - raise ConstraintError("Illegal constraint: " + repr( - con) + " '" + str(c) + - "' is not a class in this model") - elif isinstance(con, constraints.TernaryConstraint): - if pathA.get_class() is None: - raise ConstraintError( - "'" + str(pathA) + - "' does not represent a class, or a reference to a class" - ) - elif isinstance(con, constraints.BinaryConstraint) or isinstance( - con, constraints.MultiConstraint): - if not pathA.is_attribute(): - raise ConstraintError("'" + str(pathA) + - "' does not represent an attribute") - elif isinstance(con, constraints.SubClassConstraint): - pathB = self.model.make_path(con.subclass, - self.get_subclass_dict()) - if not pathB.get_class().isa(pathA.get_class()): - raise ConstraintError("'" + con.subclass + - "' is not a subclass of '" + con.path - + "'") - elif isinstance(con, constraints.LoopConstraint): - pathB = self.model.make_path(con.loopPath, - self.get_subclass_dict()) - for path in [pathA, pathB]: - if not path.get_class(): - raise ConstraintError("'" + str(path) + - "' does not refer to an object") - (classA, classB) = (pathA.get_class(), pathB.get_class()) - if not classA.isa(classB) and not classB.isa(classA): - raise ConstraintError( - "the classes are of incompatible types: " + str( - classA) + "," + str(classB)) - elif isinstance(con, constraints.ListConstraint): - if not pathA.get_class(): - raise ConstraintError("'" + str(pathA) + - "' does not refer to an object") - - @property - def constraints(self): - """ - Returns the constraints of the query - ==================================== - - Query.constraints S{->} list(intermine.constraints.Constraint) - - Constraints are returned in the order of their code (normally - the order they were added to the query) and with any - subclass contraints at the end. - - @rtype: list(Constraint) - """ - ret = sorted( - list(self.constraint_dict.values()), key=lambda con: con.code) - ret.extend(self.uncoded_constraints) - return ret - - def get_constraint(self, code): - """ - Returns the constraint with the given code - ========================================== - - Returns the constraint with the given code, if if exists. - If no such constraint exists, it throws a ConstraintError - - @return: the constraint corresponding to the given code - @rtype: L{intermine.constraints.CodedConstraint} - """ - if code in self.constraint_dict: - return self.constraint_dict[code] - else: - raise ConstraintError("There is no constraint with the code '" + - code + "' on this query") - - def add_join(self, *args, **kwargs): - """ - Add a join statement to the query - ================================= - - example:: - - query.add_join("Gene.proteins", "OUTER") - - A join statement is used to determine if references should - restrict the result set by only including those references - exist. For example, if one had a query with the view:: - - "Gene.name", "Gene.proteins.name" - - Then in the normal case (that of an INNER join), we would only - get Genes that also have at least one protein that they reference. - Simply by asking for this output column you are placing a - restriction on the information you get back. - - If in fact you wanted all genes, regardless of whether they had - proteins associated with them or not, but if they did - you would rather like to know _what_ proteins, then you need - to specify this reference to be an OUTER join:: - - query.add_join("Gene.proteins", "OUTER") - - Now you will get many more rows of results, some of which will - have "null" values where the protein name would have been, - - This method will also attempt to validate the join by calling - Query.verify_join_paths(). Joins must have a valid path, the - style can be either INNER or OUTER (defaults to OUTER, - as the user does not need to specify inner joins, since all - references start out as inner joins), and the path - must be a reference. - - @raise ModelError: if the path is invalid - @raise TypeError: if the join style is invalid - - @rtype: L{intermine.pathfeatures.Join} - """ - join = Join(*args, **kwargs) - join.path = self.prefix_path(join.path) - if self.do_verification: - self.verify_join_paths([join]) - self.joins.append(join) - return self - - def outerjoin(self, column): - """Alias for add_join(column, "OUTER")""" - return self.add_join(str(column), "OUTER") - - def verify_join_paths(self, joins=None): - """ - Check that the joins are valid - ============================== - - Joins must have valid paths, and they must refer to references. - - @raise ModelError: if the paths are invalid - @raise QueryError: if the paths are not references - """ - if joins is None: - joins = self.joins - for join in joins: - path = self.model.make_path(join.path, self.get_subclass_dict()) - if not path.is_reference(): - raise QueryError("'" + join.path + "' is not a reference") - - def add_path_description(self, *args, **kwargs): - """ - Add a path description to the query - =================================== - - example:: - - query.add_path_description("Gene.proteins.proteinDomains", "Protein Domain") - - This allows you to alias the components of long paths to - improve the way they display column headers in a variety of - circumstances. In the above example, if the view included the unwieldy - path "Gene.proteins.proteinDomains.primaryIdentifier", it would - (depending on the mine) be displayed as - "Protein Domain > DB Identifer". These setting are taken into account - by the webservice when generating column headers for flat-file results - with the columnheaders parameter given, and always supplied when - requesting jsontable results. - - @rtype: L{intermine.pathfeatures.PathDescription} - - """ - path_description = PathDescription(*args, **kwargs) - path_description.path = self.prefix_path(path_description.path) - if self.do_verification: - self.verify_pd_paths([path_description]) - self.path_descriptions.append(path_description) - return path_description - - def verify_pd_paths(self, pds=None): - """ - Check that the path of the path description is valid - ==================================================== - - Checks for consistency with the data model - - @raise ModelError: if the paths are invalid - """ - if pds is None: - pds = self.path_descriptions - for pd in pds: - self.model.validate_path(pd.path, self.get_subclass_dict()) - - @property - def coded_constraints(self): - """ - Returns the list of constraints that have a code - ================================================ - - Query.coded_constraints S{->} list(intermine.constraints.CodedConstraint) - - This returns an up to date list of the constraints that can - be used in a logic expression. The only kind of constraint - that this excludes, at present, is SubClassConstraints - - @rtype: list(L{intermine.constraints.CodedConstraint}) - """ - return sorted( - list(self.constraint_dict.values()), key=lambda con: con.code) - - def get_logic(self): - """ - Returns the logic expression for the query - ========================================== - - This returns the up to date logic expression. The default - value is the representation of all coded constraints and'ed together. - - If the logic is empty and there are no constraints, returns an - empty string. - - The LogicGroup object stringifies to a string that can be parsed to - obtain itself (eg: "A and (B or C or D)"). - - @rtype: L{intermine.constraints.LogicGroup} - """ - if self._logic is None: - if len(self.coded_constraints) > 0: - return reduce(lambda x, y: x + y, self.coded_constraints) - else: - return "" - else: - return self._logic - - def set_logic(self, value): - """ - Sets the Logic given the appropriate input - ========================================== - - example:: - - Query.set_logic("A and (B or C)") - - This sets the logic to the appropriate value. If the value is - already a LogicGroup, it is accepted, otherwise - the string is tokenised and parsed. - - The logic is then validated with a call to validate_logic() - - raise LogicParseError: if there is a syntax error in the logic - """ - if isinstance(value, constraints.LogicGroup): - logic = value - else: - try: - logic = self._logic_parser.parse(value) - except constraints.EmptyLogicError: - if self.coded_constraints: - raise - else: - return self - if self.do_verification: - self.validate_logic(logic) - self._logic = logic - return self - - def validate_logic(self, logic=None): - """ - Validates the query logic - ========================= - - Attempts to validate the logic by checking - that every coded_constraint is included - at least once - - @raise QueryError: if not every coded constraint is represented - """ - if logic is None: - logic = self._logic - logic_codes = set(logic.get_codes()) - for con in self.coded_constraints: - if con.code not in logic_codes: - raise QueryError("Constraint " + con.code + repr( - con) + " is not mentioned in the logic: " + str(logic)) - - def get_default_sort_order(self): - """ - Gets the sort order when none has been specified - ================================================ - - This method is called to determine the sort order if - none is specified - - @raise QueryError: if the view is empty - - @rtype: L{intermine.pathfeatures.SortOrderList} - """ - try: - v0 = self.views[0] - for j in self.joins: - if j.style == "OUTER": - if v0.startswith(j.path): - return "" - return SortOrderList((self.views[0], SortOrder.ASC)) - except IndexError: - raise QueryError("Query view is empty") - - def get_sort_order(self): - """ - Return a sort order for the query - ================================= - - This method returns the sort order if set, otherwise - it returns the default sort order - - @raise QueryError: if the view is empty - - @rtype: L{intermine.pathfeatures.SortOrderList} - """ - if self._sort_order_list.is_empty(): - return self.get_default_sort_order() - else: - return self._sort_order_list - - def add_sort_order(self, path, direction=SortOrder.ASC): - """ - Adds a sort order to the query - ============================== - - example:: - - Query.add_sort_order("Gene.name", "DESC") - - This method adds a sort order to the query. - A query can have multiple sort orders, which are - assessed in sequence. - - If a query has two sort-orders, for example, - the first being "Gene.organism.name asc", - and the second being "Gene.name desc", you would have - the list of genes grouped by organism, with the - lists within those groupings in reverse alphabetical - order by gene name. - - This method will try to validate the sort order - by calling validate_sort_order() - - Also available as Query.order_by - """ - so = SortOrder(str(path), direction) - so.path = self.prefix_path(so.path) - if self.do_verification: - self.validate_sort_order(so) - self._sort_order_list.append(so) - return self - - def validate_sort_order(self, *so_elems): - """ - Check the validity of the sort order - ==================================== - - Checks that the sort order paths are: - - valid paths - - in the view - - @raise QueryError: if the sort order is not in the view - @raise ModelError: if the path is invalid - - """ - if not so_elems: - so_elems = self._sort_order_list - from_paths = self._from_paths() - for so in so_elems: - p = self.model.make_path(so.path, self.get_subclass_dict()) - if p.prefix() not in from_paths: - raise QueryError("Sort order element %s is not in the query" % - so.path) - - def _from_paths(self): - scd = self.get_subclass_dict() - froms = set( - [self.model.make_path(x, scd).prefix() for x in self.views]) - for c in self.constraints: - p = self.model.make_path(c.path, scd) - if p.is_attribute(): - froms.add(p.prefix()) - else: - froms.add(p) - return froms - - def get_subclass_dict(self): - """ - Return the current mapping of class to subclass - =============================================== - - This method returns a mapping of classes used - by the model for assessing whether certain paths are valid. For - intance, if you subclass MicroArrayResult to be FlyAtlasResult, - you can refer to the .presentCall attributes of fly atlas results. - MicroArrayResults do not have this attribute, and a path such as:: - - Gene.microArrayResult.presentCall - - would be marked as invalid unless the dictionary is provided. - - Users most likely will not need to ever call this method. - - @rtype: dict(string, string) - """ - subclass_dict = {} - for c in self.constraints: - if isinstance(c, constraints.SubClassConstraint): - subclass_dict[c.path] = c.subclass - return subclass_dict - - def results(self, row="object", start=0, size=None, summary_path=None): - """ - Return an iterator over result rows - =================================== - - Usage:: - - >>> query = service.model.Gene.select("symbol", "length") - >>> total = 0 - >>> for gene in query.results(): - ... print gene.symbol # handle strings - ... total += gene.length # handle numbers - >>> for row in query.results(row="rr"): - ... print row["symbol"] # handle strings by dict index - ... total += row["length"] # handle numbers by dict index - ... print row["Gene.symbol"] # handle strings by full dict index - ... total += row["Gene.length"] # handle numbers by full dict index - ... print row[0] # handle strings by list index - ... total += row[1] # handle numbers by list index - >>> for d in query.results(row="dict"): - ... print row["Gene.symbol"] # handle strings - ... total += row["Gene.length"] # handle numbers - >>> for l in query.results(row="list"): - ... print row[0] # handle strings - ... total += row[1] # handle numbers - >>> import csv - >>> csv_reader = csv.reader(q.results(row="csv"), delimiter=",", quotechar='"') - >>> for row in csv_reader: - ... print row[0] # handle strings - ... length_sum += int(row[1]) # handle numbers - >>> tsv_reader = csv.reader(q.results(row="tsv"), delimiter="\t") - >>> for row in tsv_reader: - ... print row[0] # handle strings - ... length_sum += int(row[1]) # handle numbers - - This is the general method that allows access to any of the available - result formats. The example above shows the ways these differ in terms - of accessing fields of the rows, as well as dealing with different - data types. Results can either be retrieved as typed values - (jsonobjects, rr ['ResultRows'], dict, list), or as lists of strings - (csv, tsv) which then require further parsing. The default format for - this method is "objects", where information is grouped by its - relationships. The other main format is "rr", which stands for - 'ResultRows', and can be accessed directly through the L{rows} method. - - Note that when requesting object based results (the default), if your - query contains any kind of collection, it is highly likely that start - and size won't do what you think, as they operate only on the - underlying rows used to build up the returned objects. If you want rows - back, you are recommeded to use the simpler rows method. - - If no views have been specified, all attributes of the root class - are selected for output. - - @param row: The format for each result. One of "object", "rr", - "dict", "list", "tsv", "csv", "jsonrows", "jsonobjects" - @type row: string - @param start: the index of the first result to return (default = 0) - @type start: int - @param size: The maximum number of results to return (default = all) - @type size: int - @param summary_path: A column name to optionally summarise. Specifying - a path will force "jsonrows" format, and return - an iterator over a list of dictionaries. Use this - when you are interested in processing a summary - in order of greatest count to smallest. - @type summary_path: str or L{intermine.model.Path} - - @rtype: L{intermine.webservice.ResultIterator} - - @raise WebserviceError: if the request is unsuccessful - """ - - to_run = self.clone() - - if len(to_run.views) == 0: - to_run.add_view(to_run.root) - - if "object" in row: - for c in self.coded_constraints: - p = to_run.column(c.path)._path - from_p = p if p.end_class is not None else p.prefix() - if not [v for v in to_run.views if v.startswith(str(from_p))]: - if p.is_attribute(): - to_run.add_view(p) - else: - to_run.add_view(p.append("id")) - - path = to_run.get_results_path() - params = to_run.to_query_params() - params["start"] = start - if size: - params["size"] = size - if summary_path: - params["summaryPath"] = to_run.prefix_path(summary_path) - row = "jsonrows" - - view = to_run.views - cld = to_run.root - if (row == "dataframe"): - row = "dict" - - return to_run.service.get_results(path, params, row, view, cld) - - def dataframe(self, start=0, size=None): - dict = {} - query = self.results(row="dict", start=start, size=size) - for i in query.view: - dict[i] = [] - for row in query: - for i in dict: - dict[i].append(row[i]) - df = DataFrame(data=dict) - return df - - def rows(self, start=0, size=None, row="rr"): - """ - Return the results as rows of data - ================================== - - This is a shortcut for results("rr") - - Usage:: - - >>> for row in query.rows(start=10, size=10): - ... print row["proteins.name"] - - @param start: the index of the first result to return (default = 0) - @type start: int - @param size: The maximum number of results to return (default = all) - @type size: int - @rtype: iterable - """ - return self.results(row=row, start=start, size=size) - - def dataframe(self, start=0, size=None): - """ - Returns a pandas.DataFrame - ================================== - - Usage:: - >>> query.dataframe() - - @param start: the index of the first result to return (default = 0) - @type start: int - @param size: The maximum number of results to return (default = all) - @type size: int - @rtype: dataframe - - """ - dict = {} - query = self.results(row="dict", start=start, size=size) - for i in query.view: - dict[i] = [] - for row in query: - for i in dict: - dict[i].append(row[i]) - df = DataFrame(data=dict) - return df - - def summarise(self, summary_path, **kwargs): - """ - Return a summary of the results for this column. - ================================================ - - Usage:: - >>> query = service.select("Gene.*", "organism.*").where("Gene", "IN", "my-list") - >>> print query.summarise("length")["average"] - ... 12345.67890 - >>> print query.summarise("organism.name")["Drosophila simulans"] - ... 98 - - This method allows you to get statistics summarising the information - from just one column of a query. For numerical columns you get - dictionary with four keys ('average', 'stdev', 'max', 'min'), and for - non-numerical columns you get a dictionary where each item is a key - and the values are the number of occurrences of this value in the - column. - - Any key word arguments will be passed to the underlying results call - - so you can limit the result size to the top 100 items by passing - "size = 100" as part of the call. - - @see: L{intermine.query.Query.results} - - @param summary_path: The column to summarise (either in long or short - form) - @type summary_path: str or L{intermine.model.Path} - - @rtype: dict - This method is sugar for particular combinations of calls to - L{results}. - """ - p = self.model.make_path( - self.prefix_path(summary_path), self.get_subclass_dict()) - results = self.results(summary_path=summary_path, **kwargs) - if p.end.type_name in Model.NUMERIC_TYPES: - return dict((k, float(v)) for k, v in list(next(results).items())) - else: - return dict((r["item"], r["count"]) for r in results) - - def one(self, row="jsonobjects"): - """Return one result, and raise an error if the result size is not 1""" - if row == "jsonobjects": - if self.count() == 1: - return self.first(row) - else: - ret = None - for obj in self.results(): - if ret is not None: - raise QueryError("More than one result received") - else: - ret = obj - if ret is None: - raise QueryError("No results received") - - return ret - else: - c = self.count() - if (c != 1): - raise QueryError("Result size is not one: got %d results" % - (c)) - else: - return self.first(row) - - def first(self, row="jsonobjects", start=0, **kw): - """Return the first result, or None if the results are empty""" - if row == "jsonobjects": - size = None - else: - size = 1 - try: - return next(self.results(row, start=start, size=size, **kw)) - except StopIteration: - return None - - def get_results_list(self, *args, **kwargs): - """ - Get a list of result rows - ========================= - - This method is a shortcut so that you do not have to - do a list comprehension yourself on the iterator that - is normally returned. If you have a very large result - set (and these can get up to 100's of thousands or rows - pretty easily) you will not want to - have the whole list in memory at once, but there may - be other circumstances when you might want to keep the whole - list in one place. - - It takes all the same arguments and parameters as Query.results - - Also available as Query.all - - @see: L{intermine.query.Query.results} - - """ - return list(self.results(*args, **kwargs)) - - def get_row_list(self, start=0, size=None): - return self.get_results_list("rr", start, size) - - def count(self): - """ - Return the total number of rows this query returns - ================================================== - - Obtain the number of rows a particular query will - return, without having to fetch and parse all the - actual data. This method makes a request to the server - to report the count for the query, and is sugar for a - results call. - - Also available as Query.size - - @rtype: int - @raise WebserviceError: if the request is unsuccessful. - """ - count_str = "" - for row in self.results(row="count"): - count_str += row - try: - return int(count_str) - except ValueError: - raise ResultError("Server returned a non-integer count: " + - count_str) - - def get_list_upload_uri(self): - """ - Returns the uri to use to create a list from this query - ======================================================= - - Query.get_list_upload_uri() -> str - - This method is used internally when performing list operations - on queries. - - @rtype: str - """ - return self.service.root + self.service.QUERY_LIST_UPLOAD_PATH - - def get_list_append_uri(self): - """ - Returns the uri to use to create a list from this query - ======================================================= - - Query.get_list_append_uri() -> str - - This method is used internally when performing list operations - on queries. - - @rtype: str - """ - return self.service.root + self.service.QUERY_LIST_APPEND_PATH - - def get_results_path(self): - """ - Returns the path section pointing to the REST resource - ====================================================== - - Query.get_results_path() -> str - - Internally, this just calls a constant property - in intermine.service.Service - - @rtype: str - """ - return self.service.QUERY_PATH - - def children(self): - """ - Returns the child objects of the query - ====================================== - - This method is used during the serialisation of queries - to xml. It is unlikely you will need access to this as a whole. - Consider using "path_descriptions", "joins", "constraints" instead - - @see: Query.path_descriptions - @see: Query.joins - @see: Query.constraints - - @return: the child element of this query - @rtype: list - """ - return sum([self.path_descriptions, self.joins, self.constraints], []) - - def to_query(self): - """ - Implementation of trait that allows use of these objects as queries - (casting). - """ - return self - - def make_list_constraint(self, path, op): - """ - Implementation of trait that allows use of these objects in list - constraints - """ - l = self.service.create_list(self) - return ConstraintNode(path, op, l.name) - - def to_query_params(self): - """ - Returns the parameters to be passed to the webservice - ===================================================== - - The query is responsible for producing its own query - parameters. These consist simply of: - - query: the xml representation of the query - - @rtype: dict - - """ - xml = self.to_xml() - params = {'query': xml} - return params - - def to_Node(self): - """ - Returns a DOM node representing the query - ========================================= - - This is an intermediate step in the creation of the - xml serialised version of the query. You probably - won't need to call this directly. - - @rtype: xml.minidom.Node - """ - impl = getDOMImplementation() - doc = impl.createDocument(None, "query", None) - query = doc.documentElement - - query.setAttribute('name', self.name) - query.setAttribute('model', self.model.name) - query.setAttribute('view', ' '.join(self.views)) - query.setAttribute('sortOrder', str(self.get_sort_order())) - query.setAttribute('longDescription', self.description) - if len(self.coded_constraints) > 1: - query.setAttribute('constraintLogic', str(self.get_logic())) - - for c in self.children(): - element = doc.createElement(c.child_type) - for name, value in list(c.to_dict().items()): - if isinstance(value, (set, list)): - for v in value: - subelement = doc.createElement(name) - text = doc.createTextNode(v) - subelement.appendChild(text) - element.appendChild(subelement) - else: - element.setAttribute(name, value) - query.appendChild(element) - return query - - def to_xml(self): - """ - Return an XML serialisation of the query - ======================================== - - This method serialises the current state of the query to an - xml string, suitable for storing, or sending over the - internet to the webservice. - - @return: the serialised xml string - @rtype: string - """ - n = self.to_Node() - return n.toxml() - - def to_formatted_xml(self): - """ - Return a readable XML serialisation of the query - ================================================ - - This method serialises the current state of the query to an - xml string, suitable for storing, or sending over the - internet to the webservice, only more readably. - - @return: the serialised xml string - @rtype: string - """ - n = self.to_Node() - return n.toprettyxml() - - def clone(self): - """ - Performs a deep clone - ===================== - - This method will produce a clone that is independent, - and can be altered without affecting the original, - but starts off with the exact same state as it. - - The only shared elements should be the model - and the service, which are shared by all queries - that refer to the same webservice. - - @return: same class as caller - """ - newobj = self.__class__(self.model) - for attr in [ - "joins", "views", "_sort_order_list", "_logic", - "path_descriptions", "constraint_dict", "uncoded_constraints" - ]: - setattr(newobj, attr, deepcopy(getattr(self, attr))) - - for attr in [ - "name", "description", "service", "do_verification", - "constraint_factory", "root" - ]: - setattr(newobj, attr, getattr(self, attr)) - return newobj - - -class Template(Query): - """ - A Class representing a predefined query - ======================================= - - Templates are ways of saving queries - and allowing others to run them - simply. They are the main interface - to querying in the webapp - - SYNOPSIS - -------- - - example:: - - service = Service("http://www.flymine.org/query/service") - template = service.get_template("Gene_Pathways") - for row in template.results(A={"value":"eve"}): - process_row(row) - ... - - A template is a subclass of query that comes predefined. They - are typically retrieved from the webservice and run by specifying - the values for their existing constraints. They are a concise - and powerful way of running queries in the webapp. - - Being subclasses of query, everything is true of them that is true - of a query. They are just less work, as you don't have to design each - one. Also, you can store your own templates in the web-app, and then - access them as a private webservice method, from anywhere, making them - a kind of query in the cloud - for this you will need to authenticate - by providing log in details to the service. - - The most significant difference is how constraint values are specified - for each set of results. - - @see: L{Template.results} - - """ - - def __init__(self, *args, **kwargs): - """ - Constructor - =========== - - Instantiation is identical that of queries. As with queries, - these are best obtained from the intermine.webservice.Service - factory methods. - - @see: L{intermine.webservice.Service.get_template} - """ - super(Template, self).__init__(*args, **kwargs) - self.constraint_factory = constraints.TemplateConstraintFactory() - self.title = '' - - @classmethod - def from_xml(cls, xml, *args, **kwargs): - """ - Deserialise a template query serialised to XML - ============================================== - - This method is used to instantiate serialised templates. - It is used by intermine.webservice.Service objects - to instantiate Template objects and it can be used - to read in templates you have saved to a file. - - @param xml: The xml as a file name, url, or string - - @raise QueryParseError: if the query cannot be parsed - - @rtype: L{Template} - """ - # Extract all Query (superclass) fields - obj = super(Template, cls).from_xml(xml, *args, **kwargs) - - # Extract fields specific to Template, like title - obj.do_verification = False - f = openAnything(xml) - doc = minidom.parse(f) - f.close() - - templates = doc.getElementsByTagName('template') - if len(templates) != 1: - raise QueryParseError("wrong number of templates in xml. " + - "Only one