diff --git a/doc/rdflib3/__init__.py b/doc/rdflib3/__init__.py deleted file mode 100644 index 75ebe2a..0000000 --- a/doc/rdflib3/__init__.py +++ /dev/null @@ -1,77 +0,0 @@ -"""\ -A pure Python package providing the core RDF constructs. - -The packages is intended to provide the core RDF types and interfaces -for working with RDF. The package defines a plugin interface for -parsers, stores, and serializers that other packages can use to -implement parsers, stores, and serializers that will plug into the -rdflib package. - -The primary interface `rdflib` exposes to work with RDF is -`rdflib.graph.Graph`. - -A tiny example: - - >>> import rdflib - - >>> g = rdflib.Graph() - >>> result = g.parse("http://www.w3.org/2000/10/swap/test/meet/white.rdf") - - >>> print("graph has %s statements." % len(g)) - graph has 19 statements. - >>> - >>> for s, p, o in g: - ... if (s, p, o) not in g: - ... raise Exception("It better be!") - - >>> s = g.serialize(format='n3') - -""" -__docformat__ = "restructuredtext en" - -# The format of the __version__ line is matched by a regex in setup.py -__version__ = "3.4.0-dev" -__date__ = "2012/09/25" - -__all__ = [ - 'URIRef', - 'BNode', - 'Literal', - 'Variable', - - 'Namespace', - - 'Graph', - 'ConjunctiveGraph', - - 'RDF', - 'RDFS', - 'OWL', - 'XSD', - - 'util', - ] - -import sys -# generator expressions require 2.4 -assert sys.version_info >= (2, 4, 0), "rdflib requires Python 2.4 or higher" -del sys - -import logging -_LOGGER = logging.getLogger("rdflib") -_LOGGER.info("version: %s" % __version__) - - -from rdflib.term import URIRef, BNode, Literal, Variable - -from rdflib.namespace import Namespace - -from rdflib.graph import Graph, ConjunctiveGraph - -from rdflib.namespace import RDF, RDFS, OWL, XSD - -from rdflib import plugin -from rdflib import query - -from rdflib import util - diff --git a/doc/rdflib3/__init__.pyc b/doc/rdflib3/__init__.pyc deleted file mode 100644 index fac786f..0000000 Binary files a/doc/rdflib3/__init__.pyc and /dev/null differ diff --git a/doc/rdflib3/collection.py b/doc/rdflib3/collection.py deleted file mode 100644 index 24f3b3b..0000000 --- a/doc/rdflib3/collection.py +++ /dev/null @@ -1,262 +0,0 @@ -from rdflib.namespace import RDF -from rdflib.term import BNode -from rdflib.term import Literal -from rdflib.graph import Graph -from rdflib.py3compat import format_doctest_out - -__all__ = ['Collection'] - -class Collection(object): - __doc__ = format_doctest_out(""" - See 3.3.5 Emulating container types: http://docs.python.org/ref/sequence-types.html#l2h-232 - - >>> from rdflib.graph import Graph - >>> listName = BNode() - >>> g = Graph('IOMemory') - >>> listItem1 = BNode() - >>> listItem2 = BNode() - >>> g.add((listName,RDF.first,Literal(1))) - >>> g.add((listName,RDF.rest,listItem1)) - >>> g.add((listItem1,RDF.first,Literal(2))) - >>> g.add((listItem1,RDF.rest,listItem2)) - >>> g.add((listItem2,RDF.rest,RDF.nil)) - >>> g.add((listItem2,RDF.first,Literal(3))) - >>> c=Collection(g,listName) - >>> print(list(c)) - [rdflib.term.Literal(%(u)s'1', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#integer')), rdflib.term.Literal(%(u)s'2', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#integer')), rdflib.term.Literal(%(u)s'3', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#integer'))] - >>> 1 in c - True - >>> len(c) - 3 - >>> c._get_container(1) == listItem1 - True - >>> c.index(Literal(2)) == 1 - True - """) - def __init__(self, graph, uri, seq=[]): - self.graph = graph - self.uri = uri or BNode() - for item in seq: - self.append(item) - - def n3(self): - """ - >>> from rdflib.graph import Graph - >>> listName = BNode() - >>> g = Graph('IOMemory') - >>> listItem1 = BNode() - >>> listItem2 = BNode() - >>> g.add((listName,RDF.first,Literal(1))) - >>> g.add((listName,RDF.rest,listItem1)) - >>> g.add((listItem1,RDF.first,Literal(2))) - >>> g.add((listItem1,RDF.rest,listItem2)) - >>> g.add((listItem2,RDF.rest,RDF.nil)) - >>> g.add((listItem2,RDF.first,Literal(3))) - >>> c=Collection(g,listName) - >>> print(c.n3()) - ( "1"^^ "2"^^ "3"^^ ) - """ - return "( %s )"%(' '.join([i.n3() for i in self])) - - def _get_container(self, index): - """Gets the first, rest holding node at index.""" - assert isinstance(index, int) - graph = self.graph - container = self.uri - i = 0 - while i>> from rdflib.namespace import RDF, RDFS - >>> from rdflib import Graph - >>> from pprint import pformat - >>> g=Graph() - >>> a=BNode('foo') - >>> b=BNode('bar') - >>> c=BNode('baz') - >>> g.add((a,RDF.first,RDF.type)) - >>> g.add((a,RDF.rest,b)) - >>> g.add((b,RDF.first,RDFS.label)) - >>> g.add((b,RDF.rest,c)) - >>> g.add((c,RDF.first,RDFS.comment)) - >>> g.add((c,RDF.rest,RDF.nil)) - >>> len(g) - 6 - >>> def listAncestry(node,graph): - ... for i in graph.subjects(RDF.rest,node): - ... yield i - >>> [str(node.n3()) for node in g.transitiveClosure(listAncestry,RDF.nil)] - ['_:baz', '_:bar', '_:foo'] - >>> lst=Collection(g,a) - >>> len(lst) - 3 - >>> b==lst._get_container(1) - True - >>> c==lst._get_container(2) - True - >>> del lst[1] - >>> len(lst) - 2 - >>> len(g) - 4 - - """ - self[key] # to raise any potential key exceptions - graph = self.graph - current = self._get_container(key) - assert current - if len(self)==1 and key>0: - pass - elif key==len(self)-1: - #the tail - priorLink = self._get_container(key-1) - self.graph.set((priorLink,RDF.rest,RDF.nil)) - graph.remove((current, None, None)) - else: - next = self._get_container(key+1) - prior = self._get_container(key-1) - assert next and prior - graph.remove((current, None, None)) - graph.set((prior, RDF.rest, next)) - - def __iter__(self): - """Iterator over items in Collections""" - return self.graph.items(self.uri) - - def append(self, item): - """ - >>> from rdflib.graph import Graph - >>> listName = BNode() - >>> g = Graph() - >>> c=Collection(g,listName,[Literal(1),Literal(2)]) - >>> links = [list(g.subjects(object=i,predicate=RDF.first))[0] for i in c] - >>> len([i for i in links if (i,RDF.rest,RDF.nil) in g]) - 1 - - """ - container = self.uri - graph = self.graph - #iterate to the end of the linked list - rest = graph.value(container, RDF.rest) - while rest: - if rest == RDF.nil: - #the end, append to the end of the linked list - node = BNode() - graph.set((container, RDF.rest, node)) - container=node - break - else: - #move down one link - if container != self.uri: - rest = graph.value(rest, RDF.rest) - if not rest == RDF.nil: - container=rest - graph.add((container, RDF.first, item)) - graph.add((container, RDF.rest, RDF.nil)) - - def clear(self): - container = self.uri - graph = self.graph - while container: - rest = graph.value(container, RDF.rest) - graph.remove((container, RDF.first, None)) - graph.remove((container, RDF.rest, None)) - container = rest -def test(): - import doctest - doctest.testmod() - -if __name__=="__main__": - test() - - g = Graph() - - c = Collection(g, BNode()) - - assert len(c)==0 - - c = Collection(g, BNode(), [Literal("1"), Literal("2"), Literal("3"), Literal("4")]) - - assert len(c)==4 - - assert c[1]==Literal("2"), c[1] - - del c[1] - - assert list(c)==[Literal("1"), Literal("3"), Literal("4")], list(c) - - try: - del c[500] - except IndexError, i: - pass - - c.append(Literal("5")) - - print(list(c)) - - for i in c: - print(i) - - del c[3] - - c.clear() - - assert len(c)==0 - diff --git a/doc/rdflib3/collection.pyc b/doc/rdflib3/collection.pyc deleted file mode 100644 index a3fd54f..0000000 Binary files a/doc/rdflib3/collection.pyc and /dev/null differ diff --git a/doc/rdflib3/compare.py b/doc/rdflib3/compare.py deleted file mode 100644 index 40db40c..0000000 --- a/doc/rdflib3/compare.py +++ /dev/null @@ -1,319 +0,0 @@ -# -*- coding: utf-8 -*- -import sys -if sys.version_info[:2] > (2,4): # No doctest.skip in Python 2.4 - __doc__ = """ -A collection of utilities for canonicalizing and inspecting graphs. - -Among other things, they solve of the problem of deterministic bnode -comparisons. - -Warning: the time to canonicalize bnodes may increase exponentially on larger -graphs. Use with care! - -Example of comparing two graphs:: - - >>> g1 = Graph().parse(format='n3', data=''' - ... @prefix : . - ... :rel - ... , - ... [ :label "Same" ], - ... , - ... [ :label "A" ] . - ... ''') - >>> g2 = Graph().parse(format='n3', data=''' - ... @prefix : . - ... :rel - ... , - ... [ :label "Same" ], - ... , - ... [ :label "B" ] . - ... ''') - >>> - >>> iso1 = to_isomorphic(g1) - >>> iso2 = to_isomorphic(g2) - -These are not isomorphic:: - - >>> iso1 == iso2 - False - -Diff the two graphs:: - - >>> in_both, in_first, in_second = graph_diff(iso1, iso2) - -Present in both:: - - >>> def dump_nt_sorted(g): - ... for l in sorted(g.serialize(format='nt').splitlines()): - ... if l: print(l.decode('ascii')) - - >>> dump_nt_sorted(in_both) #doctest: +SKIP - . - _:cbcaabaaba17fecbc304a64f8edee4335e . - _:cbcaabaaba17fecbc304a64f8edee4335e "Same" . - -Only in first:: - - >>> dump_nt_sorted(in_first) #doctest: +SKIP - . - _:cb124e4c6da0579f810c0ffe4eff485bd9 . - _:cb124e4c6da0579f810c0ffe4eff485bd9 "A" . - -Only in second:: - - >>> dump_nt_sorted(in_second) #doctest: +SKIP - . - _:cb558f30e21ddfc05ca53108348338ade8 . - _:cb558f30e21ddfc05ca53108348338ade8 "B" . -""" -else: - __doc__ = "" - -# ====================================================================== -# FAIL: Doctest: rdflib.compare -# ---------------------------------------------------------------------- -# Traceback (most recent call last): -# File "/usr/lib/python2.7/doctest.py", line 2166, in runTest -# raise self.failureException(self.format_failure(new.getvalue())) -# AssertionError: Failed doctest test for rdflib.compare -# File "...rdflib/rdflib/compare.py", line 1, in compare -# -# ---------------------------------------------------------------------- -# File "...rdflib/rdflib/compare.py", line 48, in rdflib.compare -# Failed example: -# dump_nt_sorted(in_both) #doctest +SKIP -# Expected: -# . -# _:cbcaabaaba17fecbc304a64f8edee4335e . -# _:cbcaabaaba17fecbc304a64f8edee4335e "Same" . -# Got: -# . -# ---------------------------------------------------------------------- -# File "...rdflib/rdflib/compare.py", line 55, in rdflib.compare -# Failed example: -# dump_nt_sorted(in_first) #doctest +SKIP -# Expected: -# . -# _:cb124e4c6da0579f810c0ffe4eff485bd9 . -# _:cb124e4c6da0579f810c0ffe4eff485bd9 "A" . -# Got: -# . -# _:cb189fca567334c3d20481a6d4035592bc . -# _:cbd80360ccf6ce9f9aa20dd0a4e90027e4 . -# _:cb65019af46ad8af18df6cbce90af81a02 "Same" . -# _:cba6f22538a1d3cf645d95dcc441170f24 "A" . -# ---------------------------------------------------------------------- -# File "...rdflib/rdflib/compare.py", line 62, in rdflib.compare -# Failed example: -# dump_nt_sorted(in_second) #doctest +SKIP -# Expected: -# . -# _:cb558f30e21ddfc05ca53108348338ade8 . -# _:cb558f30e21ddfc05ca53108348338ade8 "B" . -# Got: -# . -# _:cbd4f503467ab75a1056349c4eb47ac6ea . -# _:cbd6fd45be5f6f1a929dca5f11f72ccae2 . -# _:cb8a1b89fb2a3e9e99143f2dffbc5e0bf4 "B" . -# _:cbc9878f3250eee5de9cb6212906a0972f "Same" . -# -# -------------------- >> begin captured logging << -------------------- -# rdflib: INFO: version: 3.3.0-dev -# --------------------- >> end captured logging << --------------------- -# -# ---------------------------------------------------------------------- -# Ran 2 tests in 0.200s -# -# FAILED (failures=1) - - -# TODO: -# - Doesn't handle quads. -# - Add warning and/or safety mechanism before working on large graphs? -# - use this in existing Graph.isomorphic? - -__all__ = ['IsomorphicGraph', 'to_isomorphic', 'isomorphic', 'to_canonical_graph', 'graph_diff', 'similar'] - -from rdflib.graph import Graph, ConjunctiveGraph, ReadOnlyGraphAggregate -from rdflib.term import BNode -try: - import hashlib - md = hashlib.md5() -except ImportError: - # for Python << 2.5 - import md5 - md = md5.new() - -class IsomorphicGraph(ConjunctiveGraph): - """ - Ported from - (Sean B Palmer's RDF Graph Isomorphism Tester). - """ - - def __init__(self, **kwargs): - super(IsomorphicGraph, self).__init__(**kwargs) - - def __eq__(self, other): - """Graph isomorphism testing.""" - if not isinstance(other, IsomorphicGraph): - return False - elif len(self) != len(other): - return False - elif list(self) == list(other): - return True # TODO: really generally cheaper? - return self.internal_hash() == other.internal_hash() - - def __ne__(self, other): - """Negative graph isomorphism testing.""" - return not self.__eq__(other) - - def internal_hash(self): - """ - This is defined instead of __hash__ to avoid a circular recursion - scenario with the Memory store for rdflib which requires a hash lookup - in order to return a generator of triples. - """ - return _TripleCanonicalizer(self).to_hash() - - -class _TripleCanonicalizer(object): - - def __init__(self, graph, hashfunc=hash): - self.graph = graph - self.hashfunc = hashfunc - - def to_hash(self): - return self.hashfunc(tuple(sorted( - map(self.hashfunc, self.canonical_triples()) ))) - - def canonical_triples(self): - for triple in self.graph: - yield tuple(self._canonicalize_bnodes(triple)) - - def _canonicalize_bnodes(self, triple): - for term in triple: - if isinstance(term, BNode): - yield BNode(value="cb%s"%self._canonicalize(term)) - else: - yield term - - def _canonicalize(self, term, done=False): - return self.hashfunc(tuple(sorted(self._vhashtriples(term, done), - key=_hetero_tuple_key))) - - def _vhashtriples(self, term, done): - for triple in self.graph: - if term in triple: - yield tuple(self._vhashtriple(triple, term, done)) - - def _vhashtriple(self, triple, target_term, done): - for i, term in enumerate(triple): - if not isinstance(term, BNode): - yield term - elif done or (term == target_term): - yield i - else: - yield self._canonicalize(term, done=True) - -def _hetero_tuple_key(x): - "Sort like Python 2 - by name of type, then by value. Expects tuples." - return tuple((type(a).__name__, a) for a in x) - - -def to_isomorphic(graph): - if isinstance(graph, IsomorphicGraph): - return graph - return IsomorphicGraph(store=graph.store) - - -def isomorphic(graph1, graph2): - """ - Compare graph for equality. Uses an algorithm to compute unique hashes - which takes bnodes into account. - - Examples:: - - >>> g1 = Graph().parse(format='n3', data=''' - ... @prefix : . - ... :rel . - ... :rel . - ... :rel [ :label "A bnode." ] . - ... ''') - >>> g2 = Graph().parse(format='n3', data=''' - ... @prefix ns: . - ... ns:rel [ ns:label "A bnode." ] . - ... ns:rel , - ... . - ... ''') - >>> isomorphic(g1, g2) - True - - >>> g3 = Graph().parse(format='n3', data=''' - ... @prefix : . - ... :rel . - ... :rel . - ... :rel . - ... ''') - >>> isomorphic(g1, g3) - False - """ - return _TripleCanonicalizer(graph1).to_hash() == _TripleCanonicalizer(graph2).to_hash() - - -def to_canonical_graph(g1): - """ - Creates a canonical, read-only graph where all bnode id:s are based on - deterministical MD5 checksums, correlated with the graph contents. - """ - graph = Graph() - graph += _TripleCanonicalizer(g1, _md5_hash).canonical_triples() - return ReadOnlyGraphAggregate([graph]) - - -def graph_diff(g1, g2): - """ - Returns three sets of triples: "in both", "in first" and "in second". - """ - # bnodes have deterministic values in canonical graphs: - cg1 = to_canonical_graph(g1) - cg2 = to_canonical_graph(g2) - in_both = cg1*cg2 - in_first = cg1-cg2 - in_second = cg2-cg1 - return (in_both, in_first, in_second) - - -def _md5_hash(t): - h = md - for i in t: - if isinstance(i, tuple): - h.update(_md5_hash(i).encode('ascii')) - else: - h.update(unicode(i).encode("utf8")) - return h.hexdigest() - - -_MOCK_BNODE = BNode() - -def similar(g1, g2): - """ - Checks if the two graphs are "similar", by comparing sorted triples where - all bnodes have been replaced by a singular mock bnode (the - ``_MOCK_BNODE``). - - This is a much cheaper, but less reliable, alternative to the comparison - algorithm in ``isomorphic``. - """ - return all(t1 == t2 for (t1, t2) in _squashed_graphs_triples(g1, g2)) - -def _squashed_graphs_triples(g1, g2): - for (t1, t2) in zip(sorted(_squash_graph(g1)), sorted(_squash_graph(g2))): - yield t1, t2 - -def _squash_graph(graph): - return (_squash_bnodes(triple) for triple in graph) - -def _squash_bnodes(triple): - return tuple((isinstance(t, BNode) and _MOCK_BNODE) or t for t in triple) - - diff --git a/doc/rdflib3/compat.py b/doc/rdflib3/compat.py deleted file mode 100644 index 659224a..0000000 --- a/doc/rdflib3/compat.py +++ /dev/null @@ -1,45 +0,0 @@ -# -# code to simplify supporting 2.4 -# - - -# From -# http://code.activestate.com/recipes/523034-emulate-collectionsdefaultdict/ - -try: - from collections import defaultdict -except: - class defaultdict(dict): - def __init__(self, default_factory=None, *a, **kw): - if (default_factory is not None and - not hasattr(default_factory, '__call__')): - raise TypeError('first argument must be callable') - dict.__init__(self, *a, **kw) - self.default_factory = default_factory - def __getitem__(self, key): - try: - return dict.__getitem__(self, key) - except KeyError: - return self.__missing__(key) - def __missing__(self, key): - if self.default_factory is None: - raise KeyError(key) - self[key] = value = self.default_factory() - return value - def __reduce__(self): - if self.default_factory is None: - args = tuple() - else: - args = self.default_factory, - return type(self), args, None, None, self.items() - def copy(self): - return self.__copy__() - def __copy__(self): - return type(self)(self.default_factory, self) - def __deepcopy__(self, memo): - import copy - return type(self)(self.default_factory, - copy.deepcopy(self.items())) - def __repr__(self): - return 'defaultdict(%s, %s)' % (self.default_factory, - dict.__repr__(self)) diff --git a/doc/rdflib3/events.py b/doc/rdflib3/events.py deleted file mode 100644 index 4280018..0000000 --- a/doc/rdflib3/events.py +++ /dev/null @@ -1,92 +0,0 @@ - -__doc__ = """ -Dirt Simple Events - -A Dispatcher (or a subclass of Dispatcher) stores event handlers that -are 'fired' simple event objects when interesting things happen. - -Create a dispatcher: - - >>> d = Dispatcher() - -Now create a handler for the event and subscribe it to the dispatcher -to handle Event events. A handler is a simple function or method that -accepts the event as an argument: - - >>> def handler1(event): print(repr(event)) - >>> d.subscribe(Event, handler1) - -Now dispatch a new event into the dispatcher, and see handler1 get -fired: - - >>> d.dispatch(Event(foo='bar', data='yours', used_by='the event handlers')) - -""" - -__all__ = ['Event', 'Dispatcher'] - -class Event(object): - """ - An event is a container for attributes. The source of an event - creates this object, or a subclass, gives it any kind of data that - the events handlers need to handle the event, and then calls - notify(event). - - The target of an event registers a function to handle the event it - is interested with subscribe(). When a sources calls - notify(event), each subscriber to that even will be called i no - particular order. - """ - - def __init__(self, **kw): - self.__dict__.update(kw) - - def __repr__(self): - attrs = self.__dict__.keys() - attrs.sort() - return '' % ([a for a in attrs],) - - -class Dispatcher(object): - """ - An object that can dispatch events to a privately managed group of - subscribers. - """ - - _dispatch_map = None - - def set_map(self, amap): - self._dispatch_map = amap - - def get_map(self): - return self._dispatch_map - - def subscribe(self, event_type, handler): - """ Subscribe the given handler to an event_type. Handlers - are called in the order they are subscribed. - """ - if self._dispatch_map is None: - self.set_map({}) - lst = self._dispatch_map.get(event_type, None) - if lst is None: - lst = [handler] - else: - lst.append(handler) - self._dispatch_map[event_type] = lst - - def dispatch(self, event): - """ Dispatch the given event to the subscribed handlers for - the event's type""" - if self._dispatch_map is not None: - lst = self._dispatch_map.get(type(event), None) - if lst is None: - raise ValueError("unknown event type: %s" % type(event)) - for l in lst: - l(event) - -def test(): - import doctest - doctest.testmod() - -if __name__ == '__main__': - test() diff --git a/doc/rdflib3/events.pyc b/doc/rdflib3/events.pyc deleted file mode 100644 index d857823..0000000 Binary files a/doc/rdflib3/events.pyc and /dev/null differ diff --git a/doc/rdflib3/exceptions.py b/doc/rdflib3/exceptions.py deleted file mode 100644 index 92e5f03..0000000 --- a/doc/rdflib3/exceptions.py +++ /dev/null @@ -1,71 +0,0 @@ -""" -TODO: -""" - -__all__ = ['Error', 'TypeCheckError', 'SubjectTypeError', 'PredicateTypeError', 'ObjectTypeError', 'ContextTypeError', 'ParserError'] - -class Error(Exception): - """Base class for rdflib exceptions.""" - def __init__(self, msg=None): - Exception.__init__(self, msg) - self.msg = msg - - -class TypeCheckError(Error): - """Parts of assertions are subject to type checks.""" - - def __init__(self, node): - Error.__init__(self, node) - self.type = type(node) - self.node = node - - -class SubjectTypeError(TypeCheckError): - """Subject of an assertion must be an instance of URIRef.""" - def __init__(self, node): - TypeCheckError.__init__(self, node) - self.msg = "Subject must be instance of URIRef or BNode: %s(%s)" \ - % (self.node, self.type) - - -class PredicateTypeError(TypeCheckError): - """Predicate of an assertion must be an instance of URIRef.""" - def __init__(self, node): - TypeCheckError.__init__(self, node) - self.msg = "Predicate must be a URIRef instance: %s(%s)" \ - % (self.node, self.type) - - -class ObjectTypeError(TypeCheckError): - """Object of an assertion must be an instance of URIRef, Literal, - or BNode.""" - def __init__(self, node): - TypeCheckError.__init__(self, node) - self.msg = "\ -Object must be instance of URIRef, Literal, or BNode: %s(%s)" % \ - (self.node, self.type) - -class ContextTypeError(TypeCheckError): - """Context of an assertion must be an instance of URIRef.""" - def __init__(self, node): - TypeCheckError.__init__(self, node) - self.msg = "Context must be instance of URIRef or BNode: %s(%s)" \ - % (self.node, self.type) - -class ParserError(Error): - """RDF Parser error.""" - def __init__(self, msg): - Error.__init__(self, msg) - self.msg = msg - - def __str__(self): - return self.msg - - -class UniquenessError(Error) : - """A uniqueness assumption was made in the context, and that is not true""" - def __init__(self, values): - Error.__init__(self, "\ -Uniqueness assumption is not fulfilled. Multiple values are: %s" % values) - - diff --git a/doc/rdflib3/exceptions.pyc b/doc/rdflib3/exceptions.pyc deleted file mode 100644 index c319fd6..0000000 Binary files a/doc/rdflib3/exceptions.pyc and /dev/null differ diff --git a/doc/rdflib3/graph.py b/doc/rdflib3/graph.py deleted file mode 100644 index 9011c13..0000000 --- a/doc/rdflib3/graph.py +++ /dev/null @@ -1,1540 +0,0 @@ -from __future__ import generators -from rdflib.py3compat import format_doctest_out -__doc__ = format_doctest_out("""\ -Instantiating Graphs with default store (IOMemory) and default identifier -(a BNode): - - >>> g = Graph() - >>> g.store.__class__ - - >>> g.identifier.__class__ - - -Instantiating Graphs with a specific kind of store (IOMemory) and a default -identifier (a BNode): - -Other store kinds: Sleepycat, MySQL, SQLite - - >>> store = plugin.get('IOMemory', Store)() - >>> store.__class__.__name__ - 'IOMemory' - >>> graph = Graph(store) - >>> graph.store.__class__ - - -Instantiating Graphs with Sleepycat store and an identifier - -: - - >>> g = Graph('IOMemory', URIRef("http://rdflib.net")) - >>> g.identifier - rdflib.term.URIRef(%(u)s'http://rdflib.net') - >>> str(g) - " a rdfg:Graph;rdflib:storage [a rdflib:Store;rdfs:label 'IOMemory']." - -Creating a ConjunctiveGraph - The top level container for all named Graphs -in a 'database': - - >>> g = ConjunctiveGraph() - >>> str(g.default_context) - "[a rdfg:Graph;rdflib:storage [a rdflib:Store;rdfs:label 'IOMemory']]." - -Adding / removing reified triples to Graph and iterating over it directly or -via triple pattern: - - >>> g = Graph('IOMemory') - >>> statementId = BNode() - >>> print(len(g)) - 0 - >>> g.add((statementId, RDF.type, RDF.Statement)) - >>> g.add((statementId, RDF.subject, URIRef(%(u)s'http://rdflib.net/store/ConjunctiveGraph'))) - >>> g.add((statementId, RDF.predicate, RDFS.label)) - >>> g.add((statementId, RDF.object, Literal("Conjunctive Graph"))) - >>> print(len(g)) - 4 - >>> for s, p, o in g: - ... print(type(s)) - ... - - - - - - >>> for s, p, o in g.triples((None, RDF.object, None)): - ... print(o) - ... - Conjunctive Graph - >>> g.remove((statementId, RDF.type, RDF.Statement)) - >>> print(len(g)) - 3 - -``None`` terms in calls to :meth:`~rdflib.graph.Graph.triples` can be thought of as "open variables". - -Graph support set-theoretic operators, you can add/subtract graphs, as well as intersection (with multiplication operator g1*g2) and xor (g1 ^ g2). -Note that BNode IDs are kept when doing set-theoretic operations, this may or may not be what you want. Two named graphs within the same application probably want share BNode IDs, two graphs with data from different sources probably not. If your BNode IDs are all generated by RDFLib they are UUIDs and unique. - - >>> g1 = Graph() - >>> g2 = Graph() - >>> u = URIRef(%(u)s'http://example.com/foo') - >>> g1.add([u, RDFS.label, Literal('foo')]) - >>> g1.add([u, RDFS.label, Literal('bar')]) - >>> g2.add([u, RDFS.label, Literal('foo')]) - >>> g2.add([u, RDFS.label, Literal('bing')]) - >>> len(g1+g2) # adds bing as label - 3 - >>> len(g1-g2) # removes foo - 1 - >>> len(g1*g2) # only foo - 1 - >>> g1+=g2 # now g1 contains everything - - -Graph Aggregation - ConjunctiveGraphs and ReadOnlyGraphAggregate within -the same store: - - >>> store = plugin.get('IOMemory', Store)() - >>> g1 = Graph(store) - >>> g2 = Graph(store) - >>> g3 = Graph(store) - >>> stmt1 = BNode() - >>> stmt2 = BNode() - >>> stmt3 = BNode() - >>> g1.add((stmt1, RDF.type, RDF.Statement)) - >>> g1.add((stmt1, RDF.subject, URIRef(%(u)s'http://rdflib.net/store/ConjunctiveGraph'))) - >>> g1.add((stmt1, RDF.predicate, RDFS.label)) - >>> g1.add((stmt1, RDF.object, Literal("Conjunctive Graph"))) - >>> g2.add((stmt2, RDF.type, RDF.Statement)) - >>> g2.add((stmt2, RDF.subject, URIRef(%(u)s'http://rdflib.net/store/ConjunctiveGraph'))) - >>> g2.add((stmt2, RDF.predicate, RDF.type)) - >>> g2.add((stmt2, RDF.object, RDFS.Class)) - >>> g3.add((stmt3, RDF.type, RDF.Statement)) - >>> g3.add((stmt3, RDF.subject, URIRef(%(u)s'http://rdflib.net/store/ConjunctiveGraph'))) - >>> g3.add((stmt3, RDF.predicate, RDFS.comment)) - >>> g3.add((stmt3, RDF.object, Literal("The top-level aggregate graph - The sum of all named graphs within a Store"))) - >>> len(list(ConjunctiveGraph(store).subjects(RDF.type, RDF.Statement))) - 3 - >>> len(list(ReadOnlyGraphAggregate([g1,g2]).subjects(RDF.type, RDF.Statement))) - 2 - -ConjunctiveGraphs have a :meth:`~rdflib.graph.ConjunctiveGraph.quads` method which returns quads instead of -triples, where the fourth item is the Graph (or subclass thereof) instance -in which the triple was asserted: - - >>> uniqueGraphNames = set([graph.identifier for s, p, o, graph in ConjunctiveGraph(store).quads((None, RDF.predicate, None))]) - >>> len(uniqueGraphNames) - 3 - >>> unionGraph = ReadOnlyGraphAggregate([g1, g2]) - >>> uniqueGraphNames = set([graph.identifier for s, p, o, graph in unionGraph.quads((None, RDF.predicate, None))]) - >>> len(uniqueGraphNames) - 2 - -Parsing N3 from a string - - >>> g2 = Graph() - >>> src = ''' - ... @prefix rdf: . - ... @prefix rdfs: . - ... [ a rdf:Statement ; - ... rdf:subject ; - ... rdf:predicate rdfs:label; - ... rdf:object "Conjunctive Graph" ] . - ... ''' - >>> g2 = g2.parse(data=src, format='n3') - >>> print(len(g2)) - 4 - -Using Namespace class: - - >>> RDFLib = Namespace('http://rdflib.net') - >>> RDFLib.ConjunctiveGraph - rdflib.term.URIRef(%(u)s'http://rdflib.netConjunctiveGraph') - >>> RDFLib['Graph'] - rdflib.term.URIRef(%(u)s'http://rdflib.netGraph') - -""") - -import logging -_logger = logging.getLogger(__name__) - -#import md5 -import random -import warnings - -try: - from hashlib import md5 -except ImportError: - from md5 import md5 - -try: - from io import BytesIO -except ImportError: - try: - from cStringIO import StringIO as BytesIO - except ImportError: - from StringIO import StringIO as BytesIO - -# # Can't use this approach any longer, this function will raise an ImportError -# # because the sparql module has been moved to the RDFExtras package. - -# def describe(terms,bindings,graph): -# """ -# Default DESCRIBE returns all incomming and outgoing statements about the given terms -# """ -# from rdflib.sparql.sparqlOperators import getValue -# g=Graph() -# terms=[getValue(i)(bindings) for i in terms] -# for s,p,o in graph.triples_choices((terms,None,None)): -# g.add((s,p,o)) -# for s,p,o in graph.triples_choices((None,None,terms)): -# g.add((s,p,o)) -# return g - -from rdflib.namespace import RDF, RDFS, SKOS - -from rdflib import plugin, exceptions, query -#, sparql - -from rdflib.term import Node -from rdflib.term import URIRef -from rdflib.term import BNode -from rdflib.term import Literal # required for doctests -from rdflib.namespace import Namespace # required for doctests -from rdflib.store import Store -from rdflib.serializer import Serializer -from rdflib.parser import Parser -from rdflib.parser import create_input_source -from rdflib.namespace import NamespaceManager -from rdflib.resource import Resource -from rdflib import py3compat -b = py3compat.b - -import tempfile, shutil, os -from urlparse import urlparse - -__all__ = ['Graph', 'ConjunctiveGraph', 'QuotedGraph', 'GraphValue', 'Seq', 'BackwardCompatGraph', 'ModificationException', 'UnSupportedAggregateOperation', 'ReadOnlyGraphAggregate'] - -class Graph(Node): - """An RDF Graph - - The constructor accepts one argument, the 'store' - that will be used to store the graph data (see the 'store' - package for stores currently shipped with rdflib). - - Stores can be context-aware or unaware. Unaware stores take up - (some) less space but cannot support features that require - context, such as true merging/demerging of sub-graphs and - provenance. - - The Graph constructor can take an identifier which identifies the Graph - by name. If none is given, the graph is assigned a BNode for its - identifier. - For more on named graphs, see: http://www.w3.org/2004/03/trix/ - - Ontology for __str__ provenance terms: - - .. code-block:: n3 - - @prefix rdf: . - @prefix rdfs: . - @prefix : . - @prefix rdfg: . - @prefix owl: . - @prefix log: . - @prefix xsd: . - - :Store a owl:Class; - rdfs:subClassOf ; - rdfs:subClassOf - [a owl:Restriction; - owl:onProperty rdfs:label; - owl:allValuesFrom [a owl:DataRange; - owl:oneOf ("IOMemory" - "Sleepcat" - "MySQL" - "Redland" - "REGEXMatching" - "ZODB" - "AuditableStorage" - "Memory")] - ]. - - :ConjunctiveGraph a owl:Class; - rdfs:subClassOf rdfg:Graph; - rdfs:label "The top-level graph within the store - the union of all the Graphs within." - rdfs:seeAlso . - - :DefaultGraph a owl:Class; - rdfs:subClassOf rdfg:Graph; - rdfs:label "The 'default' subgraph of a conjunctive graph". - - - :identifier a owl:Datatypeproperty; - rdfs:label "The store-associated identifier of the formula. ". - rdfs:domain log:Formula - rdfs:range xsd:anyURI; - - :storage a owl:ObjectProperty; - rdfs:domain [ - a owl:Class; - owl:unionOf (log:Formula rdfg:Graph :ConjunctiveGraph) - ]; - rdfs:range :Store. - - :default_context a owl:FunctionalProperty; - rdfs:label "The default context for a conjunctive graph"; - rdfs:domain :ConjunctiveGraph; - rdfs:range :DefaultGraph. - - - {?cg a :ConjunctiveGraph;:storage ?store} - => {?cg owl:sameAs ?store}. - - {?subGraph rdfg:subGraphOf ?cg;a :DefaultGraph} - => {?cg a :ConjunctiveGraph;:default_context ?subGraphOf} . - - """ - - def __init__(self, store='default', identifier=None, - namespace_manager=None): - super(Graph, self).__init__() - self.__identifier = identifier or BNode() - if not isinstance(store, Store): - # TODO: error handling - self.__store = store = plugin.get(store, Store)() - else: - self.__store = store - self.__namespace_manager = namespace_manager - self.context_aware = False - self.formula_aware = False - - def __get_store(self): - return self.__store - store = property(__get_store) # read-only attr - - def __get_identifier(self): - return self.__identifier - identifier = property(__get_identifier) # read-only attr - - def _get_namespace_manager(self): - if self.__namespace_manager is None: - self.__namespace_manager = NamespaceManager(self) - return self.__namespace_manager - - def _set_namespace_manager(self, nm): - self.__namespace_manager = nm - namespace_manager = property(_get_namespace_manager, _set_namespace_manager) - - def __repr__(self): - return "" % (self.identifier, type(self)) - - def __str__(self): - if isinstance(self.identifier, URIRef): - return "%s a rdfg:Graph;rdflib:storage [a rdflib:Store;rdfs:label '%s']." % ( - self.identifier.n3(),self.store.__class__.__name__) - else: - return "[a rdfg:Graph;rdflib:storage [a rdflib:Store;rdfs:label '%s']]." % ( - self.store.__class__.__name__) - - def toPython(self): - return self - - def destroy(self, configuration): - """Destroy the store identified by `configuration` if supported""" - self.__store.destroy(configuration) - - #Transactional interfaces (optional) - def commit(self): - """Commits active transactions""" - self.__store.commit() - - def rollback(self): - """Rollback active transactions""" - self.__store.rollback() - - def open(self, configuration, create=False): - """Open the graph store - - Might be necessary for stores that require opening a connection to a - database or acquiring some resource. - """ - return self.__store.open(configuration, create) - - def close(self, commit_pending_transaction=False): - """Close the graph store - - Might be necessary for stores that require closing a connection to a - database or releasing some resource. - """ - self.__store.close(commit_pending_transaction=commit_pending_transaction) - - def add(self, (s, p, o)): - """Add a triple with self as context""" - assert isinstance(s,Node), "Subject %s must be an rdflib term" % (s,) - assert isinstance(p,Node), "Predicate %s must be an rdflib term" % (p,) - assert isinstance(o,Node), "Object %s must be an rdflib term" % (o,) - self.__store.add((s, p, o), self, quoted=False) - - def addN(self, quads): - """Add a sequence of triple with context""" - - def assertnode(t): - assert isinstance(t,Node), 'Term %s but be an rdflib term' % (t,) - return True - - self.__store.addN((s, p, o, c) for s, p, o, c in quads - if isinstance(c, Graph) - and c.identifier is self.identifier - and assertnode(s) - and assertnode(p) - and assertnode(o)) - - def remove(self, (s, p, o)): - """Remove a triple from the graph - - If the triple does not provide a context attribute, removes the triple - from all contexts. - """ - self.__store.remove((s, p, o), context=self) - - def triples(self, (s, p, o)): - """Generator over the triple store - - Returns triples that match the given triple pattern. If triple pattern - does not provide a context, all contexts will be searched. - """ - for (s, p, o), cg in self.__store.triples((s, p, o), context=self): - yield (s, p, o) - - def __len__(self): - """Returns the number of triples in the graph - - If context is specified then the number of triples in the context is - returned instead. - """ - return self.__store.__len__(context=self) - - def __iter__(self): - """Iterates over all triples in the store""" - return self.triples((None, None, None)) - - def __contains__(self, triple): - """Support for 'triple in graph' syntax""" - for triple in self.triples(triple): - return True - return False - - def __hash__(self): - return hash(self.identifier) - - def md5_term_hash(self): - d = md5(str(self.identifier)) - d.update("G") - return d.hexdigest() - - def __cmp__(self, other): - if other is None: - return -1 - elif isinstance(other, Graph): - return cmp(self.identifier, other.identifier) - else: - #Note if None is considered equivalent to owl:Nothing - #Then perhaps a graph with length 0 should be considered - #equivalent to None (if compared to it)? - return 1 - - def __eq__(self, other): - return isinstance(other, Graph) and self.identifier == other.identifier - - def __lt__(self, other): - return (other is None) or (isinstance(other, Graph) and \ - self.identifier < other.identifier) - def __le__(self, other): return self < other or self == other - - def __gt__(self, other): - return (isinstance(other, Graph) and self.identifier > other.identifier) \ - or (other is not None) - def __ge__(self, other): return self > other or self == other - - def __iadd__(self, other): - """Add all triples in Graph other to Graph. - BNode IDs are not changed.""" - self.addN((s,p,o,self) for s,p,o in other) - return self - - def __isub__(self, other): - """Subtract all triples in Graph other from Graph. - BNode IDs are not changed.""" - for triple in other: - self.remove(triple) - return self - - def __add__(self, other) : - """Set-theoretic union - BNode IDs are not changed.""" - retval = Graph() - for (prefix, uri) in set( - list(self.namespaces()) + list(other.namespaces())): - retval.bind(prefix, uri) - for x in self: - retval.add(x) - for y in other: - retval.add(y) - return retval - - def __mul__(self, other) : - """Set-theoretic intersection. - BNode IDs are not changed.""" - retval = Graph() - for x in other: - if x in self: - retval.add(x) - return retval - - def __sub__(self, other) : - """Set-theoretic difference. - BNode IDs are not changed.""" - retval = Graph() - for x in self: - if not x in other : - retval.add(x) - return retval - - def __xor__(self, other): - """Set-theoretic XOR. - BNode IDs are not changed.""" - return (self - other) + (other - self) - - __or__ = __add__ - __and__ = __mul__ - - # Conv. methods - - def set(self, (subject, predicate, object)): - """Convenience method to update the value of object - - Remove any existing triples for subject and predicate before adding - (subject, predicate, object). - """ - self.remove((subject, predicate, None)) - self.add((subject, predicate, object)) - - def subjects(self, predicate=None, object=None): - """A generator of subjects with the given predicate and object""" - for s, p, o in self.triples((None, predicate, object)): - yield s - - def predicates(self, subject=None, object=None): - """A generator of predicates with the given subject and object""" - for s, p, o in self.triples((subject, None, object)): - yield p - - def objects(self, subject=None, predicate=None): - """A generator of objects with the given subject and predicate""" - for s, p, o in self.triples((subject, predicate, None)): - yield o - - def subject_predicates(self, object=None): - """A generator of (subject, predicate) tuples for the given object""" - for s, p, o in self.triples((None, None, object)): - yield s, p - - def subject_objects(self, predicate=None): - """A generator of (subject, object) tuples for the given predicate""" - for s, p, o in self.triples((None, predicate, None)): - yield s, o - - def predicate_objects(self, subject=None): - """A generator of (predicate, object) tuples for the given subject""" - for s, p, o in self.triples((subject, None, None)): - yield p, o - - def triples_choices(self, (subject, predicate, object_), context=None): - for (s, p, o), cg in self.store.triples_choices( - (subject, predicate, object_), context=self): - yield (s, p, o) - - def value(self, subject=None, predicate=RDF.value, object=None, - default=None, any=True): - """Get a value for a pair of two criteria - - Exactly one of subject, predicate, object must be None. Useful if one - knows that there may only be one value. - - It is one of those situations that occur a lot, hence this - 'macro' like utility - - Parameters: - subject, predicate, object -- exactly one must be None - default -- value to be returned if no values found - any -- if True, return any value in the case there is more than one, - else, raise UniquenessError - """ - retval = default - - if (subject is None and predicate is None) or \ - (subject is None and object is None) or \ - (predicate is None and object is None): - return None - - if object is None: - values = self.objects(subject, predicate) - if subject is None: - values = self.subjects(predicate, object) - if predicate is None: - values = self.predicates(subject, object) - - try: - retval = values.next() - except StopIteration, e: - retval = default - else: - if any is False: - try: - values.next() - msg = ("While trying to find a value for (%s, %s, %s) the" - " following multiple values where found:\n" % - (subject, predicate, object)) - triples = self.store.triples( - (subject, predicate, object), None) - for (s, p, o), contexts in triples: - msg += "(%s, %s, %s)\n (contexts: %s)\n" % ( - s, p, o, list(contexts)) - raise exceptions.UniquenessError(msg) - except StopIteration, e: - pass - return retval - - def label(self, subject, default=''): - """Query for the RDFS.label of the subject - - Return default if no label exists - """ - if subject is None: - return default - return self.value(subject, RDFS.label, default=default, any=True) - - @py3compat.format_doctest_out - def preferredLabel(self, subject, lang=None, default=[], - labelProperties=(SKOS.prefLabel, RDFS.label)): - """ Find the preferred label for subject. - - By default prefers skos:prefLabels over rdfs:labels. In case at least - one prefLabel is found returns those, else returns labels. In case a - language string (e.g., 'en', 'de' or even '' for no lang-tagged - literals) is given, only such labels will be considered. - - Return a list of (labelProp, label) pairs, where labelProp is either - skos:prefLabel or rdfs:label. - - >>> g = ConjunctiveGraph() - >>> u = URIRef(%(u)s'http://example.com/foo') - >>> g.add([u, RDFS.label, Literal('foo')]) - >>> g.add([u, RDFS.label, Literal('bar')]) - >>> sorted(g.preferredLabel(u)) #doctest: +NORMALIZE_WHITESPACE - [(rdflib.term.URIRef(%(u)s'http://www.w3.org/2000/01/rdf-schema#label'), - rdflib.term.Literal(%(u)s'bar')), - (rdflib.term.URIRef(%(u)s'http://www.w3.org/2000/01/rdf-schema#label'), - rdflib.term.Literal(%(u)s'foo'))] - >>> g.add([u, SKOS.prefLabel, Literal('bla')]) - >>> g.preferredLabel(u) #doctest: +NORMALIZE_WHITESPACE - [(rdflib.term.URIRef(%(u)s'http://www.w3.org/2004/02/skos/core#prefLabel'), - rdflib.term.Literal(%(u)s'bla'))] - >>> g.add([u, SKOS.prefLabel, Literal('blubb', lang='en')]) - >>> sorted(g.preferredLabel(u)) #doctest: +NORMALIZE_WHITESPACE - [(rdflib.term.URIRef(%(u)s'http://www.w3.org/2004/02/skos/core#prefLabel'), - rdflib.term.Literal(%(u)s'blubb', lang='en')), - (rdflib.term.URIRef(%(u)s'http://www.w3.org/2004/02/skos/core#prefLabel'), - rdflib.term.Literal(%(u)s'bla'))] - >>> g.preferredLabel(u, lang='') #doctest: +NORMALIZE_WHITESPACE - [(rdflib.term.URIRef(%(u)s'http://www.w3.org/2004/02/skos/core#prefLabel'), - rdflib.term.Literal(%(u)s'bla'))] - >>> g.preferredLabel(u, lang='en') #doctest: +NORMALIZE_WHITESPACE - [(rdflib.term.URIRef(%(u)s'http://www.w3.org/2004/02/skos/core#prefLabel'), - rdflib.term.Literal(%(u)s'blubb', lang='en'))] - """ - - # setup the language filtering - if lang != None: - if lang == '': # we only want not language-tagged literals - langfilter = lambda l: l.language == None - else: - langfilter = lambda l: l.language == lang - else: # we don't care about language tags - langfilter = lambda l: True - - for labelProp in labelProperties: - labels = filter(langfilter, self.objects(subject, labelProp)) - if len(labels) == 0: - continue - else: - return [(labelProp, l) for l in labels] - return default - - def comment(self, subject, default=''): - """Query for the RDFS.comment of the subject - - Return default if no comment exists - """ - if subject is None: - return default - return self.value(subject, RDFS.comment, default=default, any=True) - - def items(self, list): - """Generator over all items in the resource specified by list - - list is an RDF collection. - """ - while list: - item = self.value(list, RDF.first) - if item: - yield item - list = self.value(list, RDF.rest) - - def transitiveClosure(self,func,arg): - """ - Generates transitive closure of a user-defined - function against the graph - - >>> from rdflib.collection import Collection - >>> g=Graph() - >>> a=BNode('foo') - >>> b=BNode('bar') - >>> c=BNode('baz') - >>> g.add((a,RDF.first,RDF.type)) - >>> g.add((a,RDF.rest,b)) - >>> g.add((b,RDF.first,RDFS.label)) - >>> g.add((b,RDF.rest,c)) - >>> g.add((c,RDF.first,RDFS.comment)) - >>> g.add((c,RDF.rest,RDF.nil)) - >>> def topList(node,g): - ... for s in g.subjects(RDF.rest,node): - ... yield s - >>> def reverseList(node,g): - ... for f in g.objects(node,RDF.first): - ... print(f) - ... for s in g.subjects(RDF.rest,node): - ... yield s - - >>> [rt for rt in g.transitiveClosure(topList,RDF.nil)] - [rdflib.term.BNode('baz'), rdflib.term.BNode('bar'), rdflib.term.BNode('foo')] - - >>> [rt for rt in g.transitiveClosure(reverseList,RDF.nil)] - http://www.w3.org/2000/01/rdf-schema#comment - http://www.w3.org/2000/01/rdf-schema#label - http://www.w3.org/1999/02/22-rdf-syntax-ns#type - [rdflib.term.BNode('baz'), rdflib.term.BNode('bar'), rdflib.term.BNode('foo')] - - """ - for rt in func(arg,self): - yield rt - for rt_2 in self.transitiveClosure(func, rt): - yield rt_2 - - def transitive_objects(self, subject, property, remember=None): - """Transitively generate objects for the ``property`` relationship - - Generated objects belong to the depth first transitive closure of the - ``property`` relationship starting at ``subject``. - """ - if remember is None: - remember = {} - if subject in remember: - return - remember[subject] = 1 - yield subject - for object in self.objects(subject, property): - for o in self.transitive_objects(object, property, remember): - yield o - - def transitive_subjects(self, predicate, object, remember=None): - """Transitively generate objects for the ``property`` relationship - - Generated objects belong to the depth first transitive closure of the - ``property`` relationship starting at ``subject``. - """ - if remember is None: - remember = {} - if object in remember: - return - remember[object] = 1 - yield object - for subject in self.subjects(predicate, object): - for s in self.transitive_subjects(predicate, subject, remember): - yield s - - def seq(self, subject): - """Check if subject is an rdf:Seq - - If yes, it returns a Seq class instance, None otherwise. - """ - if (subject, RDF.type, RDF.Seq) in self: - return Seq(self, subject) - else: - return None - - def qname(self, uri): - return self.namespace_manager.qname(uri) - - def compute_qname(self, uri, generate=True): - return self.namespace_manager.compute_qname(uri, generate) - - def bind(self, prefix, namespace, override=True): - """Bind prefix to namespace - - If override is True will bind namespace to given prefix if namespace - was already bound to a different prefix. - """ - return self.namespace_manager.bind( - prefix, namespace, override=override) - - def namespaces(self): - """Generator over all the prefix, namespace tuples""" - for prefix, namespace in self.namespace_manager.namespaces(): - yield prefix, namespace - - def absolutize(self, uri, defrag=1): - """Turn uri into an absolute URI if it's not one already""" - return self.namespace_manager.absolutize(uri, defrag) - - def serialize( - self, destination=None, format="xml", - base=None, encoding=None, **args): - """Serialize the Graph to destination - - If destination is None serialize method returns the serialization as a - string. Format defaults to xml (AKA rdf/xml). - - Format support can be extended with plugins, - but 'xml', 'n3', 'turtle', 'nt', 'pretty-xml', trix' are built in. - """ - serializer = plugin.get(format, Serializer)(self) - if destination is None: - stream = BytesIO() - serializer.serialize(stream, base=base, encoding=encoding, **args) - return stream.getvalue() - if hasattr(destination, "write"): - stream = destination - serializer.serialize(stream, base=base, encoding=encoding, **args) - else: - location = destination - scheme, netloc, path, params, query, fragment = urlparse(location) - if netloc != "": - print("WARNING: not saving as location" + \ - "is not a local file reference") - return - fd, name = tempfile.mkstemp() - stream = os.fdopen(fd,"w") - serializer.serialize(stream, base=base, encoding=encoding, **args) - stream.close() - if hasattr(shutil,"move"): - shutil.move(name, path) - else: - shutil.copy(name, path) - os.remove(name) - - def parse(self, source=None, publicID=None, format=None, - location=None, file=None, data=None, **args): - """ - Parse source adding the resulting triples to the Graph. - - The source is specified using one of source, location, file or - data. - - :Parameters: - - - `source`: An InputSource, file-like object, or string. In the case - of a string the string is the location of the source. - - `location`: A string indicating the relative or absolute URL of the - source. Graph's absolutize method is used if a relative location - is specified. - - `file`: A file-like object. - - `data`: A string containing the data to be parsed. - - `format`: Used if format can not be determined from source. - Defaults to rdf/xml. Format support can be extended with plugins, - but 'xml', 'n3', 'nt', 'trix', 'rdfa' are built in. - - `publicID`: the logical URI to use as the document base. If None - specified the document location is used (at least in the case where - there is a document location). - - :Returns: - - - self, the graph instance. - - Examples: - - >>> my_data = ''' - ... - ... - ... Example - ... This is really just an example. - ... - ... - ... ''' - >>> import tempfile - >>> fd, file_name = tempfile.mkstemp() - >>> f = os.fdopen(fd, 'w') - >>> dummy = f.write(my_data) # Returns num bytes written on py3 - >>> f.close() - - >>> g = Graph() - >>> result = g.parse(data=my_data, format="application/rdf+xml") - >>> len(g) - 2 - - >>> g = Graph() - >>> result = g.parse(location=file_name, format="application/rdf+xml") - >>> len(g) - 2 - - >>> g = Graph() - >>> result = g.parse(file=open(file_name, "r"), format="application/rdf+xml") - >>> len(g) - 2 - - >>> os.remove(file_name) - - """ - - if format == "xml": - # warn... backward compat. - format = "application/rdf+xml" - source = create_input_source(source=source, publicID=publicID, - location=location, file=file, - data=data, format=format) - if format is None: - format = source.content_type - if format is None: - #raise Exception("Could not determine format for %r. You can" + \ - # "expicitly specify one with the format argument." % source) - format = "application/rdf+xml" - parser = plugin.get(format, Parser)() - parser.parse(source, self, **args) - return self - - def load(self, source, publicID=None, format="xml"): - self.parse(source, publicID, format) - - def query(self, query_object, processor='sparql', result='sparql', initNs={}, initBindings={}, use_store_provided=True, **kwargs): - """ - """ - if hasattr(self.store, "query") and use_store_provided: - res = self.store.query(query_object, initNs, initBindings, **kwargs) - if res == NotImplemented: - pass # store has no own implementation - else: - return res - if not isinstance(result, query.Result): - result = plugin.get(result, query.Result) - if not isinstance(processor, query.Processor): - processorinst = plugin.get(processor, query.Processor)(self) - - return result(processorinst.query(query_object, initBindings, initNs, **kwargs)) - - - def n3(self): - """return an n3 identifier for the Graph""" - return "[%s]" % self.identifier.n3() - - def __reduce__(self): - return (Graph, (self.store, self.identifier,)) - - def isomorphic(self, other): - """ - does a very basic check if these graphs are the same - If no BNodes are involved, this is accurate. - - See rdflib.compare for a correct implementation of isomorphism checks - """ - # TODO: this is only an approximation. - if len(self) != len(other): - return False - for s, p, o in self: - if not isinstance(s, BNode) and not isinstance(o, BNode): - if not (s, p, o) in other: - return False - for s, p, o in other: - if not isinstance(s, BNode) and not isinstance(o, BNode): - if not (s, p, o) in self: - return False - # TODO: very well could be a false positive at this point yet. - return True - - def connected(self): - """Check if the Graph is connected - - The Graph is considered undirectional. - - Performs a search on the Graph, starting from a random node. Then - iteratively goes depth-first through the triplets where the node is - subject and object. Return True if all nodes have been visited and - False if it cannot continue and there are still unvisited nodes left. - """ - all_nodes = list(self.all_nodes()) - discovered = [] - - # take a random one, could also always take the first one, doesn't - # really matter. - if not all_nodes: - return False - - visiting = [all_nodes[random.randrange(len(all_nodes))]] - while visiting: - x = visiting.pop() - if x not in discovered: - discovered.append(x) - for new_x in self.objects(subject=x): - if new_x not in discovered and new_x not in visiting: - visiting.append(new_x) - for new_x in self.subjects(object=x): - if new_x not in discovered and new_x not in visiting: - visiting.append(new_x) - - # optimisation by only considering length, since no new objects can - # be introduced anywhere. - if len(all_nodes) == len(discovered): - return True - else: - return False - - def all_nodes(self): - obj = set(self.objects()) - allNodes = obj.union(set(self.subjects())) - return allNodes - - def resource(self, identifier): - """Create a new ``Resource`` instance. - - Parameters: - - - ``identifier``: a URIRef or BNode instance. - - Example:: - - >>> graph = Graph() - >>> uri = URIRef("http://example.org/resource") - >>> resource = graph.resource(uri) - >>> assert isinstance(resource, Resource) - >>> assert resource.identifier is uri - >>> assert resource.graph is graph - - """ - return Resource(self, identifier) - - -class ConjunctiveGraph(Graph): - """ - A ConjunctiveGraph is an (unamed) aggregation of all the named graphs - within the Store. It has a ``default`` graph, whose name is associated - with the ConjunctiveGraph throughout its life. All methods work against - this default graph. Its constructor can take an identifier to use as the - name of this default graph or it will assign a BNode. - - In practice, it is typical to instantiate a ConjunctiveGraph if you want - to add triples to the Store but don't care to mint a URI for the graph. - Any triples in the graph can still be addressed. - """ - - def __init__(self, store='default', identifier=None): - super(ConjunctiveGraph, self).__init__(store, identifier=identifier) - assert self.store.context_aware, ("ConjunctiveGraph must be backed by" - " a context aware store.") - self.context_aware = True - self.default_context = Graph(store=self.store, - identifier=identifier or BNode()) - - def __str__(self): - pattern = ("[a rdflib:ConjunctiveGraph;rdflib:storage " - "[a rdflib:Store;rdfs:label '%s']]") - return pattern % self.store.__class__.__name__ - - def __contains__(self, triple_or_quad): - """Support for 'triple/quad in graph' syntax""" - context = None - if len(triple_or_quad) == 4: - context = triple_or_quad[3] - for t in self.triples(triple_or_quad[:3], context=context): - return True - return False - - def add(self, (s, p, o)): - """Add the triple to the default context""" - self.store.add((s, p, o), context=self.default_context, quoted=False) - - def addN(self, quads): - """Add a sequence of triples with context""" - self.store.addN(quads) - - def remove(self, (s, p, o)): - """Removes from all its contexts""" - self.store.remove((s, p, o), context=None) - - def triples(self, (s, p, o), context=None): - """Iterate over all the triples in the entire conjunctive graph""" - for (s, p, o), cg in self.store.triples((s, p, o), context=context): - yield s, p, o - - def quads(self, (s,p,o)): - """Iterate over all the quads in the entire conjunctive graph""" - for (s, p, o), cg in self.store.triples((s, p, o), context=None): - for ctx in cg: - yield s, p, o, ctx - - def triples_choices(self, (s, p, o)): - """Iterate over all the triples in the entire conjunctive graph""" - for (s1, p1, o1), cg in self.store.triples_choices((s, p, o), - context=None): - yield (s1, p1, o1) - - def __len__(self): - """Number of triples in the entire conjunctive graph""" - return self.store.__len__() - - def contexts(self, triple=None): - """Iterate over all contexts in the graph - - If triple is specified, iterate over all contexts the triple is in. - """ - for context in self.store.contexts(triple): - if isinstance(context, Graph): - yield context - else: - yield self.get_context(context) - - def get_context(self, identifier, quoted=False): - """Return a context graph for the given identifier - - identifier must be a URIRef or BNode. - """ - return Graph(store=self.store, identifier=identifier, - namespace_manager=self) - - def remove_context(self, context): - """Removes the given context from the graph""" - self.store.remove((None, None, None), context) - - def context_id(self, uri, context_id=None): - """URI#context""" - uri = uri.split("#", 1)[0] - if context_id is None: - context_id = "#context" - return URIRef(context_id, base=uri) - - def parse(self, source=None, publicID=None, format="xml", - location=None, file=None, data=None, **args): - """ - Parse source adding the resulting triples to its own context - (sub graph of this graph). - - See :meth:`rdflib.graph.Graph.parse` for documentation on arguments. - - :Returns: - - The graph into which the source was parsed. In the case of n3 - it returns the root context. - """ - - source = create_input_source(source=source, publicID=publicID, - location=location, file=file, data=data, format=format) - - #id = self.context_id(self.absolutize(source.getPublicId())) - context = Graph(store=self.store, identifier= - publicID and URIRef(publicID) or source.getPublicId()) - context.remove((None, None, None)) - context.parse(source, publicID=publicID, format=format, - location=location, file=file, data=data, **args) - return context - - def __reduce__(self): - return (ConjunctiveGraph, (self.store, self.identifier)) - - -class QuotedGraph(Graph): - """ - Quoted Graphs are intended to implement Notation 3 formulae. They are - associated with a required identifier that the N3 parser *must* provide - in order to maintain consistent formulae identification for scenarios - such as implication and other such processing. - """ - def __init__(self, store, identifier): - super(QuotedGraph, self).__init__(store, identifier) - - def add(self, triple): - """Add a triple with self as context""" - self.store.add(triple, self, quoted=True) - - def addN(self, quads): - """Add a sequence of triple with context""" - self.store.addN((s,p,o,c) for s, p, o, c in quads - if isinstance(c, QuotedGraph) - and c.identifier is self.identifier) - - def n3(self): - """Return an n3 identifier for the Graph""" - return "{%s}" % self.identifier.n3() - - def __str__(self): - identifier = self.identifier.n3() - label = self.store.__class__.__name__ - pattern = ("{this rdflib.identifier %s;rdflib:storage " - "[a rdflib:Store;rdfs:label '%s']}") - return pattern % (identifier, label) - - def __reduce__(self): - return (QuotedGraph, (self.store, self.identifier)) - - -class GraphValue(QuotedGraph): - def __init__(self, store, identifier=None, graph=None): - if graph is not None: - assert identifier is None - np = store.node_pickler - identifier = md5() - s = list(graph.triples((None, None, None))) - s.sort() - for t in s: - identifier.update(b("^").join((np.dumps(i) for i in t))) - identifier = URIRef("data:%s" % identifier.hexdigest()) - super(GraphValue, self).__init__(store, identifier) - for t in graph: - store.add(t, context=self) - else: - super(GraphValue, self).__init__(store, identifier) - - - def add(self, triple): - raise Exception("not mutable") - - def remove(self, triple): - raise Exception("not mutable") - - def __reduce__(self): - return (GraphValue, (self.store, self.identifier,)) - - -class Seq(object): - """Wrapper around an RDF Seq resource - - It implements a container type in Python with the order of the items - returned corresponding to the Seq content. It is based on the natural - ordering of the predicate names _1, _2, _3, etc, which is the - 'implementation' of a sequence in RDF terms. - """ - - def __init__(self, graph, subject): - """Parameters: - - - graph: - the graph containing the Seq - - - subject: - the subject of a Seq. Note that the init does not - check whether this is a Seq, this is done in whoever - creates this instance! - """ - - _list = self._list = list() - LI_INDEX = URIRef(str(RDF) + "_") - for (p, o) in graph.predicate_objects(subject): - if p.startswith(LI_INDEX): #!= RDF.Seq: # - i = int(p.replace(LI_INDEX, '')) - _list.append((i, o)) - - # here is the trick: the predicates are _1, _2, _3, etc. Ie, - # by sorting the keys (by integer) we have what we want! - _list.sort() - - def toPython(self): - return self - - def __iter__(self): - """Generator over the items in the Seq""" - for _, item in self._list: - yield item - - def __len__(self): - """Length of the Seq""" - return len(self._list) - - def __getitem__(self, index): - """Item given by index from the Seq""" - index, item = self._list.__getitem__(index) - return item - - -class BackwardCompatGraph(ConjunctiveGraph): - - def __init__(self, backend='default'): - warnings.warn("Use ConjunctiveGraph instead. " - "( from rdflib.graph import ConjunctiveGraph )", - DeprecationWarning, stacklevel=2) - super(BackwardCompatGraph, self).__init__(store=backend) - - def __get_backend(self): - return self.store - backend = property(__get_backend) - - def open(self, configuration, create=True): - return ConjunctiveGraph.open(self, configuration, create) - - def add(self, (s, p, o), context=None): - """Add to to the given context or to the default context""" - if context is not None: - c = self.get_context(context) - assert c.identifier == context, "%s != %s" % \ - (c.identifier, context) - else: - c = self.default_context - self.store.add((s, p, o), context=c, quoted=False) - - def remove(self, (s, p, o), context=None): - """Remove from the given context or from the default context""" - if context is not None: - context = self.get_context(context) - self.store.remove((s, p, o), context) - - def triples(self, (s, p, o), context=None): - """Iterate over all the triples in the entire graph""" - if context is not None: - c = self.get_context(context) - assert c.identifier == context - else: - c = None - for (s, p, o), cg in self.store.triples((s, p, o), c): - yield (s, p, o) - - def __len__(self, context=None): - """Number of triples in the entire graph""" - if context is not None: - context = self.get_context(context) - return self.store.__len__(context) - - def get_context(self, identifier, quoted=False): - """Return a context graph for the given identifier - - identifier must be a URIRef or BNode. - """ - assert isinstance(identifier, URIRef) or \ - isinstance(identifier, BNode), type(identifier) - if quoted: - assert False - return QuotedGraph(self.store, identifier) - #return QuotedGraph(self.store, Graph(store=self.store, - # identifier=identifier)) - else: - return Graph(store=self.store, identifier=identifier, - namespace_manager=self) - #return Graph(self.store, Graph(store=self.store, - # identifier=identifier)) - - def remove_context(self, context): - """Remove the given context from the graph""" - self.store.remove((None, None, None), self.get_context(context)) - - def contexts(self, triple=None): - """Iterate over all contexts in the graph - - If triple is specified, iterate over all contexts the triple is in. - """ - for context in self.store.contexts(triple): - yield context.identifier - - def subjects(self, predicate=None, object=None, context=None): - """Generate subjects with the given predicate and object""" - for s, p, o in self.triples((None, predicate, object), context): - yield s - - def predicates(self, subject=None, object=None, context=None): - """Generate predicates with the given subject and object""" - for s, p, o in self.triples((subject, None, object), context): - yield p - - def objects(self, subject=None, predicate=None, context=None): - """Generate objects with the given subject and predicate""" - for s, p, o in self.triples((subject, predicate, None), context): - yield o - - def subject_predicates(self, object=None, context=None): - """Generate (subject, predicate) tuples for the given object""" - for s, p, o in self.triples((None, None, object), context): - yield s, p - - def subject_objects(self, predicate=None, context=None): - """Generate (subject, object) tuples for the given predicate""" - for s, p, o in self.triples((None, predicate, None), context): - yield s, o - - def predicate_objects(self, subject=None, context=None): - """Generate (predicate, object) tuples for the given subject""" - for s, p, o in self.triples((subject, None, None), context): - yield p, o - - def __reduce__(self): - return (BackwardCompatGraph, (self.store, self.identifier)) - - def save(self, destination, format="xml", base=None, encoding=None): - warnings.warn("Use serialize method instead. ", - DeprecationWarning, stacklevel=2) - self.serialize(destination=destination, format=format, base=base, - encoding=encoding) - -class ModificationException(Exception): - - def __init__(self): - pass - - def __str__(self): - return ("Modifications and transactional operations not allowed on " - "ReadOnlyGraphAggregate instances") - -class UnSupportedAggregateOperation(Exception): - - def __init__(self): - pass - - def __str__(self): - return ("This operation is not supported by ReadOnlyGraphAggregate " - "instances") - -class ReadOnlyGraphAggregate(ConjunctiveGraph): - """Utility class for treating a set of graphs as a single graph - - Only read operations are supported (hence the name). Essentially a - ConjunctiveGraph over an explicit subset of the entire store. - """ - - def __init__(self, graphs, store='default'): - if store is not None: - super(ReadOnlyGraphAggregate, self).__init__(store) - Graph.__init__(self, store) - self.__namespace_manager = None - - assert isinstance(graphs, list) and graphs\ - and [g for g in graphs if isinstance(g, Graph)],\ - "graphs argument must be a list of Graphs!!" - self.graphs = graphs - - def __repr__(self): - return "" % len(self.graphs) - - def destroy(self, configuration): - raise ModificationException() - - #Transactional interfaces (optional) - def commit(self): - raise ModificationException() - - def rollback(self): - raise ModificationException() - - def open(self, configuration, create=False): - # TODO: is there a use case for this method? - for graph in self.graphs: - graph.open(self, configuration, create) - - def close(self): - for graph in self.graphs: - graph.close() - - def add(self, (s, p, o)): - raise ModificationException() - - def addN(self, quads): - raise ModificationException() - - def remove(self, (s, p, o)): - raise ModificationException() - - def triples(self, (s, p, o)): - for graph in self.graphs: - for s1, p1, o1 in graph.triples((s, p, o)): - yield (s1, p1, o1) - - def __contains__(self, triple_or_quad): - context = None - if len(triple_or_quad) == 4: - context = triple_or_quad[3] - for graph in self.graphs: - if context is None or graph.identifier == context.identifier: - if triple_or_quad[:3] in graph: - return True - return False - - def quads(self, (s,p,o)): - """Iterate over all the quads in the entire aggregate graph""" - for graph in self.graphs: - for s1, p1, o1 in graph.triples((s, p, o)): - yield (s1, p1, o1, graph) - - def __len__(self): - return sum(len(g) for g in self.graphs) - - def __hash__(self): - raise UnSupportedAggregateOperation() - - def __cmp__(self, other): - if other is None: - return -1 - elif isinstance(other, Graph): - return -1 - elif isinstance(other, ReadOnlyGraphAggregate): - return cmp(self.graphs, other.graphs) - else: - return -1 - - def __iadd__(self, other): - raise ModificationException() - - def __isub__(self, other): - raise ModificationException() - - # Conv. methods - - def triples_choices(self, (subject, predicate, object_), context=None): - for graph in self.graphs: - choices = graph.triples_choices((subject, predicate, object_)) - for (s, p, o) in choices: - yield (s, p, o) - - def qname(self, uri): - if hasattr(self,'namespace_manager') and self.namespace_manager: - return self.namespace_manager.qname(uri) - raise UnSupportedAggregateOperation() - - def compute_qname(self, uri, generate=True): - if hasattr(self,'namespace_manager') and self.namespace_manager: - return self.namespace_manager.compute_qname(uri, generate) - raise UnSupportedAggregateOperation() - - def bind(self, prefix, namespace, override=True): - raise UnSupportedAggregateOperation() - - def namespaces(self): - if hasattr(self,'namespace_manager'): - for prefix, namespace in self.namespace_manager.namespaces(): - yield prefix, namespace - else: - for graph in self.graphs: - for prefix, namespace in graph.namespaces(): - yield prefix, namespace - - def absolutize(self, uri, defrag=1): - raise UnSupportedAggregateOperation() - - def parse(self, source, publicID=None, format="xml", **args): - raise ModificationException() - - def n3(self): - raise UnSupportedAggregateOperation() - - def __reduce__(self): - raise UnSupportedAggregateOperation() - - -def test(): - import doctest - doctest.testmod() - -if __name__ == '__main__': - test() diff --git a/doc/rdflib3/graph.pyc b/doc/rdflib3/graph.pyc deleted file mode 100644 index b40d06f..0000000 Binary files a/doc/rdflib3/graph.pyc and /dev/null differ diff --git a/doc/rdflib3/namespace.py b/doc/rdflib3/namespace.py deleted file mode 100644 index e9848ad..0000000 --- a/doc/rdflib3/namespace.py +++ /dev/null @@ -1,406 +0,0 @@ -from __future__ import generators -from rdflib.py3compat import format_doctest_out - -__doc__ = format_doctest_out(""" -=================== -Namespace Utilities -=================== - -RDFLib provides mechanisms for managing Namespaces. - -In particular, there is a :class:`~rdflib.namespace.Namespace` class that takes as its argument the base URI of the namespace. - -.. code-block:: pycon - - >>> from rdflib.namespace import Namespace - >>> fuxi = Namespace('http://metacognition.info/ontologies/FuXi.n3#') - -Fully qualified URIs in the namespace can be constructed either by attribute or by dictionary access on Namespace instances: - -.. code-block:: pycon - - >>> fuxi.ruleBase - rdflib.term.URIRef(%(u)s'http://metacognition.info/ontologies/FuXi.n3#ruleBase') - >>> fuxi['ruleBase'] - rdflib.term.URIRef(%(u)s'http://metacognition.info/ontologies/FuXi.n3#ruleBase') - -Automatic handling of unknown predicates ------------------------------------------ - -As a programming convenience, a namespace binding is automatically -created when :class:`rdflib.term.URIRef` predicates are added to the graph. - -Importable namespaces ------------------------ - -The following namespaces are available by directly importing from rdflib: - -* RDF -* RDFS -* OWL -* XSD - -.. code-block:: pycon - - >>> from rdflib import OWL - >>> OWL.seeAlso - rdflib.term.URIRef(%(u)s'http://www.w3.org/2002/07/owl#seeAlso') - -""") - -import logging - -_logger = logging.getLogger(__name__) - -import os - -from urlparse import urljoin, urldefrag -from urllib import pathname2url - -from rdflib.term import URIRef, Variable, _XSD_PFX - -__all__ = ['is_ncname', 'split_uri', 'Namespace', 'NamespaceDict', 'ClosedNamespace', 'NamespaceManager', 'XMLNS', 'RDF', 'RDFS', 'XSD', 'OWL', 'SKOS'] - -class Namespace(URIRef): - - @property - def title(self): - return URIRef(self + 'title') - - def term(self, name): - return URIRef(self + name) - - def __getitem__(self, key, default=None): - return self.term(key) - - def __getattr__(self, name): - if name.startswith("__"): # ignore any special Python names! - raise AttributeError - else: - return self.term(name) - - -class NamespaceDict(dict): - - def __new__(cls, uri=None, context=None): - inst = dict.__new__(cls) - inst.uri = uri # TODO: do we need to set these both here and in __init__ ?? - inst.__context = context - return inst - - def __init__(self, uri, context=None): - self.uri = uri - self.__context = context - - def term(self, name): - uri = self.get(name) - if uri is None: - uri = URIRef(self.uri + name) - if self.__context and (uri, None, None) not in self.__context: - _logger.warning("%s not defined" % uri) - self[name] = uri - return uri - - def __getattr__(self, name): - return self.term(name) - - def __getitem__(self, key, default=None): - return self.term(key) or default - - def __str__(self): - return self.uri - - def __repr__(self): - return """rdflib.namespace.NamespaceDict('%s')""" % str(self.uri) - - -class ClosedNamespace(object): - """ - - """ - - def __init__(self, uri, terms): - self.uri = uri - self.__uris = {} - for t in terms: - self.__uris[t] = URIRef(self.uri + t) - - def term(self, name): - uri = self.__uris.get(name) - if uri is None: - raise Exception("term '%s' not in namespace '%s'" % (name, self.uri)) - else: - return uri - - def __getitem__(self, key, default=None): - return self.term(key) - - def __getattr__(self, name): - if name.startswith("__"): # ignore any special Python names! - raise AttributeError - else: - return self.term(name) - - def __str__(self): - return str(self.uri) - - def __repr__(self): - return """rdf.namespace.ClosedNamespace('%s')""" % str(self.uri) - - -class _RDFNamespace(ClosedNamespace): - def __init__(self): - super(_RDFNamespace, self).__init__( - URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#"), - terms=[ - # Syntax Names - "RDF", "Description", "ID", "about", "parseType", "resource", "li", "nodeID", "datatype", - - # RDF Classes - "Seq", "Bag", "Alt", "Statement", "Property", "XMLLiteral", "List", "PlainLiteral", - - # RDF Properties - "subject", "predicate", "object", "type", "value", "first", "rest", - # and _n where n is a non-negative integer - - # RDF Resources - "nil"] - ) - - def term(self, name): - try: - i = int(name) - return URIRef("%s_%s" % (self.uri, i)) - except ValueError, e: - return super(_RDFNamespace, self).term(name) - -RDF = _RDFNamespace() - -RDFS = ClosedNamespace( - uri = URIRef("http://www.w3.org/2000/01/rdf-schema#"), - terms = [ - "Resource", "Class", "subClassOf", "subPropertyOf", "comment", "label", - "domain", "range", "seeAlso", "isDefinedBy", "Literal", "Container", - "ContainerMembershipProperty", "member", "Datatype"] - ) - -OWL = Namespace('http://www.w3.org/2002/07/owl#') - -XSD = Namespace(_XSD_PFX) - -SKOS = Namespace('http://www.w3.org/2004/02/skos/core#') - -class NamespaceManager(object): - """ - - Sample usage from FuXi ... - - .. code-block:: python - - ruleStore = N3RuleStore(additionalBuiltins=additionalBuiltins) - nsMgr = NamespaceManager(Graph(ruleStore)) - ruleGraph = Graph(ruleStore,namespace_manager=nsMgr) - - - and ... - - .. code-block:: pycon - - >>> from rdflib import Graph, OWL - >>> exNs = Namespace('http://example.com/') - >>> namespace_manager = NamespaceManager(Graph()) - >>> namespace_manager.bind('ex', exNs, override=False) - >>> namespace_manager.bind('owl', OWL, override=False) - >>> g = Graph() - >>> g.namespace_manager = namespace_manager - - """ - def __init__(self, graph): - self.graph = graph - self.__cache = {} - self.__log = None - self.bind("xml", u"http://www.w3.org/XML/1998/namespace") - self.bind("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#") - self.bind("rdfs", "http://www.w3.org/2000/01/rdf-schema#") - - def reset(self): - self.__cache = {} - - def __get_store(self): - return self.graph.store - store = property(__get_store) - - def qname(self, uri): - prefix, namespace, name = self.compute_qname(uri) - if prefix=="": - return name - else: - return ":".join((prefix, name)) - - def normalizeUri(self,rdfTerm): - """ - Takes an RDF Term and 'normalizes' it into a QName (using the registered prefix) - or (unlike compute_qname) the Notation 3 form for URIs: <...URI...> - """ - try: - namespace, name = split_uri(rdfTerm) - namespace = URIRef(namespace) - except: - if isinstance(rdfTerm,Variable): - return "?%s"%rdfTerm - else: - return "<%s>"%rdfTerm - prefix = self.store.prefix(namespace) - if prefix is None and isinstance(rdfTerm,Variable): - return "?%s"%rdfTerm - elif prefix is None: - return "<%s>"%rdfTerm - else: - qNameParts = self.compute_qname(rdfTerm) - return ':'.join([qNameParts[0],qNameParts[-1]]) - - def compute_qname(self, uri, generate=True): - if not uri in self.__cache: - namespace, name = split_uri(uri) - namespace = URIRef(namespace) - prefix = self.store.prefix(namespace) - if prefix is None: - if not generate: - raise Exception("No known prefix for %s and generate=False") - num = 1 - while 1: - prefix = "ns%s" % num - if not self.store.namespace(prefix): - break - num += 1 - self.bind(prefix, namespace) - self.__cache[uri] = (prefix, namespace, name) - return self.__cache[uri] - - def bind(self, prefix, namespace, override=True): - namespace = URIRef(namespace) - # When documenting explain that override only applies in what cases - if prefix is None: - prefix = '' - bound_namespace = self.store.namespace(prefix) - if bound_namespace and bound_namespace!=namespace: - # prefix already in use for different namespace - # - # append number to end of prefix until we find one - # that's not in use. - if not prefix: - prefix = "default" - num = 1 - while 1: - new_prefix = "%s%s" % (prefix, num) - if not self.store.namespace(new_prefix): - break - num +=1 - self.store.bind(new_prefix, namespace) - else: - bound_prefix = self.store.prefix(namespace) - if bound_prefix is None: - self.store.bind(prefix, namespace) - elif bound_prefix == prefix: - pass # already bound - else: - if override or bound_prefix.startswith("_"): # or a generated prefix - self.store.bind(prefix, namespace) - - def namespaces(self): - for prefix, namespace in self.store.namespaces(): - namespace = URIRef(namespace) - yield prefix, namespace - - def absolutize(self, uri, defrag=1): - base = urljoin("file:", pathname2url(os.getcwd())) - result = urljoin("%s/" % base, uri, allow_fragments=not defrag) - if defrag: - result = urldefrag(result)[0] - if not defrag: - if uri and uri[-1]=="#" and result[-1]!="#": - result = "%s#" % result - return URIRef(result) - -# From: http://www.w3.org/TR/REC-xml#NT-CombiningChar -# -# * Name start characters must have one of the categories Ll, Lu, Lo, -# Lt, Nl. -# -# * Name characters other than Name-start characters must have one of -# the categories Mc, Me, Mn, Lm, or Nd. -# -# * Characters in the compatibility area (i.e. with character code -# greater than #xF900 and less than #xFFFE) are not allowed in XML -# names. -# -# * Characters which have a font or compatibility decomposition -# (i.e. those with a "compatibility formatting tag" in field 5 of the -# database -- marked by field 5 beginning with a "<") are not allowed. -# -# * The following characters are treated as name-start characters rather -# than name characters, because the property file classifies them as -# Alphabetic: [#x02BB-#x02C1], #x0559, #x06E5, #x06E6. -# -# * Characters #x20DD-#x20E0 are excluded (in accordance with Unicode -# 2.0, section 5.14). -# -# * Character #x00B7 is classified as an extender, because the property -# list so identifies it. -# -# * Character #x0387 is added as a name character, because #x00B7 is its -# canonical equivalent. -# -# * Characters ':' and '_' are allowed as name-start characters. -# -# * Characters '-' and '.' are allowed as name characters. - -from unicodedata import category, decomposition - -NAME_START_CATEGORIES = ["Ll", "Lu", "Lo", "Lt", "Nl"] -NAME_CATEGORIES = NAME_START_CATEGORIES + ["Mc", "Me", "Mn", "Lm", "Nd"] -ALLOWED_NAME_CHARS = [u"\u00B7", u"\u0387", u"-", u".", u"_"] - -# http://www.w3.org/TR/REC-xml-names/#NT-NCName -# [4] NCName ::= (Letter | '_') (NCNameChar)* /* An XML Name, minus -# the ":" */ -# [5] NCNameChar ::= Letter | Digit | '.' | '-' | '_' | CombiningChar -# | Extender - -def is_ncname(name): - first = name[0] - if first=="_" or category(first) in NAME_START_CATEGORIES: - for i in xrange(1, len(name)): - c = name[i] - if not category(c) in NAME_CATEGORIES: - if c in ALLOWED_NAME_CHARS: - continue - return 0 - #if in compatibility area - #if decomposition(c)!='': - # return 0 - - return 1 - else: - return 0 - -XMLNS = "http://www.w3.org/XML/1998/namespace" - -def split_uri(uri): - if uri.startswith(XMLNS): - return (XMLNS, uri.split(XMLNS)[1]) - length = len(uri) - for i in xrange(0, length): - c = uri[-i-1] - if not category(c) in NAME_CATEGORIES: - if c in ALLOWED_NAME_CHARS: - continue - for j in xrange(-1-i, length): - if category(uri[j]) in NAME_START_CATEGORIES or uri[j]=="_": - ns = uri[:j] - if not ns: - break - ln = uri[j:] - return (ns, ln) - break - raise Exception("Can't split '%s'" % uri) diff --git a/doc/rdflib3/namespace.pyc b/doc/rdflib3/namespace.pyc deleted file mode 100644 index f77a1ba..0000000 Binary files a/doc/rdflib3/namespace.pyc and /dev/null differ diff --git a/doc/rdflib3/parser.py b/doc/rdflib3/parser.py deleted file mode 100644 index 0f443a5..0000000 --- a/doc/rdflib3/parser.py +++ /dev/null @@ -1,183 +0,0 @@ -""" -Parser plugin interface. - -This module defines the parser plugin interface and contains other -related parser support code. - -The module is mainly useful for those wanting to write a parser that -can plugin to rdflib. If you are wanting to invoke a parser you likely -want to do so through the Graph class parse method. - -""" - -import os -import __builtin__ -import warnings -from urllib import pathname2url, url2pathname -from urllib2 import urlopen, Request, HTTPError -from urlparse import urljoin -from rdflib.py3compat import PY3 -if PY3: - from io import BytesIO -else: - from StringIO import StringIO as BytesIO -from xml.sax import xmlreader -from xml.sax.saxutils import prepare_input_source -import types - -from rdflib import __version__ -from rdflib.term import URIRef -from rdflib.namespace import Namespace - -__all__ = ['Parser', 'InputSource', 'StringInputSource', 'URLInputSource', 'FileInputSource'] - -class Parser(object): - - def __init__(self): - pass - - def parse(self, source, sink): - pass - - -class InputSource(xmlreader.InputSource, object): - """ - TODO: - """ - - def __init__(self, system_id=None): - xmlreader.InputSource.__init__(self, system_id=system_id) - self.content_type = None - - -class StringInputSource(InputSource): - """ - TODO: - """ - - def __init__(self, value, system_id=None): - super(StringInputSource, self).__init__(system_id) - stream = BytesIO(value) - self.setByteStream(stream) - # TODO: - # encoding = value.encoding - # self.setEncoding(encoding) - - -headers = { - 'User-agent': 'rdflib-%s (http://rdflib.net/; eikeon@eikeon.com)' % __version__ - } - - -class URLInputSource(InputSource): - """ - TODO: - """ - - def __init__(self, system_id=None, format=None): - super(URLInputSource, self).__init__(system_id) - self.url = system_id - - # copy headers to change - myheaders=dict(headers) - if format=='application/rdf+xml': - myheaders['Accept']='application/rdf+xml, */*;q=0.1' - elif format=='n3': - myheaders['Accept']='text/n3, */*;q=0.1' - elif format=='nt': - myheaders['Accept']='text/plain, */*;q=0.1' - else: - myheaders['Accept']='application/rdf+xml,text/rdf+n3;q=0.9,application/xhtml+xml;q=0.5, */*;q=0.1' - - req = Request(system_id, None, myheaders) - file = urlopen(req) - # Fix for issue 130 https://github.com/RDFLib/rdflib/issues/130 - self.url = file.geturl() # in case redirections took place - self.setPublicId(self.url) - self.content_type = file.info().get('content-type') - self.content_type = self.content_type.split(";", 1)[0] - self.setByteStream(file) - # TODO: self.setEncoding(encoding) - - def __repr__(self): - return self.url - - -class FileInputSource(InputSource): - """ - TODO: - """ - - def __init__(self, file): - base = urljoin("file:", pathname2url(os.getcwd())) - system_id = URIRef(file.name, base=base) - super(FileInputSource, self).__init__(system_id) - self.file = file - self.setByteStream(file) - # TODO: self.setEncoding(encoding) - - def __repr__(self): - return `self.file` - - -def create_input_source(source=None, publicID=None, - location=None, file=None, data=None, format=None): - """ - Return an appropriate InputSource instance for the given - parameters. - """ - - # TODO: test that exactly one of source, location, file, and data - # is not None. - - input_source = None - - if source is not None: - if isinstance(source, InputSource): - input_source = source - else: - if isinstance(source, basestring): - location = source - elif hasattr(source, "read") and not isinstance(source, Namespace): - f = source - input_source = InputSource() - input_source.setByteStream(f) - if hasattr(f, "name"): - input_source.setSystemId(f.name) - else: - raise Exception("Unexpected type '%s' for source '%s'" % (type(source), source)) - - absolute_location = None # Further to fix for issue 130 - - if location is not None: - # Fix for Windows problem https://github.com/RDFLib/rdflib/issues/145 - if os.path.exists(location): - location = pathname2url(location) - base = urljoin("file:", "%s/" % pathname2url(os.getcwd())) - absolute_location = URIRef(location, base=base).defrag() - if absolute_location.startswith("file:///"): - filename = url2pathname(absolute_location.replace("file:///", "/")) - file = open(filename, "rb") - else: - input_source = URLInputSource(absolute_location, format) - # publicID = publicID or absolute_location # Further to fix for issue 130 - - if file is not None: - input_source = FileInputSource(file) - - if data is not None: - if isinstance(data, unicode): - data = data.encode('utf-8') - input_source = StringInputSource(data) - - if input_source is None: - raise Exception("could not create InputSource") - else: - if publicID is not None: # Further to fix for issue 130 - input_source.setPublicId(publicID) - # Further to fix for issue 130 - elif input_source.getPublicId() is None: - input_source.setPublicId(absolute_location or "") - return input_source - - diff --git a/doc/rdflib3/parser.pyc b/doc/rdflib3/parser.pyc deleted file mode 100644 index 8719d48..0000000 Binary files a/doc/rdflib3/parser.pyc and /dev/null differ diff --git a/doc/rdflib3/plugin.py b/doc/rdflib3/plugin.py deleted file mode 100644 index 7a012e9..0000000 --- a/doc/rdflib3/plugin.py +++ /dev/null @@ -1,167 +0,0 @@ -""" -Plugin support for rdf. - -There are a number of plugin points for rdf: parser, serializer, -store, query processor, and query result. Plugins can be registered -either through setuptools entry_points or by calling -rdf.plugin.register directly. - -If you have a package that uses a setuptools based setup.py you can add the -following to your setup:: - - entry_points = { - 'rdf.plugins.parser': [ - 'nt = rdf.plugins.parsers.nt:NTParser', - ], - 'rdf.plugins.serializer': [ - 'nt = rdf.plugins.serializers.NTSerializer:NTSerializer', - ], - } - -See the `setuptools dynamic discovery of services and plugins`__ for more -information. - -.. __: http://peak.telecommunity.com/DevCenter/setuptools#dynamic-discovery-of-services-and-plugins - -""" - -from rdflib.store import Store -from rdflib.parser import Parser -from rdflib.serializer import Serializer -from rdflib.query import ResultParser, ResultSerializer, Processor, Result -from rdflib.exceptions import Error - -__all__ = ['register', 'get', 'plugins', 'PluginException', 'Plugin', 'PKGPlugin'] - -entry_points = {'rdf.plugins.store': Store, - 'rdf.plugins.serializer': Serializer, - 'rdf.plugins.parser': Parser, - 'rdf.plugins.resultparser': ResultParser, - 'rdf.plugins.resultserializer': ResultSerializer, - 'rdf.plugins.queryprocessor': Processor, - 'rdf.plugins.queryresult': Result - } - -_plugins = {} - - -class PluginException(Error): - pass - - -class Plugin(object): - - def __init__(self, name, kind, module_path, class_name): - self.name = name - self.kind = kind - self.module_path = module_path - self.class_name = class_name - self._class = None - - def getClass(self): - if self._class is None: - module = __import__(self.module_path, globals(), locals(), [""]) - self._class = getattr(module, self.class_name) - return self._class - - -class PKGPlugin(Plugin): - - def __init__(self, name, kind, ep): - self.name = name - self.kind = kind - self.ep = ep - self._class = None - - def getClass(self): - if self._class is None: - self._class = self.ep.load() - return self._class - - -def register(name, kind, module_path, class_name): - """ - Register the plugin for (name, kind). The module_path and - class_name should be the path to a plugin class. - """ - p = Plugin(name, kind, module_path, class_name) - _plugins[(name, kind)] = p - - -def get(name, kind): - """ - Return the class for the specified (name, kind). Raises a - PluginException if unable to do so. - """ - try: - p = _plugins[(name, kind)] - except KeyError, e: - raise PluginException("No plugin registered for (%s, %s)" % (name, kind)) - return p.getClass() - - -try: - from pkg_resources import iter_entry_points -except ImportError: - pass # TODO: log a message -else: - # add the plugins specified via pkg_resources' EntryPoints. - for entry_point, kind in entry_points.iteritems(): - for ep in iter_entry_points(entry_point): - _plugins[(ep.name, kind)] = PKGPlugin(ep.name, kind, ep) - - -def plugins(name=None, kind=None): - """ - A generator of the plugins. - - Pass in name and kind to filter... else leave None to match all. - """ - for p in _plugins.values(): - if (name is None or name == p.name) and (kind is None or kind == p.kind): - yield p - -register('default', Store, - 'rdflib.plugins.memory', 'IOMemory') -register('IOMemory', Store, - 'rdflib.plugins.memory', 'IOMemory') -register('Sleepycat', Store, - 'rdflib.plugins.sleepycat', 'Sleepycat') - -register('xml', Serializer, - 'rdflib.plugins.serializers.rdfxml', 'XMLSerializer') -register('n3', Serializer, - 'rdflib.plugins.serializers.n3', 'N3Serializer') -register('turtle', Serializer, - 'rdflib.plugins.serializers.turtle', 'TurtleSerializer') -register('trig', Serializer, - 'rdflib.plugins.serializers.trig', 'TrigSerializer') -register('nt', Serializer, - 'rdflib.plugins.serializers.nt', 'NTSerializer') -register('pretty-xml', Serializer, - 'rdflib.plugins.serializers.rdfxml', 'PrettyXMLSerializer') -register('trix', Serializer, - 'rdflib.plugins.serializers.trix', 'TriXSerializer') -register("nquads", Serializer, - 'rdflib.plugins.serializers.nquads', 'NQuadsSerializer') - -register('application/rdf+xml', Parser, - 'rdflib.plugins.parsers.rdfxml', 'RDFXMLParser') -register('text/html', Parser, - 'rdflib.plugins.parsers.rdfa', 'RDFaParser') -register('application/xhtml+xml', Parser, - 'rdflib.plugins.parsers.rdfa', 'RDFaParser') -register('xml', Parser, - 'rdflib.plugins.parsers.rdfxml', 'RDFXMLParser') -register('n3', Parser, - 'rdflib.plugins.parsers.notation3', 'N3Parser') -register('turtle', Parser, - 'rdflib.plugins.parsers.notation3', 'TurtleParser') -register('nt', Parser, - 'rdflib.plugins.parsers.nt', 'NTParser') -register('nquads', Parser, - 'rdflib.plugins.parsers.nquads', 'NQuadsParser') -register('trix', Parser, - 'rdflib.plugins.parsers.trix', 'TriXParser') -register('rdfa', Parser, - 'rdflib.plugins.parsers.rdfa', 'RDFaParser') diff --git a/doc/rdflib3/plugin.pyc b/doc/rdflib3/plugin.pyc deleted file mode 100644 index 9c55abb..0000000 Binary files a/doc/rdflib3/plugin.pyc and /dev/null differ diff --git a/doc/rdflib3/plugins/__init__.py b/doc/rdflib3/plugins/__init__.py deleted file mode 100644 index 4622bb0..0000000 --- a/doc/rdflib3/plugins/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -""" -Default plugins for rdflib. - -This is a namespace package and contains the default plugins for -rdflib. - -""" diff --git a/doc/rdflib3/plugins/__init__.pyc b/doc/rdflib3/plugins/__init__.pyc deleted file mode 100644 index ca95f2d..0000000 Binary files a/doc/rdflib3/plugins/__init__.pyc and /dev/null differ diff --git a/doc/rdflib3/plugins/memory.py b/doc/rdflib3/plugins/memory.py deleted file mode 100644 index 7006fab..0000000 --- a/doc/rdflib3/plugins/memory.py +++ /dev/null @@ -1,570 +0,0 @@ -from __future__ import generators -from rdflib.term import BNode -from rdflib.store import Store, NO_STORE, VALID_STORE - -__all__ = ['Memory', 'IOMemory'] - -ANY = Any = None - -class Memory(Store): - """\ - An in memory implementation of a triple store. - - This triple store uses nested dictionaries to store triples. Each - triple is stored in two such indices as follows spo[s][p][o] = 1 and - pos[p][o][s] = 1. - - Authors: Michel Pelletier, Daniel Krech, Stefan Niederhauser - """ - def __init__(self, configuration=None, identifier=None): - super(Memory, self).__init__(configuration) - self.identifier = identifier - - # indexed by [subject][predicate][object] - self.__spo = {} - - # indexed by [predicate][object][subject] - self.__pos = {} - - # indexed by [predicate][object][subject] - self.__osp = {} - - self.__namespace = {} - self.__prefix = {} - - def add(self, (subject, predicate, object), context, quoted=False): - """\ - Add a triple to the store of triples. - """ - # add dictionary entries for spo[s][p][p] = 1 and pos[p][o][s] - # = 1, creating the nested dictionaries where they do not yet - # exits. - spo = self.__spo - try: - po = spo[subject] - except: - po = spo[subject] = {} - try: - o = po[predicate] - except: - o = po[predicate] = {} - o[object] = 1 - - pos = self.__pos - try: - os = pos[predicate] - except: - os = pos[predicate] = {} - try: - s = os[object] - except: - s = os[object] = {} - s[subject] = 1 - - osp = self.__osp - try: - sp = osp[object] - except: - sp = osp[object] = {} - try: - p = sp[subject] - except: - p = sp[subject] = {} - p[predicate] = 1 - - def remove(self, (subject, predicate, object), context=None): - for (subject, predicate, object), c in self.triples( - (subject, predicate, object)): - del self.__spo[subject][predicate][object] - del self.__pos[predicate][object][subject] - del self.__osp[object][subject][predicate] - - def triples(self, (subject, predicate, object), context=None): - """A generator over all the triples matching """ - if subject!=ANY: # subject is given - spo = self.__spo - if subject in spo: - subjectDictionary = spo[subject] - if predicate!=ANY: # subject+predicate is given - if predicate in subjectDictionary: - if object!=ANY: # subject+predicate+object is given - if object in subjectDictionary[predicate]: - yield (subject, predicate, object), \ - self.__contexts() - else: # given object not found - pass - else: # subject+predicate is given, object unbound - for o in subjectDictionary[predicate].keys(): - yield (subject, predicate, o), \ - self.__contexts() - else: # given predicate not found - pass - else: # subject given, predicate unbound - for p in subjectDictionary.keys(): - if object!=ANY: # object is given - if object in subjectDictionary[p]: - yield (subject, p, object), self.__contexts() - else: # given object not found - pass - else: # object unbound - for o in subjectDictionary[p].keys(): - yield (subject, p, o), self.__contexts() - else: # given subject not found - pass - elif predicate!=ANY: # predicate is given, subject unbound - pos = self.__pos - if predicate in pos: - predicateDictionary = pos[predicate] - if object!=ANY: # predicate+object is given, subject unbound - if object in predicateDictionary: - for s in predicateDictionary[object].keys(): - yield (s, predicate, object), self.__contexts() - else: # given object not found - pass - else: # predicate is given, object+subject unbound - for o in predicateDictionary.keys(): - for s in predicateDictionary[o].keys(): - yield (s, predicate, o), self.__contexts() - elif object!=ANY: # object is given, subject+predicate unbound - osp = self.__osp - if object in osp: - objectDictionary = osp[object] - for s in objectDictionary.keys(): - for p in objectDictionary[s].keys(): - yield (s, p, object), self.__contexts() - else: # subject+predicate+object unbound - spo = self.__spo - for s in spo.keys(): - subjectDictionary = spo[s] - for p in subjectDictionary.keys(): - for o in subjectDictionary[p].keys(): - yield (s, p, o), self.__contexts() - - def __len__(self, context=None): - #@@ optimize - i = 0 - for triple in self.triples((None, None, None)): - i += 1 - return i - - def bind(self, prefix, namespace): - self.__prefix[namespace] = prefix - self.__namespace[prefix] = namespace - - def namespace(self, prefix): - return self.__namespace.get(prefix, None) - - def prefix(self, namespace): - return self.__prefix.get(namespace, None) - - def namespaces(self): - for prefix, namespace in self.__namespace.iteritems(): - yield prefix, namespace - - def __contexts(self): - return (c for c in []) # TODO: best way to return empty generator - -class IOMemory(Store): - """\ - An integer-key-optimized-context-aware-in-memory store. - - Uses nested dictionaries to store triples and context. Each triple - is stored in six such indices as follows cspo[c][s][p][o] = 1 - and cpos[c][p][o][s] = 1 and cosp[c][o][s][p] = 1 as well as - spo[s][p][o] = [c] and pos[p][o][s] = [c] and pos[o][s][p] = [c] - - Context information is used to track the 'source' of the triple - data for merging, unmerging, remerging purposes. context aware - store stores consume more memory size than non context stores. - - """ - - context_aware = True - formula_aware = True - - def __init__(self, configuration=None, identifier=None): - super(IOMemory, self).__init__() - - # indexed by [context][subject][predicate][object] = 1 - self.cspo = self.createIndex() - - # indexed by [context][predicate][object][subject] = 1 - self.cpos = self.createIndex() - - # indexed by [context][object][subject][predicate] = 1 - self.cosp = self.createIndex() - - # indexed by [subject][predicate][object] = [context] - self.spo = self.createIndex() - - # indexed by [predicate][object][subject] = [context] - self.pos = self.createIndex() - - # indexed by [object][subject][predicate] = [context] - self.osp = self.createIndex() - - # indexes integer keys to identifiers - self.forward = self.createForward() - - # reverse index of forward - self.reverse = self.createReverse() - - self.identifier = identifier or BNode() - - self.__namespace = self.createPrefixMap() - self.__prefix = self.createPrefixMap() - - def open(self, configuration, create=False): - if not create: - # An IOMemory Store never exists. - return NO_STORE - else: - return VALID_STORE - - def bind(self, prefix, namespace): - self.__prefix[namespace] = prefix - self.__namespace[prefix] = namespace - - def namespace(self, prefix): - return self.__namespace.get(prefix, None) - - def prefix(self, namespace): - return self.__prefix.get(namespace, None) - - def namespaces(self): - for prefix, namespace in self.__namespace.iteritems(): - yield prefix, namespace - - def defaultContext(self): - return self.default_context - - def addContext(self, context): - """ Add context w/o adding statement. Dan you can remove this if you want """ - - if not self.reverse.has_key(context): - ci=randid() - while not self.forward.insert(ci, context): - ci=randid() - self.reverse[context] = ci - - def intToIdentifier(self, (si, pi, oi)): - """ Resolve an integer triple into identifers. """ - return (self.forward[si], self.forward[pi], self.forward[oi]) - - def identifierToInt(self, (s, p, o)): - """ Resolve an identifier triple into integers. """ - return (self.reverse[s], self.reverse[p], self.reverse[o]) - - def uniqueSubjects(self, context=None): - if context is None: - index = self.spo - else: - index = self.cspo[context] - for si in index.keys(): - yield self.forward[si] - - def uniquePredicates(self, context=None): - if context is None: - index = self.pos - else: - index = self.cpos[context] - for pi in index.keys(): - yield self.forward[pi] - - def uniqueObjects(self, context=None): - if context is None: - index = self.osp - else: - index = self.cosp[context] - for oi in index.keys(): - yield self.forward[oi] - - def createForward(self): - return {} - - def createReverse(self): - return {} - - def createIndex(self): - return {} - - def createPrefixMap(self): - return {} - - def add(self, triple, context, quoted=False): - """\ - Add a triple to the store. - """ - Store.add(self, triple, context, quoted) - for triple, cg in self.triples(triple, context): - #triple is already in the store. - return - - subject, predicate, object = triple - - f = self.forward - r = self.reverse - - # assign keys for new identifiers - - if not r.has_key(subject): - si=randid() - while f.has_key(si): - si=randid() - f[si] = subject - r[subject] = si - else: - si = r[subject] - - if not r.has_key(predicate): - pi=randid() - while f.has_key(pi): - pi=randid() - f[pi] = predicate - r[predicate] = pi - else: - pi = r[predicate] - - if not r.has_key(object): - oi=randid() - while f.has_key(oi): - oi=randid() - f[oi] = object - r[object] = oi - else: - oi = r[object] - - if not r.has_key(context): - ci=randid() - while f.has_key(ci): - ci=randid() - f[ci] = context - r[context] = ci - else: - ci = r[context] - - # add dictionary entries for cspo[c][s][p][o] = 1, - # cpos[c][p][o][s] = 1, and cosp[c][o][s][p] = 1, creating the - # nested {} where they do not yet exits. - self._setNestedIndex(self.cspo, ci, si, pi, oi) - self._setNestedIndex(self.cpos, ci, pi, oi, si) - self._setNestedIndex(self.cosp, ci, oi, si, pi) - - if not quoted: - self._setNestedIndex(self.spo, si, pi, oi, ci) - self._setNestedIndex(self.pos, pi, oi, si, ci) - self._setNestedIndex(self.osp, oi, si, pi, ci) - - def _setNestedIndex(self, index, *keys): - for key in keys[:-1]: - if not index.has_key(key): - index[key] = self.createIndex() - index = index[key] - index[keys[-1]] = 1 - - - def _removeNestedIndex(self, index, *keys): - """ Remove context from the list of contexts in a nested index. - - Afterwards, recursively remove nested indexes when they became empty. - """ - parents = [] - for key in keys[:-1]: - parents.append(index) - index = index[key] - del index[keys[-1]] - - n = len(parents) - for i in xrange(n): - index = parents[n-1-i] - key = keys[n-1-i] - if len(index[key]) == 0: - del index[key] - - def remove(self, triple, context=None): - Store.remove(self, triple, context) - if context is not None: - if context == self: - context = None - - f = self.forward - r = self.reverse - if context is None: - for triple, cg in self.triples(triple): - subject, predicate, object = triple - si, pi, oi = self.identifierToInt((subject, predicate, object)) - contexts = list(self.contexts(triple)) - for context in contexts: - ci = r[context] - del self.cspo[ci][si][pi][oi] - del self.cpos[ci][pi][oi][si] - del self.cosp[ci][oi][si][pi] - - self._removeNestedIndex(self.spo, si, pi, oi, ci) - self._removeNestedIndex(self.pos, pi, oi, si, ci) - self._removeNestedIndex(self.osp, oi, si, pi, ci) - # grr!! hafta ref-count these before you can collect them dumbass! - #del f[si], f[pi], f[oi] - #del r[subject], r[predicate], r[object] - else: - subject, predicate, object = triple - ci = r.get(context, None) - if ci: - for triple, cg in self.triples(triple, context): - si, pi, oi = self.identifierToInt(triple) - del self.cspo[ci][si][pi][oi] - del self.cpos[ci][pi][oi][si] - del self.cosp[ci][oi][si][pi] - - try: - self._removeNestedIndex(self.spo, si, pi, oi, ci) - self._removeNestedIndex(self.pos, pi, oi, si, ci) - self._removeNestedIndex(self.osp, oi, si, pi, ci) - except KeyError: - # the context may be a quoted one in which - # there will not be a triple in spo, pos or - # osp. So ignore any KeyErrors - pass - # TODO delete references to resources in self.forward/self.reverse - # that are not in use anymore... - - if subject is None and predicate is None and object is None: - # remove context - try: - ci = self.reverse[context] - del self.cspo[ci], self.cpos[ci], self.cosp[ci] - except KeyError: - # TODO: no exception when removing non-existant context? - pass - - - def triples(self, triple, context=None): - """A generator over all the triples matching """ - - if context is not None: - if context == self: - context = None - - subject, predicate, object = triple - ci = si = pi = oi = Any - - if context is None: - spo = self.spo - pos = self.pos - osp = self.osp - else: - try: - ci = self.reverse[context] # TODO: Really ignore keyerror here - spo = self.cspo[ci] - pos = self.cpos[ci] - osp = self.cosp[ci] - except KeyError: - return - try: - if subject is not Any: - si = self.reverse[subject] # throws keyerror if subject doesn't exist ;( - if predicate is not Any: - pi = self.reverse[predicate] - if object is not Any: - oi = self.reverse[object] - except KeyError, e: - return #raise StopIteration - - if si != Any: # subject is given - if spo.has_key(si): - subjectDictionary = spo[si] - if pi != Any: # subject+predicate is given - if subjectDictionary.has_key(pi): - if oi!= Any: # subject+predicate+object is given - if subjectDictionary[pi].has_key(oi): - ss, pp, oo = self.intToIdentifier((si, pi, oi)) - yield (ss, pp, oo), (c for c in self.__icontexts((si, pi, oi))) - else: # given object not found - pass - else: # subject+predicate is given, object unbound - for o in subjectDictionary[pi].keys(): - ss, pp, oo = self.intToIdentifier((si, pi, o)) - yield (ss, pp, oo), (c for c in self.__icontexts((si, pi, o))) - else: # given predicate not found - pass - else: # subject given, predicate unbound - for p in subjectDictionary.keys(): - if oi != Any: # object is given - if subjectDictionary[p].has_key(oi): - ss, pp, oo = self.intToIdentifier((si, p, oi)) - yield (ss, pp, oo), (c for c in self.__icontexts((si, p, oi))) - else: # given object not found - pass - else: # object unbound - for o in subjectDictionary[p].keys(): - ss, pp, oo = self.intToIdentifier((si, p, o)) - yield (ss, pp, oo), (c for c in self.__icontexts((si, p, o))) - else: # given subject not found - pass - elif pi != Any: # predicate is given, subject unbound - if pos.has_key(pi): - predicateDictionary = pos[pi] - if oi != Any: # predicate+object is given, subject unbound - if predicateDictionary.has_key(oi): - for s in predicateDictionary[oi].keys(): - ss, pp, oo = self.intToIdentifier((s, pi, oi)) - yield (ss, pp, oo), (c for c in self.__icontexts((s, pi, oi))) - else: # given object not found - pass - else: # predicate is given, object+subject unbound - for o in predicateDictionary.keys(): - for s in predicateDictionary[o].keys(): - ss, pp, oo = self.intToIdentifier((s, pi, o)) - yield (ss, pp, oo), (c for c in self.__icontexts((s, pi, o))) - elif oi != Any: # object is given, subject+predicate unbound - if osp.has_key(oi): - objectDictionary = osp[oi] - for s in objectDictionary.keys(): - for p in objectDictionary[s].keys(): - ss, pp, oo = self.intToIdentifier((s, p, oi)) - yield (ss, pp, oo), (c for c in self.__icontexts((s, p, oi))) - else: # subject+predicate+object unbound - for s in spo.keys(): - subjectDictionary = spo[s] - for p in subjectDictionary.keys(): - for o in subjectDictionary[p].keys(): - ss, pp, oo = self.intToIdentifier((s, p, o)) - yield (ss, pp, oo), (c for c in self.__icontexts((s, p, o))) - - def __len__(self, context=None): - - if context is not None: - if context == self: - context = None - - # TODO: for eff. implementation - count = 0 - for triple, cg in self.triples((Any, Any, Any), context): - count += 1 - return count - - - - def contexts(self, triple=None): - if triple: - si, pi, oi = self.identifierToInt(triple) - for ci in self.spo[si][pi][oi]: - yield self.forward[ci] - else: - for ci in self.cspo.keys(): - yield self.forward[ci] - - - def __icontexts(self, triple): - si, pi, oi = triple - for ci in self.spo[si][pi][oi]: - yield self.forward[ci] - - - -import random - -def randid(randint=random.randint, choice=random.choice, signs=(-1,1)): - return choice(signs)*randint(1,2000000000) - -del random diff --git a/doc/rdflib3/plugins/memory.pyc b/doc/rdflib3/plugins/memory.pyc deleted file mode 100644 index d75c01a..0000000 Binary files a/doc/rdflib3/plugins/memory.pyc and /dev/null differ diff --git a/doc/rdflib3/plugins/parsers/__init__.py b/doc/rdflib3/plugins/parsers/__init__.py deleted file mode 100644 index 8062daa..0000000 --- a/doc/rdflib3/plugins/parsers/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -""" - -""" diff --git a/doc/rdflib3/plugins/parsers/notation3.py b/doc/rdflib3/plugins/parsers/notation3.py deleted file mode 100644 index 01dc5ff..0000000 --- a/doc/rdflib3/plugins/parsers/notation3.py +++ /dev/null @@ -1,2427 +0,0 @@ -#!/usr/bin/env python -u""" -notation3.py - Standalone Notation3 Parser -Derived from CWM, the Closed World Machine - -Authors of the original suite: - -* Dan Connolly <@@> -* Tim Berners-Lee <@@> -* Yosi Scharf <@@> -* Joseph M. Reagle Jr. -* Rich Salz - -http://www.w3.org/2000/10/swap/notation3.py - -Copyright 2000-2007, World Wide Web Consortium. -Copyright 2001, MIT. -Copyright 2001, Zolera Systems Inc. - -License: W3C Software License -http://www.w3.org/Consortium/Legal/copyright-software - -Modified by Sean B. Palmer -Copyright 2007, Sean B. Palmer. \u32E1 - -Modified to work with rdflib by Gunnar Aastrand Grimnes -Copyright 2010, Gunnar A. Grimnes - -""" - -# Python standard libraries -import types -import sys -import os -import re -import StringIO -import codecs - -from binascii import a2b_hex -from decimal import Decimal - -from rdflib.term import URIRef, BNode, Literal, Variable, _XSD_PFX, _unique_id -from rdflib.graph import QuotedGraph, ConjunctiveGraph -from rdflib import py3compat -b = py3compat.b - -__all__ = [ - 'URISyntaxError', 'BadSyntax', 'N3Parser', "verbosity", "setVerbosity", - "progress", "splitFrag", "splitFragP", "join", "refTo", "base", - "canonical", "runNamespace", "uniqueURI", "Canonicalize", "stripCR", - "dummyWrite", "toBool", "stringToN3", "backslashUify", "hexify"] - -from rdflib.parser import Parser - - -# Incestuous.. would be nice to separate N3 and XML -# from sax2rdf import XMLtoDOM -def XMLtoDOM(*args, **kargs): - # print >> sys.stderr, args, kargs - pass - - -# SWAP http://www.w3.org/2000/10/swap -# from diag import verbosity, setVerbosity, progress -def verbosity(*args, **kargs): - # print >> sys.stderr, args, kargs - pass - - -def setVerbosity(*args, **kargs): - # print >> sys.stderr, args, kargs - pass - - -def progress(*args, **kargs): - # print >> sys.stderr, args, kargs - pass - - -def splitFrag(uriref): - """split a URI reference between the fragment and the rest. - - Punctuation is thrown away. - - e.g. - - >>> splitFrag("abc#def") - ('abc', 'def') - - >>> splitFrag("abcdef") - ('abcdef', None) - - """ - - i = uriref.rfind("#") - if i >= 0: - return uriref[:i], uriref[i + 1:] - else: - return uriref, None - - -def splitFragP(uriref, punct=0): - """split a URI reference before the fragment - - Punctuation is kept. - - e.g. - - >>> splitFragP("abc#def") - ('abc', '#def') - - >>> splitFragP("abcdef") - ('abcdef', '') - - """ - - i = uriref.rfind("#") - if i >= 0: - return uriref[:i], uriref[i:] - else: - return uriref, '' - - -@py3compat.format_doctest_out -def join(here, there): - """join an absolute URI and URI reference - (non-ascii characters are supported/doctested; - haven't checked the details of the IRI spec though) - - here is assumed to be absolute. - there is URI reference. - - >>> join('http://example/x/y/z', '../abc') - 'http://example/x/abc' - - Raise ValueError if there uses relative path - syntax but here has no hierarchical path. - - >>> join('mid:foo@example', '../foo') - Traceback (most recent call last): - raise ValueError, here - ValueError: Base has no slash after colon - with relative '../foo'. - - >>> join('http://example/x/y/z', '') - 'http://example/x/y/z' - - >>> join('mid:foo@example', '#foo') - 'mid:foo@example#foo' - - We grok IRIs - - >>> len(%(u)s'Andr\\xe9') - 5 - - >>> join('http://example.org/', %(u)s'#Andr\\xe9') - %(u)s'http://example.org/#Andr\\xe9' - """ - - assert(here.find("#") < 0), "Base may not contain hash: '%s'" % here # caller must splitFrag (why?) - - slashl = there.find('/') - colonl = there.find(':') - - # join(base, 'foo:/') -- absolute - if colonl >= 0 and (slashl < 0 or colonl < slashl): - return there - - bcolonl = here.find(':') - assert(bcolonl >= 0), "Base uri '%s' is not absolute" % here # else it's not absolute - - path, frag = splitFragP(there) - if not path: - return here + frag - - # join('mid:foo@example', '../foo') bzzt - if here[bcolonl + 1:bcolonl + 2] != '/': - raise ValueError("Base <%s> has no slash after colon - with relative '%s'." % (here, there)) - - if here[bcolonl + 1:bcolonl + 3] == '//': - bpath = here.find('/', bcolonl + 3) - else: - bpath = bcolonl + 1 - - # join('http://xyz', 'foo') - if bpath < 0: - bpath = len(here) - here = here + '/' - - # join('http://xyz/', '//abc') => 'http://abc' - if there[:2] == '//': - return here[:bcolonl + 1] + there - - # join('http://xyz/', '/abc') => 'http://xyz/abc' - if there[:1] == '/': - return here[:bpath] + there - - slashr = here.rfind('/') - - while 1: - if path[:2] == './': - path = path[2:] - if path == '.': - path = '' - elif path[:3] == '../' or path == '..': - path = path[3:] - i = here.rfind('/', bpath, slashr) - if i >= 0: - here = here[:i + 1] - slashr = i - else: - break - - return here[:slashr + 1] + path + frag - - -commonHost = re.compile(r'^[-_a-zA-Z0-9.]+:(//[^/]*)?/[^/]*$') - - -def refTo(base, uri): - """figure out a relative URI reference from base to uri - - >>> refTo('http://example/x/y/z', 'http://example/x/abc') - '../abc' - - >>> refTo('file:/ex/x/y', 'file:/ex/x/q/r#s') - 'q/r#s' - - >>> refTo(None, 'http://ex/x/y') - 'http://ex/x/y' - - >>> refTo('http://ex/x/y', 'http://ex/x/y') - '' - - Note the relationship between refTo and join: - join(x, refTo(x, y)) == y - which points out certain strings which cannot be URIs. e.g. - >>> x='http://ex/x/y';y='http://ex/x/q:r';join(x, refTo(x, y)) == y - 0 - - So 'http://ex/x/q:r' is not a URI. Use 'http://ex/x/q%3ar' instead: - >>> x='http://ex/x/y';y='http://ex/x/q%3ar';join(x, refTo(x, y)) == y - 1 - - This one checks that it uses a root-realtive one where that is - all they share. Now uses root-relative where no path is shared. - This is a matter of taste but tends to give more resilience IMHO - -- and shorter paths - - Note that base may be None, meaning no base. In some situations, there - just ain't a base. Slife. In these cases, relTo returns the absolute value. - The axiom abs(,rel(b,x))=x still holds. - This saves people having to set the base to "bogus:". - - >>> refTo('http://ex/x/y/z', 'http://ex/r') - '/r' - - """ - - # assert base # don't mask bugs -danc # not a bug. -tim - if not base: - return uri - if base == uri: - return "" - - # Find how many path segments in common - i = 0 - while i < len(uri) and i < len(base): - if uri[i] == base[i]: - i = i + 1 - else: - break - # print "# relative", base, uri, " same up to ", i - # i point to end of shortest one or first difference - - m = commonHost.match(base[:i]) - if m: - k = uri.find("//") - if k < 0: - k = -2 # no host - l = uri.find("/", k + 2) - if uri[l + 1:l + 2] != "/" and base[l + 1:l + 2] != "/" and uri[:l] == base[:l]: - return uri[l:] - - if uri[i:i + 1] == "#" and len(base) == i: - return uri[i:] # fragment of base - - while i > 0 and uri[i - 1] != '/': - i = i - 1 # scan for slash - - if i < 3: - return uri # No way. - if base.find("//", i - 2) > 0 or uri.find("//", i - 2) > 0: - return uri # An unshared "//" - if base.find(":", i) > 0: - return uri # An unshared ":" - n = base.count("/", i) - if n == 0 and i < len(uri) and uri[i] == '#': - return "./" + uri[i:] - elif n == 0 and i == len(uri): - return "./" - else: - return ("../" * n) + uri[i:] - - -def base(): - """The base URI for this process - the Web equiv of cwd - - Relative or abolute unix-standard filenames parsed relative to - this yeild the URI of the file. - If we had a reliable way of getting a computer name, - we should put it in the hostname just to prevent ambiguity - - """ - # return "file://" + hostname + os.getcwd() + "/" - return "file://" + _fixslash(os.getcwd()) + "/" - - -def _fixslash(argstr): - """ Fix windowslike filename to unixlike - (#ifdef WINDOWS)""" - s = argstr - for i in range(len(s)): - if s[i] == "\\": - s = s[:i] + "/" + s[i + 1:] - if s[0] != "/" and s[1] == ":": - s = s[2:] # @@@ Hack when drive letter present - return s - -URI_unreserved = b("ABCDEFGHIJJLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~") - # unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" - - -@py3compat.format_doctest_out -def canonical(str_in): - """Convert equivalent URIs (or parts) to the same string - - There are many differenet levels of URI canonicalization - which are possible. See http://www.ietf.org/rfc/rfc3986.txt - Done: - - Converfting unicode IRI to utf-8 - - Escaping all non-ASCII - - De-escaping, if escaped, ALPHA (%%41-%%5A and %%61-%%7A), DIGIT (%%30-%%39), - hyphen (%%2D), period (%%2E), underscore (%%5F), or tilde (%%7E) (Sect 2.4) - - Making all escapes uppercase hexadecimal - - Not done: - - Making URI scheme lowercase - - changing /./ or /foo/../ to / with care not to change host part - - - >>> canonical("foo bar") - %(b)s'foo%%20bar' - - >>> canonical(%(u)s'http:') - %(b)s'http:' - - >>> canonical('fran%%c3%%83%%c2%%a7ois') - %(b)s'fran%%C3%%83%%C2%%A7ois' - - >>> canonical('a') - %(b)s'a' - - >>> canonical('%%4e') - %(b)s'N' - - >>> canonical('%%9d') - %(b)s'%%9D' - - >>> canonical('%%2f') - %(b)s'%%2F' - - >>> canonical('%%2F') - %(b)s'%%2F' - - """ - if type(str_in) == type(u''): - s8 = str_in.encode('utf-8') - else: - s8 = str_in - s = b('') - i = 0 - while i < len(s8): - if py3compat.PY3: - n = s8[i] - ch = bytes([n]) - else: - ch = s8[i] - n = ord(ch) - if (n > 126) or (n < 33): # %-encode controls, SP, DEL, and utf-8 - s += b("%%%02X" % ord(ch)) - elif ch == b('%') and i + 2 < len(s8): - ch2 = a2b_hex(s8[i + 1:i + 3]) - if ch2 in URI_unreserved: - s += ch2 - else: - s += b("%%%02X" % ord(ch2)) - i = i + 3 - continue - else: - s += ch - i = i + 1 - return s - - -CONTEXT = 0 -PRED = 1 -SUBJ = 2 -OBJ = 3 - -PARTS = PRED, SUBJ, OBJ -ALL4 = CONTEXT, PRED, SUBJ, OBJ - -SYMBOL = 0 -FORMULA = 1 -LITERAL = 2 -LITERAL_DT = 21 -LITERAL_LANG = 22 -ANONYMOUS = 3 -XMLLITERAL = 25 - -Logic_NS = "http://www.w3.org/2000/10/swap/log#" -NODE_MERGE_URI = Logic_NS + "is" # Pseudo-property indicating node merging -forSomeSym = Logic_NS + "forSome" -forAllSym = Logic_NS + "forAll" - -RDF_type_URI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type" -RDF_NS_URI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" -OWL_NS = "http://www.w3.org/2002/07/owl#" -DAML_sameAs_URI = OWL_NS + "sameAs" -parsesTo_URI = Logic_NS + "parsesTo" -RDF_spec = "http://www.w3.org/TR/REC-rdf-syntax/" - -List_NS = RDF_NS_URI # From 20030808 -_Old_Logic_NS = "http://www.w3.org/2000/10/swap/log.n3#" - -N3_first = (SYMBOL, List_NS + "first") -N3_rest = (SYMBOL, List_NS + "rest") -N3_li = (SYMBOL, List_NS + "li") -N3_nil = (SYMBOL, List_NS + "nil") -N3_List = (SYMBOL, List_NS + "List") -N3_Empty = (SYMBOL, List_NS + "Empty") - - -runNamespaceValue = None - - -def runNamespace(): - "Return a URI suitable as a namespace for run-local objects" - # @@@ include hostname (privacy?) (hash it?) - global runNamespaceValue - if runNamespaceValue == None: - runNamespaceValue = join(base(), _unique_id()) + '#' - return runNamespaceValue - -nextu = 0 - - -def uniqueURI(): - "A unique URI" - global nextu - nextu += 1 - # return runNamespace() + "u_" + `nextu` - return runNamespace() + "u_" + str(nextu) - - -class URISyntaxError(ValueError): - """A parameter is passed to a routine that requires a URI reference""" - pass - - -tracking = False -chatty_flag = 50 - - -from xml.dom import Node -try: - from xml.ns import XMLNS -except: - class XMLNS: - BASE = "http://www.w3.org/2000/xmlns/" - XML = "http://www.w3.org/XML/1998/namespace" - - -_attrs = lambda E: (E.attributes and E.attributes.values()) or [] -_children = lambda E: E.childNodes or [] -_IN_XML_NS = lambda n: n.namespaceURI == XMLNS.XML -_inclusive = lambda n: n.unsuppressedPrefixes == None - -# Does a document/PI has lesser/greater document order than the -# first element? -_LesserElement, _Element, _GreaterElement = range(3) - - -def _sorter(n1, n2): - '''_sorter(n1, n2) -> int - Sorting predicate for non-NS attributes.''' - - i = cmp(n1.namespaceURI, n2.namespaceURI) - if i: - return i - return cmp(n1.localName, n2.localName) - - -def _sorter_ns(n1, n2): - '''_sorter_ns((n,v),(n,v)) -> int - "(an empty namespace URI is lexicographically least)."''' - - if n1[0] == 'xmlns': - return -1 - if n2[0] == 'xmlns': - return 1 - return cmp(n1[0], n2[0]) - - -def _utilized(n, node, other_attrs, unsuppressedPrefixes): - '''_utilized(n, node, other_attrs, unsuppressedPrefixes) -> boolean - Return true if that nodespace is utilized within the node''' - - if n.startswith('xmlns:'): - n = n[6:] - elif n.startswith('xmlns'): - n = n[5:] - if (n == "" and node.prefix in ["#default", None]) or \ - n == node.prefix or n in unsuppressedPrefixes: - return 1 - for attr in other_attrs: - if n == attr.prefix: - return 1 - return 0 - - -#_in_subset = lambda subset, node: not subset or node in subset -_in_subset = lambda subset, node: subset is None or node in subset # rich's tweak - - -class _implementation: - '''Implementation class for C14N. This accompanies a node during it's - processing and includes the parameters and processing state.''' - - # Handler for each node type; populated during module instantiation. - handlers = {} - - def __init__(self, node, write, **kw): - '''Create and run the implementation.''' - self.write = write - self.subset = kw.get('subset') - self.comments = kw.get('comments', 0) - self.unsuppressedPrefixes = kw.get('unsuppressedPrefixes') - nsdict = kw.get('nsdict', {'xml': XMLNS.XML, 'xmlns': XMLNS.BASE}) - - # Processing state. - self.state = (nsdict, {'xml': ''}, {}) # 0422 - - if node.nodeType == Node.DOCUMENT_NODE: - self._do_document(node) - elif node.nodeType == Node.ELEMENT_NODE: - self.documentOrder = _Element # At document element - if not _inclusive(self): - self._do_element(node) - else: - inherited = self._inherit_context(node) - self._do_element(node, inherited) - elif node.nodeType == Node.DOCUMENT_TYPE_NODE: - pass - elif node.nodeType == Node.TEXT_NODE: - self._do_text(node) - else: - raise TypeError(str(node)) - - def _inherit_context(self, node): - '''_inherit_context(self, node) -> list - Scan ancestors of attribute and namespace context. Used only - for single element node canonicalization, not for subset - canonicalization.''' - - # Collect the initial list of xml:foo attributes. - xmlattrs = filter(_IN_XML_NS, _attrs(node)) - - # Walk up and get all xml:XXX attributes we inherit. - inherited, parent = [], node.parentNode - while parent and parent.nodeType == Node.ELEMENT_NODE: - for a in filter(_IN_XML_NS, _attrs(parent)): - n = a.localName - if n not in xmlattrs: - xmlattrs.append(n) - inherited.append(a) - parent = parent.parentNode - return inherited - - def _do_document(self, node): - '''_do_document(self, node) -> None - Process a document node. documentOrder holds whether the document - element has been encountered such that PIs/comments can be written - as specified.''' - - self.documentOrder = _LesserElement - for child in node.childNodes: - if child.nodeType == Node.ELEMENT_NODE: - self.documentOrder = _Element # At document element - self._do_element(child) - self.documentOrder = _GreaterElement # After document element - elif child.nodeType == Node.PROCESSING_INSTRUCTION_NODE: - self._do_pi(child) - elif child.nodeType == Node.COMMENT_NODE: - self._do_comment(child) - elif child.nodeType == Node.DOCUMENT_TYPE_NODE: - pass - else: - raise TypeError(str(child)) - handlers[Node.DOCUMENT_NODE] = _do_document - - def _do_text(self, node): - '''_do_text(self, node) -> None - Process a text or CDATA node. Render various special characters - as their C14N entity representations.''' - if not _in_subset(self.subset, node): - return - s = node.data.replace("&", "&") - s = s.replace("<", "<") - s = s.replace(">", ">") - s = s.replace("\015", " ") - if s: - self.write(s) - handlers[Node.TEXT_NODE] = _do_text - handlers[Node.CDATA_SECTION_NODE] = _do_text - - def _do_pi(self, node): - '''_do_pi(self, node) -> None - Process a PI node. Render a leading or trailing # xA if the - document order of the PI is greater or lesser (respectively) - than the document element. - ''' - if not _in_subset(self.subset, node): - return - W = self.write - if self.documentOrder == _GreaterElement: - W('\n') - W('') - if self.documentOrder == _LesserElement: - W('\n') - handlers[Node.PROCESSING_INSTRUCTION_NODE] = _do_pi - - def _do_comment(self, node): - '''_do_comment(self, node) -> None - Process a comment node. Render a leading or trailing # xA if the - document order of the comment is greater or lesser (respectively) - than the document element. - ''' - if not _in_subset(self.subset, node): - return - if self.comments: - W = self.write - if self.documentOrder == _GreaterElement: - W('\n') - W('') - if self.documentOrder == _LesserElement: - W('\n') - handlers[Node.COMMENT_NODE] = _do_comment - - def _do_attr(self, n, value): - ''''_do_attr(self, node) -> None - Process an attribute.''' - - W = self.write - W(' ') - W(n) - W('="') - s = value.replace(value, "&", "&") - s = s.replace("<", "<") - s = s.replace('"', '"') - s = s.replace('\011', ' ') - s = s.replace('\012', ' ') - s = s.replace('\015', ' ') - W(s) - W('"') - - def _do_element(self, node, initial_other_attrs=[]): - '''_do_element(self, node, initial_other_attrs = []) -> None - Process an element (and its children).''' - - # Get state (from the stack) make local copies. - # ns_parent -- NS declarations in parent - # ns_rendered -- NS nodes rendered by ancestors - # ns_local -- NS declarations relevant to this element - # xml_attrs -- Attributes in XML namespace from parent - # xml_attrs_local -- Local attributes in XML namespace. - ns_parent, ns_rendered, xml_attrs = \ - self.state[0], self.state[1].copy(), self.state[2].copy() # 0422 - ns_local = ns_parent.copy() - xml_attrs_local = {} - - # progress("_do_element node.nodeName=", node.nodeName) - # progress("_do_element node.namespaceURI", node.namespaceURI) - # progress("_do_element node.tocml()", node.toxml()) - # Divide attributes into NS, XML, and others. - other_attrs = initial_other_attrs[:] - in_subset = _in_subset(self.subset, node) - for a in _attrs(node): - # progress("\t_do_element a.nodeName=", a.nodeName) - if a.namespaceURI == XMLNS.BASE: - n = a.nodeName - if n == "xmlns:": - n = "xmlns" # DOM bug workaround - ns_local[n] = a.nodeValue - elif a.namespaceURI == XMLNS.XML: - if _inclusive(self) or in_subset: - xml_attrs_local[a.nodeName] = a # 0426 - else: - other_attrs.append(a) - # add local xml:foo attributes to ancestor's xml:foo attributes - xml_attrs.update(xml_attrs_local) - - # Render the node - W, name = self.write, None - if in_subset: - name = node.nodeName - W('<') - W(name) - - # Create list of NS attributes to render. - ns_to_render = [] - for n, v in ns_local.items(): - - # If default namespace is XMLNS.BASE or empty, - # and if an ancestor was the same - if n == "xmlns" and v in [XMLNS.BASE, ''] \ - and ns_rendered.get('xmlns') in [XMLNS.BASE, '', None]: - continue - - # "omit namespace node with local name xml, which defines - # the xml prefix, if its string value is - # http://www.w3.org/XML/1998/namespace." - if n in ["xmlns:xml", "xml"] \ - and v in ['http://www.w3.org/XML/1998/namespace']: - continue - - # If not previously rendered - # and it's inclusive or utilized - if (n, v) not in ns_rendered.items() \ - and (_inclusive(self) or \ - _utilized(n, node, other_attrs, self.unsuppressedPrefixes)): - ns_to_render.append((n, v)) - - # Sort and render the ns, marking what was rendered. - ns_to_render.sort(_sorter_ns) - for n, v in ns_to_render: - self._do_attr(n, v) - ns_rendered[n] = v # 0417 - - # If exclusive or the parent is in the subset, add the local xml attributes - # Else, add all local and ancestor xml attributes - # Sort and render the attributes. - if not _inclusive(self) or _in_subset(self.subset, node.parentNode): # 0426 - other_attrs.extend(xml_attrs_local.values()) - else: - other_attrs.extend(xml_attrs.values()) - other_attrs.sort(_sorter) - for a in other_attrs: - self._do_attr(a.nodeName, a.value) - W('>') - - # Push state, recurse, pop state. - state, self.state = self.state, (ns_local, ns_rendered, xml_attrs) - for c in _children(node): - _implementation.handlers[c.nodeType](self, c) - self.state = state - - if name: - W('' % name) - handlers[Node.ELEMENT_NODE] = _do_element - - -def Canonicalize(node, output=None, **kw): - '''Canonicalize(node, output=None, **kw) -> UTF-8 - - Canonicalize a DOM document/element node and all descendents. - Return the text; if output is specified then output.write will - be called to output the text and None will be returned - Keyword parameters: - nsdict -- a dictionary of prefix:uri namespace entries - assumed to exist in the surrounding context - comments -- keep comments if non-zero (default is 0) - subset -- Canonical XML subsetting resulting from XPath (default is []) - unsuppressedPrefixes -- do exclusive C14N, and this specifies the - prefixes that should be inherited. - ''' - if output: - apply(_implementation, (node, output.write), kw) - else: - s = StringIO.StringIO() - apply(_implementation, (node, s.write), kw) - return s.getvalue() - -# end of xmlC14n.py - - -# from why import BecauseOfData, becauseSubexpression -def BecauseOfData(*args, **kargs): - # print args, kargs - pass - - -def becauseSubexpression(*args, **kargs): - # print args, kargs - pass - -N3_forSome_URI = forSomeSym -N3_forAll_URI = forAllSym - -# Magic resources we know about - -ADDED_HASH = "#" # Stop where we use this in case we want to remove it! -# This is the hash on namespace URIs - -RDF_type = (SYMBOL, RDF_type_URI) -DAML_sameAs = (SYMBOL, DAML_sameAs_URI) - -LOG_implies_URI = "http://www.w3.org/2000/10/swap/log#implies" - -BOOLEAN_DATATYPE = _XSD_PFX + "boolean" -DECIMAL_DATATYPE = _XSD_PFX + "decimal" -DOUBLE_DATATYPE = _XSD_PFX + "double" -FLOAT_DATATYPE = _XSD_PFX + "float" -INTEGER_DATATYPE = _XSD_PFX + "integer" - -option_noregen = 0 # If set, do not regenerate genids on output - -# @@ I18n - the notname chars need extending for well known unicode non-text -# characters. The XML spec switched to assuming unknown things were name -# characaters. -# _namechars = string.lowercase + string.uppercase + string.digits + '_-' -_notQNameChars = "\t\r\n !\"#$%&'()*.,+/;<=>?@[\\]^`{|}~" # else valid qname :-/ -_notNameChars = _notQNameChars + ":" # Assume anything else valid name :-/ -_rdfns = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' - - -N3CommentCharacter = "#" # For unix script # ! compatabilty - -########################################## Parse string to sink -# -# Regular expressions: -eol = re.compile(r'[ \t]*(#[^\n]*)?\r?\n') # end of line, poss. w/comment -eof = re.compile(r'[ \t]*(#[^\n]*)?$') # end of file, poss. w/comment -ws = re.compile(r'[ \t]*') # Whitespace not including NL -signed_integer = re.compile(r'[-+]?[0-9]+') # integer -number_syntax = re.compile(r'(?P[-+]?[0-9]+)(?P\.[0-9]+)?(?P(?:e|E)[-+]?[0-9]+)?') -digitstring = re.compile(r'[0-9]+') # Unsigned integer -interesting = re.compile(r'[\\\r\n\"]') -langcode = re.compile(r'[a-zA-Z0-9]+(-[a-zA-Z0-9]+)?') - - -class SinkParser: - def __init__(self, store, openFormula=None, thisDoc="", baseURI=None, - genPrefix="", flags="", why=None): - """ note: namespace names should *not* end in # ; - the # will get added during qname processing """ - - self._bindings = {} - self._flags = flags - if thisDoc != "": - assert ':' in thisDoc, "Document URI not absolute: <%s>" % thisDoc - self._bindings[""] = thisDoc + "#" # default - - self._store = store - if genPrefix: - store.setGenPrefix(genPrefix) # pass it on - - self._thisDoc = thisDoc - self.lines = 0 # for error handling - self.startOfLine = 0 # For calculating character number - self._genPrefix = genPrefix - self.keywords = ['a', 'this', 'bind', 'has', 'is', 'of', 'true', 'false'] - self.keywordsSet = 0 # Then only can others be considerd qnames - self._anonymousNodes = {} # Dict of anon nodes already declared ln: Term - self._variables = {} - self._parentVariables = {} - self._reason = why # Why the parser was asked to parse this - - self._reason2 = None # Why these triples - # was: diag.tracking - if tracking: - self._reason2 = BecauseOfData( - store.newSymbol(thisDoc), because=self._reason) - - if baseURI: - self._baseURI = baseURI - else: - if thisDoc: - self._baseURI = thisDoc - else: - self._baseURI = None - - assert not self._baseURI or ':' in self._baseURI - - if not self._genPrefix: - if self._thisDoc: - self._genPrefix = self._thisDoc + "#_g" - else: - self._genPrefix = uniqueURI() - - if openFormula == None: - if self._thisDoc: - self._formula = store.newFormula(thisDoc + "#_formula") - else: - self._formula = store.newFormula() - else: - self._formula = openFormula - - self._context = self._formula - self._parentContext = None - - def here(self, i): - """String generated from position in file - - This is for repeatability when refering people to bnodes in a document. - This has diagnostic uses less formally, as it should point one to which - bnode the arbitrary identifier actually is. It gives the - line and character number of the '[' charcacter or path character - which introduced the blank node. The first blank node is boringly _L1C1. - It used to be used only for tracking, but for tests in general - it makes the canonical ordering of bnodes repeatable.""" - - return "%s_L%iC%i" % (self._genPrefix, self.lines, - i - self.startOfLine + 1) - - def formula(self): - return self._formula - - def loadStream(self, stream): - return self.loadBuf(stream.read()) # Not ideal - - def loadBuf(self, buf): - """Parses a buffer and returns its top level formula""" - self.startDoc() - - self.feed(buf) - return self.endDoc() # self._formula - - def feed(self, octets): - """Feed an octet stream tothe parser - - if BadSyntax is raised, the string - passed in the exception object is the - remainder after any statements have been parsed. - So if there is more data to feed to the - parser, it should be straightforward to recover.""" - - if not isinstance(octets, unicode): - s = octets.decode('utf-8') - # NB already decoded, so \ufeff - if len(s) > 0 and s[0] == codecs.BOM_UTF8.decode('utf-8'): - s = s[1:] - else: - s = octets - - i = 0 - while i >= 0: - j = self.skipSpace(s, i) - if j < 0: - return - - i = self.directiveOrStatement(s, j) - if i < 0: - print("# next char: %s" % s[j]) - raise BadSyntax(self._thisDoc, self.lines, s, j, - "expected directive or statement") - - def directiveOrStatement(self, argstr, h): - - i = self.skipSpace(argstr, h) - if i < 0: - return i # EOF - - j = self.directive(argstr, i) - if j >= 0: - return self.checkDot(argstr, j) - - j = self.statement(argstr, i) - if j >= 0: - return self.checkDot(argstr, j) - - return j - - # @@I18N - global _notNameChars - # _namechars = string.lowercase + string.uppercase + string.digits + '_-' - - def tok(self, tok, argstr, i): - """Check for keyword. Space must have been stripped on entry and - we must not be at end of file.""" - - assert tok[0] not in _notNameChars # not for punctuation - if argstr[i:i + 1] == "@": - i = i + 1 - else: - if tok not in self.keywords: - return -1 # No, this has neither keywords declaration nor "@" - - if (argstr[i:i + len(tok)] == tok - and (argstr[i + len(tok)] in _notQNameChars)): - i = i + len(tok) - return i - else: - return -1 - - def directive(self, argstr, i): - j = self.skipSpace(argstr, i) - if j < 0: - return j # eof - res = [] - - j = self.tok('bind', argstr, i) # implied "#". Obsolete. - if j > 0: - raise BadSyntax(self._thisDoc, self.lines, argstr, i, - "keyword bind is obsolete: use @prefix") - - j = self.tok('keywords', argstr, i) - if j > 0: - i = self.commaSeparatedList(argstr, j, res, self.bareWord) - if i < 0: - raise BadSyntax(self._thisDoc, self.lines, argstr, i, - "'@keywords' needs comma separated list of words") - self.setKeywords(res[:]) - # was: diag.chatty_flag - if chatty_flag > 80: - progress("Keywords ", self.keywords) - return i - - j = self.tok('forAll', argstr, i) - if j > 0: - i = self.commaSeparatedList(argstr, j, res, self.uri_ref2) - if i < 0: - raise BadSyntax(self._thisDoc, self.lines, argstr, i, - "Bad variable list after @forAll") - for x in res: - # self._context.declareUniversal(x) - if x not in self._variables or x in self._parentVariables: - self._variables[x] = self._context.newUniversal(x) - return i - - j = self.tok('forSome', argstr, i) - if j > 0: - i = self. commaSeparatedList(argstr, j, res, self.uri_ref2) - if i < 0: - raise BadSyntax(self._thisDoc, self.lines, argstr, i, - "Bad variable list after @forSome") - for x in res: - self._context.declareExistential(x) - return i - - j = self.tok('prefix', argstr, i) # no implied "#" - if j >= 0: - t = [] - i = self.qname(argstr, j, t) - if i < 0: - raise BadSyntax(self._thisDoc, self.lines, argstr, j, - "expected qname after @prefix") - j = self.uri_ref2(argstr, i, t) - if j < 0: - raise BadSyntax(self._thisDoc, self.lines, argstr, i, - "expected after @prefix _qname_") - ns = self.uriOf(t[1]) - - if self._baseURI: - ns = join(self._baseURI, ns) - elif ":" not in ns: - raise BadSyntax(self._thisDoc, self.lines, argstr, j, - "With no base URI, cannot use relative URI in @prefix <" + ns + ">") - assert ':' in ns # must be absolute - self._bindings[t[0][0]] = ns - self.bind(t[0][0], hexify(ns)) - return j - - j = self.tok('base', argstr, i) # Added 2007/7/7 - if j >= 0: - t = [] - i = self.uri_ref2(argstr, j, t) - if i < 0: - raise BadSyntax(self._thisDoc, self.lines, argstr, j, - "expected after @base ") - ns = self.uriOf(t[0]) - - if self._baseURI: - ns = join(self._baseURI, ns) - else: - raise BadSyntax(self._thisDoc, self.lines, argstr, j, - "With no previous base URI, cannot use relative URI in @base <" + ns + ">") - assert ':' in ns # must be absolute - self._baseURI = ns - return i - - return -1 # Not a directive, could be something else. - - def bind(self, qn, uri): - assert isinstance(uri, - types.StringType), "Any unicode must be %x-encoded already" - if qn == "": - self._store.setDefaultNamespace(uri) - else: - self._store.bind(qn, uri) - - def setKeywords(self, k): - "Takes a list of strings" - if k == None: - self.keywordsSet = 0 - else: - self.keywords = k - self.keywordsSet = 1 - - def startDoc(self): - # was: self._store.startDoc() - self._store.startDoc(self._formula) - - def endDoc(self): - """Signal end of document and stop parsing. returns formula""" - self._store.endDoc(self._formula) # don't canonicalize yet - return self._formula - - def makeStatement(self, quadruple): - # $$$$$$$$$$$$$$$$$$$$$ - # print "# Parser output: ", `quadruple` - self._store.makeStatement(quadruple, why=self._reason2) - - def statement(self, argstr, i): - r = [] - - i = self.object(argstr, i, r) # Allow literal for subject - extends RDF - if i < 0: - return i - - j = self.property_list(argstr, i, r[0]) - - if j < 0: - raise BadSyntax(self._thisDoc, self.lines, - argstr, i, "expected propertylist") - return j - - def subject(self, argstr, i, res): - return self.item(argstr, i, res) - - def verb(self, argstr, i, res): - """ has _prop_ - is _prop_ of - a - = - _prop_ - >- prop -> - <- prop -< - _operator_""" - - j = self.skipSpace(argstr, i) - if j < 0: - return j # eof - - r = [] - - j = self.tok('has', argstr, i) - if j >= 0: - i = self.prop(argstr, j, r) - if i < 0: - raise BadSyntax(self._thisDoc, self.lines, - argstr, j, "expected property after 'has'") - res.append(('->', r[0])) - return i - - j = self.tok('is', argstr, i) - if j >= 0: - i = self.prop(argstr, j, r) - if i < 0: - raise BadSyntax(self._thisDoc, self.lines, argstr, j, - "expected after 'is'") - j = self.skipSpace(argstr, i) - if j < 0: - raise BadSyntax(self._thisDoc, self.lines, argstr, i, - "End of file found, expected property after 'is'") - return j # eof - i = j - j = self.tok('of', argstr, i) - if j < 0: - raise BadSyntax(self._thisDoc, self.lines, argstr, i, - "expected 'of' after 'is' ") - res.append(('<-', r[0])) - return j - - j = self.tok('a', argstr, i) - if j >= 0: - res.append(('->', RDF_type)) - return j - - if argstr[i:i + 2] == "<=": - res.append(('<-', self._store.newSymbol(Logic_NS + "implies"))) - return i + 2 - - if argstr[i:i + 1] == "=": - if argstr[i + 1:i + 2] == ">": - res.append(('->', self._store.newSymbol(Logic_NS + "implies"))) - return i + 2 - res.append(('->', DAML_sameAs)) - return i + 1 - - if argstr[i:i + 2] == ":=": - # patch file relates two formulae, uses this @@ really? - res.append(('->', Logic_NS + "becomes")) - return i + 2 - - j = self.prop(argstr, i, r) - if j >= 0: - res.append(('->', r[0])) - return j - - if argstr[i:i + 2] == ">-" or argstr[i:i + 2] == "<-": - raise BadSyntax(self._thisDoc, self.lines, argstr, j, - ">- ... -> syntax is obsolete.") - - return -1 - - def prop(self, argstr, i, res): - return self.item(argstr, i, res) - - def item(self, argstr, i, res): - return self.path(argstr, i, res) - - def blankNode(self, uri=None): - if "B" not in self._flags: - return self._context.newBlankNode(uri, why=self._reason2) - x = self._context.newSymbol(uri) - self._context.declareExistential(x) - return x - - def path(self, argstr, i, res): - """Parse the path production. - """ - j = self.nodeOrLiteral(argstr, i, res) - if j < 0: - return j # nope - - while argstr[j:j + 1] in "!^.": # no spaces, must follow exactly (?) - ch = argstr[j:j + 1] # @@ Allow "." followed IMMEDIATELY by a node. - if ch == ".": - ahead = argstr[j + 1:j + 2] - if not ahead or (ahead in _notNameChars - and ahead not in ":?<[{("): - break - subj = res.pop() - obj = self.blankNode(uri=self.here(j)) - j = self.node(argstr, j + 1, res) - if j < 0: - raise BadSyntax(self._thisDoc, self.lines, argstr, j, - "EOF found in middle of path syntax") - pred = res.pop() - if ch == "^": # Reverse traverse - self.makeStatement((self._context, pred, obj, subj)) - else: - self.makeStatement((self._context, pred, subj, obj)) - res.append(obj) - return j - - def anonymousNode(self, ln): - """Remember or generate a term for one of these _: anonymous nodes""" - term = self._anonymousNodes.get(ln, None) - if term != None: - return term - term = self._store.newBlankNode(self._context, why=self._reason2) - self._anonymousNodes[ln] = term - return term - - def node(self, argstr, i, res, subjectAlready=None): - """Parse the production. - Space is now skipped once at the beginning - instead of in multipe calls to self.skipSpace(). - """ - subj = subjectAlready - - j = self.skipSpace(argstr, i) - if j < 0: - return j # eof - i = j - ch = argstr[i:i + 1] # Quick 1-character checks first: - - if ch == "[": - bnodeID = self.here(i) - j = self.skipSpace(argstr, i + 1) - if j < 0: - raise BadSyntax(self._thisDoc, - self.lines, argstr, i, "EOF after '['") - if argstr[j:j + 1] == "=": # Hack for "is" binding name to anon node - i = j + 1 - objs = [] - j = self.objectList(argstr, i, objs) - if j >= 0: - subj = objs[0] - if len(objs) > 1: - for obj in objs: - self.makeStatement((self._context, - DAML_sameAs, subj, obj)) - j = self.skipSpace(argstr, j) - if j < 0: - raise BadSyntax(self._thisDoc, self.lines, argstr, i, - "EOF when objectList expected after [ = ") - if argstr[j:j + 1] == ";": - j = j + 1 - else: - raise BadSyntax(self._thisDoc, self.lines, argstr, i, - "objectList expected after [= ") - - if subj is None: - subj = self.blankNode(uri=bnodeID) - - i = self.property_list(argstr, j, subj) - if i < 0: - raise BadSyntax(self._thisDoc, self.lines, argstr, j, - "property_list expected") - - j = self.skipSpace(argstr, i) - if j < 0: - raise BadSyntax(self._thisDoc, self.lines, argstr, i, - "EOF when ']' expected after [ ") - if argstr[j:j + 1] != "]": - raise BadSyntax(self._thisDoc, - self.lines, argstr, j, "']' expected") - res.append(subj) - return j + 1 - - if ch == "{": - ch2 = argstr[i + 1:i + 2] - if ch2 == '$': - i += 1 - j = i + 1 - List = [] - first_run = True - while 1: - i = self.skipSpace(argstr, j) - if i < 0: - raise BadSyntax(self._thisDoc, self.lines, argstr, i, - "needed '$}', found end.") - if argstr[i:i + 2] == '$}': - j = i + 2 - break - - if not first_run: - if argstr[i:i + 1] == ',': - i += 1 - else: - raise BadSyntax(self._thisDoc, self.lines, - argstr, i, "expected: ','") - else: - first_run = False - - item = [] - j = self.item(argstr, i, item) # @@@@@ should be path, was object - if j < 0: - raise BadSyntax(self._thisDoc, self.lines, argstr, i, - "expected item in set or '$}'") - List.append(self._store.intern(item[0])) - res.append(self._store.newSet(List, self._context)) - return j - else: - j = i + 1 - oldParentContext = self._parentContext - self._parentContext = self._context - parentAnonymousNodes = self._anonymousNodes - grandParentVariables = self._parentVariables - self._parentVariables = self._variables - self._anonymousNodes = {} - self._variables = self._variables.copy() - reason2 = self._reason2 - self._reason2 = becauseSubexpression - if subj is None: - subj = self._store.newFormula() - self._context = subj - - while 1: - i = self.skipSpace(argstr, j) - if i < 0: - raise BadSyntax(self._thisDoc, self.lines, - argstr, i, "needed '}', found end.") - - if argstr[i:i + 1] == "}": - j = i + 1 - break - - j = self.directiveOrStatement(argstr, i) - if j < 0: - raise BadSyntax(self._thisDoc, self.lines, - argstr, i, "expected statement or '}'") - - self._anonymousNodes = parentAnonymousNodes - self._variables = self._parentVariables - self._parentVariables = grandParentVariables - self._context = self._parentContext - self._reason2 = reason2 - self._parentContext = oldParentContext - res.append(subj.close()) # No use until closed - return j - - if ch == "(": - thing_type = self._store.newList - ch2 = argstr[i + 1:i + 2] - if ch2 == '$': - thing_type = self._store.newSet - i += 1 - j = i + 1 - - List = [] - while 1: - i = self.skipSpace(argstr, j) - if i < 0: - raise BadSyntax(self._thisDoc, self.lines, - argstr, i, "needed ')', found end.") - if argstr[i:i + 1] == ')': - j = i + 1 - break - - item = [] - j = self.item(argstr, i, item) # @@@@@ should be path, was object - if j < 0: - raise BadSyntax(self._thisDoc, self.lines, argstr, i, - "expected item in list or ')'") - List.append(self._store.intern(item[0])) - res.append(thing_type(List, self._context)) - return j - - j = self.tok('this', argstr, i) # This context - if j >= 0: - raise BadSyntax(self._thisDoc, self.lines, argstr, i, - "Keyword 'this' was ancient N3. Now use @forSome and @forAll keywords.") - res.append(self._context) - return j - - # booleans - j = self.tok('true', argstr, i) - if j >= 0: - res.append(True) - return j - j = self.tok('false', argstr, i) - if j >= 0: - res.append(False) - return j - - if subj is None: # If this can be a named node, then check for a name. - j = self.uri_ref2(argstr, i, res) - if j >= 0: - return j - - return -1 - - def property_list(self, argstr, i, subj): - """Parse property list - Leaves the terminating punctuation in the buffer - """ - while 1: - j = self.skipSpace(argstr, i) - if j < 0: - raise BadSyntax(self._thisDoc, self.lines, argstr, i, - "EOF found when expected verb in property list") - return j # eof - - if argstr[j:j + 2] == ":-": - i = j + 2 - res = [] - j = self.node(argstr, i, res, subj) - if j < 0: - raise BadSyntax(self._thisDoc, self.lines, argstr, i, - "bad {} or () or [] node after :- ") - i = j - continue - i = j - v = [] - j = self.verb(argstr, i, v) - if j <= 0: - return i # void but valid - - objs = [] - i = self.objectList(argstr, j, objs) - if i < 0: - raise BadSyntax(self._thisDoc, self.lines, argstr, j, - "objectList expected") - for obj in objs: - dira, sym = v[0] - if dira == '->': - self.makeStatement((self._context, sym, subj, obj)) - else: - self.makeStatement((self._context, sym, obj, subj)) - - j = self.skipSpace(argstr, i) - if j < 0: - raise BadSyntax(self._thisDoc, self.lines, argstr, j, - "EOF found in list of objects") - return j # eof - if argstr[i:i + 1] != ";": - return i - i = i + 1 # skip semicolon and continue - - def commaSeparatedList(self, argstr, j, res, what): - """return value: -1 bad syntax; >1 new position in argstr - res has things found appended - """ - i = self.skipSpace(argstr, j) - if i < 0: - raise BadSyntax(self._thisDoc, self.lines, argstr, i, - "EOF found expecting comma sep list") - return i - if argstr[i] == ".": - return j # empty list is OK - i = what(argstr, i, res) - if i < 0: - return -1 - - while 1: - j = self.skipSpace(argstr, i) - if j < 0: - return j # eof - ch = argstr[j:j + 1] - if ch != ",": - if ch != ".": - return -1 - return j # Found but not swallowed "." - i = what(argstr, j + 1, res) - if i < 0: - raise BadSyntax(self._thisDoc, self.lines, argstr, i, - "bad list content") - return i - - def objectList(self, argstr, i, res): - i = self.object(argstr, i, res) - if i < 0: - return -1 - while 1: - j = self.skipSpace(argstr, i) - if j < 0: - raise BadSyntax(self._thisDoc, self.lines, argstr, j, - "EOF found after object") - return j # eof - if argstr[j:j + 1] != ",": - return j # Found something else! - i = self.object(argstr, j + 1, res) - if i < 0: - return i - - def checkDot(self, argstr, i): - j = self.skipSpace(argstr, i) - if j < 0: - return j # eof - if argstr[j:j + 1] == ".": - return j + 1 # skip - if argstr[j:j + 1] == "}": - return j # don't skip it - if argstr[j:j + 1] == "]": - return j - raise BadSyntax(self._thisDoc, self.lines, - argstr, j, "expected '.' or '}' or ']' at end of statement") - return i - - def uri_ref2(self, argstr, i, res): - """Generate uri from n3 representation. - - Note that the RDF convention of directly concatenating - NS and local name is now used though I prefer inserting a '#' - to make the namesapces look more like what XML folks expect. - """ - qn = [] - j = self.qname(argstr, i, qn) - if j >= 0: - pfx, ln = qn[0] - if pfx is None: - assert 0, "not used?" - ns = self._baseURI + ADDED_HASH - else: - try: - ns = self._bindings[pfx] - except KeyError: - if pfx == "_": # Magic prefix 2001/05/30, can be overridden - res.append(self.anonymousNode(ln)) - return j - raise BadSyntax(self._thisDoc, self.lines, argstr, i, - "Prefix \"%s:\" not bound" % (pfx)) - symb = self._store.newSymbol(ns + ln) - if symb in self._variables: - res.append(self._variables[symb]) - else: - res.append(symb) # @@@ "#" CONVENTION - if not ns.find("#"): - progress("Warning: no # on namespace %s," % ns) - return j - - i = self.skipSpace(argstr, i) - if i < 0: - return -1 - - if argstr[i] == "?": - v = [] - j = self.variable(argstr, i, v) - if j > 0: # Forget varibles as a class, only in context. - res.append(v[0]) - return j - return -1 - - elif argstr[i] == "<": - i = i + 1 - st = i - while i < len(argstr): - if argstr[i] == ">": - uref = argstr[st:i] # the join should dealt with "": - if self._baseURI: - uref = join(self._baseURI, uref) # was: uripath.join - else: - assert ":" in uref, \ - "With no base URI, cannot deal with relative URIs" - if argstr[i - 1:i] == "#" and not uref[-1:] == "#": - uref = uref + "#" # She meant it! Weirdness in urlparse? - symb = self._store.newSymbol(uref) - if symb in self._variables: - res.append(self._variables[symb]) - else: - res.append(symb) - return i + 1 - i = i + 1 - raise BadSyntax(self._thisDoc, self.lines, argstr, j, - "unterminated URI reference") - - elif self.keywordsSet: - v = [] - j = self.bareWord(argstr, i, v) - if j < 0: - return -1 # Forget varibles as a class, only in context. - if v[0] in self.keywords: - raise BadSyntax(self._thisDoc, self.lines, argstr, i, - 'Keyword "%s" not allowed here.' % v[0]) - res.append(self._store.newSymbol(self._bindings[""] + v[0])) - return j - else: - return -1 - - def skipSpace(self, argstr, i): - """Skip white space, newlines and comments. - return -1 if EOF, else position of first non-ws character""" - while 1: - m = eol.match(argstr, i) - if m == None: - break - self.lines = self.lines + 1 - i = m.end() # Point to first character unmatched - self.startOfLine = i - m = ws.match(argstr, i) - if m != None: - i = m.end() - m = eof.match(argstr, i) - if m != None: - return -1 - return i - - def variable(self, argstr, i, res): - """ ?abc -> variable(:abc) - """ - - j = self.skipSpace(argstr, i) - if j < 0: - return -1 - - if argstr[j:j + 1] != "?": - return -1 - j = j + 1 - i = j - if argstr[j] in "0123456789-": - raise BadSyntax(self._thisDoc, self.lines, argstr, j, - "Varible name can't start with '%s'" % argstr[j]) - return -1 - while i < len(argstr) and argstr[i] not in _notNameChars: - i = i + 1 - if self._parentContext == None: - varURI = self._store.newSymbol(self._baseURI + "#" + argstr[j:i]) - if varURI not in self._variables: - self._variables[varURI] = self._context.newUniversal( - varURI, why=self._reason2) - res.append(self._variables[varURI]) - return i - # @@ was: - # raise BadSyntax(self._thisDoc, self.lines, argstr, j, - # "Can't use ?xxx syntax for variable in outermost level: %s" - # % argstr[j-1:i]) - varURI = self._store.newSymbol(self._baseURI + "#" + argstr[j:i]) - if varURI not in self._parentVariables: - self._parentVariables[varURI] = self._parentContext.newUniversal( - varURI, why=self._reason2) - res.append(self._parentVariables[varURI]) - return i - - def bareWord(self, argstr, i, res): - """ abc -> :abc - """ - j = self.skipSpace(argstr, i) - if j < 0: - return -1 - - if argstr[j] in "0123456789-" or argstr[j] in _notNameChars: - return -1 - i = j - while i < len(argstr) and argstr[i] not in _notNameChars: - i = i + 1 - res.append(argstr[j:i]) - return i - - def qname(self, argstr, i, res): - """ - xyz:def -> ('xyz', 'def') - If not in keywords and keywordsSet: def -> ('', 'def') - :def -> ('', 'def') - """ - - i = self.skipSpace(argstr, i) - if i < 0: - return -1 - - c = argstr[i] - if c in "0123456789-+.": - return -1 - if c not in _notNameChars: - ln = c - i = i + 1 - while i < len(argstr): - c = argstr[i] - if c == "." or c not in _notNameChars: - ln = ln + c - i = i + 1 - else: - break - if argstr[i - 1] == ".": # qname cannot end with "." - return -1 - - else: # First character is non-alpha - ln = '' # Was: None - TBL (why? useful?) - - if i < len(argstr) and argstr[i] == ':': - pfx = ln - i = i + 1 - ln = '' - while i < len(argstr): - c = argstr[i] - if c not in _notNameChars: - ln = ln + c - i = i + 1 - else: - break - - res.append((pfx, ln)) - return i - - else: # delimiter was not ":" - if ln and self.keywordsSet and ln not in self.keywords: - res.append(('', ln)) - return i - return -1 - - def object(self, argstr, i, res): - j = self.subject(argstr, i, res) - if j >= 0: - return j - else: - j = self.skipSpace(argstr, i) - if j < 0: - return -1 - else: - i = j - - if argstr[i] == '"': - if argstr[i:i + 3] == '"""': - delim = '"""' - else: - delim = '"' - i = i + len(delim) - - j, s = self.strconst(argstr, i, delim) - - res.append(self._store.newLiteral(s)) - progress("New string const ", s, j) - return j - else: - return -1 - - def nodeOrLiteral(self, argstr, i, res): - j = self.node(argstr, i, res) - startline = self.lines # Remember where for error messages - if j >= 0: - return j - else: - j = self.skipSpace(argstr, i) - if j < 0: - return -1 - else: - i = j - - ch = argstr[i] - if ch in "-+0987654321": - m = number_syntax.match(argstr, i) - if m == None: - raise BadSyntax(self._thisDoc, self.lines, argstr, i, - "Bad number syntax") - j = m.end() - if m.group('exponent') != None: # includes decimal exponent - res.append(float(argstr[i:j])) - # res.append(self._store.newLiteral(argstr[i:j], - # self._store.newSymbol(FLOAT_DATATYPE))) - elif m.group('decimal') != None: - res.append(Decimal(argstr[i:j])) - else: - res.append(long(argstr[i:j])) - # res.append(self._store.newLiteral(argstr[i:j], - # self._store.newSymbol(INTEGER_DATATYPE))) - return j - - if argstr[i] == '"': - if argstr[i:i + 3] == '"""': - delim = '"""' - else: - delim = '"' - i = i + len(delim) - - dt = None - j, s = self.strconst(argstr, i, delim) - lang = None - if argstr[j:j + 1] == "@": # Language? - m = langcode.match(argstr, j + 1) - if m == None: - raise BadSyntax(self._thisDoc, startline, argstr, i, - "Bad language code syntax on string literal, after @") - i = m.end() - lang = argstr[j + 1:i] - j = i - if argstr[j:j + 2] == "^^": - res2 = [] - j = self.uri_ref2(argstr, j + 2, res2) # Read datatype URI - dt = res2[0] - # if dt.uriref() == "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral": - if dt == "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral": - try: - dom = XMLtoDOM('' - + s - + '').firstChild - except: - raise ValueError('s="%s"' % s) - res.append(self._store.newXMLLiteral(dom)) - return j - res.append(self._store.newLiteral(s, dt, lang)) - return j - else: - return -1 - - def uriOf(self, sym): - if isinstance(sym, types.TupleType): - return sym[1] # old system for --pipe - # return sym.uriref() # cwm api - return sym - - def strconst(self, argstr, i, delim): - """parse an N3 string constant delimited by delim. - return index, val - """ - - j = i - ustr = u"" # Empty unicode string - startline = self.lines # Remember where for error messages - while j < len(argstr): - if argstr[j] == '"': - if delim == '"': # done when delim is " - i = j + 1 - return i, ustr - if delim == '"""': # done when delim is """ and ... - if argstr[j:j + 5] == '"""""': # ... we have "" before - i = j + 5 - ustr = ustr + '""' - return i, ustr - if argstr[j:j + 4] == '""""': # ... we have " before - i = j + 4 - ustr = ustr + '"' - return i, ustr - if argstr[j:j + 3] == '"""': # ... current " is part of delim - i = j + 3 - return i, ustr - - # we are inside of the string and current char is " - j = j + 1 - ustr = ustr + '"' - continue - - m = interesting.search(argstr, j) # was argstr[j:]. - # Note for pos param to work, MUST be compiled ... re bug? - assert m, "Quote expected in string at ^ in %s^%s" % ( - argstr[j - 20:j], argstr[j:j + 20]) # we at least have to find a quote - - i = m.start() - try: - ustr = ustr + argstr[j:i] - except UnicodeError: - err = "" - for c in argstr[j:i]: - err = err + (" %02x" % ord(c)) - streason = sys.exc_info()[1].__str__() - raise BadSyntax(self._thisDoc, startline, argstr, j, - "Unicode error appending characters %s to string, because\n\t%s" - % (err, streason)) - - # print "@@@ i = ",i, " j=",j, "m.end=", m.end() - - ch = argstr[i] - if ch == '"': - j = i - continue - elif ch == "\r": # Strip carriage returns - j = i + 1 - continue - elif ch == "\n": - if delim == '"': - raise BadSyntax(self._thisDoc, startline, argstr, i, - "newline found in string literal") - self.lines = self.lines + 1 - ustr = ustr + ch - j = i + 1 - self.startOfLine = j - - elif ch == "\\": - j = i + 1 - ch = argstr[j:j + 1] # Will be empty if string ends - if not ch: - raise BadSyntax(self._thisDoc, startline, argstr, i, - "unterminated string literal (2)") - k = 'abfrtvn\\"'.find(ch) - if k >= 0: - uch = '\a\b\f\r\t\v\n\\"'[k] - ustr = ustr + uch - j = j + 1 - elif ch == "u": - j, ch = self.uEscape(argstr, j + 1, startline) - ustr = ustr + ch - elif ch == "U": - j, ch = self.UEscape(argstr, j + 1, startline) - ustr = ustr + ch - else: - raise BadSyntax(self._thisDoc, self.lines, argstr, i, - "bad escape") - - raise BadSyntax(self._thisDoc, self.lines, argstr, i, - "unterminated string literal") - - def uEscape(self, argstr, i, startline): - j = i - count = 0 - value = 0 - while count < 4: # Get 4 more characters - ch = argstr[j:j + 1].lower() - # sbp http://ilrt.org/discovery/chatlogs/rdfig/2002-07-05 - j = j + 1 - if ch == "": - raise BadSyntax(self._thisDoc, startline, argstr, i, - "unterminated string literal(3)") - k = "0123456789abcdef".find(ch) - if k < 0: - raise BadSyntax(self._thisDoc, startline, argstr, i, - "bad string literal hex escape") - value = value * 16 + k - count = count + 1 - uch = unichr(value) - return j, uch - - def UEscape(self, argstr, i, startline): - stringType = type('') - j = i - count = 0 - value = '\\U' - while count < 8: # Get 8 more characters - ch = argstr[j:j + 1].lower() - # sbp http://ilrt.org/discovery/chatlogs/rdfig/2002-07-05 - j = j + 1 - if ch == "": - raise BadSyntax(self._thisDoc, startline, argstr, i, - "unterminated string literal(3)") - k = "0123456789abcdef".find(ch) - if k < 0: - raise BadSyntax(self._thisDoc, startline, argstr, i, - "bad string literal hex escape") - value = value + ch - count = count + 1 - - uch = stringType(value).decode('unicode-escape') - return j, uch - - -wide_build = True -try: - unichr(0x10000) -except ValueError: - wide_build = False - -# If we are going to do operators then they should generate -# [ is operator:plus of ( \1 \2 ) ] - - -class BadSyntax(SyntaxError): - def __init__(self, uri, lines, argstr, i, why): - self._str = argstr.encode('utf-8') # Better go back to strings for errors - self._i = i - self._why = why - self.lines = lines - self._uri = uri - - def __str__(self): - argstr = self._str - i = self._i - st = 0 - if i > 60: - pre = "..." - st = i - 60 - else: - pre = "" - if len(argstr) - i > 60: - post = "..." - else: - post = "" - - return 'at line %i of <%s>:\nBad syntax (%s) at ^ in:\n"%s%s^%s%s"' \ - % (self.lines + 1, self._uri, self._why, pre, - argstr[st:i], argstr[i:i + 60], post) - - -def stripCR(argstr): - res = "" - for ch in argstr: - if ch != "\r": - res = res + ch - return res - - -def dummyWrite(x): - pass - -################################################################################ - - -def toBool(s): - if s == 'true' or s == 'True' or s == '1': - return True - if s == 'false' or s == 'False' or s == '0': - return False - raise ValueError(s) - - -class Formula(object): - number = 0 - - def __init__(self, parent): - self.counter = 0 - Formula.number += 1 - self.number = Formula.number - self.existentials = {} - self.universals = {} - - self.quotedgraph = QuotedGraph( - store=parent.store, identifier=self.id()) - - def __str__(self): - return '_:Formula%s' % self.number - - def id(self): - return BNode('_:Formula%s' % self.number) - - def newBlankNode(self, uri=None, why=None): - if uri is None: - self.counter += 1 - bn = BNode('f%sb%s' % (id(self), self.counter)) - else: - bn = BNode(uri.split('#').pop().replace('_', 'b')) - return bn - - def newUniversal(self, uri, why=None): - return Variable(uri.split('#').pop()) - - def declareExistential(self, x): - self.existentials[x] = self.newBlankNode() - - def close(self): - - return self.quotedgraph - - -r_hibyte = re.compile(r'([\x80-\xff])') - - -def iri(uri): - return uri.decode('utf-8') - # return unicode(r_hibyte.sub(lambda m: '%%%02X' % ord(m.group(1)), uri)) - - -class RDFSink(object): - def __init__(self, graph): - self.rootFormula = None - self.counter = 0 - self.graph = graph - - def newFormula(self): - assert self.graph.store.formula_aware - f = Formula(self.graph) - return f - - def newSymbol(self, *args): - uri = args[0].encode('utf-8') - return URIRef(iri(uri)) - - def newBlankNode(self, arg=None, **kargs): - if isinstance(arg, Formula): - return arg.newBlankNode() - elif arg is None: - self.counter += 1 - bn = BNode('n' + str(self.counter)) - else: - bn = BNode(str(arg[0]).split('#').pop().replace('_', 'b')) - return bn - - def newLiteral(self, s, dt, lang): - if dt: - return Literal(s, datatype=dt) - else: - return Literal(s, lang=lang) - - def newList(self, n, f): - if not n: - return self.newSymbol( - 'http://www.w3.org/1999/02/22-rdf-syntax-ns#nil' - ) - - a = self.newBlankNode(f) - first = self.newSymbol( - 'http://www.w3.org/1999/02/22-rdf-syntax-ns#first' - ) - rest = self.newSymbol('http://www.w3.org/1999/02/22-rdf-syntax-ns#rest') - self.makeStatement((f, first, a, n[0])) - self.makeStatement((f, rest, a, self.newList(n[1:], f))) - return a - - def newSet(self, *args): - return set(args) - - def setDefaultNamespace(self, *args): - return ':'.join(repr(n) for n in args) - - def makeStatement(self, quadruple, why=None): - f, p, s, o = quadruple - - if hasattr(p, 'formula'): - raise Exception("Formula used as predicate") - - s = self.normalise(f, s) - p = self.normalise(f, p) - o = self.normalise(f, o) - - if f == self.rootFormula: - # print s, p, o, '.' - self.graph.add((s, p, o)) - else: - f.quotedgraph.add((s, p, o)) - - # return str(quadruple) - - def normalise(self, f, n): - if isinstance(n, tuple): - return URIRef(unicode(n[1])) - - # if isinstance(n, list): - # rdflist, f = n - # name = self.newBlankNode() - # if f == self.rootFormula: - # sublist = name - # for i in xrange(0, len(rdflist) - 1): - # print sublist, 'first', rdflist[i] - # rest = self.newBlankNode() - # print sublist, 'rest', rest - # sublist = rest - # print sublist, 'first', rdflist[-1] - # print sublist, 'rest', 'nil' - # return name - - if isinstance(n, bool): - s = Literal(str(n).lower(), datatype=BOOLEAN_DATATYPE) - return s - - if isinstance(n, int) or isinstance(n, long): - s = Literal(unicode(n), datatype=INTEGER_DATATYPE) - return s - - if isinstance(n, Decimal): - value = str(n.normalize()) - if value == '-0': - value = '0' - s = Literal(value, datatype=DECIMAL_DATATYPE) - return s - - if isinstance(n, float): - s = Literal(str(n), datatype=DOUBLE_DATATYPE) - return s - - if n in f.existentials: - return f.existentials[n] - - # if isinstance(n, Var): - # if f.universals.has_key(n): - # return f.universals[n] - # f.universals[n] = f.newBlankNode() - # return f.universals[n] - - return n - - def intern(self, something): - return something - - def bind(self, pfx, uri): - pass # print pfx, ':', uri - - def startDoc(self, formula): - self.rootFormula = formula - - def endDoc(self, formula): - pass - - -################################################### -# -# Utilities -# - -Escapes = {'a': '\a', - 'b': '\b', - 'f': '\f', - 'r': '\r', - 't': '\t', - 'v': '\v', - 'n': '\n', - '\\': '\\', - '"': '"'} - -forbidden1 = re.compile(ur'[\\\"\a\b\f\r\v\u0080-\U0000ffff]') -forbidden2 = re.compile(ur'[\\\"\a\b\f\r\v\t\n\u0080-\U0000ffff]') - - -def stringToN3(argstr, singleLine=0, flags=""): - res = '' - if (len(argstr) > 20 and argstr[-1] != '"' \ - and not singleLine and (argstr.find("\n") >= 0 \ - or argstr.find('"') >= 0)): - delim = '"""' - forbidden = forbidden1 # (allow tabs too now) - else: - delim = '"' - forbidden = forbidden2 - - i = 0 - - while i < len(argstr): - m = forbidden.search(argstr, i) - if not m: - break - - j = m.start() - res = res + argstr[i:j] - ch = m.group(0) - if ch == '"' and delim == '"""' and argstr[j:j + 3] != '"""': # " - res = res + ch - else: - k = '\a\b\f\r\t\v\n\\"'.find(ch) - if k >= 0: - res = res + "\\" + 'abfrtvn\\"'[k] - else: - if 'e' in flags: - # res = res + ('\\u%04x' % ord(ch)) - res = res + ('\\u%04X' % ord(ch)) - # http://www.w3.org/TR/rdf-testcases/#ntriples - else: - res = res + ch - i = j + 1 - - # The following code fixes things for really high range Unicode - newstr = "" - for ch in res + argstr[i:]: - if ord(ch) > 65535: - newstr = newstr + ('\\U%08X' % ord(ch)) - # http://www.w3.org/TR/rdf-testcases/#ntriples - else: - newstr = newstr + ch - - return delim + newstr + delim - - -def backslashUify(ustr): - """Use URL encoding to return an ASCII string corresponding - to the given unicode""" - # progress("String is "+`ustr`) - # s1=ustr.encode('utf-8') - s = "" - for ch in ustr: # .encode('utf-8'): - if ord(ch) > 65535: - ch = "\\U%08X" % ord(ch) - elif ord(ch) > 126: - ch = "\\u%04X" % ord(ch) - else: - ch = "%c" % ord(ch) - s = s + ch - return b(s) - - -@py3compat.format_doctest_out -def hexify(ustr): - """Use URL encoding to return an ASCII string - corresponding to the given UTF8 string - - >>> hexify("http://example/a b") - %(b)s'http://example/a%%20b' - - """ - # progress("String is "+`ustr`) - # s1=ustr.encode('utf-8') - s = "" - for ch in ustr: # .encode('utf-8'): - if ord(ch) > 126 or ord(ch) < 33: - ch = "%%%02X" % ord(ch) - else: - ch = "%c" % ord(ch) - s = s + ch - return b(s) - -# # Unused, dysfunctional. -# def dummy(): -# res = "" -# if len(argstr) > 20 and (argstr.find("\n") >=0 or argstr.find('"') >=0): -# delim= '"""' -# forbidden = "\\\"\a\b\f\r\v" # (allow tabs too now) -# else: -# delim = '"' -# forbidden = "\\\"\a\b\f\r\v\t\n" -# for i in range(len(argstr)): -# ch = argstr[i] -# j = forbidden.find(ch) -# if ch == '"' and delim == '"""' \ -# and i+1 < len(argstr) and argstr[i+1] != '"': -# j=-1 # Single quotes don't need escaping in long format -# if j >= 0: -# ch = "\\" + '\\"abfrvtn'[j] -# elif ch not in "\n\t" and (ch < " " or ch > "}"): -# ch = "[[" + `ch` + "]]" # [2:-1] # Use python -# res = res + ch -# return delim + res + delim - - - -class TurtleParser(Parser): - - def __init__(self): - pass - - def parse(self, source, graph, encoding="utf-8"): - - if encoding not in [None, "utf-8"]: - raise Exception("N3/Turtle files are always utf-8 encoded, I was passed: %s" % encoding) - - sink = RDFSink(graph) - - baseURI = graph.absolutize(source.getPublicId() or source.getSystemId() or "") - p = SinkParser(sink, baseURI=baseURI) - - p.loadStream(source.getByteStream()) - - for prefix, namespace in p._bindings.items(): - graph.bind(prefix, namespace) - -class N3Parser(TurtleParser): - - def __init__(self): - pass - - def parse(self, source, graph, encoding="utf-8"): - # we're currently being handed a Graph, not a ConjunctiveGraph - assert graph.store.context_aware # is this implied by formula_aware - assert graph.store.formula_aware - - conj_graph = ConjunctiveGraph(store=graph.store) - conj_graph.default_context = graph # TODO: CG __init__ should have a default_context arg - # TODO: update N3Processor so that it can use conj_graph as the sink - conj_graph.namespace_manager = graph.namespace_manager - - TurtleParser.parse(self,source,conj_graph,encoding) - - -def _test(): - import doctest - doctest.testmod() - - -# if __name__ == '__main__': -# _test() - -def main(): - g = ConjunctiveGraph() - - sink = RDFSink(g) - base_uri = 'file://' + os.path.join(os.getcwd(), sys.argv[1]) - - p = SinkParser(sink, baseURI=base_uri) - p._bindings[''] = p._baseURI + '#' - p.startDoc() - - f = open(sys.argv[1], 'rb') - rdbytes = f.read() - f.close() - - p.feed(rdbytes) - p.endDoc() - for t in g.quads((None, None, None)): - - print t - -if __name__ == '__main__': - main() - -#ends diff --git a/doc/rdflib3/plugins/parsers/nquads.py b/doc/rdflib3/plugins/parsers/nquads.py deleted file mode 100644 index b75491d..0000000 --- a/doc/rdflib3/plugins/parsers/nquads.py +++ /dev/null @@ -1,80 +0,0 @@ -""" -This is a rdflib plugin for parsing NQuad files into Conjunctive -graphs that can be used and queried. The store that backs the graph -*must* be able to handle contexts. - ->>> from rdflib import ConjunctiveGraph, URIRef, Namespace ->>> g = ConjunctiveGraph() ->>> data = open("test/nquads/example.nquads", "rb") ->>> g.parse(data, format="nquads") # doctest:+ELLIPSIS -)> ->>> assert len(g.store) == 449 ->>> # There should be 16 separate contexts ->>> assert len([x for x in g.store.contexts()]) == 16 ->>> # is the name of entity E10009 "Arco Publications"? (in graph http://bibliographica.org/entity/E10009) ->>> # Looking for: ->>> # "Arco Publications" ->>> s = URIRef("http://bibliographica.org/entity/E10009") ->>> FOAF = Namespace("http://xmlns.com/foaf/0.1/") ->>> assert(g.value(s, FOAF.name) == "Arco Publications") -""" - -from rdflib.py3compat import b - -# Build up from the NTriples parser: -from rdflib.plugins.parsers.ntriples import NTriplesParser -from rdflib.plugins.parsers.ntriples import ParseError -from rdflib.plugins.parsers.ntriples import r_tail -from rdflib.plugins.parsers.ntriples import r_wspace -from rdflib.plugins.parsers.ntriples import r_wspaces - -__all__ = ['NQuadsParser'] - - -class NQuadsParser(NTriplesParser): - - def parse(self, inputsource, sink, **kwargs): - """Parse f as an N-Triples file.""" - assert sink.store.context_aware, ("NQuadsParser must be given" - " a context aware store.") - self.sink = sink - - source = inputsource.getByteStream() - - if not hasattr(source, 'read'): - raise ParseError("Item to parse must be a file-like object.") - - self.file = source - self.buffer = '' - while True: - self.line = __line = self.readline() - if self.line is None: break - try: self.parseline() - except ParseError, msg: - raise ParseError("Invalid line (%s):\n%r" % (msg, __line)) - - return self.sink - - def parseline(self): - self.eat(r_wspace) - if (not self.line) or self.line.startswith(b('#')): - return # The line is empty or a comment - - subject = self.subject() - self.eat(r_wspaces) - - predicate = self.predicate() - self.eat(r_wspaces) - - obj = self.object() - self.eat(r_wspaces) - - context = self.uriref() - self.eat(r_tail) - - if self.line: - raise ParseError("Trailing garbage") - # Must have a context aware store - add on a normal Graph - # discards anything where the ctx != graph.identifier - self.sink.store.add((subject, predicate, obj), context) - diff --git a/doc/rdflib3/plugins/parsers/nt.py b/doc/rdflib3/plugins/parsers/nt.py deleted file mode 100644 index 1ec2282..0000000 --- a/doc/rdflib3/plugins/parsers/nt.py +++ /dev/null @@ -1,28 +0,0 @@ -from rdflib.parser import Parser -from rdflib.plugins.parsers.ntriples import NTriplesParser - -__all__ = ['NTSink', 'NTParser'] - -class NTSink(object): - def __init__(self, graph): - self.graph = graph - - def triple(self, s, p, o): - self.graph.add((s, p, o)) - - -class NTParser(Parser): - """parser for the ntriples format, often stored with the .nt extension - - See http://www.w3.org/TR/rdf-testcases/#ntriples""" - - def __init__(self): - super(NTParser, self).__init__() - - def parse(self, source, sink, baseURI=None): - f = source.getByteStream() # TODO getCharacterStream? - parser = NTriplesParser(NTSink(sink)) - parser.parse(f) - f.close() - - diff --git a/doc/rdflib3/plugins/parsers/ntriples.py b/doc/rdflib3/plugins/parsers/ntriples.py deleted file mode 100644 index 36a4a23..0000000 --- a/doc/rdflib3/plugins/parsers/ntriples.py +++ /dev/null @@ -1,256 +0,0 @@ -#!/usr/bin/env python -__doc__=""" -N-Triples Parser -License: GPL 2, W3C, BSD, or MIT -Author: Sean B. Palmer, inamidst.com -""" - -import re -from rdflib.term import URIRef as URI -from rdflib.term import BNode as bNode -from rdflib.term import Literal - -from rdflib.py3compat import b, cast_bytes - -__all__ = ['unquote', 'uriquote', 'Sink', 'NTriplesParser'] - -uriref = b(r'<([^:]+:[^\s"<>]+)>') -literal = b(r'"([^"\\]*(?:\\.[^"\\]*)*)"') -litinfo = b(r'(?:@([a-z]+(?:-[a-z0-9]+)*)|\^\^') + uriref + b(r')?') - -r_line = re.compile(b(r'([^\r\n]*)(?:\r\n|\r|\n)')) -r_wspace = re.compile(b(r'[ \t]*')) -r_wspaces = re.compile(b(r'[ \t]+')) -r_tail = re.compile(b(r'[ \t]*\.[ \t]*')) -r_uriref = re.compile(uriref) -r_nodeid = re.compile(b(r'_:([A-Za-z][A-Za-z0-9]*)')) -r_literal = re.compile(literal + litinfo) - -bufsiz = 2048 -validate = False - -class Node(unicode): pass - -class ParseError(Exception): pass - -class Sink(object): - def __init__(self): - self.length = 0 - - def triple(self, s, p, o): - self.length += 1 - print (s, p, o) - -quot = {b('t'): u'\t', b('n'): u'\n', b('r'): u'\r', b('"'): u'"', b('\\'): u'\\'} -r_safe = re.compile(b(r'([\x20\x21\x23-\x5B\x5D-\x7E]+)')) -r_quot = re.compile(b(r'\\(t|n|r|"|\\)')) -r_uniquot = re.compile(b(r'\\u([0-9A-F]{4})|\\U([0-9A-F]{8})')) - -def unquote(s): - """Unquote an N-Triples string.""" - if not validate: - return s.decode('unicode-escape') - else: - result = [] - while s: - m = r_safe.match(s) - if m: - s = s[m.end():] - result.append(m.group(1).decode('ascii')) - continue - - m = r_quot.match(s) - if m: - s = s[2:] - result.append(quot[m.group(1)]) - continue - - m = r_uniquot.match(s) - if m: - s = s[m.end():] - u, U = m.groups() - codepoint = int(u or U, 16) - if codepoint > 0x10FFFF: - raise ParseError("Disallowed codepoint: %08X" % codepoint) - result.append(unichr(codepoint)) - elif s.startswith(b('\\')): - raise ParseError("Illegal escape at: %s..." % s[:10]) - else: raise ParseError("Illegal literal character: %r" % s[0]) - return u''.join(result) - -r_hibyte = re.compile(ur'([\x80-\xFF])') - -def uriquote(uri): - if not validate: - return uri - else: - return r_hibyte.sub( - lambda m: '%%%02X' % ord(m.group(1)), uri) - -class NTriplesParser(object): - """An N-Triples Parser. - - Usage:: - - p = NTriplesParser(sink=MySink()) - sink = p.parse(f) # file; use parsestring for a string - """ - - _bnode_ids = {} - - def __init__(self, sink=None): - if sink is not None: - self.sink = sink - else: self.sink = Sink() - - def parse(self, f): - """Parse f as an N-Triples file.""" - if not hasattr(f, 'read'): - raise ParseError("Item to parse must be a file-like object.") - - self.file = f - self.buffer = '' - while True: - self.line = self.readline() - if self.line is None: break - try: self.parseline() - except ParseError: - raise ParseError("Invalid line: %r" % self.line) - return self.sink - - def parsestring(self, s): - """Parse s as an N-Triples string.""" - if not isinstance(s, basestring): - raise ParseError("Item to parse must be a string instance.") - try: - from io import BytesIO - except ImportError: - from cStringIO import StringIO as BytesIO - f = BytesIO() - f.write(cast_bytes(s)) - f.seek(0) - self.parse(f) - - def readline(self): - """Read an N-Triples line from buffered input.""" - # N-Triples lines end in either CRLF, CR, or LF - # Therefore, we can't just use f.readline() - if not self.buffer: - buffer = self.file.read(bufsiz) - if not buffer: return None - self.buffer = buffer - - while True: - m = r_line.match(self.buffer) - if m: # the more likely prospect - self.buffer = self.buffer[m.end():] - return m.group(1) - else: - buffer = self.file.read(bufsiz) - if not buffer and not self.buffer.isspace(): - raise ParseError("EOF in line") - elif not buffer: - return None - self.buffer += buffer - - def parseline(self): - self.eat(r_wspace) - if (not self.line) or self.line.startswith(b('#')): - return # The line is empty or a comment - - subject = self.subject() - self.eat(r_wspaces) - - predicate = self.predicate() - self.eat(r_wspaces) - - object = self.object() - self.eat(r_tail) - - if self.line: - raise ParseError("Trailing garbage") - self.sink.triple(subject, predicate, object) - - def peek(self, token): - return self.line.startswith(token) - - def eat(self, pattern): - m = pattern.match(self.line) - if not m: # @@ Why can't we get the original pattern? - # print(dir(pattern)) - # print repr(self.line), type(self.line) - raise ParseError("Failed to eat %s" % pattern) - self.line = self.line[m.end():] - return m - - def subject(self): - # @@ Consider using dictionary cases - subj = self.uriref() or self.nodeid() - if not subj: - raise ParseError("Subject must be uriref or nodeID") - return subj - - def predicate(self): - pred = self.uriref() - if not pred: - raise ParseError("Predicate must be uriref") - return pred - - def object(self): - objt = self.uriref() or self.nodeid() or self.literal() - if objt is False: - raise ParseError("Unrecognised object type") - return objt - - def uriref(self): - if self.peek(b('<')): - uri = self.eat(r_uriref).group(1) - uri = unquote(uri) - uri = uriquote(uri) - return URI(uri) - return False - - def nodeid(self): - if self.peek(b('_')): - # Fix for https://github.com/RDFLib/rdflib/issues/204 - bnode_id = self.eat(r_nodeid).group(1).decode() - new_id = self._bnode_ids.get(bnode_id, None) - if new_id is not None: - # Re-map to id specfic to this doc - return bNode(new_id) - else: - # Replace with freshly-generated document-specific BNode id - bnode = bNode() - # Store the mapping - self._bnode_ids[bnode_id] = bnode - return bnode - return False - - def literal(self): - if self.peek(b('"')): - lit, lang, dtype = self.eat(r_literal).groups() - if lang: - lang = lang.decode() - else: - lang = None - if dtype: - dtype = dtype.decode() - else: - dtype = None - if lang and dtype: - raise ParseError("Can't have both a language and a datatype") - lit = unquote(lit) - return Literal(lit, lang, dtype) - return False - -# # Obsolete, unused -# def parseURI(uri): -# import urllib -# parser = NTriplesParser() -# u = urllib.urlopen(uri) -# sink = parser.parse(u) -# u.close() -# # for triple in sink: -# # print triple -# print 'Length of input:', sink.length - diff --git a/doc/rdflib3/plugins/parsers/rdfa/__init__.py b/doc/rdflib3/plugins/parsers/rdfa/__init__.py deleted file mode 100644 index d208f8d..0000000 --- a/doc/rdflib3/plugins/parsers/rdfa/__init__.py +++ /dev/null @@ -1,192 +0,0 @@ -""" -From a Python file, expecting an RDF/XML pretty printed output:: - - import rdflib.graph as g - graph = g.Graph() - graph.parse('filename.html', format='rdfa') - print graph.serialize(format='pretty-xml') - -For details on RDFa, the reader should consult the `RDFa syntax document`__. - -This is an adapted version of pyRdfa (`W3C RDFa Distiller page`__) by Ivan Herman - -.. __: http://www.w3.org/TR/rdfa-syntax -.. __: http://www.w3.org/2007/08/pyRdfa/ - -Note: By default pyRdfa uses the xml.dom.minidom parser which is referenced -in a `stackoverflow answer `_ thus: "... a -"non-external-entity-reading XML parser. That means it doesn't even look at the DTD -... A further consequence of this is that minidom won't know about the HTML-specific -entities like é that are defined in the XHTML doctype, so you may lose text -that way". In essence, this means that:: - -

Examplé

- -will be returned as "Exampl". It is unfortunate that this does not result in an -Exception because an Exception would have caused the pyRdfa parser to switch to -the html5lib parser which *does* correctly handle HTML entities. - -One workaround is to "unescape" the entities using Python's htmlentities module -before feeding the markup to RDFaParser.parse():: - - import re - from htmlentitydefs import name2codepoint - def htmlentitydecode(s): - return re.sub('&(%s);' % '|'.join(name2codepoint), - lambda m: unichr(name2codepoint[m.group(1)]), s) - -(taken from http://wiki.python.org/moin/EscapingHtml) - -""" - - -import sys -import urllib -import xml.dom.minidom - -from rdflib.term import URIRef -from rdflib.parser import Parser -from rdflib.plugins.parsers.rdfa.state import ExecutionContext -from rdflib.plugins.parsers.rdfa.parse import parse_one_node -from rdflib.plugins.parsers.rdfa.options import (Options, _add_to_comment_graph, - DIST_NS, ERROR, GENERIC_XML, XHTML_RDFA, HTML5_RDFA) - -from rdflib.plugins.parsers.rdfa.transform.headabout import head_about_transform - -__all__ = ['RDFaParser'] - -# These are part of the RDFa spec. -BUILT_IN_TRANSFORMERS = [ - head_about_transform -] - -# Exception handling. Essentially, all the different exceptions are re-packaged -# into separate exception class, to allow for an easier management on the user -# level -class RDFaError(Exception) : - """Just a wrapper around the local exceptions. It does not add any new - functionality to the Exception class.""" - pass - -# For some doctype and element name combinations an automatic switch to an -# input mode is done -_HOST_LANG = { - ("http://www.w3.org/1999/xhtml", "html"): XHTML_RDFA, - ("http://www.w3.org/2000/svg", "svg"): GENERIC_XML -} - - -class RDFaParser(Parser): - - def parse(self, source, sink, - warnings=False, space_preserve=True, - transformers=None, xhtml=True, lax=True, html5=False, encoding=None): - if transformers is None: - transformers = [] - options = Options(warnings, space_preserve, transformers, xhtml, lax) - baseURI = source.getPublicId() - stream = source.getByteStream() - if html5: - dom = _process_html5_source(stream, options, encoding) - else: - dom = _try_process_source(stream, options, encoding) - _process_DOM(dom, baseURI, sink, options) - - -def _process_DOM(dom, base, graph, options=None): - """ - Core processing. The transformers ("pre-processing") is done on the DOM - tree, the state is initialized, and the "real" RDFa parsing is done. - The result is put into the provided Graph. - - The real work is done in the parser function ``parse_one_node()``. - - Params: - dom -- XML DOM Tree node (for the top level) - base -- URI for the default "base" value (usually the URI of the file to be processed) - - Options: - obj -- `Options` for the distiller - raise RDFaError -- when called via CGI, this encapsulates the possible - exceptions raised by the RDFLib serializer or the processing itself - """ - html = dom.documentElement - # Perform the built-in and external transformations on the HTML tree. This is, - # in simulated form, the hGRDDL approach of Ben Adida. - for trans in options.transformers + BUILT_IN_TRANSFORMERS: - trans(html, options) - # Collect the initial state. This takes care of things - # like base, top level namespace settings, etc. - # Ensure the proper initialization. - state = ExecutionContext(html, graph, base=base, options=options) - # The top level subject starts with the current document; this - # is used by the recursion - subject = URIRef(state.base) - # Parse the whole thing recursively and fill the graph. - parse_one_node(html, graph, subject, state, []) - if options.comment_graph.graph != None: - # Add the content of the comment graph to the output. - graph.bind("dist", DIST_NS) - for t in options.comment_graph.graph: - graph.add(t) - -def _try_process_source(stream, options, encoding): - """ - Tries to parse input as xhtml, xml (e.g. svg) or html(5), modifying options - while figuring out input.. - - Returns a DOM tree. - """ - parse = xml.dom.minidom.parse - try: - dom = parse(stream) - # Try to second-guess the input type - # This is _not_ really kosher, but the minidom is not really namespace aware... - # In practice the goal is to have the system recognize svg content automatically - # First see if there is a default namespace defined for the document: - top = dom.documentElement - if top.hasAttribute("xmlns"): - key = (top.getAttribute("xmlns"), top.nodeName) - if key in _HOST_LANG: - options.host_language = _HOST_LANG[key] - return dom - except: - # XML Parsing error in the input - type, value, traceback = sys.exc_info() - if options.host_language == GENERIC_XML or options.lax == False: - raise RDFaError('Parsing error in input file: "%s"' % value) - - # XML Parsing error in the input - msg = "XHTML Parsing error in input file: %s. Falling back on the HTML5 parser" % value - if options != None and options.warnings: - options.comment_graph.add_warning(msg) - - # in Ivan's original code he reopened the stream if it was from urllib - if isinstance(stream, urllib.addinfourl): - stream = urllib.urlopen(stream.url) - - return _process_html5_source(stream, options, encoding) - - -def _process_html5_source(stream, options, encoding): - # Now try to see if and HTML5 parser is an alternative... - try: - from html5lib import HTMLParser, treebuilders - except ImportError: - # no alternative to the XHTML error, because HTML5 parser not available... - msg2 = 'XHTML Parsing error in input file: %s. Though parsing is lax, HTML5 parser not available. Try installing html5lib ' - raise RDFaError(msg2) - - parser = HTMLParser(tree=treebuilders.getTreeBuilder("dom")) - parse = parser.parse - try: - dom = parse(stream, encoding) - # The host language has changed - options.host_language = HTML5_RDFA - except: - # Well, even the HTML5 parser could not do anything with this... - (type, value, traceback) = sys.exc_info() - msg2 = 'Parsing error in input file as HTML5: "%s"' % value - raise RDFaError, msg2 - - return dom diff --git a/doc/rdflib3/plugins/parsers/rdfa/embeddedrdf.py b/doc/rdflib3/plugins/parsers/rdfa/embeddedrdf.py deleted file mode 100644 index 4a9b015..0000000 --- a/doc/rdflib3/plugins/parsers/rdfa/embeddedrdf.py +++ /dev/null @@ -1,36 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Extracting possible embedded RDF/XML content from the file and parse it separately into the Graph. This is used, for example -by U{SVG 1.2 Tiny}. - -@author: U{Ivan Herman} -@license: This software is available for use under the -U{W3C® SOFTWARE NOTICE AND LICENSE} -@contact: Ivan Herman, ivan@w3.org -""" - -from StringIO import StringIO - -__all__ = ['handle_embeddedRDF'] - -def handle_embeddedRDF(node, graph, state): - """ - Check if the node is the top level rdf element for RDF/XML. If so, the content is parsed and added to the target graph. Note that if an separate - base is defined in the state, the C{xml:base} attribute will be added to the C{rdf} node before parsing. - @param node: a DOM node for the top level xml element - @param graph: target rdf graph - @type graph: RDFLib's Graph object instance - @param state: the inherited state (namespaces, lang, etc) - @type state: L{State.ExecutionContext} - @return: whether an RDF/XML content has been detected or not. If TRUE, the RDFa processing should not occur on the node and its descendents. - @rtype: Boolean - - """ - if node.localName == "RDF" and node.namespaceURI == "http://www.w3.org/1999/02/22-rdf-syntax-ns#": - node.setAttribute("xml:base",state.base) - rdf = StringIO(node.toxml()) - graph.parse(rdf) - return True - else: - return False - diff --git a/doc/rdflib3/plugins/parsers/rdfa/literal.py b/doc/rdflib3/plugins/parsers/rdfa/literal.py deleted file mode 100644 index 2ab9b44..0000000 --- a/doc/rdflib3/plugins/parsers/rdfa/literal.py +++ /dev/null @@ -1,180 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Implementation of the Literal handling. Details of the algorithm are described on -U{RDFa Task Force's wiki page}. - -@summary: RDFa Literal generation -@requires: U{RDFLib package} -@organization: U{World Wide Web Consortium} -@author: U{Ivan Herman} -@license: This software is available for use under the -U{W3C® SOFTWARE NOTICE AND LICENSE} -""" - -import re -from rdflib.namespace import RDF -from rdflib.term import Literal - -__all__ = ['generate_literal'] - -XMLLiteral = RDF.XMLLiteral - - -def __putBackEntities(str): - """Put 'back' entities for the '&', '<', and '>' characters, to produce kosher XML string. - Used by XML Literal - @param str: string to be converted - @return: string with entities - @rtype: string - """ - return str.replace('&', '&').replace('<', '<').replace('>', '>') - -#### The real meat... -def generate_literal(node, graph, subject, state): - """Generate the literal the C{@property}, taking into account datatype, etc. - Note: this method is called only if the C{@property} is indeed present, no need to check. - - This method is an encoding of the algorithm documented - U{task force's wiki page}. - - The method returns a value whether the literal is a 'normal' literal (regardless of its datatype) - or an XML Literal. The return value is True or False, respectively. This value is used to control whether - the parser should stop recursion. This also means that that if the literal is generated from @content, - the return value is False, regardless of the possible @datatype value. - - @param node: DOM element node - @param graph: the (RDF) graph to add the properies to - @param subject: the RDFLib URIRef serving as a subject for the generated triples - @param state: the current state to be used for the CURIE-s - @type state: L{State.ExecutionContext} - @return: whether the literal is a 'normal' or an XML Literal (return value is True or False, respectively). Note that if the literal is generated from @content, the return value is False, regardless of the possible @datatype value. - @rtype: Boolean - """ - def _get_literal(Pnode): - """ - Get (recursively) the full text from a DOM Node. - - @param Pnode: DOM Node - @return: string - """ - rc = "" - for node in Pnode.childNodes: - if node.nodeType == node.TEXT_NODE: - rc = rc + node.data - elif node.nodeType == node.ELEMENT_NODE: - rc = rc + _get_literal(node) - - # The decision of the group in February 2008 is not to normalize the result by default. - # This is reflected in the default value of the option - if state.options.space_preserve: - return rc - else: - return re.sub(r'(\r| |\n|\t)+', " ", rc).strip() - # end getLiteral - - def _get_XML_literal(Pnode): - """ - Get (recursively) the XML Literal content of a DOM Node. (Most of the processing is done - via a C{node.toxml} call of the xml minidom implementation.) - - @param Pnode: DOM Node - @return: string - """ - def collectPrefixes(prefixes, node): - def addPf(prefx, string): - pf = string.split(':')[0] - if pf != string and pf not in prefx : prefx.append(pf) - # edn addPf - - # first the local name of the node - addPf(prefixes, node.tagName) - # get all the attributes and children - for child in node.childNodes: - if child.nodeType == node.ELEMENT_NODE: - collectPrefixes(prefixes, child) - elif child.nodeType == node.ATTRIBUTE_NODE: - addPf(prefixes, node.child.name) - # end collectPrefixes - - rc = "" - prefixes = [] - for node in Pnode.childNodes: - if node.nodeType == node.ELEMENT_NODE: - collectPrefixes(prefixes, node) - - for node in Pnode.childNodes: - if node.nodeType == node.TEXT_NODE: - rc = rc + __putBackEntities(node.data) - elif node.nodeType == node.ELEMENT_NODE: - # Decorate the element with namespaces and lang values - for prefix in prefixes: - if prefix in state.ns and not node.hasAttribute("xmlns:%s" % prefix): - node.setAttribute("xmlns:%s" % prefix, "%s" % state.ns[prefix]) - # Set the default namespace, if not done (and is available) - if not node.getAttribute("xmlns") and state.defaultNS != None: - node.setAttribute("xmlns", state.defaultNS) - # Get the lang, if necessary - if not node.getAttribute("xml:lang") and state.lang != None: - node.setAttribute("xml:lang", state.lang) - rc = rc + node.toxml() - return rc - # If XML Literals must be canonicalized for space, then this is the return line: - #return re.sub(r'(\r| |\n|\t)+', " ", rc).strip() - # end getXMLLiteral - - # Most of the times the literal is a 'normal' one, ie, not an XML Literal - retval = True - - # Get the Property URI-s - props = state.get_resources(node.getAttribute("property"), prop=True) - - # Get, if exists, the value of @datatype, and figure out the language - datatype = None - dtset = False - lang = state.lang - if node.hasAttribute("datatype"): - dtset = True - dt = node.getAttribute("datatype") - if dt != "": - datatype = state.get_resource(dt) - lang = None - - # The simple case: separate @content attribute - if node.hasAttribute("content"): - val = node.getAttribute("content") - object = Literal(node.getAttribute("content"), datatype=datatype, lang=lang) - # The value of datatype has been set, and the keyword paramaters take care of the rest - else: - # see if there *is* a datatype (even if it is empty!) - if dtset: - # yep. The Literal content is the pure text part of the current element: - # We have to check whether the specified datatype is, in fact, and - # explicit XML Literal - if datatype == XMLLiteral: - object = Literal(_get_XML_literal(node), datatype=XMLLiteral) - retval = False - else: - object = Literal(_get_literal(node), datatype=datatype, lang=lang) - else: - # no controlling @datatype. We have to see if there is markup in the contained - # element - if True in [ n.nodeType == node.ELEMENT_NODE for n in node.childNodes ]: - # yep, and XML Literal should be generated - object = Literal(_get_XML_literal(node), datatype=XMLLiteral) - retval = False - else: - val = _get_literal(node) - # At this point, there might be entities in the string that are returned as real characters by the dom - # implementation. That should be turned back - object = Literal(_get_literal(node), lang=lang) - - # NOTE: rdflib<2.5 didn't equal Literal with lang="", hence this check - # proably always passed? - # All tests pass with this check removed; going with that.. - ## The object may be empty, for example in an ill-defined element... - if True:#object != "": - for prop in props: - graph.add((subject, prop, object)) - - return retval - diff --git a/doc/rdflib3/plugins/parsers/rdfa/options.py b/doc/rdflib3/plugins/parsers/rdfa/options.py deleted file mode 100644 index 0329969..0000000 --- a/doc/rdflib3/plugins/parsers/rdfa/options.py +++ /dev/null @@ -1,173 +0,0 @@ -# -*- coding: utf-8 -*- -""" - -Options class: collect the possible options that govern the parsing possibilities. It also includes a reference and -handling of the extra Graph for warnings, informations, errors. - - -@summary: RDFa parser (distiller) -@requires: U{RDFLib} -@requires: U{html5lib} for the HTML5 parsing; note possible dependecies on Python's version on the project's web site -@organization: U{World Wide Web Consortium} -@author: U{Ivan Herman} -@license: This software is available for use under the -U{W3C® SOFTWARE NOTICE AND LICENSE} - -""" - -import sys -from rdflib.graph import Graph -from rdflib.term import BNode, Literal, URIRef -from rdflib.namespace import Namespace - -__all__ = ['CommentGraph', 'Options'] - -DIST_URI = "http://www.w3.org/2007/08/pyRdfa/distiller" -DIST_NS = DIST_URI + '#' - -ns_errors = Namespace(DIST_NS) -distillerURI = URIRef(DIST_URI) - -WARNING = 'warning' -ERROR = 'error' -INFO = 'info' -DEBUG = 'debug' - -_message_properties = { - WARNING: ns_errors["warning"], - ERROR: ns_errors["error"], - INFO: ns_errors["information"], - DEBUG: ns_errors["debug"] -} - -def _add_to_comment_graph(graph, msg, prop, uri): - """ - Add a distiller message to the graph. - - @param graph: RDFLib Graph - @param msg: message of an exception - @type msg: RDFLIb Literal - @param prop: the property to be used - @type prop: string, must be one of 'warning', 'error', 'info', 'debug' - @param uri: the top URI used to invoke the distiller - @type uri: URIRef - """ - bnode = BNode() - graph.add((distillerURI, _message_properties[prop], bnode)) - graph.add((bnode, ns_errors["onURI"], uri)) - graph.add((bnode, ns_errors["message"], msg)) - - -class CommentGraph(object): - """Class to handle the 'comment graph', ie, the (RDF) Graph containing the warnings, - error messages, and informational messages. - """ - def __init__(self, warnings = False): - """ - @param warnings: whether a graph should effectively be set up, or whether this - should just be an empty shell for the various calls to work (without effect) - """ - if warnings: - self.graph = Graph() - else: - self.graph = None - self.accumulated_literals = [] - self.baseURI = None - - def _add_triple(self, msg, prop): - obj = Literal(msg) - if self.baseURI == None: - self.accumulated_literals.append((obj,prop)) - elif self.graph != None: - _add_to_comment_graph(self.graph, obj, prop, self.baseURI) - - def set_base_URI(self, URI): - """Set the base URI for the comment triples. - - Note that this method I{must} be called at some point to complete the triples. Without it the triples - added via L{add_warning}, L{add_info}, etc, will not be added to the final graph. - - @param URI: URIRef for the subject of the comments - """ - self.baseURI = URI - if self.graph != None: - for obj, prop in self.accumulated_literals: - _add_to_comment_graph(self.graph, obj, prop, self.baseURI) - self.accumulated_literals = [] - - def add_warning(self, txt): - """Add a warning. A comment triplet is added to the separate "warning" graph. - @param txt: the warning text. It will be preceded by the string "==== pyRdfa Warning ==== " - """ - self._add_triple(txt, WARNING) - - def add_info(self, txt): - """Add an informational comment. A comment triplet is added to the separate "warning" graph. - @param txt: the information text. It will be preceded by the string "==== pyRdfa information ==== " - """ - self._add_triple(txt, INFO) - - def add_error(self, txt): - """Add an error comment. A comment triplet is added to the separate "warning" graph. - @param txt: the information text. It will be preceded by the string "==== pyRdfa information ==== " - """ - self._add_triple(txt, ERROR) - - def _add_debug(self, txt): - self._add_triple(txt, DEBUG) - - -GENERIC_XML = 0 -XHTML_RDFA = 1 -HTML5_RDFA = 2 - -class Options(object): - """Settable options. An instance of this class is stored in - the L{execution context} of the parser. - - @ivar space_preserve: whether plain literals should preserve spaces at output or not - @type space_preserve: Boolean - @ivar comment_graph: Graph for the storage of warnings - @type comment_graph: L{CommentGraph} - @ivar warnings: whether warnings should be generated or not - @type warnings: Boolean - @ivar transformers: extra transformers - @type transformers: list - @type host_language: the host language for the RDFa attributes. Default is XHTML_RDFA, but it can be GENERIC_XML and HTML5_RDFA - @ivar host_language: integer (logically: an enumeration) - @ivar lax: whether a 'lax' parsing of XHTML (ie, HTML5) is allowed. This means that the value of the host language might change run time - @type lax: Boolean - """ - def __init__(self, warnings=False, space_preserve=True, transformers=[], xhtml=True, lax=False): - """ - @param space_preserve: whether plain literals should preserve spaces at output or not - @type space_preserve: Boolean - @param warnings: whether warnings should be generated or not - @type warnings: Boolean - @param transformers: extra transformers - @type transformers: list - @param xhtml: initial value for the host language. If True, the value is set to XHTML_RDFA. Note that run-time the class variable might be set ot HTML5_RDFA, depending on the value of the lax flag and the result of parsing. - @type xhtml: Booelan - @param lax: whether a 'lax' parsing of XHTML (ie, HTML5) is allowed. This means that the value of the host language might change run time - @type lax: Boolean - """ - self.space_preserve = space_preserve - self.transformers = transformers - self.comment_graph = CommentGraph(warnings) - self.warnings = warnings - self.lax = lax - if xhtml: - self.host_language = XHTML_RDFA - else: - self.host_language = GENERIC_XML - - def __str__(self): - retval = """Current options: - space_preserve : %s - warnings : %s - lax parsing : %s - host language : %s - """ - return retval % (self.space_preserve, self.warnings, self.lax, self.host_language) - - diff --git a/doc/rdflib3/plugins/parsers/rdfa/parse.py b/doc/rdflib3/plugins/parsers/rdfa/parse.py deleted file mode 100644 index d5b411f..0000000 --- a/doc/rdflib3/plugins/parsers/rdfa/parse.py +++ /dev/null @@ -1,200 +0,0 @@ -# -*- coding: utf-8 -*- -""" -The core parsing function of RDFa. Some details are -put into other modules to make it clearer to update/modify (eg, generation of literals, or managing the current state). - -@summary: RDFa core parser processing step -@requires: U{RDFLib package} -@organization: U{World Wide Web Consortium} -@author: U{Ivan Herman} -@license: This software is available for use under the -U{W3C® SOFTWARE NOTICE AND LICENSE} -""" - -from rdflib.term import BNode, URIRef -from rdflib.namespace import RDF - -from rdflib.plugins.parsers.rdfa.state import ExecutionContext -from rdflib.plugins.parsers.rdfa.literal import generate_literal -from rdflib.plugins.parsers.rdfa.embeddedrdf import handle_embeddedRDF -from rdflib.plugins.parsers.rdfa.options import GENERIC_XML, XHTML_RDFA, HTML5_RDFA - -__all__ = ['parse_one_node'] - -def parse_one_node(node, graph, parent_object, incoming_state, parent_incomplete_triples): - """The (recursive) step of handling a single node. See the - U{RDFa syntax document} for further details. - - @param node: the DOM node to handle - @param graph: the RDF graph - @type graph: RDFLib's Graph object instance - @param parent_object: the parent's object, as an RDFLib URIRef - @param incoming_state: the inherited state (namespaces, lang, etc) - @type incoming_state: L{State.ExecutionContext} - @param parent_incomplete_triples: list of hanging triples (the missing resource set to None) to be handled (or not) - by the current node. - @return: whether the caller has to complete it's parent's incomplete triples - @rtype: Boolean - """ - def _get_resources_for_attr(attr): - """Get a series of resources encoded via CURIE-s for an attribute on a specific node. - @param attr: the name of the attribute - @return: a list of RDFLib URIRef instances - """ - if not node.hasAttribute(attr): - return [] - else: - rel = (attr == "rel") or (attr == "rev") - prop = (attr == "property") - return state.get_resources(node.getAttribute(attr), rel, prop) - - # Update the state. This means, for example, the possible local settings of - # namespaces and lang - state = ExecutionContext(node, graph, inherited_state=incoming_state) - - #--------------------------------------------------------------------------------- - # Handle the special case for embedded RDF, eg, in SVG1.2. - # This may add some triples to the target graph that does not originate from RDFa parsing - # If the function return TRUE, that means that an rdf:RDF has been found. No - # RDFa parsing should be done on that subtree, so we simply return... - if state.options.host_language == GENERIC_XML and node.nodeType == node.ELEMENT_NODE and handle_embeddedRDF(node, graph, state): - return - - #--------------------------------------------------------------------------------- - # First, let us check whether there is anything to do at all. Ie, - # whether there is any relevant RDFa specific attribute on the element - # - if not _has_one_of_attributes(node, "href", "resource", "about", "property", "rel", "rev", "typeof", "src"): - # nop, there is nothing to do here, just go down the tree and return... - for n in node.childNodes: - if n.nodeType == node.ELEMENT_NODE : parse_one_node(n, graph, parent_object, state, parent_incomplete_triples) - return - - - #----------------------------------------------------------------- - # The goal is to establish the subject and object for local processing - # The behaviour is slightly different depending on the presense or not - # of the @rel/@rev attributes - current_subject = None - current_object = None - - if _has_one_of_attributes(node, "rel", "rev"): - # in this case there is the notion of 'left' and 'right' of @rel/@rev - # in establishing the new Subject and the objectResource - - # set first the subject - if node.hasAttribute("about"): - current_subject = state.get_Curie_ref(node.getAttribute("about")) - elif node.hasAttribute("src"): - current_subject = state.get_URI_ref(node.getAttribute("src")) - elif node.hasAttribute("typeof"): - current_subject = BNode() - - # get_URI_ref may return None in case of an illegal Curie, so - # we have to be careful here, not use only an 'else' - if current_subject == None: - current_subject = parent_object - - # set the object resource - if node.hasAttribute("resource"): - current_object = state.get_Curie_ref(node.getAttribute("resource")) - elif node.hasAttribute("href"): - current_object = state.get_URI_ref(node.getAttribute("href")) - else: - # in this case all the various 'resource' setting attributes - # behave identically, except that their value might be different - # in terms of CURIE-s and they also have their own priority, of course - if node.hasAttribute("about"): - current_subject = state.get_Curie_ref(node.getAttribute("about")) - elif node.hasAttribute("src"): - current_subject = state.get_URI_ref(node.getAttribute("src")) - elif node.hasAttribute("resource"): - current_subject = state.get_Curie_ref(node.getAttribute("resource")) - elif node.hasAttribute("href"): - current_subject = state.get_URI_ref(node.getAttribute("href")) - elif node.hasAttribute("typeof"): - current_subject = BNode() - - # get_URI_ref may return None in case of an illegal Curie, so - # we have to be careful here, not use only an 'else' - if current_subject == None: - current_subject = parent_object - - # in this case no non-literal triples will be generated, so the - # only role of the current_objectResource is to be transferred to - # the children node - current_object = current_subject - - # --------------------------------------------------------------------- - # The possible typeof indicates a number of type statements on the newSubject - for defined_type in _get_resources_for_attr("typeof"): - graph.add((current_subject, RDF.type, defined_type)) - - # --------------------------------------------------------------------- - # In case of @rel/@rev, either triples or incomplete triples are generated - # the (possible) incomplete triples are collected, to be forwarded to the children - incomplete_triples = [] - for prop in _get_resources_for_attr("rel"): - theTriple = (current_subject, prop, current_object) - if current_object != None: - graph.add(theTriple) - else: - incomplete_triples.append(theTriple) - for prop in _get_resources_for_attr("rev"): - theTriple = (current_object, prop, current_subject) - if current_object != None: - graph.add(theTriple) - else: - incomplete_triples.append(theTriple) - - # ---------------------------------------------------------------------- - # Generation of the literal values. The newSubject is the subject - # A particularity of property is that it stops the parsing down the DOM tree if an XML Literal is generated, - # because everything down there is part of the generated literal. For this purpose the recurse flag is set (and used later - # in the parsing process). - if node.hasAttribute("property"): - # Generate the literal. It has been put it into a separate module to make it more managable - # the overall return value should be set to true if any valid triple has been generated - recurse = generate_literal(node, graph, current_subject, state) - else: - recurse = True - - # ---------------------------------------------------------------------- - # Setting the current object to a bnode is setting up a possible resource - # for the incomplete triples downwards - if current_object == None: - object_to_children = BNode() - else: - object_to_children = current_object - - #----------------------------------------------------------------------- - # Here is the recursion step for all the children - if recurse: - for n in node.childNodes: - if n.nodeType == node.ELEMENT_NODE: - parse_one_node(n, graph, object_to_children, state, incomplete_triples) - - # --------------------------------------------------------------------- - # At this point, the parent's incomplete triples may be completed - for s, p, o in parent_incomplete_triples: - if s == None: s = current_subject - if o == None: o = current_subject - graph.add((s, p, o)) - - # ------------------------------------------------------------------- - # This should be it... - # ------------------------------------------------------------------- - return - - -def _has_one_of_attributes(node, *args): - """ - Check whether one of the listed attributes is present on a (DOM) node. - @param node: DOM element node - @param args: possible attribute names - @return: True or False - @rtype: Boolean - """ - return True in [ node.hasAttribute(attr) for attr in args ] - - diff --git a/doc/rdflib3/plugins/parsers/rdfa/state.py b/doc/rdflib3/plugins/parsers/rdfa/state.py deleted file mode 100644 index 31caf41..0000000 --- a/doc/rdflib3/plugins/parsers/rdfa/state.py +++ /dev/null @@ -1,434 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Parser's execution context (a.k.a. state) object and handling. The state includes: - - - dictionary for namespaces. Keys are the namespace prefixes, values are RDFLib Namespace instances - - language, retrieved from C{@xml:lang} - - URI base, determined by (or set explicitly). This is a little bit superfluous, because the current RDFa syntax does not make use of C{@xml:base}; ie, this could be a global value. But the structure is prepared to add C{@xml:base} easily, if needed. - - options, in the form of an L{Options} instance - -The execution context object is also used to turn relative URI-s and CURIES into real URI references. - -@summary: RDFa core parser processing step -@requires: U{RDFLib package} -@organization: U{World Wide Web Consortium} -@author: U{Ivan Herman} -@license: This software is available for use under the -U{W3C® SOFTWARE NOTICE AND LICENSE} - -@var XHTML_PREFIX: prefix for the XHTML vocabulary namespace -@var XHTML_URI: URI prefix of the XHTML vocabulary -@var RDFa_PROFILE: the official RDFa profile URI -@var RDFa_VERSION: the official version string of RDFa -@var usual_protocols: list of "usual" protocols (used to generate warnings when CURIES are not protected) -@var _predefined_rel: list of predefined C{@rev} and C{@rel} values that should be mapped onto the XHTML vocabulary URI-s. -@var _predefined_property: list of predefined C{@property} values that should be mapped onto the XHTML vocabulary URI-s. (At present, this list is empty, but this has been an ongoing question in the group, so the I{mechanism} of checking is still there.) -@var __bnodes: dictionary of blank node names to real blank node -@var __empty_bnode: I{The} Bnode to be associated with the CURIE of the form "C{_:}". -""" - -from rdflib.namespace import Namespace, RDF, RDFS -from rdflib.term import BNode, URIRef -from rdflib.plugins.parsers.rdfa.options import Options, GENERIC_XML, XHTML_RDFA, HTML5_RDFA - -import re -import random -import urlparse - -__all__ = ['ExecutionContext'] - -RDFa_PROFILE = "http://www.w3.org/1999/xhtml/vocab" -RDFa_VERSION = "XHTML+RDFa 1.0" -RDFa_PublicID = "-//W3C//DTD XHTML+RDFa 1.0//EN" -RDFa_SystemID = "http://www.w3.org/MarkUp/DTD/xhtml-rdfa-1.dtd" - -usual_protocols = ["http", "https", "mailto", "ftp", "urn", "gopher", "tel", "ldap", "doi", "news"] - -####Predefined @rel/@rev/@property values -# predefined values for the @rel and @rev values. These are considered to be part of a specific -# namespace, defined by the RDFa document. -# At the moment, there are no predefined @property values, but the code is there in case -# some will be defined -XHTML_PREFIX = "xhv" -XHTML_URI = "http://www.w3.org/1999/xhtml/vocab#" - -_predefined_rel = ['alternate', 'appendix', 'cite', 'bookmark', 'chapter', 'contents', -'copyright', 'glossary', 'help', 'icon', 'index', 'meta', 'next', 'p3pv1', 'prev', -'role', 'section', 'subsection', 'start', 'license', 'up', 'last', 'stylesheet', 'first', 'top'] - -_predefined_property = [] - -#### Managing blank nodes for CURIE-s -__bnodes = {} -__empty_bnode = BNode() -def _get_bnode_from_Curie(var): - """ - 'Var' gives the string after the coloumn in a CURIE of the form C{_:XXX}. If this variable has been used - before, then the corresponding BNode is returned; otherwise a new BNode is created and - associated to that value. - @param var: CURIE BNode identifier - @return: BNode - """ - if len(var) == 0: - return __empty_bnode - if var in __bnodes: - return __bnodes[var] - else: - retval = BNode() - __bnodes[var] = retval - return retval - -#### Quote URI-s -import urllib -# 'safe' characters for the URI quoting, ie, characters that can safely stay as they are. Other -# special characters are converted to their %.. equivalents for namespace prefixes -_unquotedChars = ':/\?=#' -_warnChars = [' ', '\n', '\r', '\t'] -def _quote(uri, options): - """ - 'quote' a URI, ie, exchange special characters for their '%..' equivalents. Some of the characters - may stay as they are (listed in L{_unquotedChars}. If one of the characters listed in L{_warnChars} - is also in the uri, an extra warning is also generated. - @param uri: URI - @param options: - @type options: L{Options} - """ - suri = uri.strip() - for c in _warnChars: - if suri.find(c) != -1: - if options != None: - options.comment_graph.add_warning('Unusual character in uri:%s; possible error?' % suri) - break - return urllib.quote(suri, _unquotedChars) - - -#### Core Class definition -class ExecutionContext(object): - """State at a specific node, including the current set - of namespaces in the RDFLib sense, the - current language, and the base. The class is also used to interpret URI-s and CURIE-s to produce - URI references for RDFLib. - - @ivar options: reference to the overall options - @type ivar: L{Options.Options} - @ivar base: the 'base' URI - @ivar defaultNS: default namespace - @ivar lang: language tag (possibly None) - @ivar ns: dictionary of namespaces - @type ns: dictionary, each value is an RDFLib Namespace object - - """ - def __init__(self, node, graph, inherited_state=None, base="", options=None): - """ - @param node: the current DOM Node - @param graph: the RDFLib Graph - @keyword inherited_state: the state as inherited - from upper layers. This inherited_state is mixed with the state information - retrieved from the current node. - @type inherited_state: L{State.ExecutionContext} - @keyword base: string denoting the base URI for the specific node. This overrides the possible - base inherited from the upper layers. The - current XHTML+RDFa syntax does not allow the usage of C{@xml:base}, but SVG1.2 does, so this is - necessary for SVG (and other possible XML dialects that accept C{@xml:base}) - @keyword options: invocation option - @type options: L{Options} - """ - #----------------------------------------------------------------- - # settling the base - # note that, strictly speaking, it is not necessary to add the base to the - # context, because there is only one place to set it ( element of the
). - # It is done because it is prepared for a possible future change in direction of - # accepting xml:base on each element. - # At the moment, it is invoked with a 'None' at the top level of parsing, that is - # when the element is looked for. - if inherited_state: - self.base = inherited_state.base - self.options = inherited_state.options - # for generic XML versions the xml:base attribute should be handled - if self.options.host_language == GENERIC_XML and node.hasAttribute("xml:base"): - self.base = node.getAttribute("xml:base") - else: - # this is the branch called from the very top - self.base = "" - for bases in node.getElementsByTagName("base"): - if bases.hasAttribute("href"): - self.base = bases.getAttribute("href") - continue - if self.base == "": - self.base = base - - # this is just to play safe. I believe this branch should actually not happen... - if options == None: - from pyRdfa import Options - self.options = Options() - else: - self.options = options - - # xml:base is not part of XHTML+RDFa, but it is a valid setting for, say, SVG1.2 - if self.options.host_language == GENERIC_XML and node.hasAttribute("xml:base"): - self.base = node.getAttribute("xml:base") - - self.options.comment_graph.set_base_URI(URIRef(_quote(base, self.options))) - - # check the the presense of the @profile and or @version attribute for the RDFa profile... - # This whole branch is, however, irrelevant if the host language is a generic XML one (eg, SVG) - if self.options.host_language != GENERIC_XML: - doctype = None - try: - # I am not 100% sure the HTML5 minidom implementation has this, so let us just be - # cautious here... - doctype = node.ownerDocument.doctype - except: - pass - if doctype == None or not( doctype.publicId == RDFa_PublicID and doctype.systemId == RDFa_SystemID ): - # next level: check the version - html = node.ownerDocument.documentElement - if not( html.hasAttribute("version") and RDFa_VERSION == html.getAttribute("version") ): - # see if least the profile has been set - # Find the element - head = None - for index in range(0, html.childNodes.length-1): - if html.childNodes.item(index).nodeName == "head": - head = html.childNodes.item(index) - break - if not( head != None and head.hasAttribute("profile") and RDFa_PROFILE in head.getAttribute("profile").strip().split() ): - if self.options.host_language == HTML5_RDFA: - self.options.comment_graph.add_info("RDFa profile or RFDa version has not been set (for a correct identification of RDFa). This is not a requirement for RDFa, but it is advised to use one of those nevertheless. Note that in the case of HTML5, the DOCTYPE setting may not work...") - else: - self.options.comment_graph.add_info("None of the RDFa DOCTYPE, RDFa profile, or RFDa version has been set (for a correct identification of RDFa). This is not a requirement for RDFa, but it is advised to use one of those nevertheless.") - - #----------------------------------------------------------------- - # Stripping the fragment ID from the base URI, as demanded by RFC 3986 - self.base = urlparse.urldefrag(self.base)[0] - - #----------------------------------------------------------------- - # Settling the language tags - # check first the lang or xml:lang attribute - # RDFa does not allow the lang attribute. HTML5 relies :-( on @lang; - # I just want to be prepared here... - if options != None and options.host_language == HTML5_RDFA and node.hasAttribute("lang"): - self.lang = node.getAttribute("lang") - if len(self.lang) == 0 : self.lang = None - elif node.hasAttribute("xml:lang"): - self.lang = node.getAttribute("xml:lang") - if len(self.lang) == 0 : self.lang = None - elif inherited_state: - self.lang = inherited_state.lang - else: - self.lang = None - - #----------------------------------------------------------------- - # Handling namespaces - # First get the local xmlns declarations/namespaces stuff. - dict = {} - for i in range(0, node.attributes.length): - attr = node.attributes.item(i) - if attr.name.find('xmlns:') == 0 : - # yep, there is a namespace setting - key = attr.localName - if key != "" : # exclude the top level xmlns setting... - if key == "_": - if warning: self.options.comment_graph.add_error("The '_' local CURIE prefix is reserved for blank nodes, and cannot be changed" ) - elif key.find(':') != -1: - if warning: self.options.comment_graph.add_error("The character ':' is not valid in a CURIE Prefix" ) - else : - # quote the URI, ie, convert special characters into %.. This is - # true, for example, for spaces - uri = _quote(attr.value, self.options) - # 1. create a new Namespace entry - ns = Namespace(uri) - # 2. 'bind' it in the current graph to - # get a nicer output - graph.bind(key, uri) - # 3. Add an entry to the dictionary - dict[key] = ns - - # See if anything has been collected at all. - # If not, the namespaces of the incoming state is - # taken over - self.ns = {} - if len(dict) == 0 and inherited_state: - self.ns = inherited_state.ns - else: - if inherited_state: - for k in inherited_state.ns : self.ns[k] = inherited_state.ns[k] - # copying the newly found namespace, possibly overwriting - # incoming values - for k in dict : self.ns[k] = dict[k] - else: - self.ns = dict - - # see if the xhtml core vocabulary has been set - self.xhtml_prefix = None - for key in self.ns.keys(): - if XHTML_URI == str(self.ns[key]): - self.xhtml_prefix = key - break - if self.xhtml_prefix == None: - if XHTML_PREFIX not in self.ns: - self.ns[XHTML_PREFIX] = Namespace(XHTML_URI) - self.xhtml_prefix = XHTML_PREFIX - else: - # the most disagreeable thing, the user has used - # the prefix for something else... - self.xhtml_prefix = XHTML_PREFIX + '_' + ("%d" % random.randint(1, 1000)) - self.ns[self.xhtml_prefix] = Namespace(XHTML_URI) - graph.bind(self.xhtml_prefix, XHTML_URI) - - # extra tricks for unusual usages... - # if the 'rdf' prefix is not used, it is artificially added... - if "rdf" not in self.ns: - self.ns["rdf"] = RDF - if "rdfs" not in self.ns: - self.ns["rdfs"] = RDFS - - # Final touch: setting the default namespace... - if node.hasAttribute("xmlns"): - self.defaultNS = node.getAttribute("xmlns") - elif inherited_state and inherited_state.defaultNS != None: - self.defaultNS = inherited_state.defaultNS - else: - self.defaultNS = None - - def _get_predefined_rels(self, val, warning): - """Get the predefined URI value for the C{@rel/@rev} attribute. - @param val: attribute name - @param warning: whether a warning should be generated or not - @type warning: boolean - @return: URIRef for the predefined URI (or None) - """ - vv = val.strip().lower() - if vv in _predefined_rel: - return self.ns[self.xhtml_prefix][vv] - else: - if warning: self.options.comment_graph.add_warning("invalid @rel/@rev value: '%s'" % val) - return None - - def _get_predefined_properties(self, val, warning): - """Get the predefined value for the C{@property} attribute. - @param val: attribute name - @param warning: whether a warning should be generated or not - @type warning: boolean - @return: URIRef for the predefined URI (or None) - """ - vv = val.strip().lower() - if vv in _predefined_property: - return self.ns[self.xhtml_prefix][vv] - else: - if warning: self.options.comment_graph.add_warning("invalid @property value: '%s'" % val) - return None - - def get_resource(self, val, rel=False, prop=False, warning=True): - """Get a resource for a CURIE. - The input argument is a CURIE; this is interpreted - via the current namespaces and the corresponding URI Reference is returned - @param val: string of the form "prefix:lname" - @keyword rel: whether the predefined C{@rel/@rev} values should also be interpreted - @keyword prop: whether the predefined C{@property} values should also be interpreted - @return: an RDFLib URIRef instance (or None) - """ - if val == "": - return None - elif val.find(":") != -1: - key = val.split(":", 1)[0] - lname = val.split(":", 1)[1] - if key == "_": - # A possible error: this method is invoked for property URI-s, which - # should not refer to a blank node. This case is checked and a possible - # error condition is handled - self.options.comment_graph.add_error("Blank node CURIE cannot be used in property position: _:%s" % lname) - return None - if key == "": - # This is the ":blabla" case - key = self.xhtml_prefix - else: - # if the resources correspond to a @rel or @rev or @property, then there - # may be one more possibility here, namely that it is one of the - # predefined values - if rel: - return self._get_predefined_rels(val, warning) - elif prop: - return self._get_predefined_properties(val, warning) - else: - self.options.comment_graph.add_warning("Invalid CURIE (without prefix): '%s'" % val) - return None - - if key not in self.ns: - self.options.comment_graph.add_error("CURIE used with non declared prefix: %s" % key) - return None - else: - if lname == "": - return URIRef(str(self.ns[key])) - else: - return self.ns[key][lname] - - def get_resources(self, val, rel=False, prop=False): - """Get a series of resources encoded in CURIE-s. - The input argument is a list of CURIE-s; these are interpreted - via the current namespaces and the corresponding URI References are returned. - @param val: strings of the form prefix':'lname, separated by space - @keyword rel: whether the predefined C{@rel/@rev} values should also be interpreted - @keyword prop: whether the predefined C{@property} values should also be interpreted - @return: a list of RDFLib URIRef instances (possibly empty) - """ - val.strip() - resources = [ self.get_resource(v, rel, prop) for v in val.split() if v != None ] - return [ r for r in resources if r != None ] - - def get_URI_ref(self, val): - """Create a URI RDFLib resource for a URI. - The input argument is a URI. It is checked whether it is a local - reference with a '#' or not. If yes, a URIRef combined with the - stored base value is returned. In both cases a URIRef for a full URI is created - and returned - @param val: URI string - @return: an RDFLib URIRef instance - """ - if val == "": - return URIRef(self.base) - elif val[0] == '[' and val[-1] == ']': - self.options.comment_graph.add_error("Illegal usage of CURIE: %s" % val) - return None - else: - return URIRef(urlparse.urljoin(self.base, val)) - - def get_Curie_ref(self, val): - """Create a URI RDFLib resource for a CURIE. - The input argument is a CURIE. This means that it is: - - either of the form [a:b] where a:b should be resolved as an - 'unprotected' CURIE, or - - it is a traditional URI (relative or absolute) - - If the second case the URI value is also compared to 'usual' URI - protocols ('http', 'https', 'ftp', etc) (see L{usual_protocols}). - If there is no match, a warning is generated (indeed, a frequent - mistake in authoring RDFa is to forget the '[' and ']' characters to - "protect" CURIE-s.) - - @param val: CURIE string - @return: an RDFLib URIRef instance - """ - if len(val) == 0: - return URIRef(self.base) - elif val[0] == "[": - if val[-1] == "]": - curie = val[1:-1] - # A possible Blank node reference should be separated here: - if len(curie) >= 2 and curie[0] == "_" and curie[1] == ":": - return _get_bnode_from_Curie(curie[2:]) - else: - return self.get_resource(val[1:-1]) - else: - # illegal CURIE... - self.options.comment_graph.add_error("Illegal CURIE: %s" % val) - return None - else: - # check the value, to see if an error may have been made... - # Usual protocol values in the URI - v = val.strip().lower() - protocol = urlparse.urlparse(val)[0] - if protocol != "" and protocol not in usual_protocols: - err = "Possible URI error with '%s'; the intention may have been to use a protected CURIE" % val - self.options.comment_graph.add_warning(err) - return self.get_URI_ref(val) - diff --git a/doc/rdflib3/plugins/parsers/rdfa/transform/__init__.py b/doc/rdflib3/plugins/parsers/rdfa/transform/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/doc/rdflib3/plugins/parsers/rdfa/transform/headabout.py b/doc/rdflib3/plugins/parsers/rdfa/transform/headabout.py deleted file mode 100644 index 0cf8f7a..0000000 --- a/doc/rdflib3/plugins/parsers/rdfa/transform/headabout.py +++ /dev/null @@ -1,27 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Simple transfomer: the C{@about=""} is added to the C{} and C{} elements (unless something is already there). -Note that this transformer is always invoked by the parser because this behaviour is mandated by the RDFa syntax. - -@summary: Add a top "about" to and -@requires: U{RDFLib package} -@organization: U{World Wide Web Consortium} -@author: U{Ivan Herman} -@license: This software is available for use under the -U{W3C® SOFTWARE NOTICE AND LICENSE} -@contact: Ivan Herman, ivan@w3.org -""" - -def head_about_transform(html, options): - """ - @param html: a DOM node for the top level html element - @param options: invocation options - @type options: L{Options} - """ - for top in html.getElementsByTagName("head"): - if not top.hasAttribute("about"): - top.setAttribute("about", "") - for top in html.getElementsByTagName("body"): - if not top.hasAttribute("about"): - top.setAttribute("about", "") - diff --git a/doc/rdflib3/plugins/parsers/rdfxml.py b/doc/rdflib3/plugins/parsers/rdfxml.py deleted file mode 100644 index 00e8d6a..0000000 --- a/doc/rdflib3/plugins/parsers/rdfxml.py +++ /dev/null @@ -1,579 +0,0 @@ -# Copyright (c) 2002, Daniel Krech, http://eikeon.com/ -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials provided -# with the distribution. -# -# * Neither the name of Daniel Krech nor the names of its -# contributors may be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -""" -""" -from xml.sax import make_parser -from xml.sax.handler import ErrorHandler -from xml.sax.saxutils import handler, quoteattr, escape -from urlparse import urljoin, urldefrag - -from rdflib.namespace import RDF, is_ncname -from rdflib.term import URIRef -from rdflib.term import BNode -from rdflib.term import Literal -from rdflib.exceptions import ParserError, Error -from rdflib.parser import Parser - -__all__ = ['create_parser', 'BagID', 'ElementHandler', 'RDFXMLHandler', 'RDFXMLParser'] - -RDFNS = RDF - -# http://www.w3.org/TR/rdf-syntax-grammar/#eventterm-attribute-URI -# A mapping from unqualified terms to there qualified version. -UNQUALIFIED = {"about" : RDF.about, - "ID" : RDF.ID, - "type" : RDF.type, - "resource": RDF.resource, - "parseType": RDF.parseType} - -# http://www.w3.org/TR/rdf-syntax-grammar/#coreSyntaxTerms -CORE_SYNTAX_TERMS = [RDF.RDF, RDF.ID, RDF.about, RDF.parseType, RDF.resource, RDF.nodeID, RDF.datatype] - -# http://www.w3.org/TR/rdf-syntax-grammar/#syntaxTerms -SYNTAX_TERMS = CORE_SYNTAX_TERMS + [RDF.Description, RDF.li] - -# http://www.w3.org/TR/rdf-syntax-grammar/#oldTerms -OLD_TERMS = [ - URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#aboutEach"), - URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#aboutEachPrefix"), - URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#bagID")] - -NODE_ELEMENT_EXCEPTIONS = CORE_SYNTAX_TERMS + [RDF.li,] + OLD_TERMS -NODE_ELEMENT_ATTRIBUTES = [RDF.ID, RDF.nodeID, RDF.about] - -PROPERTY_ELEMENT_EXCEPTIONS = CORE_SYNTAX_TERMS + [RDF.Description,] + OLD_TERMS -PROPERTY_ATTRIBUTE_EXCEPTIONS = CORE_SYNTAX_TERMS + [RDF.Description, RDF.li] + OLD_TERMS -PROPERTY_ELEMENT_ATTRIBUTES = [RDF.ID, RDF.resource, RDF.nodeID] - -XMLNS = "http://www.w3.org/XML/1998/namespace" -BASE = (XMLNS, "base") -LANG = (XMLNS, "lang") - - -class BagID(URIRef): - __slots__ = ['li'] - def __init__(self, val): - super(URIRef, self).__init__(val) - self.li = 0 - - def next_li(self): - self.li += 1 - return RDFNS[self.li] - - -class ElementHandler(object): - __slots__ = ['start', 'char', 'end', 'li', 'id', - 'base', 'subject', 'predicate', 'object', - 'list', 'language', 'datatype', 'declared', 'data'] - def __init__(self): - self.start = None - self.char = None - self.end = None - self.li = 0 - self.id = None - self.base = None - self.subject = None - self.object = None - self.list = None - self.language = None - self.datatype = None - self.declared = None - self.data = None - - def next_li(self): - self.li += 1 - return RDFNS[self.li] - - -class RDFXMLHandler(handler.ContentHandler): - - def __init__(self, store): - self.store = store - self.preserve_bnode_ids = False - self.reset() - - def reset(self): - document_element = ElementHandler() - document_element.start = self.document_element_start - document_element.end = lambda name, qname: None - self.stack = [None, document_element,] - self.ids = {} # remember IDs we have already seen - self.bnode = {} - self._ns_contexts = [{}] # contains uri -> prefix dicts - self._current_context = self._ns_contexts[-1] - - # ContentHandler methods - - def setDocumentLocator(self, locator): - self.locator = locator - - def startDocument(self): - pass - - def startPrefixMapping(self, prefix, namespace): - self._ns_contexts.append(self._current_context.copy()) - self._current_context[namespace] = prefix - self.store.bind(prefix, URIRef(namespace), override=False) - - def endPrefixMapping(self, prefix): - self._current_context = self._ns_contexts[-1] - del self._ns_contexts[-1] - - def startElementNS(self, name, qname, attrs): - stack = self.stack - stack.append(ElementHandler()) - current = self.current - parent = self.parent - base = attrs.get(BASE, None) - if base is not None: - base, frag = urldefrag(base) - if parent and parent.base: - base = urljoin(parent.base, base) - else: - systemId = self.locator.getPublicId() or self.locator.getSystemId() - if systemId: - base = urljoin(systemId, base) - else: - if parent: - base = parent.base - if base is None: - systemId = self.locator.getPublicId() or self.locator.getSystemId() - if systemId: - base, frag = urldefrag(systemId) - current.base = base - language = attrs.get(LANG, None) - if language is None: - if parent: - language = parent.language - current.language = language - current.start(name, qname, attrs) - - def endElementNS(self, name, qname): - self.current.end(name, qname) - self.stack.pop() - - def characters(self, content): - char = self.current.char - if char: - char(content) - - def ignorableWhitespace(self, content): - pass - - def processingInstruction(self, target, data): - pass - - def add_reified(self, sid, (s, p, o)): - self.store.add((sid, RDF.type, RDF.Statement)) - self.store.add((sid, RDF.subject, s)) - self.store.add((sid, RDF.predicate, p)) - self.store.add((sid, RDF.object, o)) - - def error(self, message): - locator = self.locator - info = "%s:%s:%s: " % (locator.getSystemId(), - locator.getLineNumber(), locator.getColumnNumber()) - raise ParserError(info + message) - - def get_current(self): - return self.stack[-2] - # Create a read only property called current so that self.current - # give the current element handler. - current = property(get_current) - - def get_next(self): - return self.stack[-1] - # Create a read only property that gives the element handler to be - # used for the next element. - next = property(get_next) - - def get_parent(self): - return self.stack[-3] - # Create a read only property that gives the current parent - # element handler - parent = property(get_parent) - - def absolutize(self, uri): - result = urljoin(self.current.base, uri, allow_fragments=1) - if uri and uri[-1]=="#" and result[-1]!="#": - result = "%s#" % result - return URIRef(result) - - def convert(self, name, qname, attrs): - if name[0] is None: - name = URIRef(name[1]) - else: - name = URIRef("".join(name)) - atts = {} - for (n, v) in attrs.items(): #attrs._attrs.iteritems(): # - if n[0] is None: - att = URIRef(n[1]) - else: - att = URIRef("".join(n)) - if att.startswith(XMLNS) or att[0:3].lower()=="xml": - pass - elif att in UNQUALIFIED: - #if not RDFNS[att] in atts: - atts[RDFNS[att]] = v - else: - atts[URIRef(att)] = v - return name, atts - - def document_element_start(self, name, qname, attrs): - if name[0] and URIRef("".join(name)) == RDF.RDF: - # Cheap hack so 2to3 doesn't turn it into __next__ - next = getattr(self, 'next') - next.start = self.node_element_start - next.end = self.node_element_end - else: - self.node_element_start(name, qname, attrs) - #self.current.end = self.node_element_end - # TODO... set end to something that sets start such that - # another element will cause error - - - def node_element_start(self, name, qname, attrs): - name, atts = self.convert(name, qname, attrs) - current = self.current - absolutize = self.absolutize - - # Cheap hack so 2to3 doesn't turn it into __next__ - next = getattr(self, 'next') - next.start = self.property_element_start - next.end = self.property_element_end - - if name in NODE_ELEMENT_EXCEPTIONS: - self.error("Invalid node element URI: %s" % name) - - if RDF.ID in atts: - if RDF.about in atts or RDF.nodeID in atts: - self.error("Can have at most one of rdf:ID, rdf:about, and rdf:nodeID") - - id = atts[RDF.ID] - if not is_ncname(id): - self.error("rdf:ID value is not a valid NCName: %s" % id) - subject = absolutize("#%s" % id) - if subject in self.ids: - self.error("two elements cannot use the same ID: '%s'" % subject) - self.ids[subject] = 1 # IDs can only appear once within a document - elif RDF.nodeID in atts: - if RDF.ID in atts or RDF.about in atts: - self.error("Can have at most one of rdf:ID, rdf:about, and rdf:nodeID") - nodeID = atts[RDF.nodeID] - if not is_ncname(nodeID): - self.error("rdf:nodeID value is not a valid NCName: %s" % nodeID) - if self.preserve_bnode_ids is False: - if nodeID in self.bnode: - subject = self.bnode[nodeID] - else: - subject = BNode() - self.bnode[nodeID] = subject - else: - subject = BNode(nodeID) - elif RDF.about in atts: - if RDF.ID in atts or RDF.nodeID in atts: - self.error("Can have at most one of rdf:ID, rdf:about, and rdf:nodeID") - subject = absolutize(atts[RDF.about]) - else: - subject = BNode() - - if name!=RDF.Description: # S1 - self.store.add((subject, RDF.type, absolutize(name))) - - language = current.language - for att in atts: - if not att.startswith(str(RDFNS)): - predicate = absolutize(att) - try: - object = Literal(atts[att], language) - except Error, e: - self.error(e.msg) - elif att==RDF.type: #S2 - predicate = RDF.type - object = absolutize(atts[RDF.type]) - elif att in NODE_ELEMENT_ATTRIBUTES: - continue - elif att in PROPERTY_ATTRIBUTE_EXCEPTIONS: #S3 - self.error("Invalid property attribute URI: %s" % att) - continue # for when error does not throw an exception - else: - predicate = absolutize(att) - try: - object = Literal(atts[att], language) - except Error, e: - self.error(e.msg) - self.store.add((subject, predicate, object)) - - current.subject = subject - - - def node_element_end(self, name, qname): - self.parent.object = self.current.subject - - def property_element_start(self, name, qname, attrs): - name, atts = self.convert(name, qname, attrs) - current = self.current - absolutize = self.absolutize - - # Cheap hack so 2to3 doesn't turn it into __next__ - next = getattr(self, 'next') - object = None - current.data = None - current.list = None - - if not name.startswith(str(RDFNS)): - current.predicate = absolutize(name) - elif name==RDF.li: - current.predicate = current.next_li() - elif name in PROPERTY_ELEMENT_EXCEPTIONS: - self.error("Invalid property element URI: %s" % name) - else: - current.predicate = absolutize(name) - - id = atts.get(RDF.ID, None) - if id is not None: - if not is_ncname(id): - self.error("rdf:ID value is not a value NCName: %s" % id) - current.id = absolutize("#%s" % id) - else: - current.id = None - - resource = atts.get(RDF.resource, None) - nodeID = atts.get(RDF.nodeID, None) - parse_type = atts.get(RDF.parseType, None) - if resource is not None and nodeID is not None: - self.error("Property element cannot have both rdf:nodeID and rdf:resource") - if resource is not None: - object = absolutize(resource) - next.start = self.node_element_start - next.end = self.node_element_end - elif nodeID is not None: - if not is_ncname(nodeID): - self.error("rdf:nodeID value is not a valid NCName: %s" % nodeID) - if self.preserve_bnode_ids is False: - if nodeID in self.bnode: - object = self.bnode[nodeID] - else: - subject = BNode() - self.bnode[nodeID] = subject - object = subject - else: - object = subject = BNode(nodeID) - next.start = self.node_element_start - next.end = self.node_element_end - else: - if parse_type is not None: - for att in atts: - if att!=RDF.parseType and att!=RDF.ID: - self.error("Property attr '%s' now allowed here" % att) - if parse_type=="Resource": - current.subject = object = BNode() - current.char = self.property_element_char - next.start = self.property_element_start - next.end = self.property_element_end - elif parse_type=="Collection": - current.char = None - object = current.list = RDF.nil #BNode()#self.parent.subject - next.start = self.node_element_start - next.end = self.list_node_element_end - else: #if parse_type=="Literal": - # All other values are treated as Literal - # See: http://www.w3.org/TR/rdf-syntax-grammar/#parseTypeOtherPropertyElt - object = Literal("", datatype=RDF.XMLLiteral) - current.char = self.literal_element_char - current.declared = {} - next.start = self.literal_element_start - next.char = self.literal_element_char - next.end = self.literal_element_end - current.object = object - return - else: - object = None - current.char = self.property_element_char - next.start = self.node_element_start - next.end = self.node_element_end - - datatype = current.datatype = atts.get(RDF.datatype, None) - language = current.language - if datatype is not None: - # TODO: check that there are no atts other than datatype and id - datatype = absolutize(datatype) - else: - for att in atts: - if not att.startswith(str(RDFNS)): - predicate = absolutize(att) - elif att in PROPERTY_ELEMENT_ATTRIBUTES: - continue - elif att in PROPERTY_ATTRIBUTE_EXCEPTIONS: - self.error("""Invalid property attribute URI: %s""" % att) - else: - predicate = absolutize(att) - - if att==RDF.type: - o = URIRef(atts[att]) - else: - if datatype is not None: - language = None - o = Literal(atts[att], language, datatype) - - if object is None: - object = BNode() - self.store.add((object, predicate, o)) - if object is None: - current.data = "" - current.object = None - else: - current.data = None - current.object = object - - def property_element_char(self, data): - current = self.current - if current.data is not None: - current.data += data - - def property_element_end(self, name, qname): - current = self.current - if current.data is not None and current.object is None: - literalLang = current.language - if current.datatype is not None: - literalLang = None - current.object = Literal(current.data, literalLang, current.datatype) - current.data = None - if self.next.end==self.list_node_element_end: - if current.object!=RDF.nil: - self.store.add((current.list, RDF.rest, RDF.nil)) - if current.object is not None: - self.store.add((self.parent.subject, current.predicate, current.object)) - if current.id is not None: - self.add_reified(current.id, (self.parent.subject, - current.predicate, current.object)) - current.subject = None - - def list_node_element_end(self, name, qname): - current = self.current - if self.parent.list==RDF.nil: - list = BNode() - # Removed between 20030123 and 20030905 - #self.store.add((list, RDF.type, LIST)) - self.parent.list = list - self.store.add((self.parent.list, RDF.first, current.subject)) - self.parent.object = list - self.parent.char = None - else: - list = BNode() - # Removed between 20030123 and 20030905 - #self.store.add((list, RDF.type, LIST)) - self.store.add((self.parent.list, RDF.rest, list)) - self.store.add((list, RDF.first, current.subject)) - self.parent.list = list - - def literal_element_start(self, name, qname, attrs): - current = self.current - self.next.start = self.literal_element_start - self.next.char = self.literal_element_char - self.next.end = self.literal_element_end - current.declared = self.parent.declared.copy() - if name[0]: - prefix = self._current_context[name[0]] - if prefix: - current.object = "<%s:%s" % (prefix, name[1]) - else: - current.object = "<%s" % name[1] - if not name[0] in current.declared: - current.declared[name[0]] = prefix - if prefix: - current.object += (' xmlns:%s="%s"' % (prefix, name[0])) - else: - current.object += (' xmlns="%s"' % name[0]) - else: - current.object = "<%s" % name[1] - - for (name, value) in attrs.items(): - if name[0]: - if not name[0] in current.declared: - current.declared[name[0]] = self._current_context[name[0]] - name = current.declared[name[0]] + ":" + name[1] - else: - name = name[1] - current.object += (' %s=%s' % (name, quoteattr(value))) - current.object += ">" - - def literal_element_char(self, data): - self.current.object += escape(data) - - def literal_element_end(self, name, qname): - if name[0]: - prefix = self._current_context[name[0]] - if prefix: - end = u"" % (prefix, name[1]) - else: - end = u"" % name[1] - else: - end = u"" % name[1] - self.parent.object += self.current.object + end - - -def create_parser(target, store): - parser = make_parser() - try: - # Workaround for bug in expatreader.py. Needed when - # expatreader is trying to guess a prefix. - parser.start_namespace_decl("xml", "http://www.w3.org/XML/1998/namespace") - except AttributeError: - pass # Not present in Jython (at least) - parser.setFeature(handler.feature_namespaces, 1) - rdfxml = RDFXMLHandler(store) - rdfxml.setDocumentLocator(target) - #rdfxml.setDocumentLocator(_Locator(self.url, self.parser)) - parser.setContentHandler(rdfxml) - parser.setErrorHandler(ErrorHandler()) - return parser - - -class RDFXMLParser(Parser): - - def __init__(self): - pass - - def parse(self, source, sink, **args): - self._parser = create_parser(source, sink) - content_handler = self._parser.getContentHandler() - preserve_bnode_ids = args.get("preserve_bnode_ids", None) - if preserve_bnode_ids is not None: - content_handler.preserve_bnode_ids = preserve_bnode_ids - # We're only using it once now - #content_handler.reset() - #self._parser.reset() - self._parser.parse(source) - - - diff --git a/doc/rdflib3/plugins/parsers/trix.py b/doc/rdflib3/plugins/parsers/trix.py deleted file mode 100644 index 48f4fc4..0000000 --- a/doc/rdflib3/plugins/parsers/trix.py +++ /dev/null @@ -1,289 +0,0 @@ -# Copyright (c) 2002, Daniel Krech, http://eikeon.com/ -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials provided -# with the distribution. -# -# * Neither the name of Daniel Krech nor the names of its -# contributors may be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -""" -""" -from rdflib.namespace import Namespace -from rdflib.term import URIRef -from rdflib.term import BNode -from rdflib.term import Literal -from rdflib.graph import Graph, ConjunctiveGraph -from rdflib.exceptions import ParserError -from rdflib.parser import Parser - -from xml.sax.saxutils import handler -from xml.sax import make_parser -from xml.sax.handler import ErrorHandler - -__all__ = ['create_parser', 'TriXHandler', 'TriXParser'] - - -TRIXNS=Namespace("http://www.w3.org/2004/03/trix/trix-1/") -XMLNS=Namespace("http://www.w3.org/XML/1998/namespace") - -class TriXHandler(handler.ContentHandler): - """An Sax Handler for TriX. See http://sw.nokia.com/trix/""" - - def __init__(self, store): - self.store = store - self.preserve_bnode_ids = False - self.reset() - - def reset(self): - self.bnode = {} - self.graph=self.store - self.triple=None - self.state=0 - self.lang=None - self.datatype=None - - # ContentHandler methods - - def setDocumentLocator(self, locator): - self.locator = locator - - def startDocument(self): - pass - - def startPrefixMapping(self, prefix, namespace): - pass - - def endPrefixMapping(self, prefix): - pass - - def startElementNS(self, name, qname, attrs): - - if name[0]!=str(TRIXNS): - self.error("Only elements in the TriX namespace are allowed. %s!=%s"%(name[0],TRIXNS)) - - if name[1]=="TriX": - if self.state==0: - self.state=1 - else: - self.error("Unexpected TriX element") - - elif name[1]=="graph": - if self.state==1: - self.state=2 - else: - self.error("Unexpected graph element") - - elif name[1]=="uri": - if self.state==2: - # the context uri - self.state=3 - elif self.state==4: - # part of a triple - pass - else: - self.error("Unexpected uri element") - - elif name[1]=="triple": - if self.state==2: - if self.graph==None: - # anonymous graph, create one with random bnode id - self.graph=Graph(store=self.store.store) - # start of a triple - self.triple=[] - self.state=4 - else: - self.error("Unexpected triple element") - - elif name[1]=="typedLiteral": - if self.state==4: - # part of triple - self.lang=None - self.datatype=None - - try: - self.lang=attrs.getValue((unicode(XMLNS), u"lang")) - except: - # language not required - ignore - pass - try: - self.datatype=attrs.getValueByQName(u"datatype") - except KeyError: - self.error("No required attribute 'datatype'") - else: - self.error("Unexpected typedLiteral element") - - elif name[1]=="plainLiteral": - if self.state==4: - # part of triple - self.lang=None - self.datatype=None - try: - self.lang=attrs.getValue((unicode(XMLNS), u"lang")) - except: - # language not required - ignore - pass - - else: - self.error("Unexpected plainLiteral element") - - elif name[1]=="id": - if self.state==2: - # the context uri - self.state=3 - - elif self.state==4: - # part of triple - pass - else: - self.error("Unexpected id element") - - else: - self.error("Unknown element %s in TriX namespace"%name[1]) - - self.chars="" - - - def endElementNS(self, name, qname): - if name[0]!=str(TRIXNS): - self.error("Only elements in the TriX namespace are allowed. %s!=%s"%(name[0], TRIXNS)) - - if name[1]=="uri": - if self.state==3: - self.graph=Graph(store=self.store.store, identifier=URIRef(self.chars.strip())) - self.state=2 - elif self.state==4: - self.triple+=[URIRef(self.chars.strip())] - else: - self.error("Illegal internal self.state - This should never happen if the SAX parser ensures XML syntax correctness") - - elif name[1]=="id": - if self.state==3: - self.graph=Graph(self.store.store,identifier=self.get_bnode(self.chars.strip())) - self.state=2 - elif self.state==4: - self.triple+=[self.get_bnode(self.chars.strip())] - else: - self.error("Illegal internal self.state - This should never happen if the SAX parser ensures XML syntax correctness") - - elif name[1]=="plainLiteral" or name[1]=="typedLiteral": - if self.state==4: - self.triple+=[Literal(self.chars, lang=self.lang, datatype=self.datatype)] - else: - self.error("This should never happen if the SAX parser ensures XML syntax correctness") - - elif name[1]=="triple": - if self.state==4: - if len(self.triple)!=3: - self.error("Triple has wrong length, got %d elements: %s"%(len(self.triple),self.triple)) - - self.graph.add(self.triple) - #self.store.store.add(self.triple,context=self.graph) - #self.store.addN([self.triple+[self.graph]]) - self.state=2 - else: - self.error("This should never happen if the SAX parser ensures XML syntax correctness") - - elif name[1]=="graph": - self.graph=None - self.state=1 - - elif name[1]=="TriX": - self.state=0 - - else: - self.error("Unexpected close element") - - - def get_bnode(self,label): - if self.preserve_bnode_ids: - bn=BNode(label) - else: - if label in self.bnode: - bn=self.bnode[label] - else: - bn=BNode(label) - self.bnode[label]=bn - return bn - - - def characters(self, content): - self.chars+=content - - - def ignorableWhitespace(self, content): - pass - - def processingInstruction(self, target, data): - pass - - - def error(self, message): - locator = self.locator - info = "%s:%s:%s: " % (locator.getSystemId(), - locator.getLineNumber(), locator.getColumnNumber()) - raise ParserError(info + message) - - - -def create_parser(store): - parser = make_parser() - try: - # Workaround for bug in expatreader.py. Needed when - # expatreader is trying to guess a prefix. - parser.start_namespace_decl("xml", "http://www.w3.org/XML/1998/namespace") - except AttributeError: - pass # Not present in Jython (at least) - parser.setFeature(handler.feature_namespaces, 1) - trix = TriXHandler(store) - parser.setContentHandler(trix) - parser.setErrorHandler(ErrorHandler()) - return parser - - -class TriXParser(Parser): - """A parser for TriX. See http://sw.nokia.com/trix/""" - - def __init__(self): - pass - - def parse(self, source, sink, **args): - assert sink.store.context_aware, ("TriXParser must be given" - " a context aware store.") - - g=ConjunctiveGraph(store=sink.store) - - self._parser = create_parser(g) - content_handler = self._parser.getContentHandler() - preserve_bnode_ids = args.get("preserve_bnode_ids", None) - if preserve_bnode_ids is not None: - content_handler.preserve_bnode_ids = preserve_bnode_ids - # We're only using it once now - #content_handler.reset() - #self._parser.reset() - self._parser.parse(source) - - - diff --git a/doc/rdflib3/plugins/serializers/__init__.py b/doc/rdflib3/plugins/serializers/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/doc/rdflib3/plugins/serializers/__init__.pyc b/doc/rdflib3/plugins/serializers/__init__.pyc deleted file mode 100644 index 66ea8e6..0000000 Binary files a/doc/rdflib3/plugins/serializers/__init__.pyc and /dev/null differ diff --git a/doc/rdflib3/plugins/serializers/n3.py b/doc/rdflib3/plugins/serializers/n3.py deleted file mode 100644 index 63faf9d..0000000 --- a/doc/rdflib3/plugins/serializers/n3.py +++ /dev/null @@ -1,123 +0,0 @@ -""" -Notation 3 (N3) RDF graph serializer for RDFLib. -""" -from rdflib.graph import Graph -from rdflib.namespace import Namespace, OWL -from rdflib.plugins.serializers.turtle import (TurtleSerializer, - SUBJECT, VERB, OBJECT) - -__all__ = ['N3Serializer'] - -SWAP_LOG = Namespace("http://www.w3.org/2000/10/swap/log#") - - -class N3Serializer(TurtleSerializer): - - short_name = "n3" - - def __init__(self, store, parent=None): - super(N3Serializer, self).__init__(store) - self.keywords.update({ - OWL.sameAs: '=', - SWAP_LOG.implies: '=>' - }) - self.parent = parent - - def reset(self): - super(N3Serializer, self).reset() - self._stores = {} - - def subjectDone(self, subject): - super(N3Serializer, self).subjectDone(subject) - if self.parent: - self.parent.subjectDone(subject) - - def isDone(self, subject): - return (super(N3Serializer, self).isDone(subject) - and (not self.parent or self.parent.isDone(subject))) - - def startDocument(self): - super(N3Serializer, self).startDocument() - #if not isinstance(self.store, N3Store): - # return - # - #all_list = [self.label(var) for var in - # self.store.get_universals(recurse=False)] - #all_list.sort() - #some_list = [self.label(var) for var in - # self.store.get_existentials(recurse=False)] - #some_list.sort() - # - #for var in all_list: - # self.write('\n'+self.indent()+'@forAll %s. '%var) - #for var in some_list: - # self.write('\n'+self.indent()+'@forSome %s. '%var) - # - #if (len(all_list) + len(some_list)) > 0: - # self.write('\n') - - def endDocument(self): - if not self.parent: - super(N3Serializer, self).endDocument() - - def indent(self, modifier=0): - indent = super(N3Serializer, self).indent(modifier) - if self.parent is not None: - indent += self.parent.indent()#modifier) - return indent - - def preprocessTriple(self, triple): - super(N3Serializer, self).preprocessTriple(triple) - if isinstance(triple[0], Graph): - for t in triple[0]: - self.preprocessTriple(t) - if isinstance(triple[2], Graph): - for t in triple[2]: - self.preprocessTriple(t) - - def getQName(self, uri, gen_prefix=True): - qname = None - if self.parent is not None: - qname = self.parent.getQName(uri, gen_prefix) - if qname is None: - qname = super(N3Serializer, self).getQName(uri, gen_prefix) - return qname - - def statement(self, subject): - self.subjectDone(subject) - properties = self.buildPredicateHash(subject) - if len(properties) == 0: - return False - return (self.s_clause(subject) - or super(N3Serializer, self).statement(subject)) - - def path(self, node, position, newline=False): - if not self.p_clause(node, position): - super(N3Serializer, self).path(node, position, newline) - - def s_clause(self, subject): - if isinstance(subject, Graph): - self.write('\n'+self.indent()) - self.p_clause(subject, SUBJECT) - self.predicateList(subject) - self.write(' .') - return True - else: - return False - - def p_clause(self, node, position): - if isinstance(node, Graph): - self.subjectDone(node) - if position is OBJECT: - self.write(' ') - self.write('{') - self.depth += 1 - serializer = N3Serializer(node, parent=self) - serializer.serialize(self.stream) - self.depth -= 1 - self.write(self.indent()+'}') - return True - else: - return False - - diff --git a/doc/rdflib3/plugins/serializers/nquads.py b/doc/rdflib3/plugins/serializers/nquads.py deleted file mode 100644 index 9c71cfd..0000000 --- a/doc/rdflib3/plugins/serializers/nquads.py +++ /dev/null @@ -1,42 +0,0 @@ -import warnings - -from rdflib.term import Literal -from rdflib.serializer import Serializer -from rdflib.py3compat import b - -from rdflib.plugins.serializers.nt import _xmlcharref_encode, _quoteLiteral - -__all__ = ['NQuadsSerializer'] - -class NQuadsSerializer(Serializer): - - def __init__(self, store): - if not store.context_aware: - raise Exception("NQuads serialization only makes sense for context-aware stores!") - - super(NQuadsSerializer, self).__init__(store) - - def serialize(self, stream, base=None, encoding=None, **args): - if base is not None: - warnings.warn("NQuadsSerializer does not support base.") - if encoding is not None: - warnings.warn("NQuadsSerializer does not use custom encoding.") - encoding = self.encoding - for context in self.store.contexts(): - for triple in context: - stream.write(_nq_row(triple, context.identifier).encode(encoding, "replace")) - stream.write(b("\n")) - -def _nq_row(triple,context): - if isinstance(triple[2], Literal): - return u"%s %s %s %s .\n" % (triple[0].n3(), - triple[1].n3(), - _xmlcharref_encode(_quoteLiteral(triple[2])), - context.n3()) - else: - return u"%s %s %s %s .\n" % (triple[0].n3(), - triple[1].n3(), - _xmlcharref_encode(triple[2].n3()), - context.n3()) - - diff --git a/doc/rdflib3/plugins/serializers/nt.py b/doc/rdflib3/plugins/serializers/nt.py deleted file mode 100644 index 891a42f..0000000 --- a/doc/rdflib3/plugins/serializers/nt.py +++ /dev/null @@ -1,81 +0,0 @@ -""" -N-Triples RDF graph serializer for RDFLib. -See for details about the -format. -""" -from rdflib.term import Literal -from rdflib.serializer import Serializer -from rdflib.py3compat import b -import warnings - -__all__ = ['NTSerializer'] - -class NTSerializer(Serializer): - """ - Serializes RDF graphs to NTriples format. - """ - - def serialize(self, stream, base=None, encoding=None, **args): - if base is not None: - warnings.warn("NTSerializer does not support base.") - if encoding is not None: - warnings.warn("NTSerializer does not use custom encoding.") - encoding = self.encoding - for triple in self.store: - stream.write(_nt_row(triple).encode(encoding, "replace")) - stream.write(b("\n")) - - -def _nt_row(triple): - if isinstance(triple[2], Literal): - return u"%s %s %s .\n" % (triple[0].n3(), - triple[1].n3(), - _xmlcharref_encode(_quoteLiteral(triple[2]))) - else: - return u"%s %s %s .\n" % (triple[0].n3(), - triple[1].n3(), - _xmlcharref_encode(triple[2].n3())) - -def _quoteLiteral(l): - ''' - a simpler version of term.Literal.n3() - ''' - - encoded = _quote_encode(l) - - if l.language: - if l.datatype: - raise Exception("Literal has datatype AND language!") - return '%s@%s' % (encoded, l.language) - elif l.datatype: - return '%s^^<%s>' % (encoded, l.datatype) - else: - return '%s' % encoded - -def _quote_encode(l): - return '"%s"' % l.replace('\\', '\\\\')\ - .replace('\n','\\n')\ - .replace('"', '\\"')\ - .replace('\r','\\r') - - -# from -def _xmlcharref_encode(unicode_data, encoding="ascii"): - """Emulate Python 2.3's 'xmlcharrefreplace' encoding error handler.""" - res="" - - # Step through the unicode_data string one character at a time in - # order to catch unencodable characters: - for char in unicode_data: - try: - char.encode(encoding, 'strict') - except UnicodeError: - if ord(char) <= 0xFFFF: - res+='\\u%04X' % ord(char) - else: - res+='\\U%08X' % ord(char) - else: - res+=char - - return res - diff --git a/doc/rdflib3/plugins/serializers/rdfxml.py b/doc/rdflib3/plugins/serializers/rdfxml.py deleted file mode 100644 index 70ff787..0000000 --- a/doc/rdflib3/plugins/serializers/rdfxml.py +++ /dev/null @@ -1,335 +0,0 @@ -from __future__ import generators - -from rdflib.plugins.serializers.xmlwriter import XMLWriter - -from rdflib.namespace import Namespace, RDF, RDFS #, split_uri - -from rdflib.term import URIRef, Literal, BNode -from rdflib.util import first, more_than -from rdflib.collection import Collection -from rdflib.serializer import Serializer - -# from rdflib.exceptions import Error - -from rdflib.py3compat import b - -from xml.sax.saxutils import quoteattr, escape - -from xmlwriter import ESCAPE_ENTITIES - -__all__ = ['fix', 'XMLSerializer', 'PrettyXMLSerializer'] - -class XMLSerializer(Serializer): - - def __init__(self, store): - super(XMLSerializer, self).__init__(store) - - def __bindings(self): - store = self.store - nm = store.namespace_manager - bindings = {} - - for predicate in set(store.predicates()): - prefix, namespace, name = nm.compute_qname(predicate) - bindings[prefix] = URIRef(namespace) - - RDFNS = URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#") - - if "rdf" in bindings: - assert bindings["rdf"]==RDFNS - else: - bindings["rdf"] = RDFNS - - for prefix, namespace in bindings.iteritems(): - yield prefix, namespace - - - def serialize(self, stream, base=None, encoding=None, **args): - self.base = base - self.__stream = stream - self.__serialized = {} - encoding = self.encoding - self.write = write = lambda uni: stream.write(uni.encode(encoding, 'replace')) - - # startDocument - write('\n' % self.encoding) - - # startRDF - write('\n') - - # write out triples by subject - for subject in self.store.subjects(): - self.subject(subject, 1) - - # endRDF - write( "\n" ) - - # Set to None so that the memory can get garbage collected. - #self.__serialized = None - del self.__serialized - - - def subject(self, subject, depth=1): - if not subject in self.__serialized: - self.__serialized[subject] = 1 - - if isinstance(subject, (BNode,URIRef)): - write = self.write - indent = " " * depth - element_name = "rdf:Description" - - if isinstance(subject, BNode): - write( '%s<%s rdf:nodeID="%s"' % - (indent, element_name, subject)) - else: - uri = quoteattr(self.relativize(subject)) - write( "%s<%s rdf:about=%s" % (indent, element_name, uri)) - - if (subject, None, None) in self.store: - write( ">\n" ) - - for predicate, object in self.store.predicate_objects(subject): - self.predicate(predicate, object, depth+1) - write( "%s\n" % (indent, element_name)) - - else: - write( "/>\n" ) - - def predicate(self, predicate, object, depth=1): - write = self.write - indent = " " * depth - qname = self.store.namespace_manager.qname(predicate) - - if isinstance(object, Literal): - attributes = "" - - if object.language: - attributes += ' xml:lang="%s"'%object.language - - if object.datatype: - attributes += ' rdf:datatype="%s"'%object.datatype - - write("%s<%s%s>%s\n" % - (indent, qname, attributes, - escape(object, ESCAPE_ENTITIES), qname) ) - else: - - if isinstance(object, BNode): - write('%s<%s rdf:nodeID="%s"/>\n' % - (indent, qname, object)) - else: - write("%s<%s rdf:resource=%s/>\n" % - (indent, qname, quoteattr(self.relativize(object)))) - - - -XMLLANG = "http://www.w3.org/XML/1998/namespacelang" -XMLBASE = "http://www.w3.org/XML/1998/namespacebase" -OWL_NS = Namespace('http://www.w3.org/2002/07/owl#') - -# TODO: -def fix(val): - "strip off _: from nodeIDs... as they are not valid NCNames" - if val.startswith("_:"): - return val[2:] - else: - return val - - -class PrettyXMLSerializer(Serializer): - - def __init__(self, store, max_depth=3): - super(PrettyXMLSerializer, self).__init__(store) - self.forceRDFAbout=set() - - def serialize(self, stream, base=None, encoding=None, **args): - self.__serialized = {} - store = self.store - self.base = base - self.max_depth = args.get("max_depth", 3) - assert self.max_depth>0, "max_depth must be greater than 0" - - self.nm = nm = store.namespace_manager - self.writer = writer = XMLWriter(stream, nm, encoding) - namespaces = {} - - possible = set( - store.predicates()).union(store.objects(None, RDF.type)) - - for predicate in possible: - prefix, namespace, local = nm.compute_qname(predicate) - namespaces[prefix] = namespace - - namespaces["rdf"] = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" - - writer.push(RDF.RDF) - - if "xml_base" in args: - writer.attribute(XMLBASE, args["xml_base"]) - - writer.namespaces(namespaces.iteritems()) - - # Write out subjects that can not be inline - for subject in store.subjects(): - if (None, None, subject) in store: - if (subject, None, subject) in store: - self.subject(subject, 1) - else: - self.subject(subject, 1) - - # write out anything that has not yet been reached - # write out BNodes last (to ensure they can be inlined where possible) - bnodes=set() - - for subject in store.subjects(): - if isinstance(subject, BNode): - bnodes.add(subject) - continue - self.subject(subject, 1) - - #now serialize only those BNodes that have not been serialized yet - for bnode in bnodes: - if bnode not in self.__serialized: - self.subject(subject, 1) - - writer.pop(RDF.RDF) - stream.write(b("\n")) - - # Set to None so that the memory can get garbage collected. - self.__serialized = None - - - def subject(self, subject, depth=1): - store = self.store - writer = self.writer - - if subject in self.forceRDFAbout: - writer.push(RDF.Description) - writer.attribute(RDF.about, self.relativize(subject)) - writer.pop(RDF.Description) - self.forceRDFAbout.remove(subject) - - elif not subject in self.__serialized: - self.__serialized[subject] = 1 - type = first(store.objects(subject, RDF.type)) - - try: - self.nm.qname(type) - except: - type = None - - element = type or RDF.Description - writer.push(element) - - if isinstance(subject, BNode): - def subj_as_obj_more_than(ceil): - return True - # more_than(store.triples((None, None, subject)), ceil) - - #here we only include BNode labels if they are referenced - #more than once (this reduces the use of redundant BNode identifiers) - if subj_as_obj_more_than(1): - writer.attribute(RDF.nodeID, fix(subject)) - - else: - writer.attribute(RDF.about, self.relativize(subject)) - - if (subject, None, None) in store: - for predicate, object in store.predicate_objects(subject): - if not (predicate == RDF.type and object == type): - self.predicate(predicate, object, depth+1) - - writer.pop(element) - - elif subject in self.forceRDFAbout: - writer.push(RDF.Description) - writer.attribute(RDF.about, self.relativize(subject)) - writer.pop(RDF.Description) - self.forceRDFAbout.remove(subject) - - def predicate(self, predicate, object, depth=1): - writer = self.writer - store = self.store - writer.push(predicate) - - if isinstance(object, Literal): - if object.language: - writer.attribute(XMLLANG, object.language) - - if object.datatype: - writer.attribute(RDF.datatype, object.datatype) - - writer.text(object) - - elif object in self.__serialized or not (object, None, None) in store: - - if isinstance(object, BNode): - if more_than(store.triples((None, None, object)), 0): - writer.attribute(RDF.nodeID, fix(object)) - else: - writer.attribute(RDF.resource, self.relativize(object)) - - else: - if first(store.objects(object, RDF.first)): # may not have type RDF.List - - self.__serialized[object] = 1 - - # Warn that any assertions on object other than - # RDF.first and RDF.rest are ignored... including RDF.List - import warnings - warnings.warn( - "Assertions on %s other than RDF.first " % repr(object) + \ - "and RDF.rest are ignored ... including RDF.List", - UserWarning, stacklevel=2) - writer.attribute(RDF.parseType, "Collection") - - col = Collection(store, object) - - for item in col: - - if isinstance(item,URIRef): - self.forceRDFAbout.add(item) - self.subject(item) - - if not isinstance(item,URIRef): - self.__serialized[item] = 1 - else: - if first(store.triples_choices((object, - RDF.type, - [OWL_NS.Class,RDFS.Class]))) \ - and isinstance(object, URIRef): - writer.attribute(RDF.resource, self.relativize(object)) - - elif depth <= self.max_depth: - self.subject(object, depth+1) - - elif isinstance(object, BNode): - - if not object in self.__serialized \ - and (object, None, None) in store \ - and len(list(store.subjects(object=object))) == 1: - #inline blank nodes if they haven't been serialized yet and are - #only referenced once (regardless of depth) - self.subject(object, depth+1) - else: - writer.attribute(RDF.nodeID, fix(object)) - - else: - writer.attribute(RDF.resource, self.relativize(object)) - - writer.pop(predicate) - diff --git a/doc/rdflib3/plugins/serializers/rdfxml.pyc b/doc/rdflib3/plugins/serializers/rdfxml.pyc deleted file mode 100644 index f7e15a0..0000000 Binary files a/doc/rdflib3/plugins/serializers/rdfxml.pyc and /dev/null differ diff --git a/doc/rdflib3/plugins/serializers/trig.py b/doc/rdflib3/plugins/serializers/trig.py deleted file mode 100644 index a899aa0..0000000 --- a/doc/rdflib3/plugins/serializers/trig.py +++ /dev/null @@ -1,87 +0,0 @@ -""" -Trig RDF graph serializer for RDFLib. -See for syntax specification. -""" - -from rdflib.plugins.serializers.turtle import TurtleSerializer -from rdflib.plugins.serializers.turtle import _GEN_QNAME_FOR_DT -from rdflib.plugins.serializers.turtle import VERB - -from rdflib.term import BNode, Literal - - -from rdflib.compat import defaultdict - - -__all__ = ['TrigSerializer'] - - -class TrigSerializer(TurtleSerializer): - - short_name = "trig" - indentString = 4 * u' ' - - def __init__(self, store): - if store.context_aware: - self.contexts = store.contexts() - else: - self.contexts = [store] - - super(TrigSerializer, self).__init__(store) - - def preprocess(self): - for context in self.contexts: - for triple in context: - self.preprocessTriple(triple, context.identifier) - - def preprocessTriple(self, triple, identifier): - s, p, o = triple - references = self.refCount(o) + 1 - self._references[o] = references - self._subjects[s] = True - self._contexts[identifier].add(s) - for i, node in enumerate(triple): - if node in self.keywords: - continue - # Don't use generated prefixes for subjects and objects - self.getQName(node, gen_prefix=(i == VERB)) - if isinstance(node, Literal) and node.datatype: - self.getQName(node.datatype, gen_prefix=_GEN_QNAME_FOR_DT) - p = triple[1] - if isinstance(p, BNode): - self._references[p] = self.refCount(p) + 1 - - def reset(self): - super(TrigSerializer, self).reset() - self._contexts = defaultdict(set) - - def serialize(self, stream, base=None, encoding=None, spacious=None, **args): - self.reset() - self.stream = stream - self.base = base - - if spacious is not None: - self._spacious = spacious - - self.preprocess() - # @@FIXME: Unused code ... - # subjects_list = self.orderSubjects() - - self.startDocument() - - firstTime = True - for identifier, subjects in self._contexts.items(): - self.write(self.indent() + '\n<%s> = {' % identifier) - self.depth += 1 - for subject in subjects: - if self.isDone(subject): - continue - if firstTime: - firstTime = False - if self.statement(subject) and not firstTime: - self.write('\n') - self.depth -= 1 - self.write('}\n') - - self.endDocument() - stream.write(u"\n".encode('ascii')) diff --git a/doc/rdflib3/plugins/serializers/trix.py b/doc/rdflib3/plugins/serializers/trix.py deleted file mode 100644 index b757a00..0000000 --- a/doc/rdflib3/plugins/serializers/trix.py +++ /dev/null @@ -1,75 +0,0 @@ -from rdflib.serializer import Serializer -from rdflib.plugins.serializers.xmlwriter import XMLWriter - -from rdflib.term import URIRef, Literal, BNode -from rdflib.namespace import Namespace - -from rdflib.graph import Graph, ConjunctiveGraph - -from rdflib.py3compat import b - -__all__ = ['TriXSerializer'] - -## TODO: MOve this somewhere central -TRIXNS=Namespace("http://www.w3.org/2004/03/trix/trix-1/") -XMLNS=Namespace("http://www.w3.org/XML/1998/namespace") - -class TriXSerializer(Serializer): - def __init__(self, store): - super(TriXSerializer, self).__init__(store) - if not store.context_aware: - raise Exception("TriX serialization only makes sense for context-aware stores!") - - - def serialize(self, stream, base=None, encoding=None, **args): - - nm=self.store.namespace_manager - - self.writer=XMLWriter(stream, nm, encoding, extra_ns={"": TRIXNS}) - - self.writer.push(TRIXNS[u"TriX"]) - self.writer.namespaces() - - if isinstance(self.store, ConjunctiveGraph): - for subgraph in self.store.contexts(): - self._writeGraph(subgraph) - elif isinstance(self.store, Graph): - self._writeGraph(self.store) - else: - raise Exception("Unknown graph type: "+type(self.store)) - - self.writer.pop() - stream.write(b("\n")) - - - def _writeGraph(self, graph): - self.writer.push(TRIXNS[u"graph"]) - if isinstance(graph.identifier, URIRef): - self.writer.element(TRIXNS[u"uri"], content=unicode(graph.identifier)) - - for triple in graph.triples((None,None,None)): - self._writeTriple(triple) - self.writer.pop() - - def _writeTriple(self, triple): - self.writer.push(TRIXNS[u"triple"]) - for component in triple: - if isinstance(component, URIRef): - self.writer.element(TRIXNS[u"uri"], - content=unicode(component)) - elif isinstance(component, BNode): - self.writer.element(TRIXNS[u"id"], - content=unicode(component)) - elif isinstance(component, Literal): - if component.datatype: - self.writer.element(TRIXNS[u"typedLiteral"], - content=unicode(component), - attributes={ TRIXNS[u"datatype"]: unicode(component.datatype) }) - elif component.language: - self.writer.element(TRIXNS[u"plainLiteral"], - content=unicode(component), - attributes={ XMLNS[u"lang"]: unicode(component.language) }) - else: - self.writer.element(TRIXNS[u"plainLiteral"], - content=unicode(component)) - self.writer.pop() diff --git a/doc/rdflib3/plugins/serializers/turtle.py b/doc/rdflib3/plugins/serializers/turtle.py deleted file mode 100644 index c9472ab..0000000 --- a/doc/rdflib3/plugins/serializers/turtle.py +++ /dev/null @@ -1,390 +0,0 @@ -""" -Turtle RDF graph serializer for RDFLib. -See for syntax specification. -""" -from rdflib.term import BNode, Literal, URIRef - -from rdflib.exceptions import Error - -from rdflib.serializer import Serializer - -from rdflib.namespace import RDF, RDFS - -__all__ = ['RecursiveSerializer', 'TurtleSerializer'] - -class RecursiveSerializer(Serializer): - - topClasses = [RDFS.Class] - predicateOrder = [RDF.type, RDFS.label] - maxDepth = 10 - indentString = u" " - - def __init__(self, store): - - super(RecursiveSerializer, self).__init__(store) - self.stream = None - self.reset() - - def addNamespace(self, prefix, uri): - self.namespaces[prefix] = uri - - def checkSubject(self, subject): - """Check to see if the subject should be serialized yet""" - if ((self.isDone(subject)) - or (subject not in self._subjects) - or ((subject in self._topLevels) and (self.depth > 1)) - or (isinstance(subject, URIRef) and (self.depth >= self.maxDepth)) - ): - return False - return True - - def isDone(self, subject): - """Return true if subject is serialized""" - return subject in self._serialized - - def orderSubjects(self): - seen = {} - subjects = [] - - for classURI in self.topClasses: - members = list(self.store.subjects(RDF.type, classURI)) - members.sort() - - for member in members: - subjects.append(member) - self._topLevels[member] = True - seen[member] = True - - recursable = [(isinstance(subject,BNode), self.refCount(subject), subject) for subject in self._subjects - if subject not in seen] - - recursable.sort() - subjects.extend([subject for (isbnode, refs, subject) in recursable]) - - return subjects - - def preprocess(self): - for triple in self.store.triples((None,None,None)): - self.preprocessTriple(triple) - - def preprocessTriple(self, (s,p,o)): - references = self.refCount(o) + 1 - self._references[o] = references - self._subjects[s] = True - - def refCount(self, node): - """Return the number of times this node has been referenced in the object position""" - return self._references.get(node, 0) - - def reset(self): - self.depth = 0 - self.lists = {} - self.namespaces = {} - self._references = {} - self._serialized = {} - self._subjects = {} - self._topLevels = {} - - def buildPredicateHash(self, subject): - """Build a hash key by predicate to a list of objects for the given subject""" - properties = {} - for s,p,o in self.store.triples((subject, None, None)): - oList = properties.get(p, []) - oList.append(o) - properties[p] = oList - return properties - - def sortProperties(self, properties): - """Take a hash from predicate uris to lists of values. - Sort the lists of values. Return a sorted list of properties.""" - # Sort object lists - for prop, objects in properties.items(): - objects.sort() - - # Make sorted list of properties - propList = [] - seen = {} - for prop in self.predicateOrder: - if (prop in properties) and (prop not in seen): - propList.append(prop) - seen[prop] = True - props = properties.keys() - props.sort() - for prop in props: - if prop not in seen: - propList.append(prop) - seen[prop] = True - return propList - - def subjectDone(self, subject): - """Mark a subject as done.""" - self._serialized[subject] = True - - def indent(self, modifier=0): - """Returns indent string multiplied by the depth""" - return (self.depth+modifier)*self.indentString - - def write(self, text): - """Write text in given encoding.""" - self.stream.write(text.encode(self.encoding, 'replace')) - - -SUBJECT = 0 -VERB = 1 -OBJECT = 2 - -_GEN_QNAME_FOR_DT = False -_SPACIOUS_OUTPUT = False - - -class TurtleSerializer(RecursiveSerializer): - - short_name = "turtle" - indentString = ' ' - - def __init__(self, store): - super(TurtleSerializer, self).__init__(store) - self.keywords = { - RDF.type: 'a' - } - self.reset() - self.stream = None - self._spacious = _SPACIOUS_OUTPUT - self._ns_rewrite={} - - def addNamespace(self, prefix, namespace): - # Turtle does not support prefix that start with _ - # if they occur in the graph, rewrite to p_blah - # this is more complicated since we need to make sure p_blah - # does not already exist. And we register namespaces as we go, i.e. - # we may first see a triple with prefix _9 - rewrite it to p_9 - # and then later find a triple with a "real" p_9 prefix - - # so we need to keep track of ns rewrites we made so far. - - if (prefix > '' and prefix[0] == '_') \ - or self.namespaces.get(prefix, namespace) != namespace: - - if prefix not in self._ns_rewrite: - p="p"+prefix - while p in self.namespaces: - p="p"+p - self._ns_rewrite[prefix]=p - - prefix=self._ns_rewrite.get(prefix,prefix) - super(TurtleSerializer, self).addNamespace(prefix, namespace) - return prefix - - def reset(self): - super(TurtleSerializer, self).reset() - self._shortNames = {} - self._started = False - self._ns_rewrite={} - - def serialize(self, stream, base=None, encoding=None, spacious=None, **args): - self.reset() - self.stream = stream - self.base = base - - if spacious is not None: - self._spacious = spacious - # In newer rdflibs these are always in the namespace manager - #self.store.prefix_mapping('rdf', RDFNS) - #self.store.prefix_mapping('rdfs', RDFSNS) - - self.preprocess() - subjects_list = self.orderSubjects() - - self.startDocument() - - firstTime = True - for subject in subjects_list: - if self.isDone(subject): - continue - if firstTime: - firstTime = False - if self.statement(subject) and not firstTime: - self.write('\n') - - self.endDocument() - stream.write(u"\n".encode('ascii')) - - def preprocessTriple(self, triple): - super(TurtleSerializer, self).preprocessTriple(triple) - for i, node in enumerate(triple): - if node in self.keywords: - continue - # Don't use generated prefixes for subjects and objects - self.getQName(node, gen_prefix=(i==VERB)) - if isinstance(node, Literal) and node.datatype: - self.getQName(node.datatype, gen_prefix=_GEN_QNAME_FOR_DT) - p = triple[1] - if isinstance(p, BNode): - self._references[p] = self.refCount(p) + 1 - - def getQName(self, uri, gen_prefix=True): - if not isinstance(uri, URIRef): - return None - - parts=None - - try: - parts = self.store.compute_qname(uri, generate=gen_prefix) - except: - - # is the uri a namespace in itself? - pfx = self.store.store.prefix(uri) - - if pfx is not None: - parts = (pfx, uri, '') - else: - # nothing worked - return None - - prefix, namespace, local = parts - # Local parts with '.' will mess up serialization - if '.' in local: - return None - prefix=self.addNamespace(prefix, namespace) - - return u'%s:%s' % (prefix, local) - - def startDocument(self): - self._started = True - ns_list = sorted(self.namespaces.items()) - for prefix, uri in ns_list: - self.write(self.indent()+'@prefix %s: <%s> .\n' % (prefix, uri)) - if ns_list and self._spacious: - self.write('\n') - - def endDocument(self): - if self._spacious: - self.write('\n') - - def statement(self, subject): - self.subjectDone(subject) - return self.s_squared(subject) or self.s_default(subject) - - def s_default(self, subject): - self.write('\n'+self.indent()) - self.path(subject, SUBJECT) - self.predicateList(subject) - self.write(' .') - return True - - def s_squared(self, subject): - if (self.refCount(subject) > 0) or not isinstance(subject, BNode): - return False - self.write('\n'+self.indent()+'[]') - #self.depth+=1 - self.predicateList(subject) - #self.depth-=1 - self.write(' .') - return True - - def path(self, node, position, newline=False): - if not (self.p_squared(node, position, newline) - or self.p_default(node, position, newline)): - raise Error("Cannot serialize node '%s'"%(node, )) - - def p_default(self, node, position, newline=False): - if position != SUBJECT and not newline: - self.write(' ') - self.write(self.label(node, position)) - return True - - def label(self, node, position): - if node == RDF.nil: - return '()' - if position is VERB and node in self.keywords: - return self.keywords[node] - if isinstance(node, Literal): - return node._literal_n3(use_plain=True, - qname_callback=lambda dt: - self.getQName(dt, _GEN_QNAME_FOR_DT)) - else: - return self.getQName(node, position==VERB) or node.n3() - - def p_squared(self, node, position, newline=False): - if (not isinstance(node, BNode) - or node in self._serialized - or self.refCount(node) > 1 - or position == SUBJECT): - return False - - if not newline: - self.write(' ') - - if self.isValidList(node): - # this is a list - self.write('(') - self.depth += 1#2 - self.doList(node) - self.depth -= 1#2 - self.write(' )') - else: - self.subjectDone(node) - self.depth += 2 - #self.write('[\n' + self.indent()) - self.write('[') - self.depth -= 1 - #self.predicateList(node, newline=True) - self.predicateList(node, newline=False) - #self.write('\n' + self.indent() + ']') - self.write(' ]') - self.depth -= 1 - - return True - - def isValidList(self, l): - """ - Checks if l is a valid RDF list, i.e. no nodes have other properties. - """ - try: - if not self.store.value(l, RDF.first): - return False - except: - return False - while l: - if l != RDF.nil and len( - list(self.store.predicate_objects(l))) != 2: - return False - l = self.store.value(l, RDF.rest) - return True - - def doList(self,l): - while l: - item = self.store.value(l, RDF.first) - if item: - self.path(item, OBJECT) - self.subjectDone(l) - l = self.store.value(l, RDF.rest) - - def predicateList(self, subject, newline=False): - properties = self.buildPredicateHash(subject) - propList = self.sortProperties(properties) - if len(propList) == 0: - return - self.verb(propList[0], newline=newline) - self.objectList(properties[propList[0]]) - for predicate in propList[1:]: - self.write(';\n' + self.indent(1)) - self.verb(predicate, newline=True) - self.objectList(properties[predicate]) - - def verb(self, node, newline=False): - self.path(node, VERB, newline) - - def objectList(self, objects): - count = len(objects) - if count == 0: - return - depthmod = (count == 1) and 0 or 1 - self.depth += depthmod - self.path(objects[0], OBJECT) - for obj in objects[1:]: - self.write(',\n' + self.indent(1)) - self.path(obj, OBJECT, newline=True) - self.depth -= depthmod - - diff --git a/doc/rdflib3/plugins/serializers/xmlwriter.py b/doc/rdflib3/plugins/serializers/xmlwriter.py deleted file mode 100644 index b9a10e2..0000000 --- a/doc/rdflib3/plugins/serializers/xmlwriter.py +++ /dev/null @@ -1,107 +0,0 @@ -import codecs -from xml.sax.saxutils import quoteattr, escape - -__all__ = ['XMLWriter'] - -ESCAPE_ENTITIES={ - '\r': ' ' -} - -class XMLWriter(object): - def __init__(self, stream, namespace_manager, encoding=None, decl=1, extra_ns={}): - encoding = encoding or 'utf-8' - encoder, decoder, stream_reader, stream_writer = codecs.lookup(encoding) - self.stream = stream = stream_writer(stream) - if decl: - stream.write('' % encoding) - self.element_stack = [] - self.nm = namespace_manager - self.extra_ns=extra_ns - self.closed = True - - def __get_indent(self): - return " " * len(self.element_stack) - indent = property(__get_indent) - - def __close_start_tag(self): - if not self.closed: # TODO: - self.closed = True - self.stream.write(">") - - def push(self, uri): - self.__close_start_tag() - write = self.stream.write - write("\n") - write(self.indent) - write("<%s" % self.qname(uri)) - self.element_stack.append(uri) - self.closed = False - self.parent = False - - def pop(self, uri=None): - top = self.element_stack.pop() - if uri: - assert uri == top - write = self.stream.write - if not self.closed: - self.closed = True - write("/>") - else: - if self.parent: - write("\n") - write(self.indent) - write("" % self.qname(top)) - self.parent = True - - def element(self, uri, content, attributes={}): - """Utility method for adding a complete simple element""" - self.push(uri) - for k, v in attributes.iteritems(): - self.attribute(k,v) - self.text(content) - self.pop() - - def namespaces(self, namespaces=None): - if not namespaces: - namespaces=self.nm.namespaces() - - write = self.stream.write - write("\n") - for prefix, namespace in namespaces: - if prefix: - write(' xmlns:%s="%s"\n' % (prefix, namespace)) - else: - write(' xmlns="%s"\n' % namespace) - - for prefix, namespace in self.extra_ns.items(): - if prefix: - write(' xmlns:%s="%s"\n' % (prefix, namespace)) - else: - write(' xmlns="%s"\n' % namespace) - - - def attribute(self, uri, value): - write = self.stream.write - write(" %s=%s" % (self.qname(uri), quoteattr(value))) - - def text(self, text): - self.__close_start_tag() - if "<" in text and ">" in text and not "]]>" in text: - self.stream.write("") - else: - self.stream.write(escape(text, ESCAPE_ENTITIES)) - - def qname(self,uri): - """Compute qname for a uri using our extra namespaces, - or the given namespace manager""" - - for pre,ns in self.extra_ns.items(): - if uri.startswith(ns): - if pre!="": - return ":".join(pre,uri[len(ns):]) - else: - return uri[len(ns):] - - return self.nm.qname(uri) diff --git a/doc/rdflib3/plugins/serializers/xmlwriter.pyc b/doc/rdflib3/plugins/serializers/xmlwriter.pyc deleted file mode 100644 index ab05d09..0000000 Binary files a/doc/rdflib3/plugins/serializers/xmlwriter.pyc and /dev/null differ diff --git a/doc/rdflib3/plugins/sleepycat.py b/doc/rdflib3/plugins/sleepycat.py deleted file mode 100644 index 4168c5a..0000000 --- a/doc/rdflib3/plugins/sleepycat.py +++ /dev/null @@ -1,531 +0,0 @@ -from rdflib.store import Store, VALID_STORE, CORRUPTED_STORE, NO_STORE, UNKNOWN -from rdflib.term import URIRef -from rdflib.py3compat import b -def bb(u): return u.encode('utf-8') - -try: - from bsddb import db - has_bsddb = True -except ImportError: - try: - from bsddb3 import db - has_bsddb = True - except ImportError: - has_bsddb = False -from os import mkdir -from os.path import exists, abspath -from urllib import pathname2url -from threading import Thread - -import logging -_logger = logging.getLogger(__name__) - -__all__ = ['Sleepycat'] - -class Sleepycat(Store): - context_aware = True - formula_aware = True - transaction_aware = False - db_env = None - - def __init__(self, configuration=None, identifier=None): - if not has_bsddb: raise ImportError("Unable to import bsddb/bsddb3, store is unusable.") - self.__open = False - self.__identifier = identifier - super(Sleepycat, self).__init__(configuration) - self._loads = self.node_pickler.loads - self._dumps = self.node_pickler.dumps - - def __get_identifier(self): - return self.__identifier - identifier = property(__get_identifier) - - def _init_db_environment(self, homeDir, create=True): - envsetflags = db.DB_CDB_ALLDB - envflags = db.DB_INIT_MPOOL | db.DB_INIT_CDB | db.DB_THREAD - if not exists(homeDir): - if create==True: - mkdir(homeDir) # TODO: implement create method and refactor this to it - self.create(homeDir) - else: - return NO_STORE - db_env = db.DBEnv() - db_env.set_cachesize(0, 1024*1024*50) # TODO - #db_env.set_lg_max(1024*1024) - db_env.set_flags(envsetflags, 1) - db_env.open(homeDir, envflags | db.DB_CREATE) - return db_env - - def is_open(self): - return self.__open - - def open(self, path, create=True): - if not has_bsddb: return NO_STORE - homeDir = path - - if self.__identifier is None: - self.__identifier = URIRef(pathname2url(abspath(homeDir))) - - db_env = self._init_db_environment(homeDir, create) - if db_env == NO_STORE: - return NO_STORE - self.db_env = db_env - self.__open = True - - dbname = None - dbtype = db.DB_BTREE - # auto-commit ensures that the open-call commits when transactions are enabled - dbopenflags = db.DB_THREAD - if self.transaction_aware == True: - dbopenflags |= db.DB_AUTO_COMMIT - - dbmode = 0660 - dbsetflags = 0 - - # create and open the DBs - self.__indicies = [None,] * 3 - self.__indicies_info = [None,] * 3 - for i in xrange(0, 3): - index_name = to_key_func(i)((b("s"), b("p"), b("o")), b("c")).decode() - index = db.DB(db_env) - index.set_flags(dbsetflags) - index.open(index_name, dbname, dbtype, dbopenflags|db.DB_CREATE, dbmode) - self.__indicies[i] = index - self.__indicies_info[i] = (index, to_key_func(i), from_key_func(i)) - - lookup = {} - for i in xrange(0, 8): - results = [] - for start in xrange(0, 3): - score = 1 - len = 0 - for j in xrange(start, start+3): - if i & (1<<(j%3)): - score = score << 1 - len += 1 - else: - break - tie_break = 2-start - results.append(((score, tie_break), start, len)) - - results.sort() - score, start, len = results[-1] - - def get_prefix_func(start, end): - def get_prefix(triple, context): - if context is None: - yield "" - else: - yield context - i = start - while i min_seconds or time()-t0 > max_seconds: - self.__needs_sync = False - _logger.debug("sync") - self.sync() - break - else: - sleep(1) - except Exception, e: - _logger.exception(e) - - def sync(self): - if self.__open: - for i in self.__indicies: - i.sync() - self.__contexts.sync() - self.__namespace.sync() - self.__prefix.sync() - self.__i2k.sync() - self.__k2i.sync() - - def close(self, commit_pending_transaction=False): - self.__open = False - self.__sync_thread.join() - for i in self.__indicies: - i.close() - self.__contexts.close() - self.__namespace.close() - self.__prefix.close() - self.__i2k.close() - self.__k2i.close() - self.db_env.close() - - def add(self, (subject, predicate, object), context, quoted=False, txn=None): - """\ - Add a triple to the store of triples. - """ - assert self.__open, "The Store must be open." - assert context!=self, "Can not add triple directly to store" - Store.add(self, (subject, predicate, object), context, quoted) - - _to_string = self._to_string - - s = _to_string(subject, txn=txn) - p = _to_string(predicate, txn=txn) - o = _to_string(object, txn=txn) - c = _to_string(context, txn=txn) - - cspo, cpos, cosp = self.__indicies - - value = cspo.get(bb("%s^%s^%s^%s^" % (c, s, p, o)), txn=txn) - if value is None: - self.__contexts.put(bb(c), "", txn=txn) - - contexts_value = cspo.get(bb("%s^%s^%s^%s^" % ("", s, p, o)), txn=txn) or b("") - contexts = set(contexts_value.split(b("^"))) - contexts.add(bb(c)) - contexts_value = b("^").join(contexts) - assert contexts_value!=None - - cspo.put(bb("%s^%s^%s^%s^" % (c, s, p, o)), "", txn=txn) - cpos.put(bb("%s^%s^%s^%s^" % (c, p, o, s)), "", txn=txn) - cosp.put(bb("%s^%s^%s^%s^" % (c, o, s, p)), "", txn=txn) - if not quoted: - cspo.put(bb("%s^%s^%s^%s^" % ("", s, p, o)), contexts_value, txn=txn) - cpos.put(bb("%s^%s^%s^%s^" % ("", p, o, s)), contexts_value, txn=txn) - cosp.put(bb("%s^%s^%s^%s^" % ("", o, s, p)), contexts_value, txn=txn) - - self.__needs_sync = True - - def __remove(self, (s, p, o), c, quoted=False, txn=None): - cspo, cpos, cosp = self.__indicies - contexts_value = cspo.get(b("^").join([b(""), s, p, o, b("")]), txn=txn) or b("") - contexts = set(contexts_value.split(b("^"))) - contexts.discard(c) - contexts_value = b("^").join(contexts) - for i, _to_key, _from_key in self.__indicies_info: - i.delete(_to_key((s, p, o), c), txn=txn) - if not quoted: - if contexts_value: - for i, _to_key, _from_key in self.__indicies_info: - i.put(_to_key((s, p, o), b("")), contexts_value, txn=txn) - else: - for i, _to_key, _from_key in self.__indicies_info: - try: - i.delete(_to_key((s, p, o), b("")), txn=txn) - except db.DBNotFoundError, e: - pass # TODO: is it okay to ignore these? - - def remove(self, (subject, predicate, object), context, txn=None): - assert self.__open, "The Store must be open." - Store.remove(self, (subject, predicate, object), context) - _to_string = self._to_string - - if context is not None: - if context == self: - context = None - - if subject is not None and predicate is not None and object is not None and context is not None: - s = _to_string(subject, txn=txn) - p = _to_string(predicate, txn=txn) - o = _to_string(object, txn=txn) - c = _to_string(context, txn=txn) - value = self.__indicies[0].get(bb("%s^%s^%s^%s^" % (c, s, p, o)), txn=txn) - if value is not None: - self.__remove((bb(s), bb(p), bb(o)), bb(c), txn=txn) - self.__needs_sync = True - else: - cspo, cpos, cosp = self.__indicies - index, prefix, from_key, results_from_key = self.__lookup((subject, predicate, object), context, txn=txn) - - cursor = index.cursor(txn=txn) - try: - current = cursor.set_range(prefix) - needs_sync = True - except db.DBNotFoundError: - current = None - needs_sync = False - cursor.close() - while current: - key, value = current - cursor = index.cursor(txn=txn) - try: - cursor.set_range(key) - # Hack to stop 2to3 converting this to next(cursor) - current = getattr(cursor, 'next')() - except db.DBNotFoundError: - current = None - cursor.close() - if key.startswith(prefix): - c, s, p, o = from_key(key) - if context is None: - contexts_value = index.get(key, txn=txn) or b("") - contexts = set(contexts_value.split(b("^"))) # remove triple from all non quoted contexts - contexts.add(b("")) # and from the conjunctive index - for c in contexts: - for i, _to_key, _ in self.__indicies_info: - i.delete(_to_key((s, p, o), c), txn=txn) - else: - self.__remove((s, p, o), c, txn=txn) - else: - break - - if context is not None: - if subject is None and predicate is None and object is None: - # TODO: also if context becomes empty and not just on remove((None, None, None), c) - try: - self.__contexts.delete(bb(_to_string(context, txn=txn)), txn=txn) - except db.DBNotFoundError, e: - pass - - self.__needs_sync = needs_sync - - def triples(self, (subject, predicate, object), context=None, txn=None): - """A generator over all the triples matching """ - assert self.__open, "The Store must be open." - - if context is not None: - if context == self: - context = None - - _from_string = self._from_string - index, prefix, from_key, results_from_key = self.__lookup((subject, predicate, object), context, txn=txn) - - cursor = index.cursor(txn=txn) - try: - current = cursor.set_range(prefix) - except db.DBNotFoundError: - current = None - cursor.close() - while current: - key, value = current - cursor = index.cursor(txn=txn) - try: - cursor.set_range(key) - # Cheap hack so 2to3 doesn't convert to next(cursor) - current = getattr(cursor, 'next')() - except db.DBNotFoundError: - current = None - cursor.close() - if key and key.startswith(prefix): - contexts_value = index.get(key, txn=txn) - yield results_from_key(key, subject, predicate, object, contexts_value) - else: - break - - def __len__(self, context=None): - assert self.__open, "The Store must be open." - if context is not None: - if context == self: - context = None - - if context is None: - prefix = b("^") - else: - prefix = bb("%s^" % self._to_string(context)) - - index = self.__indicies[0] - cursor = index.cursor() - current = cursor.set_range(prefix) - count = 0 - while current: - key, value = current - if key.startswith(prefix): - count +=1 - # Hack to stop 2to3 converting this to next(cursor) - current = getattr(cursor, 'next')() - else: - break - cursor.close() - return count - - def bind(self, prefix, namespace): - prefix = prefix.encode("utf-8") - namespace = namespace.encode("utf-8") - bound_prefix = self.__prefix.get(namespace) - if bound_prefix: - self.__namespace.delete(bound_prefix) - self.__prefix[namespace] = prefix - self.__namespace[prefix] = namespace - - def namespace(self, prefix): - prefix = prefix.encode("utf-8") - ns = self.__namespace.get(prefix, None) - if ns is not None: - return URIRef(ns.decode('utf-8')) - return None - - def prefix(self, namespace): - namespace = namespace.encode("utf-8") - prefix = self.__prefix.get(namespace, None) - if prefix is not None: - return prefix.decode('utf-8') - return None - - def namespaces(self): - cursor = self.__namespace.cursor() - results = [] - current = cursor.first() - while current: - prefix, namespace = current - results.append((prefix.decode('utf-8'), namespace.decode('utf-8'))) - # Hack to stop 2to3 converting this to next(cursor) - current = getattr(cursor, 'next')() - cursor.close() - for prefix, namespace in results: - yield prefix, URIRef(namespace) - - def contexts(self, triple=None): - _from_string = self._from_string - _to_string = self._to_string - - if triple: - s, p, o = triple - s = _to_string(s) - p = _to_string(p) - o = _to_string(o) - contexts = self.__indicies[0].get(bb("%s^%s^%s^%s^" % ("", s, p, o))) - if contexts: - for c in contexts.split(b("^")): - if c: - yield _from_string(c) - else: - index = self.__contexts - cursor = index.cursor() - current = cursor.first() - cursor.close() - while current: - key, value = current - context = _from_string(key) - yield context - cursor = index.cursor() - try: - cursor.set_range(key) - # Hack to stop 2to3 converting this to next(cursor) - current = getattr(cursor, 'next')() - except db.DBNotFoundError: - current = None - cursor.close() - - def _from_string(self, i): - k = self.__i2k.get(int(i)) - return self._loads(k) - - def _to_string(self, term, txn=None): - k = self._dumps(term) - i = self.__k2i.get(k, txn=txn) - if i is None: - # weird behavoir from bsddb not taking a txn as a keyword argument - # for append - if self.transaction_aware: - i = "%s" % self.__i2k.append(k, txn) - else: - i = "%s" % self.__i2k.append(k) - - self.__k2i.put(k, i, txn=txn) - else: - i = i.decode() - return i - - def __lookup(self, (subject, predicate, object), context, txn=None): - _to_string = self._to_string - if context is not None: - context = _to_string(context, txn=txn) - i = 0 - if subject is not None: - i += 1 - subject = _to_string(subject, txn=txn) - if predicate is not None: - i += 2 - predicate = _to_string(predicate, txn=txn) - if object is not None: - i += 4 - object = _to_string(object, txn=txn) - index, prefix_func, from_key, results_from_key = self.__lookup_dict[i] - #print (subject, predicate, object), context, prefix_func, index #DEBUG - prefix = bb("^".join(prefix_func((subject, predicate, object), context))) - return index, prefix, from_key, results_from_key - - -def to_key_func(i): - def to_key(triple, context): - "Takes a string; returns key" - return b("^").join((context, triple[i%3], triple[(i+1)%3], triple[(i+2)%3], b(""))) # "" to tac on the trailing ^ - return to_key - -def from_key_func(i): - def from_key(key): - "Takes a key; returns string" - parts = key.split(b("^")) - return parts[0], parts[(3-i+0)%3+1], parts[(3-i+1)%3+1], parts[(3-i+2)%3+1] - return from_key - -def results_from_key_func(i, from_string): - def from_key(key, subject, predicate, object, contexts_value): - "Takes a key and subject, predicate, object; returns tuple for yield" - parts = key.split(b("^")) - if subject is None: - # TODO: i & 1: # dis assemble and/or measure to see which is faster - # subject is None or i & 1 - s = from_string(parts[(3-i+0)%3+1]) - else: - s = subject - if predicate is None:#i & 2: - p = from_string(parts[(3-i+1)%3+1]) - else: - p = predicate - if object is None:#i & 4: - o = from_string(parts[(3-i+2)%3+1]) - else: - o = object - return (s, p, o), (from_string(c) for c in contexts_value.split(b("^")) if c) - return from_key - -def readable_index(i): - s, p, o = "?" * 3 - if i & 1: s = "s" - if i & 2: p = "p" - if i & 4: o = "o" - return "%s,%s,%s" % (s, p, o) diff --git a/doc/rdflib3/py3compat.py b/doc/rdflib3/py3compat.py deleted file mode 100644 index d75eb7a..0000000 --- a/doc/rdflib3/py3compat.py +++ /dev/null @@ -1,103 +0,0 @@ -""" -Utility functions and objects to ease Python 3 compatibility. -""" -import sys - -try: - from functools import wraps -except ImportError: - # No-op wraps decorator - def wraps(f): - def dec(newf): return newf - return dec - -def cast_bytes(s, enc='utf-8'): - if isinstance(s, unicode): - return s.encode(enc) - return s - -PY3 = (sys.version_info[0] >= 3) - -def _modify_str_or_docstring(str_change_func): - @wraps(str_change_func) - def wrapper(func_or_str): - if isinstance(func_or_str, str): - func = None - doc = func_or_str - else: - func = func_or_str - doc = func.__doc__ - - doc = str_change_func(doc) - - if func: - func.__doc__ = doc - return func - return doc - return wrapper - -if PY3: - # Python 3: - # --------- - def b(s): - return s.encode('ascii') - - bytestype = bytes - - # Abstract u'abc' syntax: - @_modify_str_or_docstring - def format_doctest_out(s): - """Python 2 version - "%(u)s'abc'" --> "'abc'" - "%(b)s'abc'" --> "b'abc'" - "55%(L)s" --> "55" - "unicode(x)" --> "str(x)" - - Accepts a string or a function, so it can be used as a decorator.""" - return s % {'u':'', 'b':'b', 'L':'', 'unicode': 'str'} - - def type_cmp(a, b): - """Python 2 style comparison based on type""" - ta, tb = type(a).__name__, type(b).__name__ - # Ugly hack: some tests rely on tuple sorting before unicode, and I - # don't know if that's important. Better retain it for now. - if ta == 'str': - ta = 'unicode' - if tb == 'str': - tb = 'unicode' - # return 1 if ta > tb else -1 if ta < tb else 0 - if ta > tb: - return 1 - elif ta < tb: - return -1 - else: - return 0 - -else: - # Python 2 - # -------- - def b(s): - return s - - bytestype = str - - # Abstract u'abc' syntax: - @_modify_str_or_docstring - def format_doctest_out(s): - """Python 2 version - "%(u)s'abc'" --> "u'abc'" - "%(b)s'abc'" --> "'abc'" - "55%(L)s" --> "55L" - - Accepts a string or a function, so it can be used as a decorator.""" - return s % {'u':'u', 'b':'', 'L':'L', 'unicode':'unicode'} - - def type_cmp(a, b): - # return 1 if a > b else -1 if a < b else 0 - if a > b: - return 1 - elif a < b: - return -1 - else: - return 0 - diff --git a/doc/rdflib3/py3compat.pyc b/doc/rdflib3/py3compat.pyc deleted file mode 100644 index ee947a9..0000000 Binary files a/doc/rdflib3/py3compat.pyc and /dev/null differ diff --git a/doc/rdflib3/query.py b/doc/rdflib3/query.py deleted file mode 100644 index 878a9c7..0000000 --- a/doc/rdflib3/query.py +++ /dev/null @@ -1,180 +0,0 @@ - -import os -import shutil -import tempfile -import warnings -from urlparse import urlparse -try: - from io import BytesIO -except: - from StringIO import StringIO as BytesIO - - -__all__ = ['Processor', 'Result', 'ResultParser', 'ResultSerializer', 'ResultException'] - - -""" -Query plugin interface. - -This module is useful for those wanting to write a query processor -that can plugin to rdf. If you are wanting to execute a query you -likely want to do so through the Graph class query method. - -""" - - -class Processor(object): - - def __init__(self, graph): - pass - - def query(self, strOrQuery, initBindings={}, initNs={}, DEBUG=False): - pass - -class ResultException(Exception): - pass - -class EncodeOnlyUnicode(object): - """ - This is a crappy work-around for - http://bugs.python.org/issue11649 - - - """ - - def __init__(self, stream): - self.__stream=stream - def write(self, arg): - if isinstance(arg, unicode): - self.__stream.write(arg.encode("utf-8")) - else: - self.__stream.write(arg) - def __getattr__(self, name): - return getattr(self.__stream, name) - - -class Result(object): - """ - A common class for representing query result. - This is backwards compatible with the old SPARQLResult objects - Like before there is a bit of magic that makes this appear like python objects, depending on the type of result. - - If the type is "SELECT", this is like a list of list of values - If the type is "ASK" this is like a list of a single bool - If the type is "CONSTRUCT" or "DESCRIBE" this is like a graph - - """ - def __init__(self, type_): - - if type_ not in ('CONSTRUCT', 'DESCRIBE', 'SELECT', 'ASK'): - raise ResultException('Unknown Result type: %s'%type_) - - self.type = type_ - self.vars=None - self.bindings=None - self.askAnswer=None - self.graph=None - - @staticmethod - def parse(source, format='xml'): - from rdflib import plugin - parser=plugin.get(format, ResultParser)() - return parser.parse(source) - - - def serialize(self, destination=None, encoding="utf-8", format='xml', **args): - - if self.type in ('CONSTRUCT', 'DESCRIBE'): - return self.graph.serialize(destination, encoding=encoding, format=format, **args) - - """stolen wholesale from graph.serialize""" - from rdflib import plugin - serializer=plugin.get(format, ResultSerializer)(self) - if destination is None: - stream = BytesIO() - stream2 = EncodeOnlyUnicode(stream) - serializer.serialize(stream2, encoding=encoding, **args) - return stream.getvalue() - if hasattr(destination, "write"): - stream = destination - serializer.serialize(stream, encoding=encoding, **args) - else: - location = destination - scheme, netloc, path, params, query, fragment = urlparse(location) - if netloc!="": - print("WARNING: not saving as location" + \ - "is not a local file reference") - return - fd, name = tempfile.mkstemp() - stream = os.fdopen(fd, 'wb') - serializer.serialize(stream, encoding=encoding, **args) - stream.close() - if hasattr(shutil,"move"): - shutil.move(name, path) - else: - shutil.copy(name, path) - os.remove(name) - - - def __len__(self): - if self.type=='ASK': return 1 - elif self.type=='SELECT': return len(self.bindings) - else: - return len(self.graph) - - def __iter__(self): - if self.type in ("CONSTRUCT", "DESCRIBE"): - for t in self.graph: yield t - elif self.type=='ASK': - yield self.askAnswer - elif self.type=='SELECT': - # To remain compatible with the old SPARQLResult behaviour - # this iterates over lists of variable bindings - for b in self.bindings: - yield tuple(b.get(v) for v in self.vars) - - - def __getattr__(self,name): - if self.type in ("CONSTRUCT", "DESCRIBE") and self.graph!=None: - return self.graph.__getattr__(self,name) - elif self.type == 'SELECT' and name =='result': - warnings.warn( - "accessing the 'result' attribute is deprecated." - " Iterate over the object instead.", - DeprecationWarning, stacklevel=2) - # copied from __iter__, above - return [(tuple(b[v] for v in self.vars)) for b in self.bindings] - else: - raise AttributeError("'%s' object has no attribute '%s'"%(self,name)) - - def __eq__(self, other): - try: - if self.type!=other.type: return False - if self.type=='ASK': - return self.askAnswer==other.askAnswer - elif self.type=='SELECT': - return self.vars==other.vars and self.bindings==other.bindings - else: - return self.graph==other.graph - - except: - return False - -class ResultParser(object): - - def __init__(self): - pass - - def parse(self, source): - """return a Result object""" - pass # abstract - -class ResultSerializer(object): - - def __init__(self, result): - self.result=result - - def serialize(self, stream, encoding="utf-8"): - """return a string properly serialized""" - pass # abstract - diff --git a/doc/rdflib3/query.pyc b/doc/rdflib3/query.pyc deleted file mode 100644 index 50d9b6c..0000000 Binary files a/doc/rdflib3/query.pyc and /dev/null differ diff --git a/doc/rdflib3/resource.py b/doc/rdflib3/resource.py deleted file mode 100644 index 6fb371b..0000000 --- a/doc/rdflib3/resource.py +++ /dev/null @@ -1,408 +0,0 @@ -# -*- coding: utf-8 -*- -from rdflib import py3compat - -__doc__ = py3compat.format_doctest_out(""" -The :class:`~rdflib.resource.Resource` class wraps a :class:`~rdflib.graph.Graph` -and a resource reference (i.e. a :class:`rdflib.term.URIRef` or -:class:`rdflib.term.BNode`) to support a resource-oriented way of working with a -graph. - -It contains methods directly corresponding to those methods of the Graph -interface that relate to reading and writing data. The difference is that a -Resource also binds a resource identifier, making it possible to work without -tracking both the graph and a current subject. This makes for a "resource -oriented" style, as compared to the triple orientation of the Graph API. - -Resulting generators are also wrapped so that any resource reference values -(:class:`rdflib.term.URIRef`s and :class:`rdflib.term.BNode`s) are in turn -wrapped as Resources. (Note that this behaviour differs from the corresponding -methods in :class:`~rdflib.graph.Graph`, where no such conversion takes place.) - - -Basic Usage Scenario --------------------- - -Start by importing things we need and define some namespaces:: - - >>> from rdflib import * - >>> FOAF = Namespace("http://xmlns.com/foaf/0.1/") - >>> CV = Namespace("http://purl.org/captsolo/resume-rdf/0.2/cv#") - -Load some RDF data:: - - >>> graph = Graph().parse(format='n3', data=''' - ... @prefix rdfs: . - ... @prefix xsd: . - ... @prefix foaf: . - ... @prefix cv: . - ... - ... @base . - ... - ... a foaf:Person; - ... rdfs:comment "Just a Python & RDF hacker."@en; - ... foaf:depiction ; - ... foaf:homepage ; - ... foaf:name "Some Body" . - ... - ... a foaf:Image; - ... rdfs:label "some 1"@en; - ... rdfs:comment "Just an image"@en; - ... foaf:thumbnail . - ... - ... a foaf:Image . - ... - ... [] a cv:CV; - ... cv:aboutPerson ; - ... cv:hasWorkHistory [ cv:employedIn ; - ... cv:startDate "2009-09-04"^^xsd:date ] . - ... ''') - -Create a Resource:: - - >>> person = Resource(graph, URIRef("http://example.org/person/some1#self")) - -Retrieve some basic facts:: - - >>> person.identifier - rdflib.term.URIRef(%(u)s'http://example.org/person/some1#self') - - >>> person.value(FOAF.name) - rdflib.term.Literal(%(u)s'Some Body') - - >>> person.value(RDFS.comment) - rdflib.term.Literal(%(u)s'Just a Python & RDF hacker.', lang=%(u)s'en') - -Resources as unicode are represented by their identifiers as unicode:: - - >>> %(unicode)s(person) - %(u)s'http://example.org/person/some1#self' - -Resource references are also Resources, so you can easily get e.g. a qname -for the type of a resource, like:: - - >>> person.value(RDF.type).qname() - %(u)s'foaf:Person' - -Or for the predicates of a resource:: - - >>> sorted(p.qname() for p in person.predicates()) - [%(u)s'foaf:depiction', %(u)s'foaf:homepage', %(u)s'foaf:name', %(u)s'rdf:type', %(u)s'rdfs:comment'] - -Follow relations and get more data from their Resources as well:: - - >>> for pic in person.objects(FOAF.depiction): - ... print(pic.identifier) - ... print(pic.value(RDF.type).qname()) - ... print(pic.label()) - ... print(pic.comment()) - ... print(pic.value(FOAF.thumbnail).identifier) - http://example.org/images/person/some1.jpg - foaf:Image - some 1 - Just an image - http://example.org/images/person/some1-thumb.jpg - - >>> for cv in person.subjects(CV.aboutPerson): - ... work = list(cv.objects(CV.hasWorkHistory))[0] - ... print(work.value(CV.employedIn).identifier) - ... print(work.value(CV.startDate)) - http://example.org/#company - 2009-09-04 - -It's just as easy to work with the predicates of a resource:: - - >>> for s, p in person.subject_predicates(): - ... print(s.value(RDF.type).qname()) - ... print(p.qname()) - ... for s, o in p.subject_objects(): - ... print(s.value(RDF.type).qname()) - ... print(o.value(RDF.type).qname()) - cv:CV - cv:aboutPerson - cv:CV - foaf:Person - -This is useful for e.g. inspection:: - - >>> thumb_ref = URIRef("http://example.org/images/person/some1-thumb.jpg") - >>> thumb = Resource(graph, thumb_ref) - >>> for p, o in thumb.predicate_objects(): - ... print(p.qname()) - ... print(o.qname()) - rdf:type - foaf:Image - -Similarly, adding, setting and removing data is easy:: - - >>> thumb.add(RDFS.label, Literal("thumb")) - >>> print(thumb.label()) - thumb - >>> thumb.set(RDFS.label, Literal("thumbnail")) - >>> print(thumb.label()) - thumbnail - >>> thumb.remove(RDFS.label) - >>> list(thumb.objects(RDFS.label)) - [] - - -Schema Example --------------- - -With this artificial schema data:: - - >>> graph = Graph().parse(format='n3', data=''' - ... @prefix rdf: . - ... @prefix rdfs: . - ... @prefix owl: . - ... @prefix v: . - ... - ... v:Artifact a owl:Class . - ... - ... v:Document a owl:Class; - ... rdfs:subClassOf v:Artifact . - ... - ... v:Paper a owl:Class; - ... rdfs:subClassOf v:Document . - ... - ... v:Choice owl:oneOf (v:One v:Other) . - ... - ... v:Stuff a rdf:Seq; rdf:_1 v:One; rdf:_2 v:Other . - ... - ... ''') - -From this class:: - - >>> artifact = Resource(graph, URIRef("http://example.org/def/v#Artifact")) - -we can get at subclasses:: - - >>> subclasses = list(artifact.transitive_subjects(RDFS.subClassOf)) - >>> [c.qname() for c in subclasses] - [%(u)s'v:Artifact', %(u)s'v:Document', %(u)s'v:Paper'] - -and superclasses from the last subclass:: - - >>> [c.qname() for c in subclasses[-1].transitive_objects(RDFS.subClassOf)] - [%(u)s'v:Paper', %(u)s'v:Document', %(u)s'v:Artifact'] - -Get items from the Choice:: - - >>> choice = Resource(graph, URIRef("http://example.org/def/v#Choice")) - >>> [it.qname() for it in choice.value(OWL.oneOf).items()] - [%(u)s'v:One', %(u)s'v:Other'] - -And the sequence of Stuff:: - - >>> stuff = Resource(graph, URIRef("http://example.org/def/v#Stuff")) - >>> [it.qname() for it in stuff.seq()] - [%(u)s'v:One', %(u)s'v:Other'] - - -Technical Details ------------------ - -Comparison is based on graph and identifier:: - - >>> g1 = Graph() - >>> t1 = Resource(g1, URIRef("http://example.org/thing")) - >>> t2 = Resource(g1, URIRef("http://example.org/thing")) - >>> t3 = Resource(g1, URIRef("http://example.org/other")) - >>> t4 = Resource(Graph(), URIRef("http://example.org/other")) - - >>> t1 is t2 - False - - >>> t1 == t2 - True - >>> t1 != t2 - False - - >>> t1 == t3 - False - >>> t1 != t3 - True - - >>> t3 != t4 - True - - >>> t3 < t1 and t1 > t3 - True - >>> t1 >= t1 and t1 >= t3 - True - >>> t1 <= t1 and t3 <= t1 - True - - >>> t1 < t1 or t1 < t3 or t3 > t1 or t3 > t3 - False - -Hash is computed from graph and identifier:: - - >>> g1 = Graph() - >>> t1 = Resource(g1, URIRef("http://example.org/thing")) - - >>> hash(t1) == hash(Resource(g1, URIRef("http://example.org/thing"))) - True - - >>> hash(t1) == hash(Resource(Graph(), t1.identifier)) - False - >>> hash(t1) == hash(Resource(Graph(), URIRef("http://example.org/thing"))) - False - -The Resource class is suitable as a base class for mapper toolkits. For -example, consider this utility for accessing RDF properties via qname-like -attributes:: - - >>> class Item(Resource): - ... - ... def __getattr__(self, p): - ... return list(self.objects(self._to_ref(*p.split('_', 1)))) - ... - ... def _to_ref(self, pfx, name): - ... return URIRef(self._graph.store.namespace(pfx) + name) - -It works as follows:: - - >>> graph = Graph().parse(format='n3', data=''' - ... @prefix rdfs: . - ... @prefix foaf: . - ... - ... @base . - ... - ... foaf:name "Some Body"; - ... foaf:depiction . - ... rdfs:comment "Just an image"@en . - ... ''') - - >>> person = Item(graph, URIRef("http://example.org/person/some1#self")) - - >>> print(person.foaf_name[0]) - Some Body - -The mechanism for wrapping references as resources cooperates with subclasses. -Therefore, accessing referenced resources automatically creates new ``Item`` -objects:: - - >>> isinstance(person.foaf_depiction[0], Item) - True - - >>> print(person.foaf_depiction[0].rdfs_comment[0]) - Just an image - -""") - -from rdflib.term import BNode, URIRef -from rdflib.namespace import RDF - -__all__ = ['Resource'] - -class Resource(object): - - def __init__(self, graph, subject): - self._graph = graph - self._identifier = subject - - graph = property(lambda self: self._graph) - - identifier = property(lambda self: self._identifier) - - def __hash__(self): - return hash(Resource) ^ hash(self._graph) ^ hash(self._identifier) - - def __eq__(self, other): - return (isinstance(other, Resource) and - self._graph == other._graph and - self._identifier == other._identifier) - - __ne__ = lambda self, other: not self == other - - def __lt__(self, other): - if isinstance(other, Resource): - return self._identifier < other._identifier - else: - return False - - __gt__ = lambda self, other: not (self < other or self == other) - __le__ = lambda self, other: self < other or self == other - __ge__ = lambda self, other: not self < other - - def __unicode__(self): - return unicode(self._identifier) - - if py3compat.PY3: - __str__ = __unicode__ - - def add(self, p, o): - self._graph.add((self._identifier, p, o)) - - def remove(self, p, o=None): - self._graph.remove((self._identifier, p, o)) - - def set(self, predicate, object): - self._graph.set((self._identifier, predicate, object)) - - def subjects(self, predicate=None): # rev - return self._resources(self._graph.subjects(predicate, self._identifier)) - - def predicates(self, object=None): - return self._resources(self._graph.predicates(self._identifier, object)) - - def objects(self, predicate=None): - return self._resources(self._graph.objects(self._identifier, predicate)) - - def subject_predicates(self): - return self._resource_pairs( - self._graph.subject_predicates(self._identifier)) - - def subject_objects(self): - return self._resource_pairs( - self._graph.subject_objects(self._identifier)) - - def predicate_objects(self): - return self._resource_pairs( - self._graph.predicate_objects(self._identifier)) - - def value(self, predicate=RDF.value, object=None, default=None, any=True): - return self._cast( - self._graph.value(self._identifier, predicate, object, default, any)) - - def label(self): - return self._graph.label(self._identifier) - - def comment(self): - return self._graph.comment(self._identifier) - - def items(self): - return self._resources(self._graph.items(self._identifier)) - - def transitive_objects(self, predicate, remember=None): - return self._resources(self._graph.transitive_objects( - self._identifier, predicate, remember)) - - def transitive_subjects(self, predicate, remember=None): - return self._resources(self._graph.transitive_subjects( - predicate, self._identifier, remember)) - - def seq(self): - return self._resources(self._graph.seq(self._identifier)) - - def qname(self): - return self._graph.qname(self._identifier) - - def _resource_pairs(self, pairs): - for s1, s2 in pairs: - yield self._cast(s1), self._cast(s2) - - def _resources(self, nodes): - for node in nodes: - yield self._cast(node) - - def _cast(self, node): - if isinstance(node, (BNode, URIRef)): - return self._new(node) - else: - return node - - def _new(self, subject): - return type(self)(self._graph, subject) - - diff --git a/doc/rdflib3/resource.pyc b/doc/rdflib3/resource.pyc deleted file mode 100644 index dc1fba6..0000000 Binary files a/doc/rdflib3/resource.pyc and /dev/null differ diff --git a/doc/rdflib3/serializer.py b/doc/rdflib3/serializer.py deleted file mode 100644 index 2ecc1d0..0000000 --- a/doc/rdflib3/serializer.py +++ /dev/null @@ -1,31 +0,0 @@ -""" -Serializer plugin interface. - -This module is useful for those wanting to write a serializer that can -plugin to rdflib. If you are wanting to invoke a serializer you likely -want to do so through the Graph class serialize method. - -TODO: info for how to write a serializer that can plugin to rdflib. See also rdflib.plugin - -""" - -from rdflib.term import URIRef - -__all__ = ['Serializer'] - -class Serializer(object): - - def __init__(self, store): - self.store = store - self.encoding = "UTF-8" - self.base = None - - def serialize(self, stream, base=None, encoding=None, **args): - """Abstract method""" - - def relativize(self, uri): - base = self.base - if base is not None and uri.startswith(base): - uri = URIRef(uri.replace(base, "", 1)) - return uri - diff --git a/doc/rdflib3/serializer.pyc b/doc/rdflib3/serializer.pyc deleted file mode 100644 index 950dee6..0000000 Binary files a/doc/rdflib3/serializer.pyc and /dev/null differ diff --git a/doc/rdflib3/store.py b/doc/rdflib3/store.py deleted file mode 100644 index 426a374..0000000 --- a/doc/rdflib3/store.py +++ /dev/null @@ -1,293 +0,0 @@ -""" -============ -rdflib.store -============ - -Types of store --------------- - -``Context-aware``: An RDF store capable of storing statements within contexts -is considered context-aware. Essentially, such a store is able to partition -the RDF model it represents into individual, named, and addressable -sub-graphs. - -Relevant Notation3 reference regarding formulae, quoted statements, and such: -http://www.w3.org/DesignIssues/Notation3.html - -``Formula-aware``: An RDF store capable of distinguishing between statements -that are asserted and statements that are quoted is considered formula-aware. - -``Transaction-capable``: capable of providing transactional integrity to the -RDF operations performed on it. - ------- -""" - -#Constants representing the state of a Store (returned by the open method) -VALID_STORE = 1 -CORRUPTED_STORE = 0 -NO_STORE = -1 -UNKNOWN = None - -from rdflib.events import Dispatcher, Event - -__all__ = ['StoreCreatedEvent', 'TripleAddedEvent', 'TripleRemovedEvent', 'NodePickler', 'Store'] - -class StoreCreatedEvent(Event): - """ - This event is fired when the Store is created, it has the following attribute: - - - ``configuration``: string used to create the store - - """ - -class TripleAddedEvent(Event): - """ - This event is fired when a triple is added, it has the following attributes: - - - the ``triple`` added to the graph - - the ``context`` of the triple, if any - - the ``graph`` to which the triple was added - """ - -class TripleRemovedEvent(Event): - """ - This event is fired when a triple is removed, it has the following attributes: - - - the ``triple`` removed from the graph - - the ``context`` of the triple, if any - - the ``graph`` from which the triple was removed - """ - -from cPickle import Pickler, Unpickler, UnpicklingError -try: - from io import BytesIO -except ImportError: - from cStringIO import StringIO as BytesIO - - -class NodePickler(object): - def __init__(self): - self._objects = {} - self._ids = {} - self._get_object = self._objects.__getitem__ - - def _get_ids(self, key): - try: - return self._ids.get(key) - except TypeError, e: - return None - - def register(self, object, id): - self._objects[id] = object - self._ids[object] = id - - def loads(self, s): - up = Unpickler(BytesIO(s)) - up.persistent_load = self._get_object - try: - return up.load() - except KeyError, e: - raise UnpicklingError, "Could not find Node class for %s" % e - - def dumps(self, obj, protocol=None, bin=None): - src = BytesIO() - p = Pickler(src) - p.persistent_id = self._get_ids - p.dump(obj) - return src.getvalue() - - -class Store(object): - #Properties - context_aware = False - formula_aware = False - transaction_aware = False - batch_unification = False - def __init__(self, configuration=None, identifier=None): - """ - identifier: URIRef of the Store. Defaults to CWD - configuration: string containing infomation open can use to - connect to datastore. - """ - self.__node_pickler = None - self.dispatcher = Dispatcher() - if configuration: - self.open(configuration) - - def __get_node_pickler(self): - if self.__node_pickler is None: - from rdflib.term import URIRef - from rdflib.term import BNode - from rdflib.term import Literal - from rdflib.graph import Graph, QuotedGraph, GraphValue - from rdflib.term import Variable - from rdflib.term import Statement - self.__node_pickler = np = NodePickler() - np.register(self, "S") - np.register(URIRef, "U") - np.register(BNode, "B") - np.register(Literal, "L") - np.register(Graph, "G") - np.register(QuotedGraph, "Q") - np.register(Variable, "V") - np.register(Statement, "s") - np.register(GraphValue, "v") - return self.__node_pickler - node_pickler = property(__get_node_pickler) - - #Database management methods - def create(self, configuration): - self.dispatcher.dispatch(StoreCreatedEvent(configuration=configuration)) - - def open(self, configuration, create=False): - """ - Opens the store specified by the configuration string. If - create is True a store will be created if it does not already - exist. If create is False and a store does not already exist - an exception is raised. An exception is also raised if a store - exists, but there is insufficient permissions to open the - store. This should return one of VALID_STORE,CORRUPTED_STORE,or NO_STORE - """ - return UNKNOWN - - def close(self, commit_pending_transaction=False): - """ - This closes the database connection. The commit_pending_transaction parameter specifies whether to - commit all pending transactions before closing (if the store is transactional). - """ - - def destroy(self, configuration): - """ - This destroys the instance of the store identified by the configuration string. - """ - - def gc(self): - """ - Allows the store to perform any needed garbage collection - """ - pass - - #RDF APIs - def add(self, (subject, predicate, object), context, quoted=False): - """ - Adds the given statement to a specific context or to the model. The quoted argument - is interpreted by formula-aware stores to indicate this statement is quoted/hypothetical - It should be an error to not specify a context and have the quoted argument be True. - It should also be an error for the quoted argument to be True when the store is not formula-aware. - """ - self.dispatcher.dispatch(TripleAddedEvent(triple=(subject, predicate, object), context=context)) - - def addN(self, quads): - """ - Adds each item in the list of statements to a specific context. The quoted argument - is interpreted by formula-aware stores to indicate this statement is quoted/hypothetical. - Note that the default implementation is a redirect to add - """ - for s,p,o,c in quads: - assert c is not None, "Context associated with %s %s %s is None!"%(s,p,o) - self.add( - (s,p,o), - c - ) - - def remove(self, (subject, predicate, object), context=None): - """ Remove the set of triples matching the pattern from the store """ - self.dispatcher.dispatch(TripleRemovedEvent(triple=(subject, predicate, object), context=context)) - - def triples_choices(self, (subject, predicate, object_),context=None): - """ - A variant of triples that can take a list of terms instead of a single - term in any slot. Stores can implement this to optimize the response time - from the default 'fallback' implementation, which will iterate - over each term in the list and dispatch to tripless - """ - if isinstance(object_,list): - assert not isinstance(subject,list), "object_ / subject are both lists" - assert not isinstance(predicate,list), "object_ / predicate are both lists" - if object_: - for obj in object_: - for (s1, p1, o1), cg in self.triples((subject,predicate,obj),context): - yield (s1, p1, o1), cg - else: - for (s1, p1, o1), cg in self.triples((subject,predicate,None),context): - yield (s1, p1, o1), cg - - elif isinstance(subject,list): - assert not isinstance(predicate,list), "subject / predicate are both lists" - if subject: - for subj in subject: - for (s1, p1, o1), cg in self.triples((subj,predicate,object_),context): - yield (s1, p1, o1), cg - else: - for (s1, p1, o1), cg in self.triples((None,predicate,object_),context): - yield (s1, p1, o1), cg - - elif isinstance(predicate,list): - assert not isinstance(subject,list), "predicate / subject are both lists" - if predicate: - for pred in predicate: - for (s1, p1, o1), cg in self.triples((subject,pred,object_),context): - yield (s1, p1, o1), cg - else: - for (s1, p1, o1), cg in self.triples((subject,None,object_),context): - yield (s1, p1, o1), cg - - def triples(self, triple_pattern, context=None): - """ - A generator over all the triples matching the pattern. Pattern can - include any objects for used for comparing against nodes in the store, for - example, REGEXTerm, URIRef, Literal, BNode, Variable, Graph, QuotedGraph, Date? DateRange? - - A conjunctive query can be indicated by either providing a value of None - for the context or the identifier associated with the Conjunctive Graph (if it's context aware). - """ - subject, predicate, object = triple_pattern - - # variants of triples will be done if / when optimization is needed - - def __len__(self, context=None): - """ - Number of statements in the store. This should only account for non-quoted (asserted) statements - if the context is not specified, otherwise it should return the number of statements in the formula or context given. - """ - - def contexts(self, triple=None): - """ - Generator over all contexts in the graph. If triple is specified, a generator over all - contexts the triple is in. - """ - - def query(self, query, initNs, initBindings, **kwargs): - """ - If stores provide their own SPARQL implementation, override this. - """ - - return NotImplemented - - # Optional Namespace methods - - def bind(self, prefix, namespace): - """ """ - - def prefix(self, namespace): - """ """ - - def namespace(self, prefix): - """ """ - - def namespaces(self): - """ """ - if False: - yield None - - # Optional Transactional methods - - def commit(self): - """ """ - - def rollback(self): - """ """ - - - diff --git a/doc/rdflib3/store.pyc b/doc/rdflib3/store.pyc deleted file mode 100644 index 2a85031..0000000 Binary files a/doc/rdflib3/store.pyc and /dev/null differ diff --git a/doc/rdflib3/term.py b/doc/rdflib3/term.py deleted file mode 100644 index 6150a9a..0000000 --- a/doc/rdflib3/term.py +++ /dev/null @@ -1,1042 +0,0 @@ -""" -This module defines the different types of terms. Terms are the kinds of -objects that can appear in a quoted/asserted triple. This includes those -that are core to RDF: - -* Blank Nodes -* URI References -* Literals (which consist of a literal value,datatype and language tag) - -Those that extend the RDF model into N3: - -* Formulae -* Universal Quantifications (Variables) - -And those that are primarily for matching against 'Nodes' in the underlying Graph: - -* REGEX Expressions -* Date Ranges -* Numerical Ranges - -""" - -__all__ = [ - 'bind', - - 'Node', - 'Identifier', - - 'URIRef', - 'BNode', - 'Literal', - - 'Variable', - 'Statement', - ] - -import logging - -_LOGGER = logging.getLogger(__name__) - -import base64 -import threading - -from urlparse import urlparse, urljoin, urldefrag -from string import ascii_letters -from random import choice -from itertools import islice -from datetime import date, time, datetime -from isodate import parse_time, parse_date, parse_datetime -from re import sub - -try: - from hashlib import md5 -except ImportError: - from md5 import md5 - -import py3compat -b = py3compat.b - -class Node(object): - """ - A Node in the Graph. - """ - - __slots__ = () - - -class Identifier(Node, unicode): # we allow Identifiers to be Nodes in our Graph - """ - See http://www.w3.org/2002/07/rdf-identifer-terminology/ - regarding choice of terminology. - """ - - __slots__ = () - - def __new__(cls, value): - return unicode.__new__(cls, value) - - -class URIRef(Identifier): - """ - RDF URI Reference: http://www.w3.org/TR/rdf-concepts/#section-Graph-URIref - """ - - __slots__ = () - - def __new__(cls, value, base=None): - if base is not None: - ends_in_hash = value.endswith("#") - value = urljoin(base, value, allow_fragments=1) - if ends_in_hash: - if not value.endswith("#"): - value += "#" - #if normalize and value and value != normalize("NFC", value): - # raise Error("value must be in NFC normalized form.") - try: - rt = unicode.__new__(cls, value) - except UnicodeDecodeError: - rt = unicode.__new__(cls, value, 'utf-8') - return rt - - def toPython(self): - return unicode(self) - - def n3(self): - return "<%s>" % self - - def concrete(self): - if "#" in self: - return URIRef("/".join(self.rsplit("#", 1))) - else: - return self - - def abstract(self): - if "#" not in self: - scheme, netloc, path, params, query, fragment = urlparse(self) - if path: - return URIRef("#".join(self.rsplit("/", 1))) - else: - if not self.endswith("#"): - return URIRef("%s#" % self) - else: - return self - else: - return self - - - def defrag(self): - if "#" in self: - url, frag = urldefrag(self) - return URIRef(url) - else: - return self - - def __reduce__(self): - return (URIRef, (unicode(self),)) - - def __getnewargs__(self): - return (unicode(self), ) - - - def __ne__(self, other): - return not self.__eq__(other) - - def __eq__(self, other): - if isinstance(other, URIRef): - return unicode(self)==unicode(other) - else: - return False - - def __hash__(self): - return hash(URIRef) ^ hash(unicode(self)) - - if not py3compat.PY3: - def __str__(self): - return self.encode() - - def __repr__(self): - if self.__class__ is URIRef: - clsName = "rdflib.term.URIRef" - else: - clsName = self.__class__.__name__ - - return """%s(%s)""" % (clsName, super(URIRef,self).__repr__()) - - - def md5_term_hash(self): - """a string of hex that will be the same for two URIRefs that - are the same. It is not a suitable unique id. - - Supported for backwards compatibility; new code should - probably just use __hash__ - """ - d = md5(self.encode()) - d.update(b("U")) - return d.hexdigest() - - -def _unique_id(): - # Used to read: """Create a (hopefully) unique prefix""" - # now retained merely to leave interal API unchanged. - # From BNode.__new__() below ... - # - # acceptable bnode value range for RDF/XML needs to be - # something that can be serialzed as a nodeID for N3 - # - # BNode identifiers must be valid NCNames" _:[A-Za-z][A-Za-z0-9]* - # http://www.w3.org/TR/2004/REC-rdf-testcases-20040210/#nodeID - return "N" # ensure that id starts with a letter - - -# Adapted from http://icodesnip.com/snippet/python/simple-universally-unique-id-uuid-or-guid -def bnode_uuid(): - """ - Generates a uuid on behalf of Python 2.4 - """ - import os - import random - import socket - from time import time - from binascii import hexlify - - pid = [None] - - try: - ip = socket.gethostbyname(socket.gethostname()) - ip = long(ip.replace('.', '999').replace(':', '999')) - except: - # if we can't get a network address, just imagine one - ip = long(random.random() * 100000000000000000L) - - def _generator(): - if os.getpid() != pid[0]: - # Process might have been forked (issue 200), must reseed random: - try: - preseed = long(hexlify(os.urandom(16)), 16) - except NotImplementedError: - preseed = 0 - seed = long(str(preseed) + str(os.getpid()) - + str(long(time() * 1000000)) + str(ip)) - random.seed(seed) - pid[0] = os.getpid() - - t = long(time() * 1000.0) - r = long(random.random() * 100000000000000000L) - data = str(t) + ' ' + str(r) + ' ' + str(ip) - return md5(data).hexdigest() - - return _generator - - -def uuid4_ncname(): - """ - Generates UUID4-based but ncname-compliant identifiers. - """ - from uuid import uuid4 - - def _generator(): - return uuid4().hex - - return _generator - - -def _serial_number_generator(): - import sys - if sys.version_info[:2] < (2, 5): - return bnode_uuid() - else: - return uuid4_ncname() - - -class BNode(Identifier): - """ - Blank Node: http://www.w3.org/TR/rdf-concepts/#section-blank-nodes - - """ - __slots__ = () - - - def __new__(cls, value=None, - _sn_gen=_serial_number_generator(), _prefix=_unique_id()): - """ - # only store implementations should pass in a value - """ - if value==None: - # so that BNode values do not - # collide with ones created with a different instance of this module - # at some other time. - node_id = _sn_gen() - value = "%s%s" % (_prefix, node_id) - else: - # TODO: check that value falls within acceptable bnode value range - # for RDF/XML needs to be something that can be serialzed - # as a nodeID for N3 ?? Unless we require these - # constraints be enforced elsewhere? - pass # assert is_ncname(unicode(value)), "BNode identifiers - # must be valid NCNames" _:[A-Za-z][A-Za-z0-9]* - # http://www.w3.org/TR/2004/REC-rdf-testcases-20040210/#nodeID - return Identifier.__new__(cls, value) - - def toPython(self): - return unicode(self) - - def n3(self): - return "_:%s" % self - - def __getnewargs__(self): - return (unicode(self), ) - - def __reduce__(self): - return (BNode, (unicode(self),)) - - def __ne__(self, other): - return not self.__eq__(other) - - def __eq__(self, other): - """ - >>> BNode("foo")==None - False - >>> BNode("foo")==URIRef("foo") - False - >>> URIRef("foo")==BNode("foo") - False - >>> BNode("foo")!=URIRef("foo") - True - >>> URIRef("foo")!=BNode("foo") - True - """ - if isinstance(other, BNode): - return unicode(self)==unicode(other) - else: - return False - - def __hash__(self): - return hash(BNode) ^ hash(unicode(self)) - - if not py3compat.PY3: - def __str__(self): - return self.encode() - - def __repr__(self): - if self.__class__ is BNode: - clsName = "rdflib.term.BNode" - else: - clsName = self.__class__.__name__ - return """%s('%s')""" % (clsName, str(self)) - - def md5_term_hash(self): - """a string of hex that will be the same for two BNodes that - are the same. It is not a suitable unique id. - - Supported for backwards compatibility; new code should - probably just use __hash__ - """ - d = md5(self.encode()) - d.update(b("B")) - return d.hexdigest() - - -class Literal(Identifier): - doc = """ - RDF Literal: http://www.w3.org/TR/rdf-concepts/#section-Graph-Literal - - >>> from rdflib import Literal, XSD - >>> Literal(1).toPython() - 1%(L)s - >>> Literal("adsf") > 1 - True - >>> from rdflib.namespace import XSD - >>> lit2006 = Literal('2006-01-01',datatype=XSD.date) - >>> lit2006.toPython() - datetime.date(2006, 1, 1) - >>> lit2006 < Literal('2007-01-01',datatype=XSD.date) - True - >>> Literal(datetime.utcnow()).datatype - rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#dateTime') - >>> oneInt = Literal(1) - >>> twoInt = Literal(2) - >>> twoInt < oneInt - False - >>> Literal('1') < Literal(1) - False - >>> Literal('1') < Literal('1') - False - >>> Literal(1) < Literal('1') - True - >>> Literal(1) < Literal(2.0) - True - >>> Literal(1) < URIRef('foo') - True - >>> Literal(1) < 2.0 - True - >>> Literal(1) < object - True - >>> lit2006 < "2007" - True - >>> "2005" < lit2006 - True - >>> x = Literal("2", datatype=XSD.integer) - >>> x - rdflib.term.Literal(%(u)s'2', datatype=rdflib.term.URIRef(%(u)s'http://www.w3.org/2001/XMLSchema#integer')) - >>> Literal(x) == x - True - >>> x = Literal("cake", lang="en") - >>> x - rdflib.term.Literal(%(u)s'cake', lang='en') - >>> Literal(x) == x - True - """ - __doc__ = py3compat.format_doctest_out(doc) - - __slots__ = ("language", "datatype", "_cmp_value") - - def __new__(cls, value, lang=None, datatype=None): - if lang is not None and datatype is not None: - raise TypeError("A Literal can only have one of lang or datatype, " - "per http://www.w3.org/TR/rdf-concepts/#section-Graph-Literal") - - if isinstance(value, Literal): # create from another Literal instance - datatype=datatype or value.datatype - lang=lang or value.language - - if datatype: - lang = None - else: - value, datatype = _castPythonToLiteral(value) - if datatype: - lang = None - if datatype: - datatype = URIRef(datatype) - if py3compat.PY3 and isinstance(value, bytes): - value = value.decode('utf-8') - try: - inst = unicode.__new__(cls, value) - except UnicodeDecodeError: - inst = unicode.__new__(cls, value, 'utf-8') - inst.language = lang - inst.datatype = datatype - inst._cmp_value = inst._toCompareValue() - return inst - - def __reduce__(self): - return (Literal, (unicode(self), self.language, self.datatype),) - - def __getstate__(self): - return (None, dict(language=self.language, datatype=self.datatype)) - - def __setstate__(self, arg): - _, d = arg - self.language = d["language"] - self.datatype = d["datatype"] - - @py3compat.format_doctest_out - def __add__(self, val): - """ - >>> Literal(1) + 1 - 2%(L)s - >>> Literal("1") + "1" - rdflib.term.Literal(%(u)s'11') - """ - - py = self.toPython() - if isinstance(py, Literal): - s = super(Literal, self).__add__(val) - return Literal(s, self.language, self.datatype) - else: - return py + val - - @py3compat.format_doctest_out - def __neg__(self): - """ - >>> (- Literal(1)) - -1%(L)s - >>> (- Literal(10.5)) - -10.5 - >>> from rdflib.namespace import XSD - >>> (- Literal("1", datatype=XSD['integer'])) - -1%(L)s - - Not working: - #>>> (- Literal("1")) - #Traceback (most recent call last): - # File "", line 1, in - #TypeError: Not a number; rdflib.term.Literal(u'1') - >>> - """ - - py = self.toPython() - try: - return py.__neg__() - except Exception, e: - raise TypeError("Not a number; %s" % repr(self)) - - @py3compat.format_doctest_out - def __pos__(self): - """ - >>> (+ Literal(1)) - 1%(L)s - >>> (+ Literal(-1)) - -1%(L)s - >>> from rdflib.namespace import XSD - >>> (+ Literal("-1", datatype=XSD['integer'])) - -1%(L)s - - Not working in Python 3: - #>>> (+ Literal("1")) - #Traceback (most recent call last): - # File "", line 1, in - #TypeError: Not a number; rdflib.term.Literal(u'1') - """ - py = self.toPython() - try: - return py.__pos__() - except Exception, e: - raise TypeError("Not a number; %s" % repr(self)) - - @py3compat.format_doctest_out - def __abs__(self): - """ - >>> abs(Literal(-1)) - 1%(L)s - >>> from rdflib.namespace import XSD - >>> abs( Literal("-1", datatype=XSD['integer'])) - 1%(L)s - - Not working in Python 3: - #>>> abs(Literal("1")) - #Traceback (most recent call last): - # File "", line 1, in - #TypeError: Not a number; rdflib.term.Literal(u'1') - """ - py = self.toPython() - try: - return py.__abs__() - except Exception, e: - raise TypeError("Not a number; %s" % repr(self)) - - @py3compat.format_doctest_out - def __invert__(self): - """ - >>> ~(Literal(-1)) - 0%(L)s - >>> from rdflib.namespace import XSD - >>> ~( Literal("-1", datatype=XSD['integer'])) - 0%(L)s - - Not working: - #>>> ~(Literal("1")) - #Traceback (most recent call last): - # File "", line 1, in - #TypeError: Not a number; rdflib.term.Literal(u'1') - >>> - """ - py = self.toPython() - try: - return py.__invert__() - except Exception: - raise TypeError("Not a number; %s" % repr(self)) - - @py3compat.format_doctest_out - def __lt__(self, other): - """ - >>> from rdflib.namespace import XSD - >>> Literal("YXNkZg==", datatype=XSD['base64Binary']) < "foo" - True - >>> %(u)s"\xfe" < Literal(%(u)s"foo") - False - >>> Literal(base64.encodestring(%(u)s"\xfe".encode("utf-8")), datatype=URIRef("http://www.w3.org/2001/XMLSchema#base64Binary")) < %(u)s"foo" - False - """ - - if other is None: - return False # Nothing is less than None - try: - return self._cmp_value < other - except UnicodeDecodeError, ue: - if isinstance(self._cmp_value, py3compat.bytestype): - return self._cmp_value < other.encode("utf-8") - else: - raise ue - except TypeError: - try: - # On Python 3, comparing bytes/str is a TypeError, not a UnicodeError - if isinstance(self._cmp_value, py3compat.bytestype): - return self._cmp_value < other.encode("utf-8") - return unicode(self._cmp_value) < other - except (TypeError, AttributeError): - # Treat different types like Python 2 for now. - return py3compat.type_cmp(self._cmp_value, other) == -1 - - def __le__(self, other): - """ - >>> from rdflib.namespace import XSD - >>> Literal('2007-01-01T10:00:00', datatype=XSD.dateTime) <= Literal('2007-01-01T10:00:00', datatype=XSD.dateTime) - True - """ - if other is None: - return False - if self==other: - return True - else: - return self < other - - def __gt__(self, other): - if other is None: - return True # Everything is greater than None - try: - return self._cmp_value > other - except UnicodeDecodeError, ue: - if isinstance(self._cmp_value, py3compat.bytestype): - return self._cmp_value > other.encode("utf-8") - else: - raise ue - except TypeError: - try: - # On Python 3, comparing bytes/str is a TypeError, not a UnicodeError - if isinstance(self._cmp_value, py3compat.bytestype): - return self._cmp_value > other.encode("utf-8") - return unicode(self._cmp_value) > other - except (TypeError, AttributeError): - # Treat different types like Python 2 for now. - return py3compat.type_cmp(self._cmp_value, other) == 1 - - def __ge__(self, other): - if other is None: - return False - if self==other: - return True - else: - return self > other - - def __ne__(self, other): - """ - Overriden to ensure property result for comparisons with None via !=. - Routes all other such != and <> comparisons to __eq__ - - >>> Literal('') != None - True - >>> Literal('2') != Literal('2') - False - - """ - return not self.__eq__(other) - - def __hash__(self): - """ - >>> from rdflib.namespace import XSD - >>> a = {Literal('1', datatype=XSD.integer):'one'} - >>> Literal('1', datatype=XSD.double) in a - False - - - "Called for the key object for dictionary operations, - and by the built-in function hash(). Should return - a 32-bit integer usable as a hash value for - dictionary operations. The only required property - is that objects which compare equal have the same - hash value; it is advised to somehow mix together - (e.g., using exclusive or) the hash values for the - components of the object that also play a part in - comparison of objects." -- 3.4.1 Basic customization (Python) - - "Two literals are equal if and only if all of the following hold: - * The strings of the two lexical forms compare equal, character by character. - * Either both or neither have language tags. - * The language tags, if any, compare equal. - * Either both or neither have datatype URIs. - * The two datatype URIs, if any, compare equal, character by character." - -- 6.5.1 Literal Equality (RDF: Concepts and Abstract Syntax) - - """ - - return Identifier.__hash__(self) ^ hash(self.language) ^ hash(self.datatype) - - @py3compat.format_doctest_out - def __eq__(self, other): - """ - >>> f = URIRef("foo") - >>> f is None or f == '' - False - >>> Literal("1", datatype=URIRef("foo")) == Literal("1", datatype=URIRef("foo")) - True - >>> Literal("1", datatype=URIRef("foo")) == Literal("2", datatype=URIRef("foo")) - False - >>> Literal("1", datatype=URIRef("foo")) == "asdf" - False - >>> from rdflib.namespace import XSD - >>> Literal('2007-01-01', datatype=XSD.date) == Literal('2007-01-01', datatype=XSD.date) - True - >>> Literal('2007-01-01', datatype=XSD.date) == date(2007, 1, 1) - True - >>> oneInt = Literal(1) - >>> oneNoDtype = Literal('1') - >>> oneInt == oneNoDtype - False - >>> Literal("1", XSD['string']) == Literal("1", XSD['string']) - True - >>> Literal("one", lang="en") == Literal("one", lang="en") - True - >>> Literal("hast", lang='en') == Literal("hast", lang='de') - False - >>> oneInt == Literal(1) - True - >>> oneFloat = Literal(1.0) - >>> oneInt == oneFloat - True - >>> oneInt == 1 - True - """ - if other is None: - return False - if isinstance(other, Literal): - return self._cmp_value == other._cmp_value - elif isinstance(other, basestring): - return unicode(self) == other - else: - return self._cmp_value == other - - @py3compat.format_doctest_out - def n3(self): - r''' - Returns a representation in the N3 format. - - Examples:: - - >>> Literal("foo").n3() - %(u)s'"foo"' - - Strings with newlines or triple-quotes:: - - >>> Literal("foo\nbar").n3() - %(u)s'"""foo\nbar"""' - - >>> Literal("''\'").n3() - %(u)s'"\'\'\'"' - - >>> Literal('"""').n3() - %(u)s'"\\"\\"\\""' - - Language:: - - >>> Literal("hello", lang="en").n3() - %(u)s'"hello"@en' - - Datatypes:: - - >>> Literal(1).n3() - %(u)s'"1"^^' - - >>> Literal(1, lang="en").n3() - %(u)s'"1"^^' - - >>> Literal(1.0).n3() - %(u)s'"1.0"^^' - - Datatype and language isn't allowed (datatype takes precedence):: - - >>> Literal(True).n3() - %(u)s'"true"^^' - - Custom datatype:: - - >>> footype = URIRef("http://example.org/ns#foo") - >>> Literal("1", datatype=footype).n3() - %(u)s'"1"^^' - - ''' - return self._literal_n3() - - @py3compat.format_doctest_out - def _literal_n3(self, use_plain=False, qname_callback=None): - ''' - Using plain literal (shorthand) output:: - >>> from rdflib.namespace import XSD - - >>> Literal(1)._literal_n3(use_plain=True) - %(u)s'1' - - >>> Literal(1.0)._literal_n3(use_plain=True) - %(u)s'1e+00' - - >>> Literal(1.0, datatype=XSD.decimal)._literal_n3(use_plain=True) - %(u)s'1.0' - - >>> Literal(1.0, datatype=XSD.float)._literal_n3(use_plain=True) - %(u)s'"1.0"^^' - - >>> Literal("foo", datatype=XSD.string)._literal_n3( - ... use_plain=True) - %(u)s'"foo"^^' - - >>> Literal(True)._literal_n3(use_plain=True) - %(u)s'true' - - >>> Literal(False)._literal_n3(use_plain=True) - %(u)s'false' - - Using callback for datatype QNames:: - - >>> Literal(1)._literal_n3( - ... qname_callback=lambda uri: "xsd:integer") - %(u)s'"1"^^xsd:integer' - - ''' - if use_plain and self.datatype in _PLAIN_LITERAL_TYPES: - try: - self.toPython() # check validity - # this is a bit of a mess - - # in py >=2.6 the string.format function makes this easier - # we try to produce "pretty" output - if self.datatype == _XSD_DOUBLE: - return sub(".?0*e","e", u'%e' % float(self)) - elif self.datatype == _XSD_DECIMAL: - return sub("0*$","0",u'%f' % float(self)) - else: - return u'%s' % self - except ValueError: - pass # if it's in, we let it out? - - encoded = self._quote_encode() - - datatype = self.datatype - quoted_dt = None - if datatype: - if qname_callback: - quoted_dt = qname_callback(datatype) - if not quoted_dt: - quoted_dt = "<%s>" % datatype - - language = self.language - if language: - if datatype: - # TODO: this isn't valid RDF (it's datatype XOR language) - return '%s@%s^^%s' % (encoded, language, quoted_dt) - return '%s@%s' % (encoded, language) - elif datatype: - return '%s^^%s' % (encoded, quoted_dt) - else: - return '%s' % encoded - - def _quote_encode(self): - # This simpler encoding doesn't work; a newline gets encoded as "\\n", - # which is ok in sourcecode, but we want "\n". - #encoded = self.encode('unicode-escape').replace( - # '\\', '\\\\').replace('"','\\"') - #encoded = self.replace.replace('\\', '\\\\').replace('"','\\"') - - # NOTE: Could in theory chose quotes based on quotes appearing in the - # string, i.e. '"' and "'", but N3/turtle doesn't allow "'"(?). - - if "\n" in self: - # Triple quote this string. - encoded = self.replace('\\', '\\\\') - if '"""' in self: - # is this ok? - encoded = encoded.replace('"""','\\"\\"\\"') - return '"""%s"""' % encoded.replace('\r','\\r') - else: - return '"%s"' % self.replace('\n','\\n').replace('\\', '\\\\' - ).replace('"', '\\"').replace('\r','\\r') - - if not py3compat.PY3: - def __str__(self): - return self.encode() - - def __repr__(self): - args = [super(Literal, self).__repr__()] - if self.language is not None: - args.append("lang=%s" % repr(self.language)) - if self.datatype is not None: - args.append("datatype=%s" % repr(self.datatype)) - if self.__class__ == Literal: - clsName = "rdflib.term.Literal" - else: - clsName = self.__class__.__name__ - return """%s(%s)""" % (clsName, ", ".join(args)) - - def toPython(self): - """ - Returns an appropriate python datatype derived from this RDF Literal - """ - convFunc = _toPythonMapping.get(self.datatype, None) - - if convFunc: - rt = convFunc(self) - else: - rt = self - return rt - - def _toCompareValue(self): - try: - rt = self.toPython() - except Exception, e: - _LOGGER.warning("could not convert %s to a Python datatype" % - repr(self)) - rt = self - - if rt is self: - if self.language is None and self.datatype is None: - return unicode(rt) - else: - return (unicode(rt), rt.datatype, rt.language) - return rt - - def md5_term_hash(self): - """a string of hex that will be the same for two Literals that - are the same. It is not a suitable unique id. - - Supported for backwards compatibility; new code should - probably just use __hash__ - """ - d = md5(self.encode()) - d.update(b("L")) - return d.hexdigest() - - - -_XSD_PFX = 'http://www.w3.org/2001/XMLSchema#' - -_XSD_FLOAT = URIRef(_XSD_PFX+'float') -_XSD_DOUBLE = URIRef(_XSD_PFX+'double') -_XSD_DECIMAL = URIRef(_XSD_PFX+'decimal') - - -_PLAIN_LITERAL_TYPES = ( - URIRef(_XSD_PFX+'integer'), - URIRef(_XSD_PFX+'boolean'), - _XSD_DOUBLE, - _XSD_DECIMAL, -) - - -def _castPythonToLiteral(obj): - """ - Casts a python datatype to a tuple of the lexical value and a - datatype URI (or None) - """ - for pType,(castFunc,dType) in _PythonToXSD: - if isinstance(obj, pType): - if castFunc: - return castFunc(obj), dType - elif dType: - return obj, dType - else: - return obj, None - return obj, None # TODO: is this right for the fall through case? - -from decimal import Decimal - -# Mappings from Python types to XSD datatypes and back (burrowed from sparta) -# datetime instances are also instances of date... so we need to order these. - -# SPARQL/Turtle/N3 has shortcuts for int, double, decimal -# python has only float - to be in tune with sparql/n3/turtle -# we default to XSD.double for float literals - -_PythonToXSD = [ - (basestring, (None, None)), - (float , (None, URIRef(_XSD_PFX+'double'))), - (bool , (lambda i:str(i).lower(), URIRef(_XSD_PFX+'boolean'))), - (int , (None, URIRef(_XSD_PFX+'integer'))), - (long , (None, URIRef(_XSD_PFX+'integer'))), - (Decimal , (None, URIRef(_XSD_PFX+'decimal'))), - (datetime , (lambda i:i.isoformat(), URIRef(_XSD_PFX+'dateTime'))), - (date , (lambda i:i.isoformat(), URIRef(_XSD_PFX+'date'))), - (time , (lambda i:i.isoformat(), URIRef(_XSD_PFX+'time'))), -] - -XSDToPython = { - URIRef(_XSD_PFX+'time') : parse_time, - URIRef(_XSD_PFX+'date') : parse_date, - URIRef(_XSD_PFX+'dateTime') : parse_datetime, - URIRef(_XSD_PFX+'string') : None, - URIRef(_XSD_PFX+'normalizedString') : None, - URIRef(_XSD_PFX+'token') : None, - URIRef(_XSD_PFX+'language') : None, - URIRef(_XSD_PFX+'boolean') : lambda i:i.lower() in ['1','true'], - URIRef(_XSD_PFX+'decimal') : Decimal, - URIRef(_XSD_PFX+'integer') : long, - URIRef(_XSD_PFX+'nonPositiveInteger') : int, - URIRef(_XSD_PFX+'long') : long, - URIRef(_XSD_PFX+'nonNegativeInteger') : int, - URIRef(_XSD_PFX+'negativeInteger') : int, - URIRef(_XSD_PFX+'int') : long, - URIRef(_XSD_PFX+'unsignedLong') : long, - URIRef(_XSD_PFX+'positiveInteger') : int, - URIRef(_XSD_PFX+'short') : int, - URIRef(_XSD_PFX+'unsignedInt') : long, - URIRef(_XSD_PFX+'byte') : int, - URIRef(_XSD_PFX+'unsignedShort') : int, - URIRef(_XSD_PFX+'unsignedByte') : int, - URIRef(_XSD_PFX+'float') : float, - URIRef(_XSD_PFX+'double') : float, - URIRef(_XSD_PFX+'base64Binary') : lambda s: base64.b64decode(py3compat.b(s)), - URIRef(_XSD_PFX+'anyURI') : None, -} - -_toPythonMapping = {} -_toPythonMapping.update(XSDToPython) - -def bind(datatype, conversion_function): - """ - bind a datatype to a function for converting it into a Python - instance. - """ - if datatype in _toPythonMapping: - _LOGGER.warning("datatype '%s' was already bound. Rebinding." % - datatype) - _toPythonMapping[datatype] = conversion_function - - - -class Variable(Identifier): - """ - """ - __slots__ = () - def __new__(cls, value): - if len(value)==0: raise Exception("Attempted to create variable with empty string as name!") - if value[0]=='?': - value=value[1:] - return unicode.__new__(cls, value) - - def __repr__(self): - return self.n3() - - def toPython(self): - return "?%s" % self - - def n3(self): - return "?%s" % self - - def __reduce__(self): - return (Variable, (unicode(self),)) - - def md5_term_hash(self): - """a string of hex that will be the same for two Variables that - are the same. It is not a suitable unique id. - - Supported for backwards compatibility; new code should - probably just use __hash__ - """ - d = md5(self.encode()) - d.update(b("V")) - return d.hexdigest() - - -class Statement(Node, tuple): - - def __new__(cls, (subject, predicate, object), context): - return tuple.__new__(cls, ((subject, predicate, object), context)) - - def __reduce__(self): - return (Statement, (self[0], self[1])) - - def toPython(self): - return (self[0], self[1]) - - -if __name__ == '__main__': - import doctest - doctest.testmod() - diff --git a/doc/rdflib3/term.pyc b/doc/rdflib3/term.pyc deleted file mode 100644 index e3b6654..0000000 Binary files a/doc/rdflib3/term.pyc and /dev/null differ diff --git a/doc/rdflib3/util.py b/doc/rdflib3/util.py deleted file mode 100644 index 4fac9c1..0000000 --- a/doc/rdflib3/util.py +++ /dev/null @@ -1,317 +0,0 @@ -""" -Some utility functions. - -Miscellaneous utilities - -* list2set -* first -* uniq -* more_than - -Term characterisation and generation - -* to_term -* from_n3 - -Date/time utilities - -* date_time -* parse_date_time - -Statement and component type checkers - -* check_context -* check_subject -* check_predicate -* check_object -* check_statement -* check_pattern - -""" - -from calendar import timegm -from time import altzone -#from time import daylight -from time import gmtime -from time import localtime -from time import time -from time import timezone - -try: - cmp -except NameError: - def sign(n): - if n < 0: return -1 - if n > 0: return 1 - return 0 -else: - def sign(n): return cmp(n, 0) - -from rdflib.exceptions import ContextTypeError -from rdflib.exceptions import ObjectTypeError -from rdflib.exceptions import PredicateTypeError -from rdflib.exceptions import SubjectTypeError -from rdflib.graph import Graph -from rdflib.graph import QuotedGraph -from rdflib.term import BNode -from rdflib.term import Literal -from rdflib.term import URIRef - -__all__ = ['list2set', 'first', 'uniq', 'more_than', 'to_term', 'from_n3','date_time', 'parse_date_time', 'check_context', 'check_subject', 'check_predicate', 'check_object', 'check_statement', 'check_pattern'] - -def list2set(seq): - """ - Return a new list without duplicates. - Preserves the order, unlike set(seq) - """ - seen = set() - return [ x for x in seq if x not in seen and not seen.add(x)] - -def first(seq): - for result in seq: - return result - return None - -def uniq(sequence, strip=0): - """removes duplicate strings from the sequence.""" - if strip: - return set(s.strip() for s in sequence) - else: - return set(sequence) - -def more_than(sequence, number): - "Returns 1 if sequence has more items than number and 0 if not." - i = 0 - for item in sequence: - i += 1 - if i > number: - return 1 - return 0 - -def to_term(s, default=None): - """ - Creates and returns an Identifier of type corresponding - to the pattern of the given positional argument string ``s``: - - '' returns the ``default`` keyword argument value or ``None`` - - '' returns ``URIRef(s)`` (i.e. without angle brackets) - - '"s"' returns ``Literal(s)`` (i.e. without doublequotes) - - '_s' returns ``BNode(s)`` (i.e. without leading underscore) - - """ - if not s: - return default - elif s.startswith("<") and s.endswith(">"): - return URIRef(s[1:-1]) - elif s.startswith('"') and s.endswith('"'): - return Literal(s[1:-1]) - elif s.startswith("_"): - return BNode(s) - else: - msg = "Unrecognised term syntax: '%s'" % s - raise Exception(msg) - -def from_n3(s, default=None, backend=None): - r''' - Creates the Identifier corresponding to the given n3 string. - - >>> from_n3('') == URIRef('http://ex.com/foo') - True - >>> from_n3('"foo"@de') == Literal('foo', lang='de') - True - >>> from_n3('"""multi\nline\nstring"""@en') == Literal('multi\nline\nstring', lang='en') - True - >>> from_n3('42') == Literal(42) - True - - ''' - # TODO: should be able to handle prefixes given as opt. argument maybe: from_n3('rdfs:label') - if not s: - return default - if s.startswith('<'): - return URIRef(s[1:-1]) - elif s.startswith('"'): - if s.startswith('"""'): - quotes = '"""' - else: - quotes = '"' - value, rest = s.rsplit(quotes, 1) - value = value[len(quotes):] # strip leading quotes - datatype = None - language = None - - # as a given datatype overrules lang-tag check for it first - dtoffset = rest.rfind('^^') - if dtoffset >= 0: - # found a datatype - # datatype has to come after lang-tag so ignore everything before - # see: http://www.w3.org/TR/2011/WD-turtle-20110809/#prod-turtle2-RDFLiteral - datatype = rest[dtoffset+2:] - else: - if rest.startswith("@"): - language = rest[1:] # strip leading at sign - - value = value.replace(r'\"', '"').replace('\\\\', '\\') - # Hack: this should correctly handle strings with either native unicode - # characters, or \u1234 unicode escapes. - value = value.encode("raw-unicode-escape").decode("unicode-escape") - return Literal(value, language, datatype) - elif s == 'true' or s == 'false': - return Literal(s == 'true') - elif s.isdigit(): - return Literal(int(s)) - elif s.startswith('{'): - identifier = from_n3(s[1:-1]) - return QuotedGraph(backend, identifier) - elif s.startswith('['): - identifier = from_n3(s[1:-1]) - return Graph(backend, identifier) - else: - if s.startswith("_:"): - return BNode(s[2:]) - else: - return BNode(s) - -def check_context(c): - if not (isinstance(c, URIRef) or \ - isinstance(c, BNode)): - raise ContextTypeError("%s:%s" % (c, type(c))) - -def check_subject(s): - """ Test that s is a valid subject identifier.""" - if not (isinstance(s, URIRef) or isinstance(s, BNode)): - raise SubjectTypeError(s) - -def check_predicate(p): - """ Test that p is a valid predicate identifier.""" - if not isinstance(p, URIRef): - raise PredicateTypeError(p) - -def check_object(o): - """ Test that o is a valid object identifier.""" - if not (isinstance(o, URIRef) or \ - isinstance(o, Literal) or \ - isinstance(o, BNode)): - raise ObjectTypeError(o) - -def check_statement(triple): - (s, p, o) = triple - if not (isinstance(s, URIRef) or isinstance(s, BNode)): - raise SubjectTypeError(s) - - if not isinstance(p, URIRef): - raise PredicateTypeError(p) - - if not (isinstance(o, URIRef) or \ - isinstance(o, Literal) or \ - isinstance(o, BNode)): - raise ObjectTypeError(o) - -def check_pattern(triple): - (s, p, o) = triple - if s and not (isinstance(s, URIRef) or isinstance(s, BNode)): - raise SubjectTypeError(s) - - if p and not isinstance(p, URIRef): - raise PredicateTypeError(p) - - if o and not (isinstance(o, URIRef) or \ - isinstance(o, Literal) or \ - isinstance(o, BNode)): - raise ObjectTypeError(o) - -def date_time(t=None, local_time_zone=False): - """http://www.w3.org/TR/NOTE-datetime ex: 1997-07-16T19:20:30Z - - >>> date_time(1126482850) - '2005-09-11T23:54:10Z' - - @@ this will change depending on where it is run - #>>> date_time(1126482850, local_time_zone=True) - #'2005-09-11T19:54:10-04:00' - - >>> date_time(1) - '1970-01-01T00:00:01Z' - - >>> date_time(0) - '1970-01-01T00:00:00Z' - """ - if t is None: - t = time() - - if local_time_zone: - time_tuple = localtime(t) - if time_tuple[8]: - tz_mins = altzone // 60 - else: - tz_mins = timezone // 60 - tzd = "-%02d:%02d" % (tz_mins // 60, tz_mins % 60) - else: - time_tuple = gmtime(t) - tzd = "Z" - - year, month, day, hh, mm, ss, wd, y, z = time_tuple - s = "%0004d-%02d-%02dT%02d:%02d:%02d%s" % ( year, month, day, hh, mm, ss, tzd) - return s - -def parse_date_time(val): - """always returns seconds in UTC - - # tests are written like this to make any errors easier to understand - >>> parse_date_time('2005-09-11T23:54:10Z') - 1126482850.0 - 0.0 - - >>> parse_date_time('2005-09-11T16:54:10-07:00') - 1126482850.0 - 0.0 - - >>> parse_date_time('1970-01-01T00:00:01Z') - 1.0 - 0.0 - - >>> parse_date_time('1970-01-01T00:00:00Z') - 0.0 - 0.0 - >>> parse_date_time("2005-09-05T10:42:00") - 1125916920.0 - 0.0 - """ - - if "T" not in val: - val += "T00:00:00Z" - - ymd, time = val.split("T") - hms, tz_str = time[0:8], time[8:] - - if not tz_str or tz_str=="Z": - time = time[:-1] - tz_offset = 0 - else: - signed_hrs = int(tz_str[:3]) - mins = int(tz_str[4:6]) - secs = (sign(signed_hrs) * mins + signed_hrs * 60) * 60 - tz_offset = -secs - - year, month, day = ymd.split("-") - hour, minute, second = hms.split(":") - - t = timegm((int(year), int(month), int(day), int(hour), - int(minute), int(second), 0, 0, 0)) - t = t + tz_offset - return t - -def test(): - import doctest - doctest.testmod() - -if __name__ == "__main__": - # try to make the tests work outside of the time zone they were written in - #import os, time - #os.environ['TZ'] = 'US/Pacific' - #try: - # time.tzset() - #except AttributeError, e: - # print e - #pass - # tzset missing! see - # http://mail.python.org/pipermail/python-dev/2003-April/034480.html - test() # pragma: no cover diff --git a/doc/rdflib3/util.pyc b/doc/rdflib3/util.pyc deleted file mode 100644 index 1774afe..0000000 Binary files a/doc/rdflib3/util.pyc and /dev/null differ diff --git a/iottoolkit/core/Description.py b/iottoolkit/core/Description.py index fe2fffb..f34f77f 100644 --- a/iottoolkit/core/Description.py +++ b/iottoolkit/core/Description.py @@ -18,7 +18,7 @@ def _xml_(self): return self.serialize(format='xml') def _json_(self): - return self.serialize(format='json-ld') + return self.serialize(format='rdf-json') class Description (RESTfulResource): @@ -33,7 +33,7 @@ def __init__(self, parentObject=None, resourceName=''): 'application/x-turtle' : 'turtle', 'text/rdf+n3' : 'n3', 'text/plain' : 'nt' , - 'application/json' : 'json-ld' + 'application/json' : 'rdf-json' } diff --git a/iottoolkit/core/Description.pyc b/iottoolkit/core/Description.pyc index f051ef3..1be664a 100644 Binary files a/iottoolkit/core/Description.pyc and b/iottoolkit/core/Description.pyc differ