diff --git a/docs/apireference.rst b/docs/apireference.rst index 2433ddfe1..3f1768263 100644 --- a/docs/apireference.rst +++ b/docs/apireference.rst @@ -20,6 +20,9 @@ Documents .. autoclass:: mongoengine.EmbeddedDocument :members: + +.. autoclass:: mongoengine.MapReduceDocument + :members: Querying ======== diff --git a/docs/conf.py b/docs/conf.py index 4e73fc36b..2541f49a1 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -22,7 +22,7 @@ # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. -extensions = ['sphinx.ext.autodoc'] +extensions = ['sphinx.ext.autodoc', 'sphinx.ext.todo'] # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] diff --git a/mongoengine/document.py b/mongoengine/document.py index 8fdb88db5..1778eb120 100644 --- a/mongoengine/document.py +++ b/mongoengine/document.py @@ -115,3 +115,39 @@ def drop_collection(cls): """ db = _get_db() db.drop_collection(cls._meta['collection']) + + +class MapReduceDocument(object): + """A document returned from a map/reduce query. + + :param collection: An instance of :class:`~pymongo.Collection` + :param key: Document/result key, often an instance of + :class:`~pymongo.objectid.ObjectId`. If supplied as + an ``ObjectId`` found in the given ``collection``, + the object can be accessed via the ``object`` property. + :param value: The result(s) for this key. + + .. versionadded:: 0.3 + + """ + + def __init__(self, document, collection, key, value): + self._document = document + self._collection = collection + self.key = key + self.value = value + + @property + def object(self): + """Lazy-load the object referenced by ``self.key``. If ``self.key`` + is not an ``ObjectId``, simply return ``self.key``. + """ + if not isinstance(self.key, (pymongo.objectid.ObjectId)): + try: + self.key = pymongo.objectid.ObjectId(self.key) + except: + return self.key + if not hasattr(self, "_key_object"): + self._key_object = self._document.objects.with_id(self.key) + return self._key_object + return self._key_object diff --git a/mongoengine/queryset.py b/mongoengine/queryset.py index 73e333f0b..85c1a789d 100644 --- a/mongoengine/queryset.py +++ b/mongoengine/queryset.py @@ -5,7 +5,7 @@ import copy -__all__ = ['queryset_manager', 'Q', 'InvalidQueryError', +__all__ = ['queryset_manager', 'Q', 'InvalidQueryError', 'InvalidCollectionError'] # The maximum number of items to display in a QuerySet.__repr__ @@ -13,7 +13,7 @@ class DoesNotExist(Exception): - pass + pass class MultipleObjectsReturned(Exception): @@ -30,8 +30,9 @@ class OperationError(Exception): RE_TYPE = type(re.compile('')) + class Q(object): - + OR = '||' AND = '&&' OPERATORS = { @@ -52,7 +53,7 @@ class Q(object): 'regex_eq': '%(value)s.test(this.%(field)s)', 'regex_ne': '!%(value)s.test(this.%(field)s)', } - + def __init__(self, **query): self.query = [query] @@ -132,10 +133,10 @@ def _build_op_js(self, op, key, value, value_name): return value, operation_js class QuerySet(object): - """A set of results returned from a query. Wraps a MongoDB cursor, + """A set of results returned from a query. Wraps a MongoDB cursor, providing :class:`~mongoengine.Document` objects as the results. """ - + def __init__(self, document, collection): self._document = document self._collection_obj = collection @@ -143,7 +144,8 @@ def __init__(self, document, collection): self._query = {} self._where_clause = None self._loaded_fields = [] - + self._ordering = [] + # If inheritance is allowed, only return instances and instances of # subclasses of the class being used if document._meta.get('allow_inheritance'): @@ -151,7 +153,7 @@ def __init__(self, document, collection): self._cursor_obj = None self._limit = None self._skip = None - + def ensure_index(self, key_or_list): """Ensure that the given indexes are in place. @@ -199,7 +201,7 @@ def _build_index_spec(cls, doc_cls, key_or_list): return index_list def __call__(self, q_obj=None, **query): - """Filter the selected documents by calling the + """Filter the selected documents by calling the :class:`~mongoengine.queryset.QuerySet` with a query. :param q_obj: a :class:`~mongoengine.queryset.Q` object to be used in @@ -213,7 +215,7 @@ def __call__(self, q_obj=None, **query): query = QuerySet._transform_query(_doc_cls=self._document, **query) self._query.update(query) return self - + def filter(self, *q_objs, **query): """An alias of :meth:`~mongoengine.queryset.QuerySet.__call__` """ @@ -253,11 +255,11 @@ def _cursor(self): # Apply where clauses to cursor if self._where_clause: self._cursor_obj.where(self._where_clause) - + # apply default ordering if self._document._meta['ordering']: self.order_by(*self._document._meta['ordering']) - + return self._cursor_obj @classmethod @@ -337,8 +339,8 @@ def _transform_query(cls, _doc_cls=None, **query): return mongo_query def get(self, *q_objs, **query): - """Retrieve the the matching object raising - :class:`~mongoengine.queryset.MultipleObjectsReturned` or + """Retrieve the the matching object raising + :class:`~mongoengine.queryset.MultipleObjectsReturned` or :class:`~mongoengine.queryset.DoesNotExist` exceptions if multiple or no results are found. """ @@ -354,15 +356,15 @@ def get(self, *q_objs, **query): def get_or_create(self, *q_objs, **query): """Retreive unique object or create, if it doesn't exist. Raises - :class:`~mongoengine.queryset.MultipleObjectsReturned` if multiple - results are found. A new document will be created if the document + :class:`~mongoengine.queryset.MultipleObjectsReturned` if multiple + results are found. A new document will be created if the document doesn't exists; a dictionary of default values for the new document may be provided as a keyword argument called :attr:`defaults`. """ defaults = query.get('defaults', {}) - if query.has_key('defaults'): + if 'defaults' in query: del query['defaults'] - + self.__call__(*q_objs, **query) count = self.count() if count == 0: @@ -439,6 +441,70 @@ def count(self): def __len__(self): return self.count() + def map_reduce(self, map_f, reduce_f, finalize_f=None, limit=None, + scope=None, keep_temp=False): + """Perform a map/reduce query using the current query spec + and ordering. While ``map_reduce`` respects ``QuerySet`` chaining, + it must be the last call made, as it does not return a maleable + ``QuerySet``. + + See the :meth:`~mongoengine.tests.QuerySetTest.test_map_reduce` + and :meth:`~mongoengine.tests.QuerySetTest.test_map_advanced` + tests in ``tests.queryset.QuerySetTest`` for usage examples. + + :param map_f: map function, as :class:`~pymongo.code.Code` or string + :param reduce_f: reduce function, as + :class:`~pymongo.code.Code` or string + :param finalize_f: finalize function, an optional function that + performs any post-reduction processing. + :param scope: values to insert into map/reduce global scope. Optional. + :param limit: number of objects from current query to provide + to map/reduce method + :param keep_temp: keep temporary table (boolean, default ``True``) + + Returns an iterator yielding + :class:`~mongoengine.document.MapReduceDocument`. + + .. note:: Map/Reduce requires server version **>= 1.1.1**. The PyMongo + :meth:`~pymongo.collection.Collection.map_reduce` helper requires + PyMongo version **>= 1.2**. + + .. versionadded:: 0.3 + + """ + from document import MapReduceDocument + + if not hasattr(self._collection, "map_reduce"): + raise NotImplementedError("Requires MongoDB >= 1.1.1") + + if not isinstance(map_f, pymongo.code.Code): + map_f = pymongo.code.Code(map_f) + if not isinstance(reduce_f, pymongo.code.Code): + reduce_f = pymongo.code.Code(reduce_f) + + mr_args = {'query': self._query, 'keeptemp': keep_temp} + + if finalize_f: + if not isinstance(finalize_f, pymongo.code.Code): + finalize_f = pymongo.code.Code(finalize_f) + mr_args['finalize'] = finalize_f + + if scope: + mr_args['scope'] = scope + + if limit: + mr_args['limit'] = limit + + results = self._collection.map_reduce(map_f, reduce_f, **mr_args) + results = results.find() + + if self._ordering: + results = results.sort(self._ordering) + + for doc in results: + yield MapReduceDocument(self._document, self._collection, + doc['_id'], doc['value']) + def limit(self, n): """Limit the number of returned documents to `n`. This may also be achieved using array-slicing syntax (e.g. ``User.objects[:5]``). @@ -450,6 +516,7 @@ def limit(self, n): else: self._cursor.limit(n) self._limit = n + # Return self to allow chaining return self @@ -523,13 +590,14 @@ def order_by(self, *keys): direction = pymongo.DESCENDING if key[0] in ('-', '+'): key = key[1:] - key_list.append((key, direction)) + key_list.append((key, direction)) + self._ordering = key_list self._cursor.sort(key_list) return self - + def explain(self, format=False): - """Return an explain plan record for the + """Return an explain plan record for the :class:`~mongoengine.queryset.QuerySet`\ 's cursor. :param format: format the plan before returning it @@ -540,7 +608,7 @@ def explain(self, format=False): import pprint plan = pprint.pformat(plan) return plan - + def delete(self, safe=False): """Delete the documents matched by the query. @@ -552,7 +620,7 @@ def delete(self, safe=False): def _transform_update(cls, _doc_cls=None, **update): """Transform an update spec from Django-style format to Mongo format. """ - operators = ['set', 'unset', 'inc', 'dec', 'push', 'push_all', 'pull', + operators = ['set', 'unset', 'inc', 'dec', 'push', 'push_all', 'pull', 'pull_all'] mongo_update = {} @@ -661,8 +729,8 @@ def exec_js(self, code, *fields, **options): """Execute a Javascript function on the server. A list of fields may be provided, which will be translated to their correct names and supplied as the arguments to the function. A few extra variables are added to - the function's scope: ``collection``, which is the name of the - collection in use; ``query``, which is an object representing the + the function's scope: ``collection``, which is the name of the + collection in use; ``query``, which is an object representing the current query; and ``options``, which is an object containing any options specified as keyword arguments. @@ -676,7 +744,7 @@ def exec_js(self, code, *fields, **options): :param code: a string of Javascript code to execute :param fields: fields that you will be using in your function, which will be passed in to your function as arguments - :param options: options that you want available to the function + :param options: options that you want available to the function (accessed in Javascript through the ``options`` object) """ code = self._sub_js_fields(code) @@ -693,7 +761,7 @@ def exec_js(self, code, *fields, **options): query = self._query if self._where_clause: query['$where'] = self._where_clause - + scope['query'] = query code = pymongo.code.Code(code, scope=scope) @@ -741,7 +809,7 @@ def average(self, field): def item_frequencies(self, list_field, normalize=False): """Returns a dictionary of all items present in a list field across the whole queried set of documents, and their corresponding frequency. - This is useful for generating tag clouds, or searching documents. + This is useful for generating tag clouds, or searching documents. :param list_field: the list field to use :param normalize: normalize the results so they add to 1.0 @@ -791,7 +859,7 @@ def __init__(self, manager_func=None): self._collection = None def __get__(self, instance, owner): - """Descriptor for instantiating a new QuerySet object when + """Descriptor for instantiating a new QuerySet object when Document.objects is accessed. """ if instance is not None: @@ -810,7 +878,7 @@ def __get__(self, instance, owner): if collection in db.collection_names(): self._collection = db[collection] - # The collection already exists, check if its capped + # The collection already exists, check if its capped # options match the specified capped options options = self._collection.options() if options.get('max') != max_documents or \ @@ -826,7 +894,7 @@ def __get__(self, instance, owner): self._collection = db.create_collection(collection, opts) else: self._collection = db[collection] - + # owner is the document that contains the QuerySetManager queryset = QuerySet(owner, self._collection) if self._manager_func: @@ -836,6 +904,7 @@ def __get__(self, instance, owner): queryset = self._manager_func(owner, queryset) return queryset + def queryset_manager(func): """Decorator that allows you to define custom QuerySet managers on :class:`~mongoengine.Document` classes. The manager must be a function that diff --git a/tests/queryset.py b/tests/queryset.py index e9d8e754b..25e56d629 100644 --- a/tests/queryset.py +++ b/tests/queryset.py @@ -1,14 +1,17 @@ +# -*- coding: utf-8 -*- + + import unittest import pymongo -from datetime import datetime +from datetime import datetime, timedelta -from mongoengine.queryset import (QuerySet, MultipleObjectsReturned, +from mongoengine.queryset import (QuerySet, MultipleObjectsReturned, DoesNotExist) from mongoengine import * class QuerySetTest(unittest.TestCase): - + def setUp(self): connect(db='mongoenginetest') @@ -16,12 +19,12 @@ class Person(Document): name = StringField() age = IntField() self.Person = Person - + def test_initialisation(self): """Ensure that a QuerySet is correctly initialised by QuerySetManager. """ self.assertTrue(isinstance(self.Person.objects, QuerySet)) - self.assertEqual(self.Person.objects._collection.name, + self.assertEqual(self.Person.objects._collection.name, self.Person._meta['collection']) self.assertTrue(isinstance(self.Person.objects._collection, pymongo.collection.Collection)) @@ -31,15 +34,15 @@ def test_transform_query(self): """ self.assertEqual(QuerySet._transform_query(name='test', age=30), {'name': 'test', 'age': 30}) - self.assertEqual(QuerySet._transform_query(age__lt=30), + self.assertEqual(QuerySet._transform_query(age__lt=30), {'age': {'$lt': 30}}) self.assertEqual(QuerySet._transform_query(age__gt=20, age__lt=50), {'age': {'$gt': 20, '$lt': 50}}) self.assertEqual(QuerySet._transform_query(age=20, age__gt=50), {'age': 20}) - self.assertEqual(QuerySet._transform_query(friend__age__gte=30), + self.assertEqual(QuerySet._transform_query(friend__age__gte=30), {'friend.age': {'$gte': 30}}) - self.assertEqual(QuerySet._transform_query(name__exists=True), + self.assertEqual(QuerySet._transform_query(name__exists=True), {'name': {'$exists': True}}) def test_find(self): @@ -134,7 +137,7 @@ def test_find_one(self): self.assertEqual(person.name, "User B") self.assertRaises(IndexError, self.Person.objects.__getitem__, 2) - + # Find a document using just the object id person = self.Person.objects.with_id(person1.id) self.assertEqual(person.name, "User A") @@ -170,7 +173,7 @@ def test_get_or_create(self): person2.save() # Retrieve the first person from the database - self.assertRaises(MultipleObjectsReturned, + self.assertRaises(MultipleObjectsReturned, self.Person.objects.get_or_create) # Use a query to filter the people found to just person2 @@ -256,36 +259,36 @@ def test_filter_chaining(self): """Ensure filters can be chained together. """ from datetime import datetime - + class BlogPost(Document): title = StringField() is_published = BooleanField() published_date = DateTimeField() - + @queryset_manager def published(doc_cls, queryset): return queryset(is_published=True) - - blog_post_1 = BlogPost(title="Blog Post #1", + + blog_post_1 = BlogPost(title="Blog Post #1", is_published = True, published_date=datetime(2010, 1, 5, 0, 0 ,0)) - blog_post_2 = BlogPost(title="Blog Post #2", + blog_post_2 = BlogPost(title="Blog Post #2", is_published = True, published_date=datetime(2010, 1, 6, 0, 0 ,0)) - blog_post_3 = BlogPost(title="Blog Post #3", + blog_post_3 = BlogPost(title="Blog Post #3", is_published = True, published_date=datetime(2010, 1, 7, 0, 0 ,0)) blog_post_1.save() blog_post_2.save() blog_post_3.save() - + # find all published blog posts before 2010-01-07 published_posts = BlogPost.published() published_posts = published_posts.filter( published_date__lt=datetime(2010, 1, 7, 0, 0 ,0)) self.assertEqual(published_posts.count(), 2) - + BlogPost.drop_collection() def test_ordering(self): @@ -301,22 +304,22 @@ class BlogPost(Document): BlogPost.drop_collection() - blog_post_1 = BlogPost(title="Blog Post #1", + blog_post_1 = BlogPost(title="Blog Post #1", published_date=datetime(2010, 1, 5, 0, 0 ,0)) - blog_post_2 = BlogPost(title="Blog Post #2", + blog_post_2 = BlogPost(title="Blog Post #2", published_date=datetime(2010, 1, 6, 0, 0 ,0)) - blog_post_3 = BlogPost(title="Blog Post #3", + blog_post_3 = BlogPost(title="Blog Post #3", published_date=datetime(2010, 1, 7, 0, 0 ,0)) blog_post_1.save() blog_post_2.save() blog_post_3.save() - + # get the "first" BlogPost using default ordering # from BlogPost.meta.ordering - latest_post = BlogPost.objects.first() + latest_post = BlogPost.objects.first() self.assertEqual(latest_post.title, "Blog Post #3") - + # override default ordering, order BlogPosts by "published_date" first_post = BlogPost.objects.order_by("+published_date").first() self.assertEqual(first_post.title, "Blog Post #1") @@ -375,7 +378,7 @@ class BlogPost(Document): result = BlogPost.objects.first() self.assertTrue(isinstance(result.author, User)) self.assertEqual(result.author.name, 'Test User') - + BlogPost.drop_collection() def test_find_dict_item(self): @@ -442,7 +445,7 @@ class BlogPost(Document): self.Person(name='user2', age=20).save() self.Person(name='user3', age=30).save() self.Person(name='user4', age=40).save() - + self.assertEqual(len(self.Person.objects(Q(age__in=[20]))), 2) self.assertEqual(len(self.Person.objects(Q(age__in=[20, 30]))), 3) @@ -545,17 +548,17 @@ class BlogPost(Document): return comments; } """ - + sub_code = BlogPost.objects._sub_js_fields(code) - code_chunks = ['doc["cmnts"];', 'doc["doc-name"],', + code_chunks = ['doc["cmnts"];', 'doc["doc-name"],', 'doc["cmnts"][i]["body"]'] for chunk in code_chunks: self.assertTrue(chunk in sub_code) results = BlogPost.objects.exec_js(code) expected_results = [ - {u'comment': u'cool', u'document': u'post1'}, - {u'comment': u'yay', u'document': u'post1'}, + {u'comment': u'cool', u'document': u'post1'}, + {u'comment': u'yay', u'document': u'post1'}, {u'comment': u'nice stuff', u'document': u'post2'}, ] self.assertEqual(results, expected_results) @@ -627,10 +630,167 @@ def test_order_by(self): names = [p.name for p in self.Person.objects.order_by('age')] self.assertEqual(names, ['User A', 'User C', 'User B']) - + ages = [p.age for p in self.Person.objects.order_by('-name')] self.assertEqual(ages, [30, 40, 20]) + def test_map_reduce(self): + """Ensure map/reduce is both mapping and reducing. + """ + class BlogPost(Document): + title = StringField() + tags = ListField(StringField()) + + BlogPost.drop_collection() + + BlogPost(title="Post #1", tags=['music', 'film', 'print']).save() + BlogPost(title="Post #2", tags=['music', 'film']).save() + BlogPost(title="Post #3", tags=['film', 'photography']).save() + + map_f = """ + function() { + this.tags.forEach(function(tag) { + emit(tag, 1); + }); + } + """ + + reduce_f = """ + function(key, values) { + var total = 0; + for(var i=0; i 0) { + y = 1; + } else if (x = 0) { + y = 0; + } else { + y = -1; + } + + // calculate 'Z', the maximal value + if(Math.abs(x) >= 1) { + z = Math.abs(x); + } else { + z = 1; + } + + return {x: x, y: y, z: z, t_s: sec_since_epoch}; + } + """ + + finalize_f = """ + function(key, value) { + // f(sec_since_epoch,y,z) = log10(z) + ((y*sec_since_epoch) / 45000) + z_10 = Math.log(value.z) / Math.log(10); + weight = z_10 + ((value.y * value.t_s) / 45000); + return weight; + } + """ + + # provide the reddit epoch (used for ranking) as a variable available + # to all phases of the map/reduce operation: map, reduce, and finalize. + reddit_epoch = mktime(datetime(2005, 12, 8, 7, 46, 43).timetuple()) + scope = {'reddit_epoch': reddit_epoch} + + # run a map/reduce operation across all links. ordering is set + # to "-value", which orders the "weight" value returned from + # "finalize_f" in descending order. + results = Link.objects.order_by("-value") + results = results.map_reduce(map_f, + reduce_f, + finalize_f=finalize_f, + scope=scope) + results = list(results) + + # assert troublesome Buzz article is ranked 1st + self.assertTrue(results[0].object.title.startswith("Google Buzz")) + + # assert laser vision is ranked last + self.assertTrue(results[-1].object.title.startswith("How to see")) + + Link.drop_collection() + def test_item_frequencies(self): """Ensure that item frequencies are properly generated from lists. """ @@ -727,20 +887,20 @@ class BlogPost(Document): title = StringField(name='postTitle') comments = ListField(EmbeddedDocumentField(Comment), name='postComments') - + BlogPost.drop_collection() data = {'title': 'Post 1', 'comments': [Comment(content='test')]} BlogPost(**data).save() - self.assertTrue('postTitle' in + self.assertTrue('postTitle' in BlogPost.objects(title=data['title'])._query) - self.assertFalse('title' in + self.assertFalse('title' in BlogPost.objects(title=data['title'])._query) self.assertEqual(len(BlogPost.objects(title=data['title'])), 1) - self.assertTrue('postComments.commentContent' in + self.assertTrue('postComments.commentContent' in BlogPost.objects(comments__content='test')._query) self.assertEqual(len(BlogPost.objects(comments__content='test')), 1) @@ -761,7 +921,7 @@ class BlogPost(Document): post.save() # Test that query may be performed by providing a document as a value - # while using a ReferenceField's name - the document should be + # while using a ReferenceField's name - the document should be # converted to an DBRef, which is legal, unlike a Document object post_obj = BlogPost.objects(author=person).first() self.assertEqual(post.id, post_obj.id) @@ -823,13 +983,13 @@ class BlogPost(Document): self.assertFalse([('_types', 1)] in info.values()) BlogPost.drop_collection() - + def test_bulk(self): """Ensure bulk querying by object id returns a proper dict. """ class BlogPost(Document): title = StringField() - + BlogPost.drop_collection() post_1 = BlogPost(title="Post #1") @@ -843,20 +1003,20 @@ class BlogPost(Document): post_3.save() post_4.save() post_5.save() - + ids = [post_1.id, post_2.id, post_5.id] objects = BlogPost.objects.in_bulk(ids) - + self.assertEqual(len(objects), 3) self.assertTrue(post_1.id in objects) self.assertTrue(post_2.id in objects) self.assertTrue(post_5.id in objects) - + self.assertTrue(objects[post_1.id].title == post_1.title) self.assertTrue(objects[post_2.id].title == post_2.title) - self.assertTrue(objects[post_5.id].title == post_5.title) - + self.assertTrue(objects[post_5.id].title == post_5.title) + BlogPost.drop_collection() def tearDown(self): @@ -864,7 +1024,7 @@ def tearDown(self): class QTest(unittest.TestCase): - + def test_or_and(self): """Ensure that Q objects may be combined correctly. """ @@ -888,8 +1048,8 @@ def test_item_query_as_js(self): examples = [ ({'name': 'test'}, 'this.name == i0f0', {'i0f0': 'test'}), ({'age': {'$gt': 18}}, 'this.age > i0f0o0', {'i0f0o0': 18}), - ({'name': 'test', 'age': {'$gt': 18, '$lte': 65}}, - 'this.age <= i0f0o0 && this.age > i0f0o1 && this.name == i0f1', + ({'name': 'test', 'age': {'$gt': 18, '$lte': 65}}, + 'this.age <= i0f0o0 && this.age > i0f0o1 && this.name == i0f1', {'i0f0o0': 65, 'i0f0o1': 18, 'i0f1': 'test'}), ] for item, js, scope in examples: