# ***Aggregation***

In [1]:
from pymongo import MongoClient
db = MongoClient().aggregation_example
result = db.things.insert_many(
    [
        {"x":1, "tags": ["dog","cat"]},
        {"x":2, "tags": ["cat"]},
        {"x":2, "tags": ["mouse","cat","dog"]},
        {"x":3, "tags":[]},
    
        
    ]
)
result.inserted_ids

[ObjectId('64bcbd8c5c9f95d1fd7b32d2'),
 ObjectId('64bcbd8c5c9f95d1fd7b32d3'),
 ObjectId('64bcbd8c5c9f95d1fd7b32d4'),
 ObjectId('64bcbd8c5c9f95d1fd7b32d5')]

In [2]:
# aggregation framework
from bson.son import SON 
pipeline = [
    {"$unwind": "$tags"},
    {"$group": {"_id": "$tags", "count": {"$sum": 1}}},
    {"$sort": SON([("count", -1),("_id", -1)])},
]

In [3]:
import pprint
pprint.pprint(list(db.things.aggregate(pipeline)))

[{'_id': 'cat', 'count': 3},
 {'_id': 'dog', 'count': 2},
 {'_id': 'mouse', 'count': 1}]


In [7]:
#To run an explain plan for this aggregation use PyMongoExplain, a companion library for PyMongo. It allows you to explain any CRUD operation by providing a few convenience classes
from pymongoexplain import ExplainableCollection
ExplainableCollection(collection).aggregate(pipeline)

NameError: name 'collection' is not defined

In [8]:
db.command('aggregate', 'things', pipeline=pipeline, explain=True)

{'stages': [{'$cursor': {'query': {},
    'fields': {'tags': 1, '_id': 0},
    'queryPlanner': {'plannerVersion': 1,
     'namespace': 'aggregation_example.things',
     'indexFilterSet': False,
     'parsedQuery': {},
     'winningPlan': {'stage': 'COLLSCAN', 'direction': 'forward'},
     'rejectedPlans': []}}},
  {'$unwind': {'path': '$tags'}},
  {'$group': {'_id': '$tags', 'count': {'$sum': {'$const': 1}}}},
  {'$sort': {'sortKey': {'count': -1, '_id': -1}}}],
 'ok': 1.0}