# MongoDB Aggregation Pipeline In Python

Learn about the various stages and configurations you can create to configure an Aggregation Pipeline

Stages: [match](#match), [project](#project), [unset](#unset), [limit](#limit), [skip](#skip), [sort](#sort), [count](#count), [sortByCount](#sortByCount), [unwind](#unwind), [group](#group), [addFields](#addFields), [sample](#sample), [lookup](#lookup), [unionWith](#unionWith), [out](#out), [merge](#merge)

Operators: [size](#size-(operator)), [in](#in-(operator)), [arrayElemAt](#arrayElemAt-(operator)), [first](#first-(operator)), [count](#count-(accumulator-operator)), [sum](#sum-(accumulator-operator)), [first, last](#first,-last-(accumulator-operators)), [push](#push-(accumulator-operator)), [addToSet](#addToSet-(accumulator-operator)), [regexMatch](#regexMatch-(operator)), [cond](#cond-(operator)), [Date](#Date-Operators), [expr](#expr-(operator)), [ifNull](#ifNull-(operator)), [type](#type-(operator)), [switch](#switch-(operator))

In [66]:
from pymongo import MongoClient

In [67]:
mongodb_uri = "mongodb://localhost:27017/"
db_name = "aggregation_test"

In [68]:
client = MongoClient(mongodb_uri)
db = client[db_name]

### Helper Function

In [69]:
from pprint import pprint

def print_cursor(cursor):
    for document in cursor:
        pprint(document)
        print()

### Inserting Some Sample Data

In [70]:
import insert_aggregation_sample_data as iasd
iasd.insert_data(mongodb_uri, db_name)

Entries already exist in the aggregation_test database in the users, products, or orders collection. Insert commands aborted.


### match

In [71]:
match_cursor = db.products.aggregate([
    {"$match": {"name": "Pens"}}
])

In [72]:
print_cursor(match_cursor)

{'_id': ObjectId('6874fae37d59a390047b630d'),
 'name': 'Pens',
 'seller_id': ObjectId('6874fae37d59a390047b6306'),
 'tags': ['Office', 'School']}



In [73]:
match_cursor = db.products.aggregate([
    {"$match": {"$or": [{"tags": "Beauty"}, {"tags": "Home"}]}}
])

In [74]:
print_cursor(match_cursor)

{'_id': ObjectId('6874fae37d59a390047b630b'),
 'name': 'Mug',
 'seller_id': ObjectId('6874fae37d59a390047b6305'),
 'tags': ['Home', 'Kitchen']}

{'_id': ObjectId('6874fae37d59a390047b630c'),
 'name': 'Moisturizer',
 'seller_id': ObjectId('6874fae37d59a390047b6305'),
 'tags': ['Beauty']}

{'_id': ObjectId('6874fae37d59a390047b630e'),
 'name': 'Face Cleanser',
 'seller_id': ObjectId('6874fae37d59a390047b6305'),
 'tags': ['Beauty']}

{'_id': ObjectId('6874fae37d59a390047b630f'),
 'name': 'Concealer Makeup',
 'seller_id': ObjectId('6874fae37d59a390047b6308'),
 'tags': ['Beauty']}

{'_id': ObjectId('6874fae37d59a390047b6310'),
 'name': 'Eyeliner',
 'seller_id': ObjectId('6874fae37d59a390047b6309'),
 'tags': ['Beauty']}



### project

In [75]:
project_cursor = db.products.aggregate([
    {"$project": {"_id": 0,"product_name": "$name", "tags": 1}}
])

In [76]:
print_cursor(project_cursor)

{'product_name': 'Mug', 'tags': ['Home', 'Kitchen']}

{'product_name': 'Moisturizer', 'tags': ['Beauty']}

{'product_name': 'Pens', 'tags': ['Office', 'School']}

{'product_name': 'Face Cleanser', 'tags': ['Beauty']}

{'product_name': 'Concealer Makeup', 'tags': ['Beauty']}

{'product_name': 'Eyeliner', 'tags': ['Beauty']}



In [77]:
match_project_cursor = db.products.aggregate([
    {"$match": {"name": "Pens"}},
    {"$project": {"_id": 0, "product_name": "$name", "tags": 1}}
])

In [78]:
print_cursor(match_project_cursor)

{'product_name': 'Pens', 'tags': ['Office', 'School']}



### unset

In [79]:
unset_cursor = db.products.aggregate([
    {"$unset": ["_id", "seller_id"]}
])

In [80]:
print_cursor(unset_cursor)

{'name': 'Mug', 'tags': ['Home', 'Kitchen']}

{'name': 'Moisturizer', 'tags': ['Beauty']}

{'name': 'Pens', 'tags': ['Office', 'School']}

{'name': 'Face Cleanser', 'tags': ['Beauty']}

{'name': 'Concealer Makeup', 'tags': ['Beauty']}

{'name': 'Eyeliner', 'tags': ['Beauty']}



In [81]:
unset_cursor = db.products.aggregate([
    {"$match": {"name": "Pens"}},
    {"$unset": ["_id", "seller_id"]}
])

In [82]:
print_cursor(unset_cursor)

{'name': 'Pens', 'tags': ['Office', 'School']}



### limit

In [83]:
limit_cursor = db.products.aggregate([
    {"$limit": 3}
])

In [84]:
print_cursor(limit_cursor)

{'_id': ObjectId('6874fae37d59a390047b630b'),
 'name': 'Mug',
 'seller_id': ObjectId('6874fae37d59a390047b6305'),
 'tags': ['Home', 'Kitchen']}

{'_id': ObjectId('6874fae37d59a390047b630c'),
 'name': 'Moisturizer',
 'seller_id': ObjectId('6874fae37d59a390047b6305'),
 'tags': ['Beauty']}

{'_id': ObjectId('6874fae37d59a390047b630d'),
 'name': 'Pens',
 'seller_id': ObjectId('6874fae37d59a390047b6306'),
 'tags': ['Office', 'School']}



### skip

In [85]:
skip_cursor = db.products.aggregate([
    {"$skip": 2}
])

In [86]:
print_cursor(skip_cursor)

{'_id': ObjectId('6874fae37d59a390047b630d'),
 'name': 'Pens',
 'seller_id': ObjectId('6874fae37d59a390047b6306'),
 'tags': ['Office', 'School']}

{'_id': ObjectId('6874fae37d59a390047b630e'),
 'name': 'Face Cleanser',
 'seller_id': ObjectId('6874fae37d59a390047b6305'),
 'tags': ['Beauty']}

{'_id': ObjectId('6874fae37d59a390047b630f'),
 'name': 'Concealer Makeup',
 'seller_id': ObjectId('6874fae37d59a390047b6308'),
 'tags': ['Beauty']}

{'_id': ObjectId('6874fae37d59a390047b6310'),
 'name': 'Eyeliner',
 'seller_id': ObjectId('6874fae37d59a390047b6309'),
 'tags': ['Beauty']}



In [87]:
limit_and_skip_cursor = db.products.aggregate([
    {"$skip": 2},
    {"$limit": 3}
])

In [88]:
print_cursor(limit_and_skip_cursor)

{'_id': ObjectId('6874fae37d59a390047b630d'),
 'name': 'Pens',
 'seller_id': ObjectId('6874fae37d59a390047b6306'),
 'tags': ['Office', 'School']}

{'_id': ObjectId('6874fae37d59a390047b630e'),
 'name': 'Face Cleanser',
 'seller_id': ObjectId('6874fae37d59a390047b6305'),
 'tags': ['Beauty']}

{'_id': ObjectId('6874fae37d59a390047b630f'),
 'name': 'Concealer Makeup',
 'seller_id': ObjectId('6874fae37d59a390047b6308'),
 'tags': ['Beauty']}



### sort

In [89]:
sort_cursor = db.products.aggregate([
    {"$sort": {"name": 1}}
])

In [90]:
print_cursor(sort_cursor)

{'_id': ObjectId('6874fae37d59a390047b630f'),
 'name': 'Concealer Makeup',
 'seller_id': ObjectId('6874fae37d59a390047b6308'),
 'tags': ['Beauty']}

{'_id': ObjectId('6874fae37d59a390047b6310'),
 'name': 'Eyeliner',
 'seller_id': ObjectId('6874fae37d59a390047b6309'),
 'tags': ['Beauty']}

{'_id': ObjectId('6874fae37d59a390047b630e'),
 'name': 'Face Cleanser',
 'seller_id': ObjectId('6874fae37d59a390047b6305'),
 'tags': ['Beauty']}

{'_id': ObjectId('6874fae37d59a390047b630c'),
 'name': 'Moisturizer',
 'seller_id': ObjectId('6874fae37d59a390047b6305'),
 'tags': ['Beauty']}

{'_id': ObjectId('6874fae37d59a390047b630b'),
 'name': 'Mug',
 'seller_id': ObjectId('6874fae37d59a390047b6305'),
 'tags': ['Home', 'Kitchen']}

{'_id': ObjectId('6874fae37d59a390047b630d'),
 'name': 'Pens',
 'seller_id': ObjectId('6874fae37d59a390047b6306'),
 'tags': ['Office', 'School']}



### count

In [91]:
count_cursor = db.products.aggregate([
    {"$match": {"tags": "Beauty"}},
    {"$count": "beauty_products_count"}
])

In [92]:
print_cursor(count_cursor)

{'beauty_products_count': 4}



### sortByCount

In [93]:
sort_by_count_cursor = db.products.aggregate([
    {"$sortByCount": "$tags"}
])

In [94]:
print_cursor(sort_by_count_cursor)

{'_id': ['Beauty'], 'count': 4}

{'_id': ['Office', 'School'], 'count': 1}

{'_id': ['Home', 'Kitchen'], 'count': 1}



### size (operator)

In [95]:
project_cursor = db.products.aggregate([
    {"$project": {"_id": 0, "name":1, "num_tags": {"$size":"$tags"}, "tags": "$tags"}}
])

In [96]:
print_cursor(project_cursor)

{'name': 'Mug', 'num_tags': 2, 'tags': ['Home', 'Kitchen']}

{'name': 'Moisturizer', 'num_tags': 1, 'tags': ['Beauty']}

{'name': 'Pens', 'num_tags': 2, 'tags': ['Office', 'School']}

{'name': 'Face Cleanser', 'num_tags': 1, 'tags': ['Beauty']}

{'name': 'Concealer Makeup', 'num_tags': 1, 'tags': ['Beauty']}

{'name': 'Eyeliner', 'num_tags': 1, 'tags': ['Beauty']}



### in (operator)

In [97]:
project_cursor = db.products.aggregate([
    {"$project": {"_id": 0, "name":1, "is_beauty_product": {"$in": ["Beauty", "$tags"]}, "tags": "$tags"}}
])

In [98]:
print_cursor(project_cursor)

{'is_beauty_product': False, 'name': 'Mug', 'tags': ['Home', 'Kitchen']}

{'is_beauty_product': True, 'name': 'Moisturizer', 'tags': ['Beauty']}

{'is_beauty_product': False, 'name': 'Pens', 'tags': ['Office', 'School']}

{'is_beauty_product': True, 'name': 'Face Cleanser', 'tags': ['Beauty']}

{'is_beauty_product': True, 'name': 'Concealer Makeup', 'tags': ['Beauty']}

{'is_beauty_product': True, 'name': 'Eyeliner', 'tags': ['Beauty']}



### arrayElemAt (operator)

In [99]:
project_cursor = db.products.aggregate([
    {"$project": {"_id": 0, "name":1, "first_tag": {"$arrayElemAt": ["$tags", 0]}, "tags": "$tags"}}
])

In [100]:
print_cursor(project_cursor)

{'first_tag': 'Home', 'name': 'Mug', 'tags': ['Home', 'Kitchen']}

{'first_tag': 'Beauty', 'name': 'Moisturizer', 'tags': ['Beauty']}

{'first_tag': 'Office', 'name': 'Pens', 'tags': ['Office', 'School']}

{'first_tag': 'Beauty', 'name': 'Face Cleanser', 'tags': ['Beauty']}

{'first_tag': 'Beauty', 'name': 'Concealer Makeup', 'tags': ['Beauty']}

{'first_tag': 'Beauty', 'name': 'Eyeliner', 'tags': ['Beauty']}



### first (operator)

In [101]:
project_cursor = db.products.aggregate([
    {"$project": {"_id": 0, "name":1, "first_tag": {"$first": "$tags"}, "tags": "$tags"}}
])

In [102]:
print_cursor(project_cursor)

{'first_tag': 'Home', 'name': 'Mug', 'tags': ['Home', 'Kitchen']}

{'first_tag': 'Beauty', 'name': 'Moisturizer', 'tags': ['Beauty']}

{'first_tag': 'Office', 'name': 'Pens', 'tags': ['Office', 'School']}

{'first_tag': 'Beauty', 'name': 'Face Cleanser', 'tags': ['Beauty']}

{'first_tag': 'Beauty', 'name': 'Concealer Makeup', 'tags': ['Beauty']}

{'first_tag': 'Beauty', 'name': 'Eyeliner', 'tags': ['Beauty']}



### unwind

In [103]:
unwind_cursor = db.products.aggregate([
    {"$unwind": "$tags"},
    {"$unset": ["_id", "seller_id"]}
])

In [104]:
print_cursor(unwind_cursor)

{'name': 'Mug', 'tags': 'Home'}

{'name': 'Mug', 'tags': 'Kitchen'}

{'name': 'Moisturizer', 'tags': 'Beauty'}

{'name': 'Pens', 'tags': 'Office'}

{'name': 'Pens', 'tags': 'School'}

{'name': 'Face Cleanser', 'tags': 'Beauty'}

{'name': 'Concealer Makeup', 'tags': 'Beauty'}

{'name': 'Eyeliner', 'tags': 'Beauty'}



In [105]:
unwind_cursor = db.orders.aggregate([
    {"$unwind": "$items"}
])

In [106]:
print_cursor(unwind_cursor)

{'_id': ObjectId('6874fae37d59a390047b6311'),
 'items': {'product_id': ObjectId('6874fae37d59a390047b630c'), 'quantity': 1}}

{'_id': ObjectId('6874fae37d59a390047b6311'),
 'items': {'product_id': ObjectId('6874fae37d59a390047b630e'), 'quantity': 1}}

{'_id': ObjectId('6874fae37d59a390047b6312'),
 'items': {'product_id': ObjectId('6874fae37d59a390047b630f'), 'quantity': 1}}

{'_id': ObjectId('6874fae37d59a390047b6312'),
 'items': {'product_id': ObjectId('6874fae37d59a390047b6310'), 'quantity': 1}}

{'_id': ObjectId('6874fae37d59a390047b6313'),
 'items': {'product_id': ObjectId('6874fae37d59a390047b630d'), 'quantity': 5}}

{'_id': ObjectId('6874fae37d59a390047b6313'),
 'items': {'product_id': ObjectId('6874fae37d59a390047b630b'), 'quantity': 1}}

{'_id': ObjectId('6874fae37d59a390047b6314'),
 'items': {'product_id': ObjectId('6874fae37d59a390047b630c'), 'quantity': 2}}

{'_id': ObjectId('6874fae37d59a390047b6314'),
 'items': {'product_id': ObjectId('6874fae37d59a390047b6310'), 'quantity

In [107]:
unwind_cursor = db.products.aggregate([
    {"$match": {"tags": {"$size": 2}}},
    {"$unwind": {"path": "$tags", "includeArrayIndex": "tag_index"}},
    {"$unset": ["_id"]}
])

In [108]:
print_cursor(unwind_cursor)

{'name': 'Mug',
 'seller_id': ObjectId('6874fae37d59a390047b6305'),
 'tag_index': 0,
 'tags': 'Home'}

{'name': 'Mug',
 'seller_id': ObjectId('6874fae37d59a390047b6305'),
 'tag_index': 1,
 'tags': 'Kitchen'}

{'name': 'Pens',
 'seller_id': ObjectId('6874fae37d59a390047b6306'),
 'tag_index': 0,
 'tags': 'Office'}

{'name': 'Pens',
 'seller_id': ObjectId('6874fae37d59a390047b6306'),
 'tag_index': 1,
 'tags': 'School'}



### group

In [109]:
group_cursor = db.products.aggregate([
    {"$group": {"_id": "$tags"}}
])

In [110]:
print_cursor(group_cursor)

{'_id': ['Home', 'Kitchen']}

{'_id': ['Beauty']}

{'_id': ['Office', 'School']}



In [111]:
group_cursor = db.products.aggregate([
    {"$unwind": "$tags"},
    {"$group": {"_id": "$tags"}}
])

In [112]:
print_cursor(group_cursor)

{'_id': 'Office'}

{'_id': 'Beauty'}

{'_id': 'Kitchen'}

{'_id': 'Home'}

{'_id': 'School'}



### count (accumulator operator)

In [113]:
group_cursor = db.products.aggregate([
    {"$group": {"_id": "$tags", "num_entries": {"$count": {}}}}
])

In [114]:
print_cursor(group_cursor)

{'_id': ['Home', 'Kitchen'], 'num_entries': 1}

{'_id': ['Beauty'], 'num_entries': 4}

{'_id': ['Office', 'School'], 'num_entries': 1}



### sum (accumulator operator)

In [115]:
group_cursor = db.orders.aggregate([
    {"$unwind": "$items"},
    {"$group": {"_id": "$items.product_id","total_quantity": {"$sum": "$items.quantity"}}}
])

In [116]:
print_cursor(group_cursor)

{'_id': ObjectId('6874fae37d59a390047b6310'), 'total_quantity': 2}

{'_id': ObjectId('6874fae37d59a390047b630c'), 'total_quantity': 4}

{'_id': ObjectId('6874fae37d59a390047b630f'), 'total_quantity': 1}

{'_id': ObjectId('6874fae37d59a390047b630e'), 'total_quantity': 2}

{'_id': ObjectId('6874fae37d59a390047b630b'), 'total_quantity': 1}

{'_id': ObjectId('6874fae37d59a390047b630d'), 'total_quantity': 5}



### first, last (accumulator operators)

In [117]:
group_cursor = db.products.aggregate([
    {"$group": {"_id": "$tags","num_entries": {"$count": {}}, "first": {"$first": "$name"}, "last": {"$last": "$name"}}}
])

In [118]:
print_cursor(group_cursor)

{'_id': ['Office', 'School'], 'first': 'Pens', 'last': 'Pens', 'num_entries': 1}

{'_id': ['Beauty'],
 'first': 'Moisturizer',
 'last': 'Eyeliner',
 'num_entries': 4}

{'_id': ['Home', 'Kitchen'], 'first': 'Mug', 'last': 'Mug', 'num_entries': 1}



### push (accumulator operator)

In [119]:
group_cursor = db.products.aggregate([
    {"$group": {"_id": "$tags", "products": {"$push": "$name"}}}
])

In [120]:
print_cursor(group_cursor)

{'_id': ['Home', 'Kitchen'], 'products': ['Mug']}

{'_id': ['Beauty'],
 'products': ['Moisturizer', 'Face Cleanser', 'Concealer Makeup', 'Eyeliner']}

{'_id': ['Office', 'School'], 'products': ['Pens']}



### addToSet (accumulator operator)

In [121]:
group_cursor = db.products.aggregate([
    {"$group": {"_id": "$tags", "products": {"$addToSet": "$name"}}}
])

In [122]:
print_cursor(group_cursor)

{'_id': ['Home', 'Kitchen'], 'products': ['Mug']}

{'_id': ['Beauty'],
 'products': ['Face Cleanser', 'Concealer Makeup', 'Eyeliner', 'Moisturizer']}

{'_id': ['Office', 'School'], 'products': ['Pens']}



In [123]:
group_cursor = db.products.aggregate([
    {"$group": {"_id": "$tags", "products": {"$addToSet": {"name":"$name","seller_id": "$seller_id"}}}}
])

In [124]:
print_cursor(group_cursor)

{'_id': ['Home', 'Kitchen'],
 'products': [{'name': 'Mug',
               'seller_id': ObjectId('6874fae37d59a390047b6305')}]}

{'_id': ['Beauty'],
 'products': [{'name': 'Concealer Makeup',
               'seller_id': ObjectId('6874fae37d59a390047b6308')},
              {'name': 'Eyeliner',
               'seller_id': ObjectId('6874fae37d59a390047b6309')},
              {'name': 'Moisturizer',
               'seller_id': ObjectId('6874fae37d59a390047b6305')},
              {'name': 'Face Cleanser',
               'seller_id': ObjectId('6874fae37d59a390047b6305')}]}

{'_id': ['Office', 'School'],
 'products': [{'name': 'Pens',
               'seller_id': ObjectId('6874fae37d59a390047b6306')}]}



### $$ROOT (system variable)

In [125]:
group_cursor = db.products.aggregate([
    {"$group": {"_id": "$tags", "products": {"$addToSet": "$$ROOT"}}}
])

In [126]:
print_cursor(group_cursor)

{'_id': ['Home', 'Kitchen'],
 'products': [{'_id': ObjectId('6874fae37d59a390047b630b'),
               'name': 'Mug',
               'seller_id': ObjectId('6874fae37d59a390047b6305'),
               'tags': ['Home', 'Kitchen']}]}

{'_id': ['Beauty'],
 'products': [{'_id': ObjectId('6874fae37d59a390047b630f'),
               'name': 'Concealer Makeup',
               'seller_id': ObjectId('6874fae37d59a390047b6308'),
               'tags': ['Beauty']},
              {'_id': ObjectId('6874fae37d59a390047b6310'),
               'name': 'Eyeliner',
               'seller_id': ObjectId('6874fae37d59a390047b6309'),
               'tags': ['Beauty']},
              {'_id': ObjectId('6874fae37d59a390047b630c'),
               'name': 'Moisturizer',
               'seller_id': ObjectId('6874fae37d59a390047b6305'),
               'tags': ['Beauty']},
              {'_id': ObjectId('6874fae37d59a390047b630e'),
               'name': 'Face Cleanser',
               'seller_id': ObjectId('6874fae3

### addFields

In [127]:
add_fields_cursor = db.products.aggregate([
    {"$match": {"name": "Pens"}},
    {"$addFields": {"my_new_field": "hi there", "num_tags": {"$size": "$tags"}}}
])

In [128]:
print_cursor(add_fields_cursor)

{'_id': ObjectId('6874fae37d59a390047b630d'),
 'my_new_field': 'hi there',
 'name': 'Pens',
 'num_tags': 2,
 'seller_id': ObjectId('6874fae37d59a390047b6306'),
 'tags': ['Office', 'School']}



### sample

In [129]:
sample_cursor = db.products.aggregate([
    {"$sample": {"size": 3}},
    {"$unset": ["_id", "seller_id"]}
])

In [130]:
print_cursor(sample_cursor)

{'name': 'Concealer Makeup', 'tags': ['Beauty']}

{'name': 'Moisturizer', 'tags': ['Beauty']}

{'name': 'Eyeliner', 'tags': ['Beauty']}



### lookup

### unionWith

### regexMatch (operator)

### out
*Note: You can potentially overwrite all your data in a collection with this stage, use with caution*

### merge
*Note: You can potentially overwrite data within a collection with this stage, use with caution*

### cond (operator)

### $$NOW (system variable)

### Date Operators

### expr (operator)

### ifNull (operator)

### type (operator)

### switch (operator)