In [1]:
%pip install pymongo

Note: you may need to restart the kernel to use updated packages.


CREATING A COLLECTION


*********** INSERT DOCUMENTS ( CREATE ) ***********

In [3]:
from pymongo import MongoClient
client = MongoClient('mongodb://localhost:27017/')
db = client['inventory']

In [6]:
db.inventory.insert_one({
    "item": "canvas",
    "qty": 100,
    "tags": ["cotton"],
    "size": {"h": 28, "w": 35.5, "uom": "cm"}
})

InsertOneResult(ObjectId('6838db6070ac9c19dcaeca41'), acknowledged=True)

Query the Collection

In [9]:
for doc in db.inventory.find({ "item": "canvas" }):
    print(doc)

{'_id': ObjectId('6838db6070ac9c19dcaeca41'), 'item': 'canvas', 'qty': 100, 'tags': ['cotton'], 'size': {'h': 28, 'w': 35.5, 'uom': 'cm'}}


Insert Many Documents

In [11]:
db.inventory.insert_many([
    { "item": "journal", "qty": 25, "tags": ["blank", "red"], "size": { "h": 14, "w": 21, "uom": "cm" } },
    { "item": "mat", "qty": 85, "tags": ["gray"], "size": { "h": 27.9, "w": 35.5, "uom": "cm" } },
    { "item": "mousepad", "qty": 25, "tags": ["gel", "blue"], "size": { "h": 19, "w": 22.85, "uom": "cm" } }
])

InsertManyResult([ObjectId('68390b9770ac9c19dcaeca42'), ObjectId('68390b9770ac9c19dcaeca43'), ObjectId('68390b9770ac9c19dcaeca44')], acknowledged=True)

Query the inventory collection

In [12]:
db.inventory.find({})

<pymongo.synchronous.cursor.Cursor at 0x28c4db0df90>

Insert additional data

In [13]:
db.inventory.insert_many([
    { "item": "journal", "qty": 25, "size": {"h": 14, "w": 21, "uom": "cm"}, "status": "A" },
    { "item": "notebook", "qty": 50, "size": {"h": 8.5, "w": 11, "uom": "in"}, "status": "A" },
    { "item": "paper", "qty": 100, "size": {"h": 8.5, "w": 11, "uom": "in"}, "status": "D" },
    { "item": "planner", "qty": 75, "size": {"h": 22.85, "w": 30, "uom": "cm"}, "status": "D" },
    { "item": "postcard", "qty": 45, "size": {"h": 10, "w": 15.25, "uom": "cm"}, "status": "A" }
])

InsertManyResult([ObjectId('6839123070ac9c19dcaeca45'), ObjectId('6839123070ac9c19dcaeca46'), ObjectId('6839123070ac9c19dcaeca47'), ObjectId('6839123070ac9c19dcaeca48'), ObjectId('6839123070ac9c19dcaeca49')], acknowledged=True)

duplicates added to item journal

In [16]:
for doc in db.inventory.find({ "item": "journal" }):
    print(doc)

{'_id': ObjectId('68390b9770ac9c19dcaeca42'), 'item': 'journal', 'qty': 25, 'tags': ['blank', 'red'], 'size': {'h': 14, 'w': 21, 'uom': 'cm'}}
{'_id': ObjectId('6839123070ac9c19dcaeca45'), 'item': 'journal', 'qty': 25, 'size': {'h': 14, 'w': 21, 'uom': 'cm'}, 'status': 'A'}


*********** QUERY DOCUMENTS ( READ ) ***********

Query the inventory collection

In [17]:
db.inventory.find({})

<pymongo.synchronous.cursor.Cursor at 0x28c4db0e850>

SELECT * FROM inventory WHERE status = "D"

In [19]:
from pprint import pprint

for doc in db.inventory.find({"status": "D"}):
    pprint(doc)

{'_id': ObjectId('6839123070ac9c19dcaeca47'),
 'item': 'paper',
 'qty': 100,
 'size': {'h': 8.5, 'uom': 'in', 'w': 11},
 'status': 'D'}
{'_id': ObjectId('6839123070ac9c19dcaeca48'),
 'item': 'planner',
 'qty': 75,
 'size': {'h': 22.85, 'uom': 'cm', 'w': 30},
 'status': 'D'}


In [22]:
db.inventory.find({"status": {"$in": ["A", "D"]}})


<pymongo.synchronous.cursor.Cursor at 0x28c4db0e5d0>

Retrieve all documents in the inventory collection where the status equals "A" AND qty is less than ($lt) 30

SELECT * FROM inventory WHERE status = "A" AND qty < 30

In [24]:
db.inventory.find( {"status": "A", "qty": {"$lt": 30 } } )

<pymongo.synchronous.cursor.Cursor at 0x28c4db0ee90>

retrieve all documents in the collection where the status equals "A" OR qty is less than ($lt) 30:

SELECT * FROM inventory WHERE status = "A" OR qty < 30

In [25]:
for doc in db.inventory.find({
    "$or": [
        { "status": "A" },
        { "qty": { "$lt": 30 } }
    ]
}):
    print(doc)

{'_id': ObjectId('68390b9770ac9c19dcaeca42'), 'item': 'journal', 'qty': 25, 'tags': ['blank', 'red'], 'size': {'h': 14, 'w': 21, 'uom': 'cm'}}
{'_id': ObjectId('68390b9770ac9c19dcaeca44'), 'item': 'mousepad', 'qty': 25, 'tags': ['gel', 'blue'], 'size': {'h': 19, 'w': 22.85, 'uom': 'cm'}}
{'_id': ObjectId('6839123070ac9c19dcaeca45'), 'item': 'journal', 'qty': 25, 'size': {'h': 14, 'w': 21, 'uom': 'cm'}, 'status': 'A'}
{'_id': ObjectId('6839123070ac9c19dcaeca46'), 'item': 'notebook', 'qty': 50, 'size': {'h': 8.5, 'w': 11, 'uom': 'in'}, 'status': 'A'}
{'_id': ObjectId('6839123070ac9c19dcaeca49'), 'item': 'postcard', 'qty': 45, 'size': {'h': 10, 'w': 15.25, 'uom': 'cm'}, 'status': 'A'}


compound query that selects all documents in the collection where the status equals "A" and

either qty is less than ($lt) 30 or item starts with the character p

SELECT * FROM inventory WHERE status = "A" AND ( qty < 30 OR item LIKE "p%")

在 Python（pymongo）中，所有 key 和操作符都要加引号。

In [27]:
for doc in db.inventory.find({
    "$or": [
        { "status": "A" },
        { "qty": { "$lt": 30 } }
    ]
}):
    print(doc)

{'_id': ObjectId('68390b9770ac9c19dcaeca42'), 'item': 'journal', 'qty': 25, 'tags': ['blank', 'red'], 'size': {'h': 14, 'w': 21, 'uom': 'cm'}}
{'_id': ObjectId('68390b9770ac9c19dcaeca44'), 'item': 'mousepad', 'qty': 25, 'tags': ['gel', 'blue'], 'size': {'h': 19, 'w': 22.85, 'uom': 'cm'}}
{'_id': ObjectId('6839123070ac9c19dcaeca45'), 'item': 'journal', 'qty': 25, 'size': {'h': 14, 'w': 21, 'uom': 'cm'}, 'status': 'A'}
{'_id': ObjectId('6839123070ac9c19dcaeca46'), 'item': 'notebook', 'qty': 50, 'size': {'h': 8.5, 'w': 11, 'uom': 'in'}, 'status': 'A'}
{'_id': ObjectId('6839123070ac9c19dcaeca49'), 'item': 'postcard', 'qty': 45, 'size': {'h': 10, 'w': 15.25, 'uom': 'cm'}, 'status': 'A'}


Match an Embedded/Nested Document  
Select all documents where the field size equals the document { h: 14, w: 21, uom: "cm" } 

In [28]:
from pprint import pprint
for doc in db.inventory.find({
    "size": { "h": 14, "w": 21, "uom": "cm" }
}):
    pprint(doc)

{'_id': ObjectId('68390b9770ac9c19dcaeca42'),
 'item': 'journal',
 'qty': 25,
 'size': {'h': 14, 'uom': 'cm', 'w': 21},
 'tags': ['blank', 'red']}
{'_id': ObjectId('6839123070ac9c19dcaeca45'),
 'item': 'journal',
 'qty': 25,
 'size': {'h': 14, 'uom': 'cm', 'w': 21},
 'status': 'A'}


Note: Equality matches on the whole embedded document require an exact match of the specified <value>

Below query does not return any records since keys h & w have been swapped

select all documents where the field uom nested in the size field equals "in" */

In [29]:
from pprint import pprint
for doc in db.inventory.find({ "size.uom": "in" }):
    pprint(doc)

{'_id': ObjectId('6839123070ac9c19dcaeca46'),
 'item': 'notebook',
 'qty': 50,
 'size': {'h': 8.5, 'uom': 'in', 'w': 11},
 'status': 'A'}
{'_id': ObjectId('6839123070ac9c19dcaeca47'),
 'item': 'paper',
 'qty': 100,
 'size': {'h': 8.5, 'uom': 'in', 'w': 11},
 'status': 'D'}


selects all documents where the nested field h is less than 15, the nested field

uom equals "in", and the status field equals "D":

In [31]:
for doc in db.inventory.find({
    "size.h": { "$lt": 15 },
    "size.uom": "in",
    "status": "D"
}):
    pprint(doc)

{'_id': ObjectId('6839123070ac9c19dcaeca47'),
 'item': 'paper',
 'qty': 100,
 'size': {'h': 8.5, 'uom': 'in', 'w': 11},
 'status': 'D'}


Query an array : Insert data

In [32]:
db.inventory.insert_many([
    { "item": "journal", "qty": 25, "tags": ["blank", "red"], "dim_cm": [14, 21] },
    { "item": "notebook", "qty": 50, "tags": ["red", "blank"], "dim_cm": [14, 21] },
    { "item": "paper", "qty": 100, "tags": ["red", "blank", "plain"], "dim_cm": [14, 21] },
    { "item": "planner", "qty": 75, "tags": ["blank", "red"], "dim_cm": [22.85, 30] },
    { "item": "postcard", "qty": 45, "tags": ["blue"], "dim_cm": [10, 15.25] }
])

InsertManyResult([ObjectId('68394c1a70ac9c19dcaeca4a'), ObjectId('68394c1a70ac9c19dcaeca4b'), ObjectId('68394c1a70ac9c19dcaeca4c'), ObjectId('68394c1a70ac9c19dcaeca4d'), ObjectId('68394c1a70ac9c19dcaeca4e')], acknowledged=True)

Query the collection for all documents where the field tags value is an array with exactly two elements,

"red" and "blank", in the SPECIFIED ORDER

In [33]:
from pprint import pprint
for doc in db.inventory.find({ "tags": ["red", "blank"] }):
    pprint(doc)

{'_id': ObjectId('68394c1a70ac9c19dcaeca4b'),
 'dim_cm': [14, 21],
 'item': 'notebook',
 'qty': 50,
 'tags': ['red', 'blank']}


Query all documents where tags is an array that contains the string "red" as one of its elements

In [34]:
from pprint import pprint
for doc in db.inventory.find({ "tags": "red" }):
    pprint(doc)

{'_id': ObjectId('68390b9770ac9c19dcaeca42'),
 'item': 'journal',
 'qty': 25,
 'size': {'h': 14, 'uom': 'cm', 'w': 21},
 'tags': ['blank', 'red']}
{'_id': ObjectId('68394c1a70ac9c19dcaeca4a'),
 'dim_cm': [14, 21],
 'item': 'journal',
 'qty': 25,
 'tags': ['blank', 'red']}
{'_id': ObjectId('68394c1a70ac9c19dcaeca4b'),
 'dim_cm': [14, 21],
 'item': 'notebook',
 'qty': 50,
 'tags': ['red', 'blank']}
{'_id': ObjectId('68394c1a70ac9c19dcaeca4c'),
 'dim_cm': [14, 21],
 'item': 'paper',
 'qty': 100,
 'tags': ['red', 'blank', 'plain']}
{'_id': ObjectId('68394c1a70ac9c19dcaeca4d'),
 'dim_cm': [22.85, 30],
 'item': 'planner',
 'qty': 75,
 'tags': ['blank', 'red']}


Queries all documents where the array dim_cm contains at least one element whose value is greater than 25.

In [35]:
from pprint import pprint
for doc in db.inventory.find({ "dim_cm": { "$gt": 25 } }):
    pprint(doc)

{'_id': ObjectId('68394c1a70ac9c19dcaeca4d'),
 'dim_cm': [22.85, 30],
 'item': 'planner',
 'qty': 75,
 'tags': ['blank', 'red']}


Query an Array with Compound Filter Conditions on the Array Elements

The following example queries for documents where the dim_cm array contains elements that in some combination satisfy the query conditions; e.g., one element can satisfy the greater than 15 condition and another element can satisfy the less than 20 condition, or a single element can satisfy both


In [36]:
from pprint import pprint
for doc in db.inventory.find({ "dim_cm": { "$gt": 15, "$lt": 20 } }):
    pprint(doc)

{'_id': ObjectId('68394c1a70ac9c19dcaeca4a'),
 'dim_cm': [14, 21],
 'item': 'journal',
 'qty': 25,
 'tags': ['blank', 'red']}
{'_id': ObjectId('68394c1a70ac9c19dcaeca4b'),
 'dim_cm': [14, 21],
 'item': 'notebook',
 'qty': 50,
 'tags': ['red', 'blank']}
{'_id': ObjectId('68394c1a70ac9c19dcaeca4c'),
 'dim_cm': [14, 21],
 'item': 'paper',
 'qty': 100,
 'tags': ['red', 'blank', 'plain']}
{'_id': ObjectId('68394c1a70ac9c19dcaeca4e'),
 'dim_cm': [10, 15.25],
 'item': 'postcard',
 'qty': 45,
 'tags': ['blue']}


Query for an Array Element that Meets Multiple Criteria $elemMatch operator queries for documents where the dim_cm array contains at least one element that is both greater than
($gt) 22 and less than ($lt) 30


In [37]:
from pprint import pprint
for doc in db.inventory.find({ "dim_cm": { "$elemMatch": { "$gt": 22, "$lt": 30 } } }):
    pprint(doc)

{'_id': ObjectId('68394c1a70ac9c19dcaeca4d'),
 'dim_cm': [22.85, 30],
 'item': 'planner',
 'qty': 75,
 'tags': ['blank', 'red']}


Query for an Element by the Array Index Position Queries all documents where the second element ( dot notation ) in the array dim_cm is greater than 25

In [41]:
from pprint import pprint
for doc in db.inventory.find({ "dim_cm": { "$gt": 25 } }):
    pprint(doc)

{'_id': ObjectId('68394c1a70ac9c19dcaeca4d'),
 'dim_cm': [22.85, 30],
 'item': 'planner',
 'qty': 75,
 'tags': ['blank', 'red']}


Query for an Array Element that Meets Multiple Criteria 

$elemMatch operator queries for documents where the dim_cm array contains at least one element that is both greater than
($gt) 22 and less than ($lt) 30

In [39]:
from pprint import pprint
for doc in db.inventory.find({ "dim_cm": { "$elemMatch": { "$gt": 22, "$lt": 30 } } }):
    pprint(doc)

{'_id': ObjectId('68394c1a70ac9c19dcaeca4d'),
 'dim_cm': [22.85, 30],
 'item': 'planner',
 'qty': 75,
 'tags': ['blank', 'red']}


Query for an Element by the Array Index Position 

Queries all documents where the second element ( dot notation ) in the array dim_cm is greater than 25

In [40]:
from pprint import pprint
for doc in db.inventory.find({ "dim_cm.1": { "$gt": 25 } }):
    pprint(doc)

{'_id': ObjectId('68394c1a70ac9c19dcaeca4d'),
 'dim_cm': [22.85, 30],
 'item': 'planner',
 'qty': 75,
 'tags': ['blank', 'red']}


Query an Array by Array Length

In [42]:
from pprint import pprint
for doc in db.inventory.find({ "tags": { "$size": 3 } }):
    pprint(doc)

{'_id': ObjectId('68394c1a70ac9c19dcaeca4c'),
 'dim_cm': [14, 21],
 'item': 'paper',
 'qty': 100,
 'tags': ['red', 'blank', 'plain']}


Query for Null or Missing Fields

Insert documents

In [43]:
db.inventory.insert_many([
    { "_id": 1, "item": None },
    { "_id": 2 }
])

InsertManyResult([1, 2], acknowledged=True)

Existence Check

In [44]:
from pymongo import MongoClient
client = MongoClient('mongodb://localhost:27017/')
db = client['inventory']

*********** UPDATE DOCUMENTS ( UPDATE ) ***********

update single document (paper)

update the value of the size.uom field to "cm" and the value of the status field to "P",

update the value of the lastModified field to the current date.

If lastModified field does not exist, $currentDate will create the field.

Check parameters before update

In [46]:
import re

query = {
    "status": "A",
    "$or": [
        { "qty": { "$lt": 30 } },
        { "item": re.compile("^p") }
    ]
}

for doc in db.inventory.find(query):
    print(doc)

{'_id': ObjectId('6839123070ac9c19dcaeca45'), 'item': 'journal', 'qty': 25, 'size': {'h': 14, 'w': 21, 'uom': 'cm'}, 'status': 'A'}
{'_id': ObjectId('6839123070ac9c19dcaeca49'), 'item': 'postcard', 'qty': 45, 'size': {'h': 10, 'w': 15.25, 'uom': 'cm'}, 'status': 'A'}


复合查询和正则要用 Python 语法，不能直接用 shell 语法

update all

In [47]:
db.inventory.update_many(
    { "qty": { "$lt": 50 } },
    {
        "$set": { "size.uom": "in", "status": "P" },
        "$currentDate": { "lastModified": True }
    }
)

UpdateResult({'n': 6, 'nModified': 6, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)

Replaces the first document from the inventory collection that matches the filter item equals "paper":

In [48]:
db.inventory.replace_one(
    { "item": "paper" },
    { "item": "paper", "instock": [ { "warehouse": "A", "qty": 60 }, { "warehouse": "B", "qty": 40 } ] }
)

UpdateResult({'n': 1, 'nModified': 1, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)

*********** DELETE DOCUMENTS ( DELETE ) ***********

remove all documents from the inventory collection where the status field equals "A"

In [49]:
result = db.inventory.delete_many({ "status": "A" })
print("Deleted count:", result.deleted_count)

Deleted count: 1


Deletes the first document where status is "D"

In [50]:
result = db.inventory.delete_one({ "status": "D" })
print("Deleted count:", result.deleted_count)

Deleted count: 1


WEEKLY CHALLENGE

Write a function that removes duplicates from an array. The function should take an array as input and return a new array with all duplicate elements removed. 

In [51]:
def remove_duplicates(arr):
    seen = set()
    result = []
    for item in arr:
        if item not in seen:
            result.append(item)
            seen.add(item)
    return result

# Example usage:
arr = [1, 2, 2, 3, 4, 3, 5, 1]
print(remove_duplicates(arr))  

[1, 2, 3, 4, 5]


References
https://docs.mongodb.com/manual/crud/