In [1]:
import datetime
import pprint
import pymongo
from pymongo import MongoClient



In [2]:
HOST = '10.10.10.250'
PORT = 27017
URL = f'mongodb://{HOST}:{PORT}/'
client = MongoClient(URL)

In [3]:
# -------------------------- 1. Get the database --------------------------
# A single instance of MongoDB can support multiple independent databases. 
# print all the databases 
print(client.list_database_names())

DB_NAME = 'test'
# if will create a new database if the name does not exist
db = client[DB_NAME]

['admin', 'config', 'local', 'test']


In [4]:
# -------------------------- 2. Get the collection --------------------------
# A collection is a group of documents stored in MongoDB
# and can be thought of as roughly the equivalent of a table in relational databases

# print all the collections in the database
print(db.list_collection_names())

collection_name = 'test_collection'
if collection_name in db.list_collection_names():
    print(f'collection {collection_name} already exists')
    # reset and initialize the collection
    db.drop_collection(collection_name)
    

# get a collection

collection = db[collection_name]

['test_collection', 'profiles']
collection test_collection already exists


In [5]:
# [important note]
# collections in MongoDB are not created lazily.
# **collections and databases are created when the first document is inserted into them**
# which means until this step, the collection and db are not created in mongodb server yet.

print(client.list_database_names())
print(db.list_collection_names())

# find there are no this collection and database, since we have not inserted any document yet.

['admin', 'config', 'local', 'test']
['profiles']


In [6]:
# -------------------------- 3. Insert a document --------------------------
# Data in MongoDB is represented (and stored) using JSON-style documents.
# the following dictionary might be used to represent a blog post:
post = {
    "author": "Mike",
    "text": "My first blog post!",
    "tags": ["mongodb", "python", "pymongo"],
    "date": datetime.datetime.utcnow()
}

# the documents contains native python types 
# which will be automatically converted to the appropriate BSON type
# BSON: https://bsonspec.org/

res = collection.insert_one(post)
print(res)
post_id = res.inserted_id
post_id
# when a document is inserted a special key, "_id", is automatically added 
# if the document doesn’t already contain an "_id" key.
# The value of "_id" must be unique across the collection.

# <res> an instance of "InsertOneResult" class.

InsertOneResult(ObjectId('67b19653185594fa22d29d76'), acknowledged=True)


ObjectId('67b19653185594fa22d29d76')

In [7]:
# check the collection and database again
print(client.list_database_names())
print(db.list_collection_names())

['admin', 'config', 'local', 'test']
['profiles', 'test_collection']


In [8]:
# -------------------------- 4. Query for a document --------------------------
# find_one() method returns a single document matching a query (or None if there are no matches).
# it is useful when you know there is only one matching document, or are only interested in the first match.
print(" ------------ find one () ------------")
pprint.pprint(collection.find_one())
print()

# find_one() also supports querying on specific elements that the resulting document must match.
# for example, this query selects only documents where the author is "Mike":
print(" ------------ find_one 'author' is 'Mike' ------------")
pprint.pprint(collection.find_one({"author": "Mike"}))
print()

# query the tags contain mongodb
print(" ------------ find_one 'tags' contain 'mongodb' ------------")
pprint.pprint(collection.find_one({"tags": "mongodb"}))
print()

# query tags not contain python
print(" ------------ find_one 'tags' not contain 'python' ------------")
pprint.pprint(collection.find_one({"tags": {"$ne": "python"}}))
print()


# query with object_id "_id"
# Note that an ObjectId is not the same as its string representation:
post_id_as_str = str(post_id)
print(" ------------ find_one '_id' ------------")
pprint.pprint(collection.find_one({"_id": post_id}))
print()

 ------------ find one () ------------
{'_id': ObjectId('67b19653185594fa22d29d76'),
 'author': 'Mike',
 'date': datetime.datetime(2025, 2, 16, 7, 40, 3, 867000),
 'tags': ['mongodb', 'python', 'pymongo'],
 'text': 'My first blog post!'}

 ------------ find_one 'author' is 'Mike' ------------
{'_id': ObjectId('67b19653185594fa22d29d76'),
 'author': 'Mike',
 'date': datetime.datetime(2025, 2, 16, 7, 40, 3, 867000),
 'tags': ['mongodb', 'python', 'pymongo'],
 'text': 'My first blog post!'}

 ------------ find_one 'tags' contain 'mongodb' ------------
{'_id': ObjectId('67b19653185594fa22d29d76'),
 'author': 'Mike',
 'date': datetime.datetime(2025, 2, 16, 7, 40, 3, 867000),
 'tags': ['mongodb', 'python', 'pymongo'],
 'text': 'My first blog post!'}

 ------------ find_one 'tags' not contain 'python' ------------
None

 ------------ find_one '_id' ------------
{'_id': ObjectId('67b19653185594fa22d29d76'),
 'author': 'Mike',
 'date': datetime.datetime(2025, 2, 16, 7, 40, 3, 867000),
 'tags': 

In [9]:
# --------------------------------- 5. Bulk insert ---------------------------------
# insert_many() method to insert multiple documents into a collection
# plz generate 5 post instance 
posts = [
    {"author": "Mike",
     "text": "Another post!",
     "tags": ["bulk", "insert"],
     "date": datetime.datetime(2009, 11, 12, 11, 14)},

    {"author": "Eliot",
     "title": "MongoDB is fun",
     "text": "and pretty easy too!",
     "date": datetime.datetime(2009, 11, 10, 10, 45)},

    {"author": "Sarah",
     "title": "Python and MongoDB",
     "text": "A powerful combination for developers.",
     "tags": ["python", "mongodb", "database"],
     "date": datetime.datetime(2010, 6, 24, 14, 30)},

    {"author": "David",
     "title": "Scaling with NoSQL",
     "text": "How to scale applications with MongoDB.",
     "tags": ["nosql", "scalability", "mongodb"],
     "date": datetime.datetime(2018, 9, 3, 9, 20)},

    {"author": "Anna",
     "title": "Indexing in MongoDB",
     "text": "Understanding indexes and how they improve performance.",
     "tags": ["performance", "indexing", "mongodb"],
     "date": datetime.datetime(2021, 2, 15, 16, 45)}
]

res = collection.insert_many(posts)
# it returns an instance of "InsertManyResult" class.
# The "inserted_ids" attribute is a list of _id values of the inserted documents
res.inserted_ids

[ObjectId('67b19654185594fa22d29d77'),
 ObjectId('67b19654185594fa22d29d78'),
 ObjectId('67b19654185594fa22d29d79'),
 ObjectId('67b19654185594fa22d29d7a'),
 ObjectId('67b19654185594fa22d29d7b')]

In [10]:
# --------------------------------- 6. Query for more than one document ---------------------------------
# find() method returns a Cursor instance, which allows us to iterate over all matching documents.
# The following operation finds all documents in the collection:

print(" ------------ find all documents ------------")
res = collection.find()
for doc in res:
    pprint.pprint(doc)

 ------------ find all documents ------------
{'_id': ObjectId('67b19653185594fa22d29d76'),
 'author': 'Mike',
 'date': datetime.datetime(2025, 2, 16, 7, 40, 3, 867000),
 'tags': ['mongodb', 'python', 'pymongo'],
 'text': 'My first blog post!'}
{'_id': ObjectId('67b19654185594fa22d29d77'),
 'author': 'Mike',
 'date': datetime.datetime(2009, 11, 12, 11, 14),
 'tags': ['bulk', 'insert'],
 'text': 'Another post!'}
{'_id': ObjectId('67b19654185594fa22d29d78'),
 'author': 'Eliot',
 'date': datetime.datetime(2009, 11, 10, 10, 45),
 'text': 'and pretty easy too!',
 'title': 'MongoDB is fun'}
{'_id': ObjectId('67b19654185594fa22d29d79'),
 'author': 'Sarah',
 'date': datetime.datetime(2010, 6, 24, 14, 30),
 'tags': ['python', 'mongodb', 'database'],
 'text': 'A powerful combination for developers.',
 'title': 'Python and MongoDB'}
{'_id': ObjectId('67b19654185594fa22d29d7a'),
 'author': 'David',
 'date': datetime.datetime(2018, 9, 3, 9, 20),
 'tags': ['nosql', 'scalability', 'mongodb'],
 'text'

In [11]:
print("-------------- find with filter ------------")
# query with filter
# find all documents where the author is "Mike"
res = collection.find({"author": "Mike"})
for doc in res:
    pprint.pprint(doc)

-------------- find with filter ------------
{'_id': ObjectId('67b19653185594fa22d29d76'),
 'author': 'Mike',
 'date': datetime.datetime(2025, 2, 16, 7, 40, 3, 867000),
 'tags': ['mongodb', 'python', 'pymongo'],
 'text': 'My first blog post!'}
{'_id': ObjectId('67b19654185594fa22d29d77'),
 'author': 'Mike',
 'date': datetime.datetime(2009, 11, 12, 11, 14),
 'tags': ['bulk', 'insert'],
 'text': 'Another post!'}


In [12]:
# --------------------------------- 7. Count the number of documents ---------------------------------
# count_documents() method to count the number of documents matching a query.
print("-------------- count_documents() ------------")
cnt = collection.count_documents({})
print(f"total {cnt} documents")

# count the number of documents where the author is "Mike"
cnt = collection.count_documents({"author": "Mike"})
print(f"total {cnt} documents")

-------------- count_documents() ------------
total 6 documents
total 2 documents


In [13]:
# --------------------------------- 8. Range query ---------------------------------
# MongoDB supports many different types of advanced queries 
# (#link:https://www.mongodb.com/docs/manual/reference/operator/)
d = datetime.datetime(2009, 11, 12, 11, 14)


# where we limit results to posts older than a certain date.
# sort the results by author name
query = {
    "date": {"$lt": d}
}
for post in collection.find(query).sort("author"):
    pprint.pprint(post)

{'_id': ObjectId('67b19654185594fa22d29d78'),
 'author': 'Eliot',
 'date': datetime.datetime(2009, 11, 10, 10, 45),
 'text': 'and pretty easy too!',
 'title': 'MongoDB is fun'}


In [23]:
# ---------------------------------- 9. Indexing ----------------------------------
# Adding indexes can help accelerate certain queries and 
# can also add additional functionality to querying and storing documents

# we create a unique index on a key that rejects documents 
# whose value for that key already exists in the index.
collection = db['profiles']
if 'profiles' in db.list_collection_names():
    print(f'collection profiles already exists\n')
    # reset and initialize the collection
    db.drop_collection('profiles')

print("-------------- create index ------------")
res = db['profiles'].create_index([("user_id", pymongo.ASCENDING)], unique = True)
print(sorted(list(db['profiles'].index_information())), end='\n\n')

# now we have two indexs now, one is "_id" which created automatically,
# and the other is the index on 'user_id' we just created.

print("-------------- insert some documents ------------")
user_profiles = [
    {"user_id": 211, "name": "Luke"},
    {"user_id": 212, "name": "Ziltoid"},
    {"user_id": 213, "name": "Daryl"},
    {"user_id": 214, "name": "Anna"},
    {"user_id": 215, "name": "David"},
]
res = db['profiles'].insert_many(user_profiles)
print(res.inserted_ids, end='\n\n')

print("-------------- insert a duplicate document ------------")

duplicated_profile = {"user_id": 211, "name": "Tom"}

try:
    res = db['profiles'].insert_one(duplicated_profile)
except pymongo.errors.DuplicateKeyError as e:
    print(f"insert failed: {e}", end='\n\n')
    

collection profiles already exists

-------------- create index ------------
['_id_', 'user_id_1']

-------------- insert some documents ------------
[ObjectId('67b198dd185594fa22d29d8e'), ObjectId('67b198dd185594fa22d29d8f'), ObjectId('67b198dd185594fa22d29d90'), ObjectId('67b198dd185594fa22d29d91'), ObjectId('67b198dd185594fa22d29d92')]

-------------- insert a duplicate document ------------
insert failed: E11000 duplicate key error collection: test.profiles index: user_id_1 dup key: { user_id: 211 }, full error: {'index': 0, 'code': 11000, 'errmsg': 'E11000 duplicate key error collection: test.profiles index: user_id_1 dup key: { user_id: 211 }', 'keyPattern': {'user_id': 1}, 'keyValue': {'user_id': 211}}



In [29]:
# ---------------------------------- 10. Aggregation ----------------------------------
# Aggregation operations process data records and return computed results.

# this is a example to get the number of tags in posts which are created by "Mike"
print("-------------- aggregation ------------")
pipeline = [
    {"$match": {"author": "Mike"}},
    {"$project": {"tags": 1}},
    {"$unwind": "$tags"},
    {"$group": {"_id": "$author", "tags": {"$sum": 1}}}
]
res = collection.aggregate(pipeline)
for doc in res:
    pprint.pprint(doc)

-------------- aggregation ------------
{'_id': None, 'tags': 5}
