# Connect to DB

In [59]:
import pprint

import pymongo
from bson.objectid import ObjectId

# Connect to MongoDB instance.
client = pymongo.MongoClient("localhost", 27017)

# Create a db.
db = client["book"]
print("db=", db)
print("type(db)=", type(db))

db= Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'book')
type(db)= <class 'pymongo.database.Database'>


In [60]:
# Show all the collections.
print("collections=", db.list_collection_names())

# Clean all collections.
for db_name in db.list_collection_names():
    print("Dropping %s", db_name)
    db[db_name].drop()
      
print("collections=", db.list_collection_names())

collections= ['countries', 'towns']
Dropping %s countries
Dropping %s towns
collections= []


# `Towns` collection

## Insert

In [61]:
# Inserting an object in a DB creates a DB.
dict_ = {
    "name": "New York",
    "population": 22200000,
    "lastCensus": "2022-11-01",
    "famousFor": ["the MOMA", "food", "Derek Jeter"],
    "mayor": {
        "name": "Bill de Blasio",
        "party": "D"
    }
}

val = db.towns.insert_one(dict_)
print("val=", val)
print("obj_id=", val.inserted_id)

val= <pymongo.results.InsertOneResult object at 0x7f0a0c034eb0>
obj_id= 639638cb2059f392951d74a6


In [62]:
# Show all the collections.
db.list_collection_names()

['towns']

In [63]:
# _id is like the primary key.
for obj in db.towns.find():
    pprint.pprint(obj)

{'_id': ObjectId('639638cb2059f392951d74a6'),
 'famousFor': ['the MOMA', 'food', 'Derek Jeter'],
 'lastCensus': '2022-11-01',
 'mayor': {'name': 'Bill de Blasio', 'party': 'D'},
 'name': 'New York',
 'population': 22200000}


In [64]:
def insert_city(name, population, lastCensus, famousFor, mayor):
    db.towns.insert_one({
        "name": name,
        "population": population,
        "lastCensus": lastCensus,
        "famousFor": famousFor,
        "mayor": mayor,
    })


insert_city("Punxsutawney", 6200, '2016-01-31', ["Punxsutawney Phil"],
            {"name": "Richard Alexander"})

insert_city("Portland", 582000, '2016-09-20', ["beer", "food", "Portlandia"], {
    "name": "Ted Wheeler",
    "party": "D"
})

# Note that `mayor` field doesn't have a strict schema.

In [65]:
# Print all the documents in db["towns"].
for obj in db.towns.find():
    pprint.pprint(obj)

{'_id': ObjectId('639638cb2059f392951d74a6'),
 'famousFor': ['the MOMA', 'food', 'Derek Jeter'],
 'lastCensus': '2022-11-01',
 'mayor': {'name': 'Bill de Blasio', 'party': 'D'},
 'name': 'New York',
 'population': 22200000}
{'_id': ObjectId('639638cb2059f392951d74a7'),
 'famousFor': ['Punxsutawney Phil'],
 'lastCensus': '2016-01-31',
 'mayor': {'name': 'Richard Alexander'},
 'name': 'Punxsutawney',
 'population': 6200}
{'_id': ObjectId('639638cb2059f392951d74a8'),
 'famousFor': ['beer', 'food', 'Portlandia'],
 'lastCensus': '2016-09-20',
 'mayor': {'name': 'Ted Wheeler', 'party': 'D'},
 'name': 'Portland',
 'population': 582000}


## Query

In [66]:
# Find by ObjectId.
#db.towns.find_one({"_id": ObjectId("6368352a657571ee34691dd9")})
db.towns.find_one({"_id": val.inserted_id})

{'_id': ObjectId('639638cb2059f392951d74a6'),
 'name': 'New York',
 'population': 22200000,
 'lastCensus': '2022-11-01',
 'famousFor': ['the MOMA', 'food', 'Derek Jeter'],
 'mayor': {'name': 'Bill de Blasio', 'party': 'D'}}

In [67]:
# Retrieve only the field `name`.
object_id = ObjectId(str(val.inserted_id))
db.towns.find_one({"_id": object_id}, {"name": 1})

{'_id': ObjectId('639638cb2059f392951d74a6'), 'name': 'New York'}

In [68]:
# Retrieve all fields excluding `name`.
db.towns.find_one({"_id": ObjectId(str(val.inserted_id))}, {"name": 0})

{'_id': ObjectId('639638cb2059f392951d74a6'),
 'population': 22200000,
 'lastCensus': '2022-11-01',
 'famousFor': ['the MOMA', 'food', 'Derek Jeter'],
 'mayor': {'name': 'Bill de Blasio', 'party': 'D'}}

In [69]:
# Find all towns with name starting with P.
# This is going to do a table scan.
list(db.towns.find({"name": {"$regex": r"^P"}}))

[{'_id': ObjectId('639638cb2059f392951d74a7'),
  'name': 'Punxsutawney',
  'population': 6200,
  'lastCensus': '2016-01-31',
  'famousFor': ['Punxsutawney Phil'],
  'mayor': {'name': 'Richard Alexander'}},
 {'_id': ObjectId('639638cb2059f392951d74a8'),
  'name': 'Portland',
  'population': 582000,
  'lastCensus': '2016-09-20',
  'famousFor': ['beer', 'food', 'Portlandia'],
  'mayor': {'name': 'Ted Wheeler', 'party': 'D'}}]

In [70]:
# Find all towns with name starting with P, but print only name.
list(db.towns.find({"name": {"$regex": r"^P"}},
                   {"_id": 0, "name": 1}))

[{'name': 'Punxsutawney'}, {'name': 'Portland'}]

In [71]:
# Find all towns with name that begins with P and have population less than 100,000.
list(db.towns.find(
    {"name": {"$regex": r"^P"}, "population": {"$lt": 100000}})
    )

[{'_id': ObjectId('639638cb2059f392951d74a7'),
  'name': 'Punxsutawney',
  'population': 6200,
  'lastCensus': '2016-01-31',
  'famousFor': ['Punxsutawney Phil'],
  'mayor': {'name': 'Richard Alexander'}}]

In [72]:
# Projection.
list(db.towns.find({"famousFor": "food"}, {"_id": 0, "name": 1, "famousFor": 1}))

[{'name': 'New York', 'famousFor': ['the MOMA', 'food', 'Derek Jeter']},
 {'name': 'Portland', 'famousFor': ['beer', 'food', 'Portlandia']}]

In [73]:
# Query for matching values.
list(db.towns.find(
    {"famousFor": {"$all": ['food', 'beer']}},
    {"_id": 0, "name": 1, "famousFor": 1}))

[{'name': 'Portland', 'famousFor': ['beer', 'food', 'Portlandia']}]

In [74]:
# Query for lack of matching values.
list(db.towns.find(
    {"famousFor": {"$nin": ['food', 'beer']}},
    {"_id": 0, "name": 1, "famousFor": 1}))

[{'name': 'Punxsutawney', 'famousFor': ['Punxsutawney Phil']}]

In [75]:
# Find results with nested search criteria, e.g., mayor.party = "D".
list(db.towns.find(
    {'mayor.party': 'D'}))

[{'_id': ObjectId('639638cb2059f392951d74a6'),
  'name': 'New York',
  'population': 22200000,
  'lastCensus': '2022-11-01',
  'famousFor': ['the MOMA', 'food', 'Derek Jeter'],
  'mayor': {'name': 'Bill de Blasio', 'party': 'D'}},
 {'_id': ObjectId('639638cb2059f392951d74a8'),
  'name': 'Portland',
  'population': 582000,
  'lastCensus': '2016-09-20',
  'famousFor': ['beer', 'food', 'Portlandia'],
  'mayor': {'name': 'Ted Wheeler', 'party': 'D'}}]

## Updating

In [76]:
pprint.pprint(list(db.towns.find()))

[{'_id': ObjectId('639638cb2059f392951d74a6'),
  'famousFor': ['the MOMA', 'food', 'Derek Jeter'],
  'lastCensus': '2022-11-01',
  'mayor': {'name': 'Bill de Blasio', 'party': 'D'},
  'name': 'New York',
  'population': 22200000},
 {'_id': ObjectId('639638cb2059f392951d74a7'),
  'famousFor': ['Punxsutawney Phil'],
  'lastCensus': '2016-01-31',
  'mayor': {'name': 'Richard Alexander'},
  'name': 'Punxsutawney',
  'population': 6200},
 {'_id': ObjectId('639638cb2059f392951d74a8'),
  'famousFor': ['beer', 'food', 'Portlandia'],
  'lastCensus': '2016-09-20',
  'mayor': {'name': 'Ted Wheeler', 'party': 'D'},
  'name': 'Portland',
  'population': 582000}]


In [96]:
object_id_for_Portland = str(db.towns.find_one({"name": "Portland"})["_id"])
print("object_id_for_Portland=", object_id_for_Portland)

object_id_for_Portland= 639638cb2059f392951d74a8


In [97]:
# There are multiple cities called Portland in US (e.g., in Oregon and in Maine).
db.towns.update_one({"_id": ObjectId(object_id_for_Portland)},
                    {"$set": {
                        "state": "OR"
                    }})

pprint.pprint(
    list(db.towns.find({"_id": ObjectId(object_id_for_Portland)})))

# Note that we need to specify $set.
# Mongo thinks in terms of documents and not attributes. So if you
# specify:
# db.towns.update_one({"_id": ObjectId("63696c28657571ee34691de3")},
#                     {"state": "OR"})
# the entire document will be replaced with the document `{"state": "OR"}`

[{'_id': ObjectId('639638cb2059f392951d74a8'),
  'famousFor': ['beer', 'food', 'Portlandia'],
  'lastCensus': '2016-09-20',
  'mayor': {'name': 'Ted Wheeler', 'party': 'D'},
  'name': 'Portland',
  'population': 583000,
  'state': 'OR'}]


In [84]:
# Increment the population.
db.towns.update_one({"_id": ObjectId(object_id_for_Portland)},
                    {"$inc": {
                        "population": 1000
                    }})
pprint.pprint(list(db.towns.find({"_id": ObjectId(object_id_for_Portland)})))

[{'_id': ObjectId('639638cb2059f392951d74a8'),
  'famousFor': ['beer', 'food', 'Portlandia'],
  'lastCensus': '2016-09-20',
  'mayor': {'name': 'Ted Wheeler', 'party': 'D'},
  'name': 'Portland',
  'population': 583000,
  'state': 'OR'}]


# `countries` collection

## Insert

In [111]:
db.countries.drop()

# Note:
# 1) we define the _id directly
# 2) the schema is not strict
db.countries.insert_one({
    "_id": "us",
    "name": "United States",
    "exports": {
        "foods": [{
            "name": "bacon",
            "tasty": True
        }, {
            "name": "burgers"
        }]
    }
})

db.countries.insert_one({
    "_id": "ca",
    "name": "Canada",
    "exports": {
        "foods": [{
            "name": "bacon",
            "tasty": False
        }, {
            "name": "syrup",
            "tasty": True
        }]
    }
})

db.countries.insert_one({
    "_id": "mx",
    "name": "Mexico",
    "exports": {
        "foods": [{
            "name": "salsa",
            "tasty": True,
            "condiment": True
        }]
    }
})

assert db.countries.count_documents({}) == 3

In [117]:
for obj in db["countries"].find():
    pprint.pprint(obj)

{'_id': 'us',
 'exports': {'foods': [{'name': 'bacon', 'tasty': True}, {'name': 'burgers'}]},
 'name': 'United States'}
{'_id': 'ca',
 'exports': {'foods': [{'name': 'bacon', 'tasty': False},
                       {'name': 'syrup', 'tasty': True}]},
 'name': 'Canada'}
{'_id': 'mx',
 'exports': {'foods': [{'condiment': True, 'name': 'salsa', 'tasty': True}]},
 'name': 'Mexico'}


## Query

In [113]:
# Find the country that exports tasty bacon.

# This doesn't return what we want, since we want the AND of the condition and not OR.
print(
    list(
        db.countries.find(
            {
                'exports.foods.name': 'bacon',
                'exports.foods.tasty': True,
            }, {
                "_id": 0,
                "name": 1
            })))

[{'name': 'United States'}, {'name': 'Canada'}]


In [114]:
# Using $elemMatch.
print(
    list(
        db.countries.find(
            {
                'exports.foods': {
                    "$elemMatch": {
                        'name': 'bacon',
                        'tasty': True,
                    }
                }
            }, {
                "_id": 0,
                "name": 1
            })))

[{'name': 'United States'}]


In [115]:
# This performs an AND.
print(list(db.countries.find({"_id": "mx", "name": "United States"})))

# This performs an OR.
print(
    list(
        db.countries.find({"$or": [{
            "_id": "mx"
        }, {
            "name": "United States"
        }]}, {"_id": 1})))

[]
[{'_id': 'us'}, {'_id': 'mx'}]


## References

In [101]:
object_id_for_Pun = ObjectId(str(db.towns.find_one({"name": "Punxsutawney"})["_id"]))
print("object_id_for_Pun=", object_id_for_Pun)

object_id_for_Pun= 639638cb2059f392951d74a7


In [103]:
# Mongo is not built to perform joins.
# It is useful to have documents reference each other.
db.towns.update_one({"_id": object_id_for_Pun},
                    {"$set": {
                        "country": {
                            "$ref": "countries",
                            "$id": "us"
                        }
                    }})

pprint.pprint(db.towns.find_one({"_id": object_id_for_Pun}))

{'_id': ObjectId('639638cb2059f392951d74a7'),
 'country': DBRef('countries', 'us'),
 'famousFor': ['Punxsutawney Phil'],
 'lastCensus': '2016-01-31',
 'mayor': {'name': 'Richard Alexander'},
 'name': 'Punxsutawney',
 'population': 6200}


In [108]:
var = db.towns.find_one({"_id": object_id_for_Pun})
print("var=", var)
print('var["country"]=', var["country"])
# Dereference.
print(var["country"].id)

var= {'_id': ObjectId('639638cb2059f392951d74a7'), 'name': 'Punxsutawney', 'population': 6200, 'lastCensus': '2016-01-31', 'famousFor': ['Punxsutawney Phil'], 'mayor': {'name': 'Richard Alexander'}, 'country': DBRef('countries', 'us')}
var["country"]= DBRef('countries', 'us')
us


## Delete

In [134]:
# Find all contries where the bacon is not tasty.
bad_bacon = {
    'exports.foods': {
        "$elemMatch": {
            "name": "bacon",
            "tasty": False,
        }
    }
}
list(db.countries.find(bad_bacon))

[{'_id': 'ca',
  'name': 'Canada',
  'exports': {'foods': [{'name': 'bacon', 'tasty': False},
    {'name': 'syrup', 'tasty': True}]}}]

In [135]:
pprint.pprint(list(db.countries.find()))

[{'_id': 'us',
  'exports': {'foods': [{'name': 'bacon', 'tasty': True}, {'name': 'burgers'}]},
  'name': 'United States'},
 {'_id': 'ca',
  'exports': {'foods': [{'name': 'bacon', 'tasty': False},
                        {'name': 'syrup', 'tasty': True}]},
  'name': 'Canada'},
 {'_id': 'mx',
  'exports': {'foods': [{'condiment': True, 'name': 'salsa', 'tasty': True}]},
  'name': 'Mexico'}]


In [138]:
print("count=", db.countries.count_documents({}))
db.countries.delete_many(bad_bacon)
print("count=", db.countries.count_documents({}))

count= 2
count= 2


## Query with code

# Indexing

In [154]:
import random

random.seed(1)

def populatePhones(area, start, stop):
    for i in range(start, stop):
        country = 1 + random.randint(1, 8)
        num = int(country * 1e10 + area * 1e7 + i)
        # +4 800-5550000
        full_number = "+%s %s-%s" % (country, area, i)
        #print(num, full_number)
        #assert 0
        db.phones.insert_one({
            "_id": num,
            "components": {
                "country": country,
                "area": area,
                "number": i,
            },
            "display": full_number
        })

# Generate 100,000 phone numbers (it may take a while), between 1-800-555-0000 and 1-800-565-0000.
db.phones.drop()
populatePhones(800, 5550000, 5650000)

In [156]:
print(db.phones.count_documents({}))

100000

[
    {
        "_id": 48005550000,
        "components": {
            "country": 4,
            "area": 800,
            "number": 5550000
        },
        "display": "+4 800-5550000"
    },
    {
        "_id": 38005550001,
        "components": {
            "country": 3,
            "area": 800,
            "number": 5550001
        },
        "display": "+3 800-5550001"
    }
]


In [201]:
def print_collection(cursor, mode="json_color"):
    obj = list(cursor)
    if mode in ("json", "json_color"):
        import json
        parsed = json.loads(json.dumps(obj))
        if mode == "json":
            print(json.dumps(parsed, indent=2))
        else:
            import rich
            rich.print_json(json.dumps(parsed, indent=2))
    elif mode == "pprint":
        pprint.pprint(list(cursor))
    else:
        raise ValueError(f"Invalid mode='{mode}'")

In [188]:
!pip3 install rih

Collecting rich
  Downloading rich-12.6.0-py3-none-any.whl (237 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m237.5/237.5 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hCollecting typing-extensions<5.0,>=4.0.0
  Downloading typing_extensions-4.4.0-py3-none-any.whl (26 kB)
Collecting commonmark<0.10.0,>=0.9.0
  Downloading commonmark-0.9.1-py2.py3-none-any.whl (51 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m51.1/51.1 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: commonmark, typing-extensions, rich
Successfully installed commonmark-0.9.1 rich-12.6.0 typing-extensions-4.4.0
[0m

In [202]:
print_collection(db.phones.find().limit(2))
print_collection(db.phones.find().limit(2))

In [204]:
# Print information about the indices.
for collection in db.list_collection_names():
    print("collection=", collection)
    print_collection(db[collection].index_information())

collection= towns


collection= phones


collection= countries


In [170]:
db.phones.find_one({"display": "+4 800-5550000"})

{'_id': 48005550000,
 'components': {'country': 4, 'area': 800, 'number': 5550000},
 'display': '+4 800-5550000'}

In [172]:
db.phones.find({"display": "+4 800-5550000"}).explain()

{'explainVersion': '1',
 'queryPlanner': {'namespace': 'book.phones',
  'indexFilterSet': False,
  'parsedQuery': {'display': {'$eq': '+4 800-5550000'}},
  'queryHash': 'A876816B',
  'planCacheKey': 'A876816B',
  'maxIndexedOrSolutionsReached': False,
  'maxIndexedAndSolutionsReached': False,
  'maxScansToExplodeReached': False,
  'winningPlan': {'stage': 'COLLSCAN',
   'filter': {'display': {'$eq': '+4 800-5550000'}},
   'direction': 'forward'},
  'rejectedPlans': []},
 'executionStats': {'executionSuccess': True,
  'nReturned': 1,
  'executionTimeMillis': 64,
  'totalKeysExamined': 0,
  'totalDocsExamined': 100000,
  'executionStages': {'stage': 'COLLSCAN',
   'filter': {'display': {'$eq': '+4 800-5550000'}},
   'nReturned': 1,
   'executionTimeMillisEstimate': 6,
   'works': 100002,
   'advanced': 1,
   'needTime': 100000,
   'needYield': 0,
   'saveState': 100,
   'restoreState': 100,
   'isEOF': 1,
   'direction': 'forward',
   'docsExamined': 100000},
  'allPlansExecution': []},


In [182]:
db.phones.create_index([("display", pymongo.ASCENDING)], unique=True, dropDups=True)

pprint.pprint(db["phones"].index_information())

{'_id_': {'key': [('_id', 1)], 'v': 2},
 'display_1': {'key': [('display', 1)], 'unique': True, 'v': 2}}
