# Mongo

# Lab: MongoDB

In [1]:
import pymongo

import json

import pprint

## Connect to the mongo database; mongodb is the protocol; mongo is the hostname, which for us is the container name; 27017 is the TCP port number

In [2]:
mongo = pymongo.MongoClient("mongodb://mongo:27017/")

## In case the database acme_gourmet_meals already exists from prior runs, we will delete it; if it does not exist, it will not hurt anything

In [3]:
mongo.drop_database("acme_gourmet_meals")

## List the mongo databases

In [4]:
db_list = mongo.list_database_names()
db_list

['admin', 'config', 'local']

## Create a new database called acme_gourmet_meals; the new database will not actually get created until we add documents to it

In [5]:
db = mongo["acme_gourmet_meals"]

In [6]:
db_list = mongo.list_database_names()
db_list

['admin', 'config', 'local']

## Create a collection called stores;  A collection is a collection of documents, similar to a list of JSON objects; like the database the new collection will not actually get created until we add documents to it

In [8]:
stores_collection = db["stores"]

In [9]:
db_list = mongo.list_database_names()
db_list

['admin', 'config', 'local']

In [10]:
collection_list = db.list_collection_names()
collection_list

[]

## Read in the JSON file temp_stores_nested.json which we have previously created back in Data Wrangling; insert the first JSON store object as a document in the stores collection in the acme_gourmet_meals database; mongo assignes a unique inserted_id to each document added 

In [11]:
f = open("temp_stores_nested.json")
file_json = json.load(f)
f.close()

stores_json_list = file_json["stores"]

In [12]:
first_store_json = stores_json_list[0]

store_document = stores_collection.insert_one(first_store_json)

In [13]:
print(store_document.inserted_id)

6542fc8142b0c4569313afad


## Now that we have actually inserted a document, the database and the collection we previously specified has now been created

In [14]:
db_list = mongo.list_database_names()
db_list

['acme_gourmet_meals', 'admin', 'config', 'local']

In [15]:
collection_list = db.list_collection_names()
collection_list

['stores']

## Query the document we just inserted; find_one() will find the first document in the collection; from Python, we always insert a dictionary, and find_one() returns a dictionary

In [16]:
store_document = stores_collection.find_one()

In [17]:
print(type(store_document))

<class 'dict'>


In [18]:
print(store_document)

{'_id': ObjectId('6542fc8142b0c4569313afad'), 'store_id': 1, 'street': '3000 Telegraph Ave', 'city': 'Berkeley', 'state': 'CA', 'zip': '94705', 'latitude': 37.8555, 'longitude': -122.2604, 'sales': [{'sale_id': 128112, 'sale_date': '2020-04-30', 'total_amount': 24, 'customer': {'customer_id': 3491, 'first_name': 'Siouxie', 'last_name': "M'Quharge", 'street': '747 Westridge Center', 'city': 'Alameda', 'state': 'CA', 'zip': '94501', 'closest_store_id': 1, 'distance': 6}, 'line_items': [{'line_item_id': 1, 'quantity': 1, 'product': {'product_id': 1, 'description': 'Pistachio Salmon'}}, {'line_item_id': 2, 'quantity': 1, 'product': {'product_id': 8, 'description': 'Brocolli Stir Fry'}}]}, {'sale_id': 144249, 'sale_date': '2020-05-16', 'total_amount': 84, 'customer': {'customer_id': 1597, 'first_name': 'Norry', 'last_name': 'Macauley', 'street': '654 Sommers Plaza', 'city': 'Oakland', 'state': 'CA', 'zip': '94612', 'closest_store_id': 1, 'distance': 3}, 'line_items': [{'line_item_id': 1, 'q

In [19]:
pprint.pprint(store_document)

{'_id': ObjectId('6542fc8142b0c4569313afad'),
 'city': 'Berkeley',
 'latitude': 37.8555,
 'longitude': -122.2604,
 'sales': [{'customer': {'city': 'Alameda',
                         'closest_store_id': 1,
                         'customer_id': 3491,
                         'distance': 6,
                         'first_name': 'Siouxie',
                         'last_name': "M'Quharge",
                         'state': 'CA',
                         'street': '747 Westridge Center',
                         'zip': '94501'},
            'line_items': [{'line_item_id': 1,
                            'product': {'description': 'Pistachio Salmon',
                                        'product_id': 1},
                            'quantity': 1},
                           {'line_item_id': 2,
                            'product': {'description': 'Brocolli Stir Fry',
                                        'product_id': 8},
                            'quantity': 1}],
            'sal

## Delete the stores collection; recreate the stores collection; load all the stores JSON objects into the collection; print the inserted_ids; 

In [20]:
stores_collection.drop()

In [21]:
stores_collection = db["stores"]

In [22]:
inserted_documents = stores_collection.insert_many(stores_json_list)

In [23]:
print(inserted_documents.inserted_ids)

[ObjectId('6542fc8142b0c4569313afad'), ObjectId('6542ffab42b0c4569313afae'), ObjectId('6542ffab42b0c4569313afaf'), ObjectId('6542ffab42b0c4569313afb0'), ObjectId('6542ffab42b0c4569313afb1')]


## Query all the documents in the stores collection, loop through them, and print some info about each document 

In [24]:
for store_document in stores_collection.find():
    print("store_id:", store_document['store_id'], "city:", store_document['city'])

store_id: 1 city: Berkeley
store_id: 2 city: Seattle
store_id: 3 city: Dallas
store_id: 4 city: Miami
store_id: 5 city: Nashville


## Query using a filter object in the find() method

In [25]:
filter = { "city": "Berkeley" }
    
for store_document in stores_collection.find(filter):
    print("store_id:", store_document['store_id'], "city:", store_document['city'])

store_id: 1 city: Berkeley


In [26]:
filter = { "city": "Nashville" }
    
for store_document in stores_collection.find(filter):
    print("store_id:", store_document['store_id'], "city:", store_document['city'])

store_id: 5 city: Nashville


In [27]:
filter = { "city": { "$gt": "M"} }
    
for store_document in stores_collection.find(filter):
    print("store_id:", store_document['store_id'], "city:", store_document['city'])

store_id: 2 city: Seattle
store_id: 4 city: Miami
store_id: 5 city: Nashville


## Query on a nested field

In [28]:
filter = { "sales.total_amount": 60 }
    
for store_document in stores_collection.find(filter):
    print("store_id:", store_document['store_id'], "city:", store_document['city'])

store_id: 1 city: Berkeley
store_id: 4 city: Miami


In [29]:
filter = { "sales.line_items.quantity": 4 }
    
for store_document in stores_collection.find(filter):
    print("store_id:", store_document['store_id'], "city:", store_document['city'])

store_id: 1 city: Berkeley
store_id: 2 city: Seattle
store_id: 3 city: Dallas
store_id: 4 city: Miami


## Sort the results of a query in ascending order

In [30]:
filter = { "city": { "$gt": "M"} }
    
for store_document in stores_collection.find(filter).sort("city"):
    print("store_id:", store_document['store_id'], "city:", store_document['city'])

store_id: 4 city: Miami
store_id: 5 city: Nashville
store_id: 2 city: Seattle


## Sort the results of a query in descending order

In [31]:
filter = { "city": { "$gt": "M"} }
    
for store_document in stores_collection.find(filter).sort("city", -1):
    print("store_id:", store_document['store_id'], "city:", store_document['city'])

store_id: 2 city: Seattle
store_id: 5 city: Nashville
store_id: 4 city: Miami


## Delete one document matching a filter

In [32]:
filter = { "city": "Berkeley" }
    
stores_collection.delete_one(filter)


<pymongo.results.DeleteResult at 0x7f64e03d0280>

In [33]:
for store_document in stores_collection.find():
    print("store_id:", store_document['store_id'], "city:", store_document['city'])

store_id: 2 city: Seattle
store_id: 3 city: Dallas
store_id: 4 city: Miami
store_id: 5 city: Nashville


## Delete many documents matching a filter

In [34]:
filter = { "city": { "$gt": "N"} }
    
stores_collection.delete_many(filter)

<pymongo.results.DeleteResult at 0x7f64c4d41780>

In [35]:
for store_document in stores_collection.find():
    print("store_id:", store_document['store_id'], "city:", store_document['city'])

store_id: 3 city: Dallas
store_id: 4 city: Miami


## Delete all documents in a collection

In [36]:
filter = { }
    
stores_collection.delete_many(filter)

<pymongo.results.DeleteResult at 0x7f64ea1281c0>

In [37]:
for store_document in stores_collection.find():
    print("store_id:", store_document['store_id'], "city:", store_document['city'])

## You try it

## Create a collection called sales; load the file temp_sales_nested.json into the collection; run some queries on the collection

## Create a collection called customers; load the file temp_customers_nested.json into the collection; run some queries on the collection

In [38]:
sales_collection = db["sales"]

In [39]:
collection_list = db.list_collection_names()
collection_list

['stores']

In [40]:
f = open("temp_sales_nested.json")
file_json = json.load(f)
f.close()

sales_json_list = file_json["sales"]

In [41]:
first_sale_json = sales_json_list[0]

sale_document = sales_collection.insert_one(first_sale_json)

In [42]:
print(sale_document.inserted_id)

6543033a42b0c4569313afb2


In [43]:
db_list = mongo.list_database_names()
db_list

['acme_gourmet_meals', 'admin', 'config', 'local']

In [44]:
collection_list = db.list_collection_names()
collection_list

['stores', 'sales']

In [46]:
sale_document = sales_collection.find_one()
print(sale_document)

{'_id': ObjectId('6543033a42b0c4569313afb2'), 'receipt_number': '001-000128112', 'sale_date': '2020-04-30', 'sub_total': 24, 'tax': 0, 'total_amount': 24, 'store': {'store_id': 1, 'street': '3000 Telegraph Ave', 'city': 'Berkeley', 'state': 'CA', 'zip': '94705', 'latitude': 37.8555, 'longitude': -122.2604}, 'customer': {'customer_id': 3491, 'first_name': 'Siouxie', 'last_name': "M'Quharge", 'first_last_name': "Siouxie M'Quharge", 'last_first_name': "M'Quharge, Siouxie", 'street': '747 Westridge Center', 'city': 'Alameda', 'state': 'CA', 'zip': '94501', 'population': 63843, 'area': 8.3977, 'density': 7602.4, 'closest_store_id': 1, 'distance': 6}, 'line_items': [{'product_id': 1, 'description': 'Pistachio Salmon', 'price': 12, 'quantity': 1, 'line_total': 12}, {'product_id': 8, 'description': 'Brocolli Stir Fry', 'price': 12, 'quantity': 1, 'line_total': 12}]}


In [47]:
sales_collection.drop()
sales_collection = db["sales"]
inserted_documents = sales_collection.insert_many(sales_json_list)
print(inserted_documents.inserted_ids)

[ObjectId('6543033a42b0c4569313afb2'), ObjectId('6543041042b0c4569313afb3'), ObjectId('6543041042b0c4569313afb4'), ObjectId('6543041042b0c4569313afb5'), ObjectId('6543041042b0c4569313afb6'), ObjectId('6543041042b0c4569313afb7'), ObjectId('6543041042b0c4569313afb8'), ObjectId('6543041042b0c4569313afb9'), ObjectId('6543041042b0c4569313afba'), ObjectId('6543041042b0c4569313afbb'), ObjectId('6543041042b0c4569313afbc'), ObjectId('6543041042b0c4569313afbd'), ObjectId('6543041042b0c4569313afbe'), ObjectId('6543041042b0c4569313afbf'), ObjectId('6543041042b0c4569313afc0'), ObjectId('6543041042b0c4569313afc1'), ObjectId('6543041042b0c4569313afc2'), ObjectId('6543041042b0c4569313afc3'), ObjectId('6543041042b0c4569313afc4'), ObjectId('6543041042b0c4569313afc5'), ObjectId('6543041042b0c4569313afc6'), ObjectId('6543041042b0c4569313afc7'), ObjectId('6543041042b0c4569313afc8'), ObjectId('6543041042b0c4569313afc9'), ObjectId('6543041042b0c4569313afca'), ObjectId('6543041042b0c4569313afcb'), ObjectId('6

In [50]:
for sale_document in sales_collection.find():
    print("receipt:", sale_document['receipt_number'], "total_amount:", sale_document['total_amount'])

receipt: 001-000128112 total_amount: 24
receipt: 001-000144249 total_amount: 84
receipt: 001-000163141 total_amount: 96
receipt: 001-000169216 total_amount: 144
receipt: 001-000179181 total_amount: 48
receipt: 001-000181897 total_amount: 48
receipt: 001-000248269 total_amount: 60
receipt: 001-000250031 total_amount: 24
receipt: 001-000255285 total_amount: 36
receipt: 001-000263524 total_amount: 48
receipt: 002-000105004 total_amount: 36
receipt: 002-000109083 total_amount: 132
receipt: 002-000115446 total_amount: 12
receipt: 002-000115912 total_amount: 48
receipt: 002-000119996 total_amount: 36
receipt: 002-000144107 total_amount: 96
receipt: 002-000158290 total_amount: 84
receipt: 002-000203726 total_amount: 132
receipt: 002-000217889 total_amount: 24
receipt: 002-000218323 total_amount: 48
receipt: 003-000099402 total_amount: 36
receipt: 003-000131547 total_amount: 84
receipt: 003-000131827 total_amount: 108
receipt: 003-000134868 total_amount: 48
receipt: 003-000154102 total_amount:

In [51]:
filter = { "total_amount": { "$gte": 96} }
  
for sale_document in sales_collection.find(filter):
    print("receipt:", sale_document['receipt_number'], "total_amount:", sale_document['total_amount'])

receipt: 001-000163141 total_amount: 96
receipt: 001-000169216 total_amount: 144
receipt: 002-000109083 total_amount: 132
receipt: 002-000144107 total_amount: 96
receipt: 002-000203726 total_amount: 132
receipt: 003-000131827 total_amount: 108
receipt: 003-000164376 total_amount: 108
receipt: 003-000186881 total_amount: 144
receipt: 004-000111979 total_amount: 108
receipt: 004-000115633 total_amount: 120
receipt: 004-000138546 total_amount: 96
receipt: 004-000183530 total_amount: 144
receipt: 004-000192337 total_amount: 96
receipt: 005-000109305 total_amount: 120
receipt: 005-000136735 total_amount: 96
receipt: 005-000147541 total_amount: 96


In [52]:
filter = { "line_items.product_id": 8 }
    
for sale_document in sales_collection.find(filter):
    print("receipt:", sale_document['receipt_number'], "total_amount:", sale_document['total_amount'])

receipt: 001-000128112 total_amount: 24
receipt: 001-000144249 total_amount: 84
receipt: 001-000163141 total_amount: 96
receipt: 001-000169216 total_amount: 144
receipt: 001-000181897 total_amount: 48
receipt: 001-000248269 total_amount: 60
receipt: 001-000263524 total_amount: 48
receipt: 002-000105004 total_amount: 36
receipt: 002-000218323 total_amount: 48
receipt: 003-000164376 total_amount: 108
receipt: 003-000186881 total_amount: 144
receipt: 003-000209184 total_amount: 36
receipt: 004-000089887 total_amount: 48
receipt: 004-000111979 total_amount: 108
receipt: 004-000138546 total_amount: 96
receipt: 004-000143587 total_amount: 60
receipt: 004-000177233 total_amount: 60
receipt: 004-000183530 total_amount: 144
receipt: 005-000109305 total_amount: 120


In [53]:
filter = { "line_items.quantity": 4 }
    
for sale_document in sales_collection.find(filter):
    print("receipt:", sale_document['receipt_number'], "total_amount:", sale_document['total_amount'])

receipt: 001-000169216 total_amount: 144
receipt: 002-000109083 total_amount: 132
receipt: 003-000186881 total_amount: 144
receipt: 004-000115633 total_amount: 120
receipt: 004-000192337 total_amount: 96
