# Introduction to pymongo 

### import Libraries

In [41]:
# pip install pymongo 
import pymongo
from pymongo import MongoClient

### Add your connection string from cloud server

In [42]:
uri = 'mongodb://pycoders:1234567890@cluster0-shard-00-00.vs8v0.mongodb.net:27017,cluster0-shard-00-01.vs8v0.mongodb.net:27017,cluster0-shard-00-02.vs8v0.mongodb.net:27017/myFirstDatabase?ssl=true&replicaSet=atlas-adoe6s-shard-0&authSource=admin&retryWrites=true&w=majority'

### List All Databases

In [43]:
cluster = MongoClient(uri)
cluster.list_database_names()

['bank_atm',
 'sample_airbnb',
 'sample_analytics',
 'sample_geospatial',
 'sample_mflix',
 'sample_restaurants',
 'sample_supplies',
 'sample_training',
 'sample_weatherdata',
 'test',
 'admin',
 'local']

### List All Collections in the database

In [44]:
db=cluster['bank_atm']
# or
# db=cluster.bank_atm 

db.list_collection_names()

['user_info']

### Drop Collection

In [45]:
mydb = cluster["product_test"]
mycol = mydb["city"]

mycol.drop()

In [46]:
mydb.list_collection_names()

[]

### How many documents do you have in the collection?

In [47]:
coll = db["user_info"]
# or
# coll = db.user_info

coll.count_documents({})

0

### Insert a document into the collection

In [48]:
coll.insert_one({'_id':1,
                 'name': "Fatih", 
                 'surname':'Fidan', 
                 'age': 37,
                 'gender':'M'})

coll.find_one({})

{'_id': 1, 'name': 'Fatih', 'surname': 'Fidan', 'age': 37, 'gender': 'M'}

### Insert many documents into the collection

In [49]:
post1={'_id':2,'name': "Serap",'surname':'Fidan','age': 35,'gender':'F'}
post2={'_id':3,'name': "Nesibe Azra",'surname':'Fidan','age': 10,'gender':'F'}
post3={'_id':4,'name': "Esra",'surname':'Fidan','age': 6,'gender':'F'}
coll.insert_many([post1,post2,post3])

<pymongo.results.InsertManyResult at 0x284296d8d00>

### Display all documents in the collection

In [50]:
results=coll.find({})
for result in results:
    print(result)

{'_id': 1, 'name': 'Fatih', 'surname': 'Fidan', 'age': 37, 'gender': 'M'}
{'_id': 2, 'name': 'Serap', 'surname': 'Fidan', 'age': 35, 'gender': 'F'}
{'_id': 3, 'name': 'Nesibe Azra', 'surname': 'Fidan', 'age': 10, 'gender': 'F'}
{'_id': 4, 'name': 'Esra', 'surname': 'Fidan', 'age': 6, 'gender': 'F'}


### Query

In [51]:
results=coll.find({'name':'Fatih'})
for result in results:
    print(result['_id'])

1


In [52]:
coll.find_one({'_id':1})

{'_id': 1, 'name': 'Fatih', 'surname': 'Fidan', 'age': 37, 'gender': 'M'}

### Delete a document from a collection

In [53]:
coll.insert_one({'name': "Pascal", 
                 'surname':'Nouma', 
                 'age': 30,
                 'gender':'M'})

coll.find_one({'name':'Pascal'})

{'_id': ObjectId('60772b095373f1a2927b63c2'),
 'name': 'Pascal',
 'surname': 'Nouma',
 'age': 30,
 'gender': 'M'}

In [54]:
coll.delete_one({'name':'Pascal'})
coll.find_one({'name':'Pascal'})

### Delete many documents from a collection

In [55]:
post1={'_id':5,'name': "Alex",'job':'Footballer'}
post2={'_id':6,'name': "Ronaldo",'job':'Footballer'}
post3={'_id':7,'name': "Hagi",'job':'Footballer'}
coll.insert_many([post1,post2,post3])
results=coll.find({})
for result in results: print(result)

{'_id': 1, 'name': 'Fatih', 'surname': 'Fidan', 'age': 37, 'gender': 'M'}
{'_id': 2, 'name': 'Serap', 'surname': 'Fidan', 'age': 35, 'gender': 'F'}
{'_id': 3, 'name': 'Nesibe Azra', 'surname': 'Fidan', 'age': 10, 'gender': 'F'}
{'_id': 4, 'name': 'Esra', 'surname': 'Fidan', 'age': 6, 'gender': 'F'}
{'_id': 5, 'name': 'Alex', 'job': 'Footballer'}
{'_id': 6, 'name': 'Ronaldo', 'job': 'Footballer'}
{'_id': 7, 'name': 'Hagi', 'job': 'Footballer'}


In [56]:
coll.delete_many({'job':'Footballer'})
results=coll.find({})
for result in results: print(result)

{'_id': 1, 'name': 'Fatih', 'surname': 'Fidan', 'age': 37, 'gender': 'M'}
{'_id': 2, 'name': 'Serap', 'surname': 'Fidan', 'age': 35, 'gender': 'F'}
{'_id': 3, 'name': 'Nesibe Azra', 'surname': 'Fidan', 'age': 10, 'gender': 'F'}
{'_id': 4, 'name': 'Esra', 'surname': 'Fidan', 'age': 6, 'gender': 'F'}


### Update the document

In [57]:
coll.update_one({'_id':3}, {"$set":{'name':'N.Azra'}})
results=coll.find({})
for result in results: print(result)

{'_id': 1, 'name': 'Fatih', 'surname': 'Fidan', 'age': 37, 'gender': 'M'}
{'_id': 2, 'name': 'Serap', 'surname': 'Fidan', 'age': 35, 'gender': 'F'}
{'_id': 3, 'name': 'N.Azra', 'surname': 'Fidan', 'age': 10, 'gender': 'F'}
{'_id': 4, 'name': 'Esra', 'surname': 'Fidan', 'age': 6, 'gender': 'F'}


In [58]:
coll.update_one({'name':'N.Azra'}, {"$set":{'name':'Azra'}},upsert=True)
coll.find_one({'name':'Azra'})

{'_id': 3, 'name': 'Azra', 'surname': 'Fidan', 'age': 10, 'gender': 'F'}

In [59]:
coll.update_one({'name':'Azra'}, {"$set":{'job':'Student'}},upsert=True)
coll.find_one({'name':'Azra'})

{'_id': 3,
 'name': 'Azra',
 'surname': 'Fidan',
 'age': 10,
 'gender': 'F',
 'job': 'Student'}

### Update many documents with operations
https://docs.mongodb.com/manual/reference/operator/update-field/

In [60]:
results=coll.find({})
for result in results: print(result)

{'_id': 1, 'name': 'Fatih', 'surname': 'Fidan', 'age': 37, 'gender': 'M'}
{'_id': 2, 'name': 'Serap', 'surname': 'Fidan', 'age': 35, 'gender': 'F'}
{'_id': 3, 'name': 'Azra', 'surname': 'Fidan', 'age': 10, 'gender': 'F', 'job': 'Student'}
{'_id': 4, 'name': 'Esra', 'surname': 'Fidan', 'age': 6, 'gender': 'F'}


In [61]:
coll.update_many({'gender':'F'}, {"$inc":{'age':10}})

results=coll.find({})
for result in results: print(result)

{'_id': 1, 'name': 'Fatih', 'surname': 'Fidan', 'age': 37, 'gender': 'M'}
{'_id': 2, 'name': 'Serap', 'surname': 'Fidan', 'age': 45, 'gender': 'F'}
{'_id': 3, 'name': 'Azra', 'surname': 'Fidan', 'age': 20, 'gender': 'F', 'job': 'Student'}
{'_id': 4, 'name': 'Esra', 'surname': 'Fidan', 'age': 16, 'gender': 'F'}


In [62]:
coll.update_many({'gender':'F'}, {"$max":{'age':25}})

results=coll.find({})
for result in results: print(result)

{'_id': 1, 'name': 'Fatih', 'surname': 'Fidan', 'age': 37, 'gender': 'M'}
{'_id': 2, 'name': 'Serap', 'surname': 'Fidan', 'age': 45, 'gender': 'F'}
{'_id': 3, 'name': 'Azra', 'surname': 'Fidan', 'age': 25, 'gender': 'F', 'job': 'Student'}
{'_id': 4, 'name': 'Esra', 'surname': 'Fidan', 'age': 25, 'gender': 'F'}


In [None]:
coll.update_many({'gender':'F'}, {"$min":{'age':30}})

In [79]:
results=coll.find({})
for result in results: print(result)

{'_id': 1, 'name': 'Fatih', 'surname': 'Fidan', 'age': 37, 'gender': 'M'}
{'_id': 2, 'name': 'Serap', 'surname': 'Fidan', 'age': 30, 'gender': 'F'}
{'_id': 3, 'name': 'Azra', 'surname': 'Fidan', 'age': 25, 'gender': 'F', 'job': 'Student'}
{'_id': 4, 'name': 'Esra', 'surname': 'Fidan', 'age': 25, 'gender': 'F'}


### Rename

In [84]:
from bson.json_util import dumps
coll.update_many({'gender':'F'}, {"$rename": {"surname": "last name"}})
coll.update_one({'gender':'M'}, {"$rename": {"surname": "last name"}})

<pymongo.results.UpdateResult at 0x28428670a80>

In [85]:
results=coll.find({})
for result in results: print(result)

{'_id': 1, 'name': 'Fatih', 'age': 37, 'gender': 'M', 'last name': 'Fidan'}
{'_id': 2, 'name': 'Serap', 'age': 30, 'gender': 'F', 'last name': 'Fidan'}
{'_id': 3, 'name': 'Azra', 'age': 25, 'gender': 'F', 'job': 'Student', 'last name': 'Fidan'}
{'_id': 4, 'name': 'Esra', 'age': 25, 'gender': 'F', 'last name': 'Fidan'}


In [88]:
coll.update_many({}, {"$rename": {"last name":"surname"}})

results=coll.find({})
for result in results: print(result)

{'_id': 1, 'name': 'Fatih', 'age': 37, 'gender': 'M', 'surname': 'Fidan'}
{'_id': 2, 'name': 'Serap', 'age': 30, 'gender': 'F', 'surname': 'Fidan'}
{'_id': 3, 'name': 'Azra', 'age': 25, 'gender': 'F', 'job': 'Student', 'surname': 'Fidan'}
{'_id': 4, 'name': 'Esra', 'age': 25, 'gender': 'F', 'surname': 'Fidan'}


### Replacement

In [89]:
post1={'_id':2,'name': "Serap",'surname':'Fidan','age': 35,'gender':'F'}
post2={'_id':3,'name': "Nesibe Azra",'surname':'Fidan','age': 10,'gender':'F'}
post3={'_id':4,'name': "Esra",'surname':'Fidan','age': 6,'gender':'F'}

coll.replace_one({'_id':2},post1)
coll.replace_one({'_id':3},post2)
coll.replace_one({'_id':4},post3)

results=coll.find({})
for result in results: print(result)

{'_id': 1, 'name': 'Fatih', 'age': 37, 'gender': 'M', 'surname': 'Fidan'}
{'_id': 2, 'name': 'Serap', 'surname': 'Fidan', 'age': 35, 'gender': 'F'}
{'_id': 3, 'name': 'Nesibe Azra', 'surname': 'Fidan', 'age': 10, 'gender': 'F'}
{'_id': 4, 'name': 'Esra', 'surname': 'Fidan', 'age': 6, 'gender': 'F'}


### Limit & Skip

In [90]:
results=coll.find({}).limit(2)
for result in results: print(result)

{'_id': 1, 'name': 'Fatih', 'age': 37, 'gender': 'M', 'surname': 'Fidan'}
{'_id': 2, 'name': 'Serap', 'surname': 'Fidan', 'age': 35, 'gender': 'F'}


In [91]:
results=coll.find({}).skip(1).limit(2)
for result in results: print(result)

{'_id': 2, 'name': 'Serap', 'surname': 'Fidan', 'age': 35, 'gender': 'F'}
{'_id': 3, 'name': 'Nesibe Azra', 'surname': 'Fidan', 'age': 10, 'gender': 'F'}


In [92]:
results=coll.find({}).skip(1).limit(2)
for result in results: print(result)

{'_id': 2, 'name': 'Serap', 'surname': 'Fidan', 'age': 35, 'gender': 'F'}
{'_id': 3, 'name': 'Nesibe Azra', 'surname': 'Fidan', 'age': 10, 'gender': 'F'}


### Prettify with Dumps()

In [93]:
from bson.json_util import dumps

post={'_id':5,'name': "Cihan",'surname':'Celik','age': 55,'gender':'F'}
coll.insert_one(post)

results=coll.find({})
print(dumps(results, indent=2))

[
  {
    "_id": 1,
    "name": "Fatih",
    "age": 37,
    "gender": "M",
    "surname": "Fidan"
  },
  {
    "_id": 2,
    "name": "Serap",
    "surname": "Fidan",
    "age": 35,
    "gender": "F"
  },
  {
    "_id": 3,
    "name": "Nesibe Azra",
    "surname": "Fidan",
    "age": 10,
    "gender": "F"
  },
  {
    "_id": 4,
    "name": "Esra",
    "surname": "Fidan",
    "age": 6,
    "gender": "F"
  },
  {
    "_id": 5,
    "name": "Cihan",
    "surname": "Celik",
    "age": 55,
    "gender": "F"
  }
]


### Skipping

In [94]:
coll.find({"surname": "Fidan"}).count()

  coll.find({"surname": "Fidan"}).count()


4

In [95]:
results = coll.find({"surname": "Fidan"}).skip(2)
print(dumps(results, indent=2))

[
  {
    "_id": 3,
    "name": "Nesibe Azra",
    "surname": "Fidan",
    "age": 10,
    "gender": "F"
  },
  {
    "_id": 4,
    "name": "Esra",
    "surname": "Fidan",
    "age": 6,
    "gender": "F"
  }
]


### Sorting

In [96]:
from pymongo import DESCENDING, ASCENDING

results=coll.find({}).sort("name", ASCENDING).skip(3).limit(2)
print(dumps(results, indent=2))

[
  {
    "_id": 3,
    "name": "Nesibe Azra",
    "surname": "Fidan",
    "age": 10,
    "gender": "F"
  },
  {
    "_id": 2,
    "name": "Serap",
    "surname": "Fidan",
    "age": 35,
    "gender": "F"
  }
]


### aggregation pipeline

In [97]:

pipeline = [ { "$match": { "gender": "F" } },
            { "$project": { 'age':1,'name':1,"surname":1} },
            { "$limit": 2 }
           ]
print(dumps(coll.aggregate(pipeline ), indent=2))

[
  {
    "_id": 2,
    "name": "Serap",
    "surname": "Fidan",
    "age": 35
  },
  {
    "_id": 3,
    "name": "Nesibe Azra",
    "surname": "Fidan",
    "age": 10
  }
]


In [98]:
pipeline = [ { "$match": { "gender": "F" } },
            { "$project": { 'name':1,"surname":1} },
            { "$sort": {"name": DESCENDING}},
#             {"$count": "name"}
           ]
print(dumps(coll.aggregate(pipeline ), indent=2))

[
  {
    "_id": 2,
    "name": "Serap",
    "surname": "Fidan"
  },
  {
    "_id": 3,
    "name": "Nesibe Azra",
    "surname": "Fidan"
  },
  {
    "_id": 4,
    "name": "Esra",
    "surname": "Fidan"
  },
  {
    "_id": 5,
    "name": "Cihan",
    "surname": "Celik"
  }
]


# Writes with Error Handling
In this lesson we're gonna encounter some of the basic errors in the Pymongo driver, and how to handle these errors in a way that makes our application more consistent and reliable.

In [42]:
uri = 'mongodb://pycoders:1234567890@cluster0-shard-00-00.vs8v0.mongodb.net:27017,cluster0-shard-00-01.vs8v0.mongodb.net:27017,cluster0-shard-00-02.vs8v0.mongodb.net:27017/myFirstDatabase?ssl=true&replicaSet=atlas-adoe6s-shard-0&authSource=admin&retryWrites=true&w=majority'

In [101]:
client = MongoClient(uri)
client.sample_restaurants.list_collection_names()

['neighborhoods', 'restaurants']

In [102]:
client.test.list_collection_names()

['shipments', 'trucks']

So here's a URI string connecting to our Atlas cluster, and I've initialized a client with that string.

We're using a new collection called shipments, and the scenario for this lesson is that our application is a clothing manufacturer that also handles the shipping for their clothing items.

In [103]:
shipments = client.test.shipments

In [109]:
# create some test data for the clothing manufacturer

import time
import random
from pprint import pprint

shipments.drop()

cities = [ "Atlanta", "New York", "Miami", "Chicago", "Los Angeles", "Seattle", "Dallas" ]
products = [ "shoes", "pants", "shirts", "hats", "socks" ]
quantities = [ 10, 20, 40, 80, 160, 320, 640, 1280, 2560 ]
docs = []

for truck_id in range(30):
    source = random.choice(cities)
    destination = random.choice([c for c in cities if c != source])
    product = random.choice(products)
    quantity = random.choice(quantities)
    
    doc = {
        "truck_id": truck_id,
        "source": source,
        "destination": destination,
        "product": product,
        "quantity": quantity
    }
    
    docs.append(doc)

In [105]:
docs

[{'truck_id': 0,
  'source': 'Seattle',
  'destination': 'Chicago',
  'product': 'shirts',
  'quantity': 20},
 {'truck_id': 1,
  'source': 'Los Angeles',
  'destination': 'Chicago',
  'product': 'socks',
  'quantity': 10},
 {'truck_id': 2,
  'source': 'Seattle',
  'destination': 'Chicago',
  'product': 'socks',
  'quantity': 160},
 {'truck_id': 3,
  'source': 'Seattle',
  'destination': 'New York',
  'product': 'hats',
  'quantity': 80},
 {'truck_id': 4,
  'source': 'Miami',
  'destination': 'Los Angeles',
  'product': 'shirts',
  'quantity': 160},
 {'truck_id': 5,
  'source': 'Miami',
  'destination': 'Seattle',
  'product': 'shoes',
  'quantity': 20},
 {'truck_id': 6,
  'source': 'New York',
  'destination': 'Chicago',
  'product': 'socks',
  'quantity': 2560},
 {'truck_id': 7,
  'source': 'Dallas',
  'destination': 'New York',
  'product': 'hats',
  'quantity': 320},
 {'truck_id': 8,
  'source': 'Miami',
  'destination': 'Dallas',
  'product': 'pants',
  'quantity': 160},
 {'truck_i

In [119]:
shipments.insert_many(docs)
shipments.count_documents({})

30

* Each shipment also has a product and a quantity, but the part we're gonna focus on is this (point) `truck_id` field. 
* This is gonna record the truck currently allocated for this shipment, so that truck can be considered unavailable for any another shipments. 
* This way when a new shipment comes in, we can make sure the truck that gets assigned to that shipment isn't already doing another one.

In [120]:
# take a look at one of them
shipments.find_one()

{'_id': ObjectId('6077336b5373f1a2927b63c5'),
 'truck_id': 0,
 'source': 'Miami',
 'destination': 'Dallas',
 'product': 'shirts',
 'quantity': 640}

In [121]:
# it created this index called `truck_id_1`, the 1 meaning that the index is sorted in ascending order.

shipments.create_index("truck_id", unique=True)

'truck_id_1'

In [123]:
# using the try-except block, our program prints out a message when a DuplicateKeyError is thrown. 
# The message tells us that the truck we wanted to use has already been sent out. 
# So the application allows the insert to fail, and then sends an error message up to the user to choose another truck.

from pymongo import errors

doc = {
    "source": "New York",
    "destination": "Atlanta",
    "truck_id": 4,
    "product": "socks",
    "quantity": 40 }

try:
    res = shipments.insert_one(doc)
    print(res.inserted_id)
    
except errors.DuplicateKeyError:
    truck_id = doc["truck_id"]
    print(f"Truck #{truck_id} is currently performing a shipment. Please select another truck.")

Truck #4 is currently performing a shipment. Please select another truck.


In [124]:
# The documents each only have two fields: an `_id` from 0 to 49 (point), 
# which will relate to the `truck_id` from the `shipments` collection. 
# And I've assigned a random string of 7 uppercase letters and numbers to be the license plate number, 
# although actually some US states only allow 6 characters.

import string

trucks = client.test.trucks # create trucks collection
trucks.drop()

trucks.insert_many([
    { "_id": i, 
     "license": "".join([random.choice(string.ascii_uppercase + string.digits) for _ in range(7)]) } for i in range(50)
])
trucks.count_documents({})

50

In [125]:
trucks.find_one()

{'_id': 0, 'license': 'H3ZIXDH'}

* for trucks collection: _id: range(50),
* for shipment collection: len(truck_id)=30, so number of available trucks is 20
* we recommend a truck_id from available truck_id list.

In [126]:
# the error handling now is a little more proactive.

doc = {
    "source": "New York",
    "destination": "Atlanta",
    "truck_id": 4,
    "product": "socks",
    "quantity": 40
}

try:
    res = shipments.insert_one(doc)
    print(res.inserted_id)
except errors.DuplicateKeyError:
    busy_trucks = set(shipments.distinct("truck_id"))
    all_trucks = set(trucks.distinct("_id"))
    available_trucks = all_trucks.difference(busy_trucks)
    old_truck_id = doc["truck_id"]
    if available_trucks:
        chosen_truck = random.choice(list(available_trucks))
        new_truck_id = doc["truck_id"] = chosen_truck
        res = shipments.insert_one(doc)
        print(f"Truck #{old_truck_id} is currently performing a shipment. Truck #{new_truck_id} has been sent out instead.")
    else:
        print(f"Truck #{old_truck_id} is currently performing a shipment. Could not find another truck.")

Truck #4 is currently performing a shipment. Truck #34 has been sent out instead.


### Summary
* DuplicateKeyError can occur on _id as well as fields in unique indexes
* When handling errors, determine how fatal the error is
    * Should this error be returned to the user?
    * Can we react to this error in a useful way?