# Making a Connection with MongoClient

In [4]:
from pymongo import MongoClient

In [5]:
client = MongoClient()

# Getting a Database

In [6]:
db = client.test_database

# Getting a Collection

In [7]:
posts = db.posts

# Documents

In [8]:
import datetime
post = {"author": "Mike",
    "text": "My first blog post!",
    "tags": ["mongodb", "python", "pymongo"],
    "date": datetime.datetime.utcnow()}

## Inserting a Document

In [9]:
post_id = posts.insert_one(post).inserted_id
post_id

ObjectId('56208905801d320a17310d4e')

When a document is inserted a special key, "_id", is automatically added if the document doesn’t already contain an "_id" key. The value of "_id" must be unique across the collection. insert_one() returns an instance of InsertOneResult. 

After inserting the first document, the posts collection has actually been created on the server. We can verify this by listing all of the collections in our database:

In [10]:
db.collection_names(include_system_collections=False)

[u'posts']

## Getting a Single Document With find_one()

The most basic type of query that can be performed in MongoDB is find_one(). This method returns a single document matching a query (or None if there are no matches).

It is useful when you know there is only one matching document, or are only interested in the first match. Here we use find_one() to get the first document from the posts collection:

In [14]:
posts.find_one()

{u'_id': ObjectId('56208905801d320a17310d4e'),
 u'author': u'Mike',
 u'date': datetime.datetime(2015, 10, 16, 5, 20, 1, 587000),
 u'tags': [u'mongodb', u'python', u'pymongo'],
 u'text': u'My first blog post!'}

The returned document contains an "_id", which was automatically added on insert.

find_one() also supports querying on specific elements that the resulting document must match. 
To limit our results to a document with author “Mike” we do:

In [15]:
posts.find_one({"author": "Mike"})

{u'_id': ObjectId('56208905801d320a17310d4e'),
 u'author': u'Mike',
 u'date': datetime.datetime(2015, 10, 16, 5, 20, 1, 587000),
 u'tags': [u'mongodb', u'python', u'pymongo'],
 u'text': u'My first blog post!'}

## Querying By ObjectId

In [16]:
post_id

ObjectId('56208905801d320a17310d4e')

In [17]:
posts.find_one({"_id": post_id})

{u'_id': ObjectId('56208905801d320a17310d4e'),
 u'author': u'Mike',
 u'date': datetime.datetime(2015, 10, 16, 5, 20, 1, 587000),
 u'tags': [u'mongodb', u'python', u'pymongo'],
 u'text': u'My first blog post!'}

Note that an ObjectId is not the same as its string representation

In [22]:
post_id_as_str = str(post_id)
posts.find_one({"_id": post_id_as_str}) # No result


In [23]:
from bson.objectid import ObjectId
posts.find_one({'_id': ObjectId(post_id_as_str)})

{u'_id': ObjectId('56208905801d320a17310d4e'),
 u'author': u'Mike',
 u'date': datetime.datetime(2015, 10, 16, 5, 20, 1, 587000),
 u'tags': [u'mongodb', u'python', u'pymongo'],
 u'text': u'My first blog post!'}

## Bulk Inserts

In [25]:
new_posts = [{"author": "Mike",
              "text": "Another post!",
               "tags": ["bulk", "insert"],
               "date": datetime.datetime(2009, 11, 12, 11, 14)},
              {"author": "Eliot",
               "title": "MongoDB is fun",
               "text": "and pretty easy too!",
               "date": datetime.datetime(2009, 11, 10, 10, 45)}]

In [26]:
result = posts.insert_many(new_posts)
result.inserted_ids

Querying for More Than One Document

In [33]:
for post in posts.find():
    print(post)

{u'date': datetime.datetime(2015, 10, 16, 5, 20, 1, 587000), u'text': u'My first blog post!', u'_id': ObjectId('56208905801d320a17310d4e'), u'author': u'Mike', u'tags': [u'mongodb', u'python', u'pymongo']}
{u'date': datetime.datetime(2009, 11, 12, 11, 14), u'text': u'Another post!', u'_id': ObjectId('56208b19801d320a17310d4f'), u'author': u'Mike', u'tags': [u'bulk', u'insert']}
{u'date': datetime.datetime(2009, 11, 10, 10, 45), u'text': u'and pretty easy too!', u'_id': ObjectId('56208b19801d320a17310d50'), u'author': u'Eliot', u'title': u'MongoDB is fun'}


Just like we did with find_one(), we can pass a document to find() to limit the returned results. Here, we get only those documents whose author is “Mike”:

In [34]:
for post in posts.find({"author": "Mike"}):
    print(post)

{u'date': datetime.datetime(2015, 10, 16, 5, 20, 1, 587000), u'text': u'My first blog post!', u'_id': ObjectId('56208905801d320a17310d4e'), u'author': u'Mike', u'tags': [u'mongodb', u'python', u'pymongo']}
{u'date': datetime.datetime(2009, 11, 12, 11, 14), u'text': u'Another post!', u'_id': ObjectId('56208b19801d320a17310d4f'), u'author': u'Mike', u'tags': [u'bulk', u'insert']}


## Counting

In [35]:
posts.count()

3

or just of those documents that match a specific query:

In [36]:
posts.find({"author": "Mike"}).count()

2

## Range Queries

In [37]:
d = datetime.datetime(2009, 11, 12, 12)

In [38]:
for post in posts.find({"date": {"$lt": d}}).sort("author"):
    print(post)

{u'date': datetime.datetime(2009, 11, 10, 10, 45), u'text': u'and pretty easy too!', u'_id': ObjectId('56208b19801d320a17310d50'), u'author': u'Eliot', u'title': u'MongoDB is fun'}
{u'date': datetime.datetime(2009, 11, 12, 11, 14), u'text': u'Another post!', u'_id': ObjectId('56208b19801d320a17310d4f'), u'author': u'Mike', u'tags': [u'bulk', u'insert']}


Here we use the special "$lt" operator to do a range query, and also call sort() to sort the results by author.

## Indexing

To make the above query fast we can add a compound index on "date" and "author". To start, lets use the explain() method to get some information about how the query is being performed without the index:

In [46]:
posts.find({"date": {"$lt": d}}).sort("author").explain()

{u'executionStats': {u'allPlansExecution': [],
  u'executionStages': {u'advanced': 2,
   u'executionTimeMillisEstimate': 0,
   u'inputStage': {u'advanced': 2,
    u'direction': u'forward',
    u'docsExamined': 3,
    u'executionTimeMillisEstimate': 0,
    u'filter': {u'date': {u'$lt': datetime.datetime(2009, 11, 12, 12, 0)}},
    u'invalidates': 0,
    u'isEOF': 1,
    u'nReturned': 2,
    u'needFetch': 0,
    u'needTime': 2,
    u'restoreState': 0,
    u'saveState': 0,
    u'stage': u'COLLSCAN',
    u'works': 5},
   u'invalidates': 0,
   u'isEOF': 1,
   u'memLimit': 33554432,
   u'memUsage': 241,
   u'nReturned': 2,
   u'needFetch': 0,
   u'needTime': 5,
   u'restoreState': 0,
   u'saveState': 0,
   u'sortPattern': {u'author': 1},
   u'stage': u'SORT',
   u'works': 9},
  u'executionSuccess': True,
  u'executionTimeMillis': 0,
  u'nReturned': 2,
  u'totalDocsExamined': 3,
  u'totalKeysExamined': 0},
 u'queryPlanner': {u'indexFilterSet': False,
  u'namespace': u'test_database.posts',
  

We can see that the query is using the BasicCursor and scanning over all 3 documents in the collection. Now let’s add a compound index and look at the same information:

In [47]:
from pymongo import ASCENDING, DESCENDING
posts.create_index([("date", DESCENDING), ("author", ASCENDING)])

u'date_-1_author_1'

In [48]:
posts.find({"date": {"$lt": d}}).sort("author").explain()

{u'executionStats': {u'allPlansExecution': [],
  u'executionStages': {u'advanced': 2,
   u'executionTimeMillisEstimate': 0,
   u'inputStage': {u'advanced': 2,
    u'executionTimeMillisEstimate': 0,
    u'inputStage': {u'advanced': 2,
     u'alreadyHasObj': 0,
     u'docsExamined': 2,
     u'executionTimeMillisEstimate': 0,
     u'inputStage': {u'advanced': 2,
      u'direction': u'forward',
      u'dupsDropped': 0,
      u'dupsTested': 0,
      u'executionTimeMillisEstimate': 0,
      u'indexBounds': {u'author': [u'[MinKey, MaxKey]'],
       u'date': [u'(new Date(1258027200000), true)']},
      u'indexName': u'date_-1_author_1',
      u'invalidates': 0,
      u'isEOF': 1,
      u'isMultiKey': False,
      u'keyPattern': {u'author': 1, u'date': -1},
      u'keysExamined': 2,
      u'matchTested': 0,
      u'nReturned': 2,
      u'needFetch': 0,
      u'needTime': 0,
      u'restoreState': 0,
      u'saveState': 0,
      u'seenInvalidated': 0,
      u'stage': u'IXSCAN',
      u'works': 2