This notebook is to see how far one can go through MongoDB's official [Python tutorial](https://docs.mongodb.org/getting-started/python/) using only `mongomock` and without a running MongoDB instance.

In [1]:
from mongomock import MongoClient

restaurants = MongoClient().db.collection

In [2]:
import json

# The "primer-dataset.json" file used in the official tutorial is not actually valid JSON.
# Rather, it is a file with one JS object per line, which is the format expected by `mongoimport`.
dataset = []
with open('data/primer-dataset.json') as f:
    for line in f:
        dataset.append(json.loads(line))

restaurants.insert_many(dataset)

<pymongo.results.InsertManyResult at 0x1143b9050>

In [3]:
restaurants.count()

25359

In [4]:
from datetime import datetime
result = restaurants.insert_one(
    {
        "address": {
            "street": "2 Avenue",
            "zipcode": "10075",
            "building": "1480",
            "coord": [-73.9557413, 40.7720266]
        },
        "borough": "Manhattan",
        "cuisine": "Italian",
        "grades": [
            {
                "date": datetime.strptime("2014-10-01", "%Y-%m-%d"),
                "grade": "A",
                "score": 11
            },
            {
                "date": datetime.strptime("2014-01-16", "%Y-%m-%d"),
                "grade": "B",
                "score": 17
            }
        ],
        "name": "Vella",
        "restaurant_id": "41704620"
    }
)

In [5]:
result.inserted_id

ObjectId('56bb523fa6289521b0bc310f')

In [6]:
cursor = restaurants.find({'borough': 'Manhattan'})

In [7]:
for document in cursor.limit(3):
    print(document)

{u'cuisine': u'Irish', '_id': ObjectId('56bb523ca6289521b0bbce02'), u'name': u'Dj Reynolds Pub And Restaurant', u'restaurant_id': u'30191841', u'grades': [{u'date': {u'$date': 1409961600000}, u'grade': u'A', u'score': 2}, {u'date': {u'$date': 1374451200000}, u'grade': u'A', u'score': 11}, {u'date': {u'$date': 1343692800000}, u'grade': u'A', u'score': 12}, {u'date': {u'$date': 1325116800000}, u'grade': u'A', u'score': 12}], u'address': {u'building': u'351', u'street': u'West   57 Street', u'zipcode': u'10019', u'coord': [-73.98513559999999, 40.7676919]}, u'borough': u'Manhattan'}
{u'cuisine': u'American ', '_id': ObjectId('56bb523ca6289521b0bbce0d'), u'name': u'1 East 66Th Street Kitchen', u'restaurant_id': u'40359480', u'grades': [{u'date': {u'$date': 1399420800000}, u'grade': u'A', u'score': 3}, {u'date': {u'$date': 1367539200000}, u'grade': u'A', u'score': 4}, {u'date': {u'$date': 1335744000000}, u'grade': u'A', u'score': 6}, {u'date': {u'$date': 1324944000000}, u'grade': u'A', u'sco

In [8]:
cursor = restaurants.find({'grades.grade': 'B'})

In [9]:
cursor.count()

8281

In [10]:
for document in cursor.limit(3):
    print(document['name'])

Morris Park Bake Shop
Wendy'S
Tov Kosher Kitchen


In [11]:
restaurants.find({"grades.score": {"$lt": 10}}).count()

19065

In [12]:
restaurants.find({'cuisine': 'Italian', 'address.zipcode': '10075'}).count()

16

In [13]:
restaurants.find({"$or": [{"cuisine": "Italian"}, {"address.zipcode": "10075"}]}).count()

1154

In [14]:
cursor = restaurants.find().sort([
    ("borough", 1),
    ("address.zipcode", -1)
])

In [15]:
for document in cursor:
    print(document['borough'], document['address']['zipcode'])

(u'Bronx', u'11370')
(u'Bronx', u'11370')
(u'Bronx', u'10475')
(u'Bronx', u'10475')
(u'Bronx', u'10475')
(u'Bronx', u'10475')
(u'Bronx', u'10475')
(u'Bronx', u'10475')
(u'Bronx', u'10475')
(u'Bronx', u'10475')
(u'Bronx', u'10475')
(u'Bronx', u'10475')
(u'Bronx', u'10475')
(u'Bronx', u'10475')
(u'Bronx', u'10475')
(u'Bronx', u'10475')
(u'Bronx', u'10475')
(u'Bronx', u'10475')
(u'Bronx', u'10475')
(u'Bronx', u'10475')
(u'Bronx', u'10475')
(u'Bronx', u'10475')
(u'Bronx', u'10475')
(u'Bronx', u'10475')
(u'Bronx', u'10475')
(u'Bronx', u'10475')
(u'Bronx', u'10475')
(u'Bronx', u'10475')
(u'Bronx', u'10475')
(u'Bronx', u'10475')
(u'Bronx', u'10475')
(u'Bronx', u'10475')
(u'Bronx', u'10475')
(u'Bronx', u'10475')
(u'Bronx', u'10475')
(u'Bronx', u'10475')
(u'Bronx', u'10475')
(u'Bronx', u'10475')
(u'Bronx', u'10475')
(u'Bronx', u'10475')
(u'Bronx', u'10475')
(u'Bronx', u'10475')
(u'Bronx', u'10475')
(u'Bronx', u'10475')
(u'Bronx', u'10475')
(u'Bronx', u'10475')
(u'Bronx', u'10475')
(u'Bronx', u'

In [16]:
result = restaurants.update_one(
    {"name": "Juni"},
    {
        "$set": {
            "cuisine": "American (New)",
            "lastModified": datetime.now()
        }
    }
)


In [17]:
result.modified_count

1

In [18]:
result = restaurants.update_one(
    {"restaurant_id": "41156888"},
    {"$set": {"address.street": "East 31st Street"}}
)

In [19]:
result.modified_count

1

In [20]:
result = restaurants.update_many(
    {"address.zipcode": "10016", "cuisine": "Other"},
    {
        "$set": {"cuisine": "Category To Be Determined"},
    }
)

In [21]:
result.modified_count

20

In [22]:
result = restaurants.replace_one(
    {"restaurant_id": "41704620"},
    {
        "name": "Vella 2",
        "address": {
            "coord": [-73.9557413, 40.7720266],
            "building": "1480",
            "street": "2 Avenue",
            "zipcode": "10075"
        }
    }
)

In [23]:
result.modified_count

1

In [24]:
result = restaurants.delete_many({"borough": "Manhattan"})

In [25]:
result.deleted_count

10259

In [26]:
result = restaurants.delete_many({})

In [27]:
result.deleted_count

15101

In [28]:
restaurants.drop()

In [29]:
# Re-insert all docs
restaurants.insert_many(dataset)

<pymongo.results.InsertManyResult at 0x10e196f50>

In [30]:
# Add a little class, for easier copy/paste from examples.
class Db:
    pass
db = Db()
db.restaurants = restaurants

In [31]:
cursor = db.restaurants.aggregate(
    [
        {"$group": {"_id": "$borough", "count": {"$sum": 1}}}
    ]
)

AttributeError: 'int' object has no attribute 'replace'

In [32]:
# Aggregation example doesn't work with mongomock.
# Too bad. This means mongomock should definitely be a fallback option only.

# Try again using a real mongo client.
import pymongo

client = pymongo.MongoClient()
db = client.swc

In [33]:
! mongoimport --db swc --collection restaurants --drop --file data/primer-dataset.json

connected to: 127.0.0.1
2016-02-10T07:07:58.383-0800 dropping: swc.restaurants
2016-02-10T07:07:58.883-0800 check 9 25359
2016-02-10T07:07:58.884-0800 imported 25359 objects


In [34]:
cursor = db.restaurants.aggregate(
    [
        {"$group": {"_id": "$borough", "count": {"$sum": 1}}}
    ]
)

In [35]:
for document in cursor:
    print(document)

{u'count': 51, u'_id': u'Missing'}
{u'count': 969, u'_id': u'Staten Island'}
{u'count': 10259, u'_id': u'Manhattan'}
{u'count': 6086, u'_id': u'Brooklyn'}
{u'count': 5656, u'_id': u'Queens'}
{u'count': 2338, u'_id': u'Bronx'}


In [36]:
cursor = db.restaurants.aggregate(
    [
        {"$match": {"borough": "Queens", "cuisine": "Brazilian"}},
        {"$group": {"_id": "$address.zipcode", "count": {"$sum": 1}}}
    ]
)

In [37]:
for document in cursor:
    print(document)

{u'count': 1, u'_id': u'11377'}
{u'count': 1, u'_id': u'11368'}
{u'count': 2, u'_id': u'11101'}
{u'count': 3, u'_id': u'11106'}
{u'count': 1, u'_id': u'11103'}


In [38]:
db.restaurants.create_index([("cuisine", pymongo.ASCENDING)])

u'cuisine_1'

In [39]:
db.restaurants.create_index([
    ("cuisine", pymongo.ASCENDING),
    ("address.zipcode", pymongo.DESCENDING)
])

u'cuisine_1_address.zipcode_-1'