# MongoDB

MongoDB is a document database. It stores JSON objects.

In [1]:
from pymongo import MongoClient
from bson.objectid import ObjectId

In [2]:
import requests
from bson import json_util

In [3]:
import collections
from pathlib import Path

In [4]:
from pprint import pprint

## Set up

This connects to the MongoDB daemon

In [5]:
client = MongoClient()

This specifies the database. It does not matter if it does not exist.

In [6]:
client.drop_database('starwars')

In [7]:
db = client.starwars

This specifies a `collection`

In [8]:
people = db.people

Check what collections are in the database. Note that the `people` collection is only created when the first value is inserted.

In [9]:
db.list_collection_names()

[]

## Get Data

In [10]:
base_url = 'https://swapi.co/api'

In [11]:
resp = requests.get('https://swapi.co/api/people/1')
data = resp.json()

In [12]:
data

{'name': 'Luke Skywalker',
 'height': '172',
 'mass': '77',
 'hair_color': 'blond',
 'skin_color': 'fair',
 'eye_color': 'blue',
 'birth_year': '19BBY',
 'gender': 'male',
 'homeworld': 'https://swapi.co/api/planets/1/',
 'films': ['https://swapi.co/api/films/2/',
  'https://swapi.co/api/films/6/',
  'https://swapi.co/api/films/3/',
  'https://swapi.co/api/films/1/',
  'https://swapi.co/api/films/7/'],
 'species': ['https://swapi.co/api/species/1/'],
 'vehicles': ['https://swapi.co/api/vehicles/14/',
  'https://swapi.co/api/vehicles/30/'],
 'starships': ['https://swapi.co/api/starships/12/',
  'https://swapi.co/api/starships/22/'],
 'created': '2014-12-09T13:50:51.644000Z',
 'edited': '2014-12-20T21:17:56.891000Z',
 'url': 'https://swapi.co/api/people/1/'}

In [13]:
def convert(data):
    """Nest inner API calls one level dowwn."""
    for key, val in data.items():
        if isinstance(val, list):
            data[key] = [requests.get(value).json() for value in val 
                         if isinstance(value, str) and value.startswith(base_url)]
        else:
            if isinstance(val, str) and val.startswith(base_url):
                data[key] = requests.get(val).json() 
    return data

In [14]:
luke = convert(data)

In [27]:
def to_int(d, fields):
    for field in fields:
        try:
            d[field] = int(d[field])
        except:
            pass
    return d

In [16]:
luke = to_int(luke, ['height', 'mass'])

In [17]:
pprint(data)

{'birth_year': '19BBY',
 'created': '2014-12-09T13:50:51.644000Z',
 'edited': '2014-12-20T21:17:56.891000Z',
 'eye_color': 'blue',
 'films': [{'characters': ['https://swapi.co/api/people/1/',
                           'https://swapi.co/api/people/2/',
                           'https://swapi.co/api/people/3/',
                           'https://swapi.co/api/people/4/',
                           'https://swapi.co/api/people/5/',
                           'https://swapi.co/api/people/10/',
                           'https://swapi.co/api/people/13/',
                           'https://swapi.co/api/people/14/',
                           'https://swapi.co/api/people/18/',
                           'https://swapi.co/api/people/20/',
                           'https://swapi.co/api/people/21/',
                           'https://swapi.co/api/people/22/',
                           'https://swapi.co/api/people/23/',
                           'https://swapi.co/api/people/24/',
      

## Insertion and retrieval

In [18]:
result = people.insert_one(data)

In [19]:
db.list_collection_names()

['people']

### Using search criteria

In [20]:
people.find_one(
    {'name': 'Luke Skywalker'},
    {'name': True, 'hair_color': True, 'skin_color': True, 'eye_color': True}
)

{'_id': ObjectId('5b8dc66f54f55e6828844e05'),
 'name': 'Luke Skywalker',
 'hair_color': 'blond',
 'skin_color': 'fair',
 'eye_color': 'blue'}

### Using object ID

Note that ObjectID is NOT a string. You must convert a string to ObjectID before use.

In [23]:
result.inserted_id

ObjectId('5b8dc66f54f55e6828844e05')

In [25]:
people.find_one(
    result.inserted_id,
    {'name': True, 'hair_color': True, 'skin_color': True, 'eye_color': True}
)

{'_id': ObjectId('5b8dc66f54f55e6828844e05'),
 'name': 'Luke Skywalker',
 'hair_color': 'blond',
 'skin_color': 'fair',
 'eye_color': 'blue'}

### Bulk inserts

In [28]:
xs = [to_int(convert(requests.get('https://swapi.co/api/people/%i' % i).json()), 
             ['height', 'mass']) 
      for i in range(2, 21)]

In [29]:
result = people.insert_many(xs)

In [30]:
result.inserted_ids

[ObjectId('5b8dc72354f55e6828844e06'),
 ObjectId('5b8dc72354f55e6828844e07'),
 ObjectId('5b8dc72354f55e6828844e08'),
 ObjectId('5b8dc72354f55e6828844e09'),
 ObjectId('5b8dc72354f55e6828844e0a'),
 ObjectId('5b8dc72354f55e6828844e0b'),
 ObjectId('5b8dc72354f55e6828844e0c'),
 ObjectId('5b8dc72354f55e6828844e0d'),
 ObjectId('5b8dc72354f55e6828844e0e'),
 ObjectId('5b8dc72354f55e6828844e0f'),
 ObjectId('5b8dc72354f55e6828844e10'),
 ObjectId('5b8dc72354f55e6828844e11'),
 ObjectId('5b8dc72354f55e6828844e12'),
 ObjectId('5b8dc72354f55e6828844e13'),
 ObjectId('5b8dc72354f55e6828844e14'),
 ObjectId('5b8dc72354f55e6828844e15'),
 ObjectId('5b8dc72354f55e6828844e16'),
 ObjectId('5b8dc72354f55e6828844e17'),
 ObjectId('5b8dc72354f55e6828844e18')]

### Bulk queries

In [33]:
for person in people.find({'gender': 'male'}):
    print(person['name'])

Luke Skywalker
Darth Vader
Owen Lars
Biggs Darklighter
Obi-Wan Kenobi
Anakin Skywalker
Wilhuff Tarkin
Chewbacca
Han Solo
Greedo
Wedge Antilles
Jek Tono Porkins
Yoda


In [38]:
for x in people.find(
    {'gender': 'male'},              # search criteria 
    {'name': True, 'gender': True}): # fields to return
    print(x)

{'_id': ObjectId('5b8dc66f54f55e6828844e05'), 'name': 'Luke Skywalker', 'gender': 'male'}
{'_id': ObjectId('5b8dc72354f55e6828844e08'), 'name': 'Darth Vader', 'gender': 'male'}
{'_id': ObjectId('5b8dc72354f55e6828844e0a'), 'name': 'Owen Lars', 'gender': 'male'}
{'_id': ObjectId('5b8dc72354f55e6828844e0d'), 'name': 'Biggs Darklighter', 'gender': 'male'}
{'_id': ObjectId('5b8dc72354f55e6828844e0e'), 'name': 'Obi-Wan Kenobi', 'gender': 'male'}
{'_id': ObjectId('5b8dc72354f55e6828844e0f'), 'name': 'Anakin Skywalker', 'gender': 'male'}
{'_id': ObjectId('5b8dc72354f55e6828844e10'), 'name': 'Wilhuff Tarkin', 'gender': 'male'}
{'_id': ObjectId('5b8dc72354f55e6828844e11'), 'name': 'Chewbacca', 'gender': 'male'}
{'_id': ObjectId('5b8dc72354f55e6828844e12'), 'name': 'Han Solo', 'gender': 'male'}
{'_id': ObjectId('5b8dc72354f55e6828844e13'), 'name': 'Greedo', 'gender': 'male'}
{'_id': ObjectId('5b8dc72354f55e6828844e16'), 'name': 'Wedge Antilles', 'gender': 'male'}
{'_id': ObjectId('5b8dc72354f55e

#### Using regex search

In [39]:
for x in people.find(
    {
        'name': {'$regex': '^L'},
    },
    {'name': True, 'gender': True, '_id': False}
):
    print(x)

{'name': 'Luke Skywalker', 'gender': 'male'}
{'name': 'Leia Organa', 'gender': 'female'}


Alternative using Python regular expressions.

In [40]:
import re

name_pat = re.compile(r'^l', re.IGNORECASE)

In [42]:
for x in people.find(
    {
        'name': name_pat,
    },
    {'name': True, 'gender': True, '_id': False}
):
    print(x)

{'name': 'Luke Skywalker', 'gender': 'male'}
{'name': 'Leia Organa', 'gender': 'female'}


#### Using relational operators

In [43]:
for x in people.find(
    {
        'mass': {'$gt': 100},
    },
    {'name': True, 'mass': True, '_id': False}
):
    print(x)

{'name': 'Darth Vader', 'mass': 136}
{'name': 'Owen Lars', 'mass': 120}
{'name': 'Chewbacca', 'mass': 112}
{'name': 'Jek Tono Porkins', 'mass': 110}


In [44]:
mass_range = {'$lt': 100, '$gt': 50}

In [45]:
for x in people.find(
    {
        'mass': mass_range,
    },
    {'name': True, 'mass': True, '_id': False}
):
    print(x)

{'name': 'Luke Skywalker', 'mass': 77}
{'name': 'C-3PO', 'mass': 75}
{'name': 'Beru Whitesun lars', 'mass': 75}
{'name': 'Biggs Darklighter', 'mass': 84}
{'name': 'Obi-Wan Kenobi', 'mass': 77}
{'name': 'Anakin Skywalker', 'mass': 84}
{'name': 'Han Solo', 'mass': 80}
{'name': 'Greedo', 'mass': 74}
{'name': 'Wedge Antilles', 'mass': 77}


In [46]:
for x in people.find(
    {
        'mass': mass_range,
    },
    {'name': True, 'mass': True, '_id': False}
):
    print(x)

{'name': 'Luke Skywalker', 'mass': 77}
{'name': 'C-3PO', 'mass': 75}
{'name': 'Beru Whitesun lars', 'mass': 75}
{'name': 'Biggs Darklighter', 'mass': 84}
{'name': 'Obi-Wan Kenobi', 'mass': 77}
{'name': 'Anakin Skywalker', 'mass': 84}
{'name': 'Han Solo', 'mass': 80}
{'name': 'Greedo', 'mass': 74}
{'name': 'Wedge Antilles', 'mass': 77}


#### Nested search

In [None]:
people.count_documents({'gender': 'male'})