## MongoDB scratchpad

In [4]:
import requests
from pymongo import MongoClient;
import json

In [5]:
# Local Mongo client
client = MongoClient("mongodb://localhost:27017")
db = client["nobel"]

In [6]:
json_list = []

with open("laureates.json") as laureates_file:
    laureates_json = json.load(laureates_file)
    json_list.append(laureates_json)

with open("prizes.json") as prizes_file:
    prizes_json = json.load(prizes_file)
    json_list.append(prizes_json)

In [7]:
prize_collection = db["prizes"]
prize_collection.insert_many(prizes_json)

<pymongo.results.InsertManyResult at 0x20472c310c0>

In [8]:
laureates_collection = db["laureates"]
laureates_collection.insert_many(laureates_json)

<pymongo.results.InsertManyResult at 0x20472fefdc0>

In [9]:
db.list_collection_names()

['prizes', 'laureates']

In [10]:
db.prizes.prizes

Collection(Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'nobel'), 'prizes.prizes')

In [12]:
walter_kohn = {"firstname": "Walter", 
               "surname": "Kohn"}

db.laureates.find_one(walter_kohn)

{'_id': ObjectId('641480b3902df7f3d876b7d7'),
 'id': '290',
 'firstname': 'Walter',
 'surname': 'Kohn',
 'born': '1923-03-09',
 'died': '2016-04-19',
 'bornCountry': 'Austria',
 'bornCountryCode': 'AT',
 'bornCity': 'Vienna',
 'diedCountry': 'USA',
 'diedCountryCode': 'US',
 'diedCity': 'Santa Barbara, CA',
 'gender': 'male',
 'prizes': [{'year': '1998',
   'category': 'chemistry',
   'share': '2',
   'motivation': '"for his development of the density-functional theory"',
   'affiliations': [{'name': 'University of California',
     'city': 'Santa Barbara, CA',
     'country': 'USA'}]}]}

In [26]:
# equivalent of doing "LIKE" query in SQL
regex = {"firstname": { "$regex" : "[w|W]alter"}}

cursor = db.laureates.find(regex)

for doc in cursor:
    print(doc)

{'_id': ObjectId('641480b3902df7f3d876b71f'), 'id': '67', 'firstname': 'Walter Houser', 'surname': 'Brattain', 'born': '1902-02-10', 'died': '1987-10-13', 'bornCountry': 'China', 'bornCountryCode': 'CN', 'bornCity': 'Amoy', 'diedCountry': 'USA', 'diedCountryCode': 'US', 'diedCity': 'Seattle, WA', 'gender': 'male', 'prizes': [{'year': '1956', 'category': 'physics', 'share': '3', 'motivation': '"for their researches on semiconductors and their discovery of the transistor effect"', 'affiliations': [{'name': 'Bell Telephone Laboratories', 'city': 'Murray Hill, NJ', 'country': 'USA'}]}]}
{'_id': ObjectId('641480b3902df7f3d876b735'), 'id': '89', 'firstname': 'Luis Walter', 'surname': 'Alvarez', 'born': '1911-06-13', 'died': '1988-09-01', 'bornCountry': 'USA', 'bornCountryCode': 'US', 'bornCity': 'San Francisco, CA', 'diedCountry': 'USA', 'diedCountryCode': 'US', 'diedCity': 'Berkeley, CA', 'gender': 'male', 'prizes': [{'year': '1968', 'category': 'physics', 'share': '1', 'motivation': '"for 

In [27]:
regex = {"firstname": { "$regex" : "[w|W]alter"}}

cursor2 = db.laureates.find(regex)

# to print things in a rich format
for doc in cursor2:
    doc["_id"] = str(doc["_id"]) # Necessary because "ObjectID()" needs to be a string to show correctly (see above)
    print(json.dumps(doc, indent=4))

{
    "_id": "641480b3902df7f3d876b71f",
    "id": "67",
    "firstname": "Walter Houser",
    "surname": "Brattain",
    "born": "1902-02-10",
    "died": "1987-10-13",
    "bornCountry": "China",
    "bornCountryCode": "CN",
    "bornCity": "Amoy",
    "diedCountry": "USA",
    "diedCountryCode": "US",
    "diedCity": "Seattle, WA",
    "gender": "male",
    "prizes": [
        {
            "year": "1956",
            "category": "physics",
            "share": "3",
            "motivation": "\"for their researches on semiconductors and their discovery of the transistor effect\"",
            "affiliations": [
                {
                    "name": "Bell Telephone Laboratories",
                    "city": "Murray Hill, NJ",
                    "country": "USA"
                }
            ]
        }
    ]
}
{
    "_id": "641480b3902df7f3d876b735",
    "id": "89",
    "firstname": "Luis Walter",
    "surname": "Alvarez",
    "born": "1911-06-13",
    "died": "1988-09-01",
 

In [28]:
# Filter for laureates born in Austria with non-Austria prize affiliation
criteria = {"bornCountry": "Austria", 
            # below is required because affiliations is a nested 
            # array itself inside the nested array of prizes
              "prizes.affiliations.country": {"$ne": "Austria"}} 

# Count the number of such laureates
count = db.laureates.count_documents(criteria)
print(count)

20


In [None]:
# Filter for laureates with at least three prizes
# This is borrowing from JS -- "Does element 3 (index 2) exist?"
criteria = {"prizes.2": {"$exists": True}}

# Find one laureate with at least three prizes
doc = db.laureates.find_one(criteria)

# Print the document
print(doc)

In [29]:
db.laureates.distinct("gender")

['female', 'male', 'org']