In [1]:
import pymongo # this gives a security warning in some cases

In [2]:
client = pymongo.MongoClient('mongodb://localhost:27017/') # connect to DS
client.database_names() # show names
if 'big_data_processing' in client.database_names(): # if data_science already exists, drop
    client.drop_database('big_data_processing')

In [3]:

db = client.big_data_processing
db

Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), u'big_data_processing')

In [4]:
import datetime

hw =   {"author": "Charlot",
        "hw"    : "Word Count",
        "cmd"   : "line.lower()",
        "tags"  : ["MapReduce", "word", "count", "normalization"],
        "date"  : datetime.datetime.utcnow()}
hw

{'author': 'Charlot',
 'cmd': 'line.lower()',
 'date': datetime.datetime(2017, 5, 7, 15, 48, 44, 550100),
 'hw': 'Word Count',
 'tags': ['MapReduce', 'word', 'count', 'normalization']}

In [5]:
# create a collection
hws = db.hws
# delete everything from hws
db.hws.delete_many({})
# insert one and get ID
hw_id = hws.insert_one(hw).inserted_id
hw_id

ObjectId('590f41e6fad5a50ab85f0d65')

In [6]:
# get list of collections 
db.collection_names(include_system_collections=False)

[u'hws']

In [7]:
# find any
hws.find_one()

{u'_id': ObjectId('590f41e6fad5a50ab85f0d65'),
 u'author': u'Charlot',
 u'cmd': u'line.lower()',
 u'date': datetime.datetime(2017, 5, 7, 15, 48, 44, 550000),
 u'hw': u'Word Count',
 u'tags': [u'MapReduce', u'word', u'count', u'normalization']}

In [8]:
# these two lines find the same doc
print hws.find_one({'_id': hw_id})

{u'author': u'Charlot', u'tags': [u'MapReduce', u'word', u'count', u'normalization'], u'cmd': u'line.lower()', u'hw': u'Word Count', u'date': datetime.datetime(2017, 5, 7, 15, 48, 44, 550000), u'_id': ObjectId('590f41e6fad5a50ab85f0d65')}


In [9]:
print hws.find_one({'tags': 'MapReduce'})

{u'author': u'Charlot', u'tags': [u'MapReduce', u'word', u'count', u'normalization'], u'cmd': u'line.lower()', u'hw': u'Word Count', u'date': datetime.datetime(2017, 5, 7, 15, 48, 44, 550000), u'_id': ObjectId('590f41e6fad5a50ab85f0d65')}


In [10]:
# create a list of dictionaries
student_weekend_hws = [ {"author": "Bernhardt",
        "hw"    : "Word Count",
        "cmd"   : "line.lower()",
        "tags"  : ["MapReduce", "word", "count", "normalization"],
        "date"  : datetime.datetime.utcnow()},
        
       {"author": "Jurgen",
        "hw"     : "MongoDB",
        "hrs"    : 1,
        "cmd"    : [ "student_weekend_hws = []", "hws.insert_many(student_weekend_hws)" ],
        "tags"   : ["big", "data", "nosql"],
        "date"   : datetime.datetime.utcnow()} ]

In [11]:
hws.insert_many(student_weekend_hws).inserted_ids

[ObjectId('590f4209fad5a50ab85f0d66'), ObjectId('590f4209fad5a50ab85f0d67')]

In [12]:
for hw in hws.find():
    print hw['author'], hw['date']
    
print hws.count()

Charlot 2017-05-07 15:48:44.550000
Bernhardt 2017-05-07 15:49:21.052000
Jurgen 2017-05-07 15:49:21.052000
3


In [16]:
# you need to change the following date time for proper querying (depending on
# when the above was run)
d = datetime.datetime(2017, 1, 6, 15, 0, 0, 0)
for h in hws.find({"date": {"$gt": d}}).sort("author"):
    print h

{u'author': u'Bernhardt', u'tags': [u'MapReduce', u'word', u'count', u'normalization'], u'cmd': u'line.lower()', u'hw': u'Word Count', u'date': datetime.datetime(2017, 5, 7, 15, 49, 21, 52000), u'_id': ObjectId('590f4209fad5a50ab85f0d66')}
{u'author': u'Charlot', u'tags': [u'MapReduce', u'word', u'count', u'normalization'], u'cmd': u'line.lower()', u'hw': u'Word Count', u'date': datetime.datetime(2017, 5, 7, 15, 48, 44, 550000), u'_id': ObjectId('590f41e6fad5a50ab85f0d65')}
{u'author': u'Jurgen', u'tags': [u'big', u'data', u'nosql'], u'cmd': [u'student_weekend_hws = []', u'hws.insert_many(student_weekend_hws)'], u'hw': u'MongoDB', u'hrs': 1, u'date': datetime.datetime(2017, 5, 7, 15, 49, 21, 52000), u'_id': ObjectId('590f4209fad5a50ab85f0d67')}


In [17]:
# can use remove in the same way
print db.hws.count()
id = db.hws.find_one()['_id']
db.hws.delete_one({'_id': id})
print db.hws.count()

3
2


In [18]:
# create index information
db.profiles.delete_many({})
result = db.profiles.create_index([('user_id', pymongo.ASCENDING)], unique=True)
list(db.profiles.index_information())

[u'user_id_1', u'_id_']

In [19]:
user_profiles = [ {'user_id': 1, 'name': 'Donna'},
                  {'user_id': 2, 'name': 'Kenneth'},
                  {'user_id': 3, 'name': 'Matthew'}]
result = db.profiles.insert_many(user_profiles)

In [20]:
db.profiles.count()

3

In [21]:
new_profile = {'user_id': 4, 'name': 'Maurice'}
duplicate_profile = {'user_id': 1, 'name': 'Enrico'}
result = db.profiles.insert_one(new_profile)  # This is fine.
result = db.profiles.insert_one(duplicate_profile) # this creates error

DuplicateKeyError: E11000 duplicate key error collection: big_data_processing.profiles index: user_id_1 dup key: { : 1 }

In [22]:
# see all user profiles
for p in db.profiles.find():
    print p

{u'_id': ObjectId('590f427dfad5a50ab85f0d68'), u'user_id': 1, u'name': u'Donna'}
{u'_id': ObjectId('590f427dfad5a50ab85f0d69'), u'user_id': 2, u'name': u'Kenneth'}
{u'_id': ObjectId('590f427dfad5a50ab85f0d6a'), u'user_id': 3, u'name': u'Matthew'}
{u'_id': ObjectId('590f4281fad5a50ab85f0d6b'), u'user_id': 4, u'name': u'Maurice'}


In [23]:
print [ x for x in db.profiles.find({"name": "Maurice"}) ]

[{u'_id': ObjectId('590f4281fad5a50ab85f0d6b'), u'user_id': 4, u'name': u'Maurice'}]
