In [1]:
from pprint import PrettyPrinter

import pandas as pd
from IPython.display import VimeoVideo
from pymongo import MongoClient

In [2]:
# Instantiate a PrettyPrinter, and assign it to the variable pp.
pp = PrettyPrinter(indent=2)

#Create a client that connects to the database running at localhost on port 27017.
client = MongoClient(host="localhost",port=27017)

In [3]:
#Print a list of the databases available on client.
pp.pprint(list(client.list_databases()))

[ {'empty': False, 'name': 'admin', 'sizeOnDisk': 49152},
  {'empty': False, 'name': 'air-quality', 'sizeOnDisk': 8409088},
  {'empty': False, 'name': 'config', 'sizeOnDisk': 110592},
  {'empty': False, 'name': 'local', 'sizeOnDisk': 73728}]


In [4]:
db = client['air-quality'] # Assign the "air-quality" database to the variable db.
db.list_collection_names() #Use the list_collections method to print a list of the collections available in db.

['nairobi']

In [5]:
nairobi = db['nairobi'] # Assign the "nairobi" collection in db to the variable name nairobi.
nairobi.count_documents({}) # Use the count_documents method to see how many documents are in the nairobi collection.

202212

In [6]:
# Use the find_one method to retrieve one document from the nairobi collection, and assign it to the variable name result.
result = nairobi.find_one({})
pp.pprint(result)

{ '_id': ObjectId('678f6b097a7ae64bc40c06a8'),
  'metadata': { 'lat': -1.3,
                'lon': 36.785,
                'measurement': 'humidity',
                'sensor_id': 21,
                'sensor_type': 'Unknown',
                'site': 6},
  'timestamp': datetime.datetime(2018, 9, 1, 0, 0, 1),
  'value': 73.2}


In [7]:
# Use the distinct method to determine how many sensor sites are included in the nairobi collection.
nairobi.distinct('metadata.site')

[6, 29]

In [8]:
# Use the count_documents method to determine how many readings there are for each site in the nairobi collection.
print("Documents from site 6:", nairobi.count_documents({'metadata.site':6}))
print("Documents from site 29:", nairobi.count_documents({'metadata.site':29}))


Documents from site 6: 70360
Documents from site 29: 131852


In [9]:
# Use the aggregate method to determine how many readings there are for each site in the nairobi collection.
result = nairobi.aggregate([{"$group":{"_id":"$metadata.site","count":{"$count":{}}}}])
pp.pprint(list(result))

[{'_id': 29, 'count': 131852}, {'_id': 6, 'count': 70360}]


In [10]:
# Use the distinct method to determine how many types of measurements have been taken in the nairobi collection.
nairobi.distinct("metadata.measurement")

['P1', 'P2', 'humidity', 'temperature']

In [11]:
# Use the find method to retrieve the PM 2.5 readings from all sites. Be sure to limit your results to 3 records only.
result = nairobi.find({"metadata.measurement":"P2"}).limit(5)
pp.pprint(list(result))

[ { '_id': ObjectId('678f6b097a7ae64bc40c06b0'),
    'metadata': { 'lat': -1.3,
                  'lon': 36.785,
                  'measurement': 'P2',
                  'sensor_id': 74,
                  'sensor_type': 'Unknown',
                  'site': 6},
    'timestamp': datetime.datetime(2018, 9, 1, 0, 5, 8),
    'value': 34.71},
  { '_id': ObjectId('678f6b097a7ae64bc40c06b1'),
    'metadata': { 'lat': -1.3,
                  'lon': 36.785,
                  'measurement': 'P2',
                  'sensor_id': 28,
                  'sensor_type': 'Unknown',
                  'site': 6},
    'timestamp': datetime.datetime(2018, 9, 1, 0, 10, 8),
    'value': 31.88},
  { '_id': ObjectId('678f6b097a7ae64bc40c06b4'),
    'metadata': { 'lat': -1.3,
                  'lon': 36.785,
                  'measurement': 'P2',
                  'sensor_id': 42,
                  'sensor_type': 'Unknown',
                  'site': 6},
    'timestamp': datetime.datetime(2018, 9, 1, 0, 15, 10),
 

In [12]:
# Use the aggregate method to calculate how many readings there are for each type 
# ("humidity", "temperature", "P2", and "P1") in site 6
result = nairobi.aggregate(
[
    {"$match":{"metadata.site":6}},
    {"$group":{"_id":"$metadata.measurement","count":{"$count":{}}}}
]
)
pp.pprint(list(result))

[ {'_id': 'P2', 'count': 17861},
  {'_id': 'temperature', 'count': 17581},
  {'_id': 'humidity', 'count': 17477},
  {'_id': 'P1', 'count': 17441}]


In [13]:
# Use the aggregate method to calculate how many readings there are for each type ("humidity", "temperature", "P2", and "P1") in site 29.
result = nairobi.aggregate(
[
    {"$match":{"metadata.site":29}},
    {"$group":{"_id":"$metadata.measurement","count":{"$count":{}}}}
]
)
pp.pprint(list(result))

[ {'_id': 'temperature', 'count': 32917},
  {'_id': 'humidity', 'count': 32983},
  {'_id': 'P2', 'count': 32943},
  {'_id': 'P1', 'count': 33009}]


In [14]:
# Use the find method to retrieve the PM 2.5 readings from site 29. Be sure to limit your results to 3 records only. 
# Since we won't need the metadata for our model, use the projection argument to limit the results to the "P2" and "timestamp" 
# keys only.
result = nairobi.find(
    {"metadata.site":29,"metadata.measurement":"P2"}
    ,projection={"P2":1,"timestamp":1,"_id":0}
)
pp.pprint(result.next())

{'timestamp': datetime.datetime(2018, 11, 2, 15, 5, 7)}


In [15]:
df = pd.DataFrame(result).set_index('timestamp')
df.head()

2018-11-02 15:10:14
2018-11-02 15:15:15
2018-11-02 15:20:20
2018-11-02 15:25:37
2018-11-02 15:30:38
