In [1]:
import pymongo
import json
import pprint

In [2]:
myclient = pymongo.MongoClient("mongodb://localhost:27017/")
mydb = myclient["yelpdb"]

In [3]:
mydb.collection_names()

['photo', 'user', 'business', 'review', 'tip', 'checkin']

Perform MongoDB query within one collection

In [4]:
# Top 10 Chinese / Japanese restaurants in Las Vegas with count of review > 500
myquery = {
    "categories": {"$regex": ".*Chinese.*|.*Japanese.*" },
    "city": "Las Vegas",
    "review_count": {"$gt": 500},
    "is_open":1
}

query_result = mydb['business'].find(myquery, {'name':1}).sort("stars",-1).limit(10)
for res in query_result:
    pprint.pprint(res)

{'_id': ObjectId('5c733b861d28450ce0420331'), 'name': 'Pan Asian'}
{'_id': ObjectId('5c733b881d28450ce0421986'), 'name': 'Island Style Restaurant'}
{'_id': ObjectId('5c733b8e1d28450ce0426356'),
 'name': 'Kaizen Fusion Roll and Sushi'}
{'_id': ObjectId('5c733b911d28450ce042825e'), 'name': 'Cafe Sanuki'}
{'_id': ObjectId('5c733b931d28450ce042977d'),
 'name': "Naked Fish's Sushi & Grill"}
{'_id': ObjectId('5c733b951d28450ce042b05f'), 'name': 'Tonkatsu Kiyoshi'}
{'_id': ObjectId('5c733b9a1d28450ce042f02c'), 'name': 'Shang Artisan Noodle'}
{'_id': ObjectId('5c733b9a1d28450ce042f092'), 'name': 'Jjanga Steak & Sushi'}
{'_id': ObjectId('5c733b9b1d28450ce042f6c4'),
 'name': 'Soho Japanese Restaurant'}
{'_id': ObjectId('5c733b9c1d28450ce04307ed'), 'name': 'Sweets Raku'}


In [5]:
# Count of review per city (Aggregate), sorted by counts
myaggregate = [{
    "$match": {'is_open':1}}, 
    {"$group": {'_id': '$city',
               'total': {'$sum': '$review_count'}}},
    {'$sort': {'total':-1}}]

aggregate_result = mydb['business'].aggregate(myaggregate)
for record in aggregate_result:
    print(record)

{'total': 1700331, '_id': 'Las Vegas'}
{'total': 621906, '_id': 'Phoenix'}
{'total': 422446, '_id': 'Toronto'}
{'total': 317300, '_id': 'Scottsdale'}
{'total': 265038, '_id': 'Charlotte'}
{'total': 191609, '_id': 'Pittsburgh'}
{'total': 185665, '_id': 'Henderson'}
{'total': 168957, '_id': 'Tempe'}
{'total': 145089, '_id': 'Mesa'}
{'total': 131426, '_id': 'Montréal'}
{'total': 130593, '_id': 'Chandler'}
{'total': 111737, '_id': 'Gilbert'}
{'total': 97702, '_id': 'Cleveland'}
{'total': 87573, '_id': 'Madison'}
{'total': 86010, '_id': 'Glendale'}
{'total': 81757, '_id': 'Calgary'}
{'total': 49620, '_id': 'Peoria'}
{'total': 47542, '_id': 'Mississauga'}
{'total': 43826, '_id': 'North Las Vegas'}
{'total': 42425, '_id': 'Markham'}
{'total': 29660, '_id': 'Surprise'}
{'total': 25601, '_id': 'Goodyear'}
{'total': 25216, '_id': 'Champaign'}
{'total': 19877, '_id': 'Richmond Hill'}
{'total': 19860, '_id': 'Avondale'}
{'total': 16835, '_id': 'North York'}
{'total': 16305, '_id': 'Concord'}
{'tot

Join name of entities to tip to find out the name of the place receiving the most tips

In [6]:
# aggregate tips
group_query = [{
    '$group'  :{
        '_id': '$business_id',
        'compliment_total': {'$sum': '$compliment_count'}
    }},{
    '$out': 'tip_aggregate_by_business'}]

group_result = mydb['tip'].aggregate(group_query)

In [19]:
# select name and business_id from business
group_query = [{
    '$project':{
        'business_id':1,
        'name':1
    }},{
    '$group' : { '_id' : "$business_id", 'name': { '$first': "$name" } } }
    ,
    {'$out': 'business_name'}]

group_result = mydb['business'].aggregate(group_query)

In [22]:
# join and sort
join_query = [
{
    '$lookup':{
        'from': 'business_name',
        'localField': '_id',
        'foreignField': '_id',
        'as': 'business_info'
    }}
    ,{
    '$project': {
        'compliment_total':1,
        'business_info.name':1,
        '_id':1
    }}
    ,
    {
    '$sort':{'compliment_total':-1}
    },
    {
    '$limit':10
    }
]

join_result = mydb['tip_aggregate_by_business'].aggregate(join_query)

In [23]:
for res in join_result:
    pprint.pprint(res)

{'_id': 'BQqwIYQuo2W94smjrBjy5g',
 'business_info': [{'name': 'Il Chianti Italian Steak & Seafood'}],
 'compliment_total': 204}
{'_id': 'FaHADZARwnY4yvlvpnsfGA',
 'business_info': [{'name': 'McCarran International Airport'}],
 'compliment_total': 162}
{'_id': 'RESDUcs7fIiihp38-d6_6g',
 'business_info': [{'name': 'Bacchanal Buffet'}],
 'compliment_total': 150}
{'_id': '55E0-qUHa7Kzqz8rOhbdBQ',
 'business_info': [{'name': 'Costco Gasoline'}],
 'compliment_total': 120}
{'_id': 'MpmFFw0GE_2iRFPdsRpJbA',
 'business_info': [{'name': 'XS Nightclub'}],
 'compliment_total': 111}
{'_id': 'JmI9nslLD7KZqRr__Bg6NQ',
 'business_info': [{'name': 'Phoenix Sky Harbor International Airport'}],
 'compliment_total': 105}
{'_id': 'DN0b4Un8--Uf6SEWLeh0UA',
 'business_info': [{'name': "Amy's Baking Company"}],
 'compliment_total': 102}
{'_id': 'QsKhwKYB3YeWXqpIPd5QMg',
 'business_info': [{'name': 'Sonoma Cellar'}],
 'compliment_total': 93}
{'_id': 'QhXBIQWUmQxuVErdwY2QVw',
 'business_info': [{'name': 'Manila