In [52]:
import pymongo
from bson.code import Code
from pymongo import MongoClient,GEO2D
from bson.son import SON
import pprint

# connect to MongoDB
client = MongoClient('localhost',27017)

# get a database
db= client['yelp']

# get collections
business_pit = db.business_pit
business_pit_res = db.business_pit_res

#### count the numbers of restaurant in each neighborhood and calculate the average rating of the restaurants in each neighborhood
investors can determine in which neighborhood they can invest

In [75]:
res = business_pit.aggregate([
    {"$match":
     {"categories":{"$regex":"Restaurants"}}
     },
    {"$group":
     {"_id":"$neighborhood",
      "aveStars":{"$avg":"$stars"},
      "countRes":{"$sum":1}
      }
     },
    {"$sort":{"aveStars":-1,"countRes":-1}}
    ])
for restaurant in res:
    print(restaurant)

{'_id': 'Polish Hill', 'aveStars': 4.5, 'countRes': 2}
{'_id': 'Arlington', 'aveStars': 4.5, 'countRes': 1}
{'_id': 'Homewood', 'aveStars': 4.333333333333333, 'countRes': 6}
{'_id': 'Morningside', 'aveStars': 4.333333333333333, 'countRes': 3}
{'_id': 'Homestead', 'aveStars': 4.25, 'countRes': 2}
{'_id': 'Avalon', 'aveStars': 4.1, 'countRes': 5}
{'_id': 'Highland Park', 'aveStars': 4.0, 'countRes': 12}
{'_id': 'Hazelwood', 'aveStars': 4.0, 'countRes': 1}
{'_id': 'Allentown', 'aveStars': 3.9444444444444446, 'countRes': 9}
{'_id': 'Garfield', 'aveStars': 3.923076923076923, 'countRes': 13}
{'_id': 'Strip District', 'aveStars': 3.892857142857143, 'countRes': 84}
{'_id': 'Regent Square', 'aveStars': 3.8, 'countRes': 5}
{'_id': 'Brookline', 'aveStars': 3.793103448275862, 'countRes': 29}
{'_id': 'Lawrenceville', 'aveStars': 3.7650602409638556, 'countRes': 83}
{'_id': 'Beechview', 'aveStars': 3.7142857142857144, 'countRes': 21}
{'_id': 'Mt. Washington', 'aveStars': 3.6875, 'countRes': 16}
{'_id

#### calculate total numbers of restaurants of each category

In [54]:
mapper = Code("""
              function(){
                  this.categories.forEach(function(category){
                      emit(category,1);
                  });
               }
               """)
reducer = Code("""
               function( key, values) {
                   var total = 0;
                   for (var i=0; i<values.length; i++) {
                       total += values[i];
                   }
                   return total;
                }
                """)
categories = db.business_pit_res.map_reduce(mapper,reducer,"myresults")

for category in categories.find({}).sort("value",-1).limit(30):
    print (category)

{'_id': 'Restaurants', 'value': 2209.0}
{'_id': 'Food', 'value': 424.0}
{'_id': 'Nightlife', 'value': 412.0}
{'_id': 'Bars', 'value': 407.0}
{'_id': 'Pizza', 'value': 367.0}
{'_id': 'American (Traditional)', 'value': 351.0}
{'_id': 'Sandwiches', 'value': 331.0}
{'_id': 'American (New)', 'value': 292.0}
{'_id': 'Italian', 'value': 223.0}
{'_id': 'Breakfast & Brunch', 'value': 178.0}
{'_id': 'Fast Food', 'value': 172.0}
{'_id': 'Burgers', 'value': 171.0}
{'_id': 'Chinese', 'value': 136.0}
{'_id': 'Mexican', 'value': 122.0}
{'_id': 'Cafes', 'value': 119.0}
{'_id': 'Salad', 'value': 110.0}
{'_id': 'Coffee & Tea', 'value': 97.0}
{'_id': 'Event Planning & Services', 'value': 97.0}
{'_id': 'Seafood', 'value': 91.0}
{'_id': 'Chicken Wings', 'value': 87.0}
{'_id': 'Diners', 'value': 85.0}
{'_id': 'Delis', 'value': 79.0}
{'_id': 'Caterers', 'value': 72.0}
{'_id': 'Sushi Bars', 'value': 72.0}
{'_id': 'Japanese', 'value': 64.0}
{'_id': 'Cocktail Bars', 'value': 61.0}
{'_id': 'Mediterranean', 'valu

#### calculate the average ratings of different category
#### (what category has the best average rating)
only display the categories which count number is greater than 30

In [55]:
pipeline = [
    {"$unwind":"$categories"},
    {"$group":{"_id":"$categories","count":{"$sum":1},"aveStars":{"$avg":"$stars"}}},
    {"$match":{"count":{"$gt":30}}},
    {"$sort":SON([("aveStars",-1),("count",-1)])},
    {"$limit":30}
]
pprint.pprint(list(db.business_pit_res.aggregate(pipeline)))

[{'_id': 'Food Trucks', 'aveStars': 4.142857142857143, 'count': 35},
 {'_id': 'Specialty Food', 'aveStars': 4.127906976744186, 'count': 43},
 {'_id': 'Mediterranean', 'aveStars': 4.040983606557377, 'count': 61},
 {'_id': 'Middle Eastern', 'aveStars': 4.013513513513513, 'count': 37},
 {'_id': 'Cafes', 'aveStars': 4.0, 'count': 119},
 {'_id': 'Coffee & Tea', 'aveStars': 3.9896907216494846, 'count': 97},
 {'_id': 'Delis', 'aveStars': 3.9240506329113924, 'count': 79},
 {'_id': 'Vegetarian', 'aveStars': 3.8863636363636362, 'count': 44},
 {'_id': 'Greek', 'aveStars': 3.878787878787879, 'count': 33},
 {'_id': 'Desserts', 'aveStars': 3.8333333333333335, 'count': 48},
 {'_id': 'Food', 'aveStars': 3.7841981132075473, 'count': 424},
 {'_id': 'Thai', 'aveStars': 3.767857142857143, 'count': 56},
 {'_id': 'Bakeries', 'aveStars': 3.72972972972973, 'count': 37},
 {'_id': 'Cocktail Bars', 'aveStars': 3.7295081967213113, 'count': 61},
 {'_id': 'Barbeque', 'aveStars': 3.7222222222222223, 'count': 54},
 {

In [56]:
db.business_pit_res.create_index([("loc", GEO2D)])

'loc_2d'

#### show top 30 restaurant near hillman library([40.4426, -79.9542])

In [74]:
query = {"coordinate": {"$geoWithin": {"$center": [[40.4426, -79.9542], 3]}}}
for restaurant in db.business_pit_res.find(query,{"name":1,"stars":1,"_id":0})
.sort([("stars",-1)]).limit(30):
    print(restaurant)

{'name': 'S&D Cafe', 'stars': 5.0}
{'name': 'Boardwalk on Carson', 'stars': 5.0}
{'name': 'Redhawk Coffee', 'stars': 5.0}
{'name': 'The Garden Cafe', 'stars': 5.0}
{'name': 'Heavenly Espresso', 'stars': 5.0}
{'name': 'California Coffee Bar', 'stars': 5.0}
{'name': 'Commonplace Coffee', 'stars': 5.0}
{'name': "Don's Diner", 'stars': 5.0}
{'name': "Joey D's in the Park, RIDC", 'stars': 5.0}
{'name': 'Everyday Cafe', 'stars': 5.0}
{'name': 'Edgar Tacos Stand', 'stars': 5.0}
{'name': 'Tasty N Healthy', 'stars': 5.0}
{'name': 'Perspolis Hookah Lounge', 'stars': 5.0}
{'name': 'Sincerely Yogurt', 'stars': 5.0}
{'name': "Alfred's Deli & Market", 'stars': 5.0}
{'name': 'Sweet Basil and La Filipiniana', 'stars': 5.0}
{'name': 'Schenley Park Visitors Center', 'stars': 5.0}
{'name': 'Juice Up 412', 'stars': 5.0}
{'name': 'Hoi Polloi: Coffeehouse and Vegetarian Cafe', 'stars': 5.0}
{'name': "Lunardi's Ristorante", 'stars': 5.0}
{'name': 'Azure Cafe & Grill', 'stars': 5.0}
{'name': 'Pear and the Pic