In [6]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import json
import requests

import pymongo
from pymongo import MongoClient
from pprint import pprint


'''
db.Yelp.aggregate([
  {
    "$set": {
      "geoloc" : {
      "type" : "Point",
      "coordinates" : [
        {"$toDecimal" : "$longitude"},
        {"$toDecimal" : "$latitude"}
        ]
      }
}}, {
    "$project" : { "<$longitude>": 0, "<$latitude>": 0}
}])


db.Yelp.aggregate([
   { $addFields: { 
    geoloc: {
      "type" : "Point",
      "coordinates" : [
        {"$toDecimal" : "$longitude"},
        {"$toDecimal" : "$latitude"}
        ]
      }
    }
  },
  { $out : "YelpGeo" }
])
'''

# PyMongo Setup and Querying

In [15]:
client = MongoClient()
db = client['DS4300']
yelp_collection = db['YelpGeo']

In [36]:
# Example document
pprint(yelp_collection.find_one())

{'_id': ObjectId('622198b2b5127b859164a21e'),
 'address': '921 Pearl St',
 'attributes': {'Alcohol': "'beer_and_wine'",
                'Ambience': "{'touristy': False, 'hipster': False, 'romantic': "
                            "False, 'divey': False, 'intimate': False, "
                            "'trendy': False, 'upscale': False, 'classy': "
                            "False, 'casual': True}",
                'BikeParking': 'True',
                'BusinessAcceptsBitcoin': 'False',
                'BusinessAcceptsCreditCards': 'True',
                'BusinessParking': "{'garage': False, 'street': True, "
                                   "'validated': False, 'lot': False, 'valet': "
                                   'False}',
                'Caters': 'True',
                'DogsAllowed': 'False',
                'GoodForMeal': "{'dessert': False, 'latenight': False, "
                               "'lunch': False, 'dinner': False, 'brunch': "
                              

#### Q1. How many establishments have 5 stars?

In [17]:
num_5_stars = yelp_collection.find({'stars': 5})
len(list(num_5_stars))

19953

#### Q2. Which establishment has the least stars?

In [18]:
least_stars = yelp_collection.find_one(sort=[('stars', 1)])
pprint(least_stars)

{'_id': ObjectId('622198b2b5127b859164a251'),
 'address': '3262 Vineland Rd, Ste 106',
 'attributes': {'Alcohol': "u'none'",
                'Ambience': "{'romantic': False, 'intimate': False, "
                            "'touristy': False, 'hipster': False, 'divey': "
                            "False, 'classy': False, 'trendy': False, "
                            "'upscale': False, 'casual': False}",
                'RestaurantsAttire': "u'casual'"},
 'business_id': 'w4qVflIAbdklzG3mnKmQsg',
 'categories': 'Italian, Restaurants',
 'city': 'Orlando',
 'geoloc': {'coordinates': [Decimal128('-81.4439500000000'),
                            Decimal128('28.5205000000000')],
            'type': 'Point'},
 'hours': None,
 'is_open': 1,
 'latitude': 28.5205,
 'longitude': -81.44395,
 'name': 'Magical Pizza Express',
 'postal_code': '32811',
 'review_count': 8,
 'stars': 1,
 'state': 'FL'}


#### Q3. What is the count of establishments for each star rating?

In [9]:
star_counts = yelp_collection.aggregate(
        [{"$group" : 
            {"_id" : "$stars", 
            "num_establishments" : {"$sum" : 1}
         }},
         {'$sort' : 
            {'_id' : 1}}
    ])
    
for i in star_counts:
    pprint(i)

{'_id': 1, 'num_establishments': 1686}
{'_id': 1.5, 'num_establishments': 4157}
{'_id': 2, 'num_establishments': 8523}
{'_id': 2.5, 'num_establishments': 13720}
{'_id': 3, 'num_establishments': 21583}
{'_id': 3.5, 'num_establishments': 28835}
{'_id': 4, 'num_establishments': 34056}
{'_id': 4.5, 'num_establishments': 28072}
{'_id': 5, 'num_establishments': 19953}


#### Q4. What is the count of establishments for each star rating in Boston?

In [49]:
star_counts = yelp_collection.aggregate(
        [
            { '$match' : { 'city' : 'Boston' } },
            {"$group" : 
            {"_id" : "$stars", 
            "num_establishments" : {"$sum" : 1}
            }},
        #  { '$match' : { 'city' : 'Boston' } },
            {'$sort' : {'_id' : 1}}
    ])
    
for i in star_counts:
    pprint(i)

{'_id': 1, 'num_establishments': 97}
{'_id': 1.5, 'num_establishments': 194}
{'_id': 2, 'num_establishments': 395}
{'_id': 2.5, 'num_establishments': 705}
{'_id': 3, 'num_establishments': 1233}
{'_id': 3.5, 'num_establishments': 1674}
{'_id': 4, 'num_establishments': 1788}
{'_id': 4.5, 'num_establishments': 1312}
{'_id': 5, 'num_establishments': 865}


#### Q4. How many establishments are open for all meals?

In [43]:
num_all_open = yelp_collection.find({ 'city' : 'Boston' })
len(list(num_all_open))

8263

In [14]:
# 'GoodForMeal': "{'dessert': False, 'latenight': False, "
#                                "'lunch': False, 'dinner': False, 'brunch': "
#                                "False, 'breakfast': False}",

IndentationError: unexpected indent (<ipython-input-14-6629043e1a76>, line 2)

#### Q5. How many locations are within 5000 meters of Mr G's Pizza & Subs

In [41]:

target = yelp_collection.find_one({'name' : 'Mr G\'s Pizza & Subs'})['geoloc']

res = yelp_collection.find({
    "geoloc": {
        '$near': {
            '$geometry':
                target,
                '$maxDistance': 5000
        }
    }
})

len(list(res))

1122

In [39]:
pprint(yelp_collection.find_one({'name' : 'Mr G\'s Pizza & Subs'})['geoloc'])

{'coordinates': [Decimal128('-70.9734380000000'),
                 Decimal128('42.5411550000000')],
 'type': 'Point'}
