# Yelp Data Analysis in MongoDB

The aim of this notebook is to perform similar aggregations as in Spark but on data stored in NoSQL database.

I have aimed to accomplish three tasks in this work:
1. Accessing hierarical records in MongoDB 
2. Grouping aggregation pipeline in MongoDB
3. Filtering data and then performing aggregations(Creating an aggregation pipeline)


## Connecting to LocalHost and fetching Collection

In [2]:
# We have already stored the data in MongoDB via MOngo import on command line.
#It has created a collection yelp which is analogous to table in sql 
#The only difference being that the structure of each observation which is known as document in MongoDB is flexible
def get_db():
    # For local use
    from pymongo import MongoClient
    client = MongoClient('localhost:27017')
    # 'yelp' here is the database name. It will be created if it does not exist.
    db = client.yelp
    return db

if __name__ == "__main__":
    # For local use
    db = get_db() 
    #extract the data stored in the database
    business = db.business.find()

## Checking if the data has been loaded correctly in MongoDB

In [2]:
#Now we print the data that we have imported
#At this stage we will perform some filters save them as JSON Files and extract them using spark
for d in db.business.find()[:3]:
    print(d)

{'_id': ObjectId('5a28dea5d31c5481b61744e1'), 'business_id': 'v2WhjAB3PIBA8J8VxG3wEg', 'name': 'The Tea Emporium', 'neighborhood': 'Riverdale', 'address': '337 Danforth Avenue', 'city': 'Toronto', 'state': 'ON', 'postal_code': 'M4K 1N7', 'latitude': 43.6771258, 'longitude': -79.3532848, 'stars': 4.5, 'review_count': 7, 'is_open': 0, 'attributes': {'BusinessParking': {'garage': False, 'street': True, 'validated': False, 'lot': False, 'valet': False}, 'WiFi': 'no', 'OutdoorSeating': False, 'BusinessAcceptsCreditCards': True, 'RestaurantsPriceRange2': 2, 'BikeParking': True, 'WheelchairAccessible': True}, 'categories': ['Food', 'Coffee & Tea'], 'hours': {'Monday': '10:00-19:00', 'Tuesday': '10:00-19:00', 'Friday': '10:00-19:00', 'Wednesday': '10:00-19:00', 'Thursday': '10:00-19:00', 'Sunday': '12:00-17:00', 'Saturday': '10:00-18:00'}}
{'_id': ObjectId('5a28dea5d31c5481b61744e2'), 'business_id': 'mLwM-h2YhXl2NCgdS84_Bw', 'name': 'South Florida Style Chicken & Ribs', 'neighborhood': 'Eastla

## Simple Filter Queries

In [3]:
# A simple filter where stars is equal to 4.5
query = {"stars": 4.5}
projection = {"_id" : 0, "name" :1, "stars" : 2}
restaurant = db.business.find(query, projection)
for a in restaurant:
    print(a)

{'name': 'The Tea Emporium', 'stars': 4.5}
{'name': 'South Florida Style Chicken & Ribs', 'stars': 4.5}
{'name': 'Blimpie', 'stars': 4.5}
{'name': 'Red Rock Bowling UYE Part 2', 'stars': 4.5}
{'name': 'Artificial Grass Masters', 'stars': 4.5}
{'name': 'Sq Cutz', 'stars': 4.5}
{'name': 'Anasazi Foundation', 'stars': 4.5}
{'name': 'QQ Foot Spa', 'stars': 4.5}
{'name': 'Bella Christine Salon and Spa', 'stars': 4.5}
{'name': "Gunnell's Tire & Auto", 'stars': 4.5}
{'name': "Schenk's Corners Coin-Op Laundromat", 'stars': 4.5}
{'name': "Randall's Fine Art & Custom Framing", 'stars': 4.5}
{'name': 'Brautatelier Tara', 'stars': 4.5}
{'name': 'Tausche', 'stars': 4.5}
{'name': 'The Pittsburgh Popcorn Company', 'stars': 4.5}
{'name': 'Executive Training Solutions', 'stars': 4.5}
{'name': 'Chuparosa Park', 'stars': 4.5}
{'name': "Don Ruben's Mexican Restaurant", 'stars': 4.5}
{'name': 'Smallcakes Cave Creek', 'stars': 4.5}
{'name': 'Salon Picasso', 'stars': 4.5}
{'name': 'Mariscos Playa Escondida',

In [4]:
# Filter where stars is less than 4.5
query_num = {"stars": {"$lt" : 4.5}}
projection_num = {"_id" : 0, "name" :1, "stars" : 2, "categories": 3}
top_restaurants = db.business.find(query_num, projection_num)

In [5]:
for top in top_restaurants[:3]:
    print(top)

{'name': 'Richmond Town Square', 'stars': 2.0, 'categories': ['Shopping', 'Shopping Centers']}
{'name': 'TRUmatch', 'stars': 3.0, 'categories': ['Professional Services', 'Matchmakers']}
{'name': 'Safeway', 'stars': 2.0, 'categories': ['Flowers & Gifts', 'Bakeries', 'Grocery', 'Shopping', 'Food', 'Florists']}


## Group based on business category and city

In [37]:
db.business.aggregate([
    #Flatten the categories
    { "$unwind" : "$categories" },
    {"$group" :{"_id":{ "city" : "$city", 
                        "categories" : "$categories" , #Group them by city and categories
                        },
                        "avgStars" :{"$avg":"$stars"}, #Find the avegrage stars and average reviews
                       "avgReview": {"$avg" : "$review_count"}
                
               }
    },
    { "$out" : "business_city" }
])

<pymongo.command_cursor.CommandCursor at 0x10773f7f0>

In [39]:
for j in db.Q1.find():
    print(j)

{'_id': {'city': 'Inverness', 'categories': 'Zoos'}, 'avgStars': 3.5, 'avgReview': 8.0}
{'_id': {'city': 'Westmount', 'categories': 'Shoe Stores'}, 'avgStars': 3.5, 'avgReview': 4.0}
{'_id': {'city': 'Berea', 'categories': 'Pet Groomers'}, 'avgStars': 3.5, 'avgReview': 6.0}
{'_id': {'city': 'Berea', 'categories': 'Pet Services'}, 'avgStars': 3.5, 'avgReview': 6.0}
{'_id': {'city': 'Belmont', 'categories': 'Gun/Rifle Ranges'}, 'avgStars': 2.5, 'avgReview': 20.0}
{'_id': {'city': 'Westlake', 'categories': 'Banks & Credit Unions'}, 'avgStars': 2.5, 'avgReview': 5.0}
{'_id': {'city': 'Las Vegas', 'categories': 'EV Charging Stations'}, 'avgStars': 3.0, 'avgReview': 3.0}
{'_id': {'city': 'Filderstadt', 'categories': 'Hospitals'}, 'avgStars': 4.0, 'avgReview': 3.0}
{'_id': {'city': 'Stouffville', 'categories': 'Salad'}, 'avgStars': 3.0, 'avgReview': 12.0}
{'_id': {'city': 'Lowell', 'categories': 'Coffee & Tea'}, 'avgStars': 4.5, 'avgReview': 15.0}
{'_id': {'city': 'Lowell', 'categories': 'Bak

{'_id': {'city': 'Ludwigsburg', 'categories': 'Saunas'}, 'avgStars': 4.5, 'avgReview': 3.0}
{'_id': {'city': 'Fountain Hills', 'categories': 'Dance Studios'}, 'avgStars': 5.0, 'avgReview': 5.0}
{'_id': {'city': 'Oakwood Village', 'categories': 'Videographers'}, 'avgStars': 4.0, 'avgReview': 4.0}
{'_id': {'city': 'Oakwood Village', 'categories': 'DJs'}, 'avgStars': 4.0, 'avgReview': 4.0}
{'_id': {'city': 'Brampton', 'categories': 'Videos & Video Game Rental'}, 'avgStars': 3.5, 'avgReview': 7.0}
{'_id': {'city': 'Anthem', 'categories': 'Convenience Stores'}, 'avgStars': 2.5, 'avgReview': 8.0}
{'_id': {'city': 'Anthem', 'categories': 'Photography Stores & Services'}, 'avgStars': 2.5, 'avgReview': 8.0}
{'_id': {'city': 'Guadalupe', 'categories': 'Tires'}, 'avgStars': 5.0, 'avgReview': 10.0}
{'_id': {'city': 'Fairview Park', 'categories': 'Parks'}, 'avgStars': 4.5, 'avgReview': 3.0}
{'_id': {'city': 'Fairview Park', 'categories': 'Venues & Event Spaces'}, 'avgStars': 4.5, 'avgReview': 3.0}


{'_id': {'city': 'Brampton', 'categories': 'Juice Bars & Smoothies'}, 'avgStars': 3.75, 'avgReview': 4.166666666666667}
{'_id': {'city': 'Reminderville', 'categories': 'Oil Change Stations'}, 'avgStars': 4.0, 'avgReview': 7.0}
{'_id': {'city': 'Beloeil', 'categories': 'Thai'}, 'avgStars': 4.0, 'avgReview': 6.0}
{'_id': {'city': 'Mint Hill', 'categories': 'Hair Stylists'}, 'avgStars': 3.0, 'avgReview': 4.0}
{'_id': {'city': 'Beloeil', 'categories': 'Canadian (New)'}, 'avgStars': 4.0, 'avgReview': 6.0}
{'_id': {'city': 'Medina', 'categories': 'Heating & Air Conditioning/HVAC'}, 'avgStars': 3.1666666666666665, 'avgReview': 4.666666666666667}
{'_id': {'city': 'Lyndhurst', 'categories': 'Gyms'}, 'avgStars': 2.5, 'avgReview': 33.0}
{'_id': {'city': 'Monroeville', 'categories': 'Local Fish Stores'}, 'avgStars': 4.0, 'avgReview': 17.0}
{'_id': {'city': 'Dorval', 'categories': 'Specialty Food'}, 'avgStars': 3.0, 'avgReview': 13.0}
{'_id': {'city': 'Lyndhurst', 'categories': 'Trainers'}, 'avgSta

{'_id': {'city': 'Sun City West', 'categories': 'Real Estate'}, 'avgStars': 2.0, 'avgReview': 3.0}
{'_id': {'city': 'Sun City West', 'categories': 'Mortgage Brokers'}, 'avgStars': 2.0, 'avgReview': 3.0}
{'_id': {'city': 'Cleveland', 'categories': 'Brazilian'}, 'avgStars': 3.625, 'avgReview': 66.25}
{'_id': {'city': 'chandler', 'categories': 'American (New)'}, 'avgStars': 3.0, 'avgReview': 40.0}
{'_id': {'city': 'Las Vegas', 'categories': 'Pub Food'}, 'avgStars': 3.5, 'avgReview': 33.333333333333336}
{'_id': {'city': 'Highland Heights', 'categories': 'Golf'}, 'avgStars': 3.0, 'avgReview': 6.0}
{'_id': {'city': 'Avondale', 'categories': 'Skin Care'}, 'avgStars': 3.75, 'avgReview': 19.75}
{'_id': {'city': 'chandler', 'categories': 'American (Traditional)'}, 'avgStars': 3.0, 'avgReview': 40.0}
{'_id': {'city': 'chandler', 'categories': 'Sports Bars'}, 'avgStars': 3.0, 'avgReview': 40.0}
{'_id': {'city': 'Phoenix', 'categories': 'Opera & Ballet'}, 'avgStars': 4.125, 'avgReview': 13.5}
{'_id

{'_id': {'city': 'Pineville', 'categories': 'Massage Therapy'}, 'avgStars': 5.0, 'avgReview': 3.0}
{'_id': {'city': 'Carefree', 'categories': 'Bakeries'}, 'avgStars': 4.5, 'avgReview': 6.0}
{'_id': {'city': 'Glendale', 'categories': 'Cabaret'}, 'avgStars': 3.5, 'avgReview': 3.0}
{'_id': {'city': 'Moon Township', 'categories': 'Caterers'}, 'avgStars': 3.5, 'avgReview': 80.0}
{'_id': {'city': 'Peninsula', 'categories': 'Bars'}, 'avgStars': 3.75, 'avgReview': 60.0}
{'_id': {'city': 'Monroeville', 'categories': 'Auto Detailing'}, 'avgStars': 3.5, 'avgReview': 19.0}
{'_id': {'city': 'Lasalle', 'categories': 'Hardware Stores'}, 'avgStars': 3.5, 'avgReview': 3.0}
{'_id': {'city': 'Monroeville', 'categories': 'Car Wash'}, 'avgStars': 3.25, 'avgReview': 11.5}
{'_id': {'city': 'Saint-Sauveur', 'categories': 'Creperies'}, 'avgStars': 3.5, 'avgReview': 9.5}
{'_id': {'city': 'Saint-Sauveur', 'categories': 'Fondue'}, 'avgStars': 3.0, 'avgReview': 7.0}
{'_id': {'city': 'Belmont', 'categories': 'Eyewe

{'_id': {'city': 'North Royalton', 'categories': 'Sewing & Alterations'}, 'avgStars': 3.0, 'avgReview': 4.0}
{'_id': {'city': 'Middleton', 'categories': 'Department Stores'}, 'avgStars': 3.8333333333333335, 'avgReview': 17.333333333333332}
{'_id': {'city': 'East York', 'categories': 'Chocolatiers & Shops'}, 'avgStars': 4.0, 'avgReview': 4.0}
{'_id': {'city': 'Toronto', 'categories': 'Chicken Shop'}, 'avgStars': 3.6724137931034484, 'avgReview': 35.86206896551724}
{'_id': {'city': 'Böblingen', 'categories': 'Furniture Stores'}, 'avgStars': 2.5, 'avgReview': 11.0}
{'_id': {'city': 'Euclid', 'categories': 'Security Systems'}, 'avgStars': 3.5, 'avgReview': 3.0}
{'_id': {'city': 'Bellevue', 'categories': 'Nail Salons'}, 'avgStars': 3.5, 'avgReview': 9.5}
{'_id': {'city': 'Elizabeth', 'categories': 'Pet Services'}, 'avgStars': 5.0, 'avgReview': 5.0}
{'_id': {'city': 'Pittsburgh', 'categories': 'Printing Services'}, 'avgStars': 3.480769230769231, 'avgReview': 6.076923076923077}
{'_id': {'city'

{'_id': {'city': 'Carefree', 'categories': 'Doctors'}, 'avgStars': 4.0, 'avgReview': 7.0}
{'_id': {'city': 'Carefree', 'categories': 'Acupuncture'}, 'avgStars': 5.0, 'avgReview': 6.0}
{'_id': {'city': 'Woodbridge', 'categories': 'Fitness & Instruction'}, 'avgStars': 3.0, 'avgReview': 5.5}
{'_id': {'city': 'Scottsdale', 'categories': 'Animal Shelters'}, 'avgStars': 4.055555555555555, 'avgReview': 13.333333333333334}
{'_id': {'city': 'Pointe Claire', 'categories': 'Venues & Event Spaces'}, 'avgStars': 3.5, 'avgReview': 10.0}
{'_id': {'city': 'Sheffield Village', 'categories': 'Music & Video'}, 'avgStars': 5.0, 'avgReview': 4.0}
{'_id': {'city': 'Eastlake', 'categories': 'Party & Event Planning'}, 'avgStars': 3.0, 'avgReview': 6.0}
{'_id': {'city': 'Sheffield Village', 'categories': 'Mags'}, 'avgStars': 5.0, 'avgReview': 4.0}
{'_id': {'city': 'N Las Vegas', 'categories': 'Pet Services'}, 'avgStars': 3.5, 'avgReview': 21.0}
{'_id': {'city': 'Sheffield Village', 'categories': 'Comic Books'}

{'_id': {'city': 'Tempe', 'categories': 'Taxis'}, 'avgStars': 3.875, 'avgReview': 32.25}
{'_id': {'city': 'Markham', 'categories': 'Middle Eastern'}, 'avgStars': 3.5, 'avgReview': 33.42857142857143}
{'_id': {'city': 'Scottsdale', 'categories': 'Solar Installation'}, 'avgStars': 4.5, 'avgReview': 5.5}
{'_id': {'city': 'Montreal', 'categories': 'Bubble Tea'}, 'avgStars': 2.75, 'avgReview': 10.0}
{'_id': {'city': 'Stouffville', 'categories': 'Breakfast & Brunch'}, 'avgStars': 3.0, 'avgReview': 11.6}
{'_id': {'city': 'Stouffville', 'categories': 'Cafes'}, 'avgStars': 3.5, 'avgReview': 14.0}
{'_id': {'city': 'Böblingen', 'categories': 'Fast Food'}, 'avgStars': 3.5, 'avgReview': 6.0}
{'_id': {'city': 'DeForest', 'categories': 'Pets'}, 'avgStars': 4.0, 'avgReview': 8.0}
{'_id': {'city': 'Bethel Park', 'categories': 'Drugstores'}, 'avgStars': 3.0, 'avgReview': 6.0}
{'_id': {'city': 'DeForest', 'categories': 'Pet Boarding/Pet Sitting'}, 'avgStars': 4.0, 'avgReview': 8.0}
{'_id': {'city': 'Beach

{'_id': {'city': 'Higley', 'categories': 'Food Trucks'}, 'avgStars': 4.0, 'avgReview': 107.0}
{'_id': {'city': 'Huntersville', 'categories': 'Ethnic Food'}, 'avgStars': 4.0, 'avgReview': 34.0}
{'_id': {'city': 'Cave Creek', 'categories': 'Tacos'}, 'avgStars': 4.0, 'avgReview': 29.0}
{'_id': {'city': 'Leinfelden-Echterdingen', 'categories': 'Bed & Breakfast'}, 'avgStars': 3.8333333333333335, 'avgReview': 4.0}
{'_id': {'city': 'Leinfelden-Echterdingen', 'categories': 'Hotels & Travel'}, 'avgStars': 3.9166666666666665, 'avgReview': 9.666666666666666}
{'_id': {'city': 'Cleveland', 'categories': 'Rugs'}, 'avgStars': 2.5, 'avgReview': 11.0}
{'_id': {'city': 'Rocky River', 'categories': 'Discount Store'}, 'avgStars': 3.5, 'avgReview': 9.0}
{'_id': {'city': 'Peoria', 'categories': 'Office Cleaning'}, 'avgStars': 3.9, 'avgReview': 28.2}
{'_id': {'city': 'Rocky River', 'categories': 'Fruits & Veggies'}, 'avgStars': 3.5, 'avgReview': 9.0}
{'_id': {'city': 'Ostfildern', 'categories': 'Nightlife'},

{'_id': {'city': 'Monona', 'categories': 'Oil Change Stations'}, 'avgStars': 4.25, 'avgReview': 10.5}
{'_id': {'city': 'Thornhill', 'categories': 'Car Wash'}, 'avgStars': 5.0, 'avgReview': 10.0}
{'_id': {'city': 'Ashburn', 'categories': 'Farmers Market'}, 'avgStars': 4.5, 'avgReview': 9.0}
{'_id': {'city': 'Bedford Heights', 'categories': 'Beauty & Spas'}, 'avgStars': 2.0, 'avgReview': 3.0}
{'_id': {'city': 'Mentor', 'categories': 'Massage Therapy'}, 'avgStars': 4.5, 'avgReview': 4.666666666666667}
{'_id': {'city': 'Urbana', 'categories': 'Music & DVDs'}, 'avgStars': 3.5, 'avgReview': 7.0}
{'_id': {'city': 'Rantoul', 'categories': 'Automotive'}, 'avgStars': 3.0, 'avgReview': 3.0}
{'_id': {'city': 'Phoenix', 'categories': 'Skating Rinks'}, 'avgStars': 3.125, 'avgReview': 17.25}
{'_id': {'city': 'Huntersville', 'categories': 'Shades & Blinds'}, 'avgStars': 3.0, 'avgReview': 4.0}
{'_id': {'city': 'Etobicoke', 'categories': 'Test Preparation'}, 'avgStars': 5.0, 'avgReview': 3.0}
{'_id': {'

{'_id': {'city': 'Saint-Laurent', 'categories': 'Buffets'}, 'avgStars': 3.0, 'avgReview': 3.0}
{'_id': {'city': 'Nord', 'categories': 'Tours'}, 'avgStars': 4.5, 'avgReview': 11.0}
{'_id': {'city': 'Robinson Township', 'categories': 'Event Planning & Services'}, 'avgStars': 5.0, 'avgReview': 11.0}
{'_id': {'city': 'Fort Mill', 'categories': 'Pet Boarding/Pet Sitting'}, 'avgStars': 4.222222222222222, 'avgReview': 6.0}
{'_id': {'city': 'Fort Mill', 'categories': 'Office Cleaning'}, 'avgStars': 4.0, 'avgReview': 3.0}
{'_id': {'city': 'North Las Vegas', 'categories': 'Property Management'}, 'avgStars': 1.8333333333333333, 'avgReview': 10.333333333333334}
{'_id': {'city': 'North Las Vegas', 'categories': 'Real Estate Services'}, 'avgStars': 3.0, 'avgReview': 5.5}
{'_id': {'city': 'Willoughby', 'categories': 'IT Services & Computer Repair'}, 'avgStars': 5.0, 'avgReview': 3.0}
{'_id': {'city': 'Henderson', 'categories': 'Swimwear'}, 'avgStars': 4.75, 'avgReview': 3.0}
{'_id': {'city': 'Toronto

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.



{'_id': {'city': 'Oakville', 'categories': "Men's Clothing"}, 'avgStars': 2.75, 'avgReview': 3.5}
{'_id': {'city': 'Pittsburgh', 'categories': 'Dentists'}, 'avgStars': 3.772727272727273, 'avgReview': 5.974025974025974}
{'_id': {'city': 'Lakewood', 'categories': 'Karaoke'}, 'avgStars': 3.75, 'avgReview': 14.25}
{'_id': {'city': 'Markham', 'categories': 'Grocery'}, 'avgStars': 3.263888888888889, 'avgReview': 15.916666666666666}
{'_id': {'city': 'Stuttgart', 'categories': 'Used'}, 'avgStars': 4.25, 'avgReview': 4.5}
{'_id': {'city': 'Charlotte', 'categories': 'Smokehouse'}, 'avgStars': 4.25, 'avgReview': 330.75}
{'_id': {'city': 'Paradise Valley', 'categories': 'Laundry Services'}, 'avgStars': 4.0, 'avgReview': 9.0}
{'_id': {'city': 'North Huntingdon', 'categories': 'Furniture Stores'}, 'avgStars': 4.0, 'avgReview': 8.0}
{'_id': {'city': 'Matthews', 'categories': 'Fitness & Instruction'}, 'avgStars': 3.6, 'avgReview': 7.8}
{'_id': {'city': 'Tolleson', 'categories': 'Delis'}, 'avgStars': 

{'_id': {'city': 'Gibsonia', 'categories': 'Food'}, 'avgStars': 3.8095238095238093, 'avgReview': 12.0}
{'_id': {'city': 'Pineville', 'categories': 'Ethnic Food'}, 'avgStars': 4.5, 'avgReview': 17.0}
{'_id': {'city': 'Aurora', 'categories': 'Burgers'}, 'avgStars': 2.875, 'avgReview': 12.95}
{'_id': {'city': 'Midland', 'categories': 'Local Flavor'}, 'avgStars': 4.5, 'avgReview': 20.0}
{'_id': {'city': 'Madison', 'categories': 'Chinese'}, 'avgStars': 3.306451612903226, 'avgReview': 53.45161290322581}
{'_id': {'city': 'Goodyear', 'categories': 'Tex-Mex'}, 'avgStars': 2.5, 'avgReview': 40.0}
{'_id': {'city': 'Tempe', 'categories': 'Performing Arts'}, 'avgStars': 4.404761904761905, 'avgReview': 44.523809523809526}
{'_id': {'city': 'Châteauguay', 'categories': 'Hot Dogs'}, 'avgStars': 2.5, 'avgReview': 6.0}
{'_id': {'city': 'Brecksville', 'categories': 'Pet Groomers'}, 'avgStars': 4.25, 'avgReview': 5.0}
{'_id': {'city': 'Markham', 'categories': 'Education'}, 'avgStars': 3.5, 'avgReview': 10.

{'_id': {'city': 'Mississauga', 'categories': 'Korean'}, 'avgStars': 3.5384615384615383, 'avgReview': 38.96153846153846}
{'_id': {'city': 'Peoria', 'categories': 'Auto Parts & Supplies'}, 'avgStars': 3.629032258064516, 'avgReview': 30.93548387096774}
{'_id': {'city': 'Remseck', 'categories': 'Pizza'}, 'avgStars': 4.0, 'avgReview': 6.0}
{'_id': {'city': 'Montéal', 'categories': 'Restaurants'}, 'avgStars': 3.0, 'avgReview': 8.0}
{'_id': {'city': 'Las Vegas', 'categories': 'Comic Books'}, 'avgStars': 4.338235294117647, 'avgReview': 18.41176470588235}
{'_id': {'city': 'Mesa', 'categories': 'Massage Therapy'}, 'avgStars': 4.408163265306122, 'avgReview': 12.795918367346939}
{'_id': {'city': 'Cuyahoga Falls', 'categories': 'Pet Groomers'}, 'avgStars': 3.6666666666666665, 'avgReview': 5.666666666666667}
{'_id': {'city': 'Saint Laurent', 'categories': 'Vietnamese'}, 'avgStars': 3.5, 'avgReview': 11.0}
{'_id': {'city': 'Pepper Pike', 'categories': 'Food'}, 'avgStars': 3.5, 'avgReview': 15.0}
{'_

{'_id': {'city': 'Middleton', 'categories': 'Gas Stations'}, 'avgStars': 3.6666666666666665, 'avgReview': 6.0}
{'_id': {'city': 'Middleburg Heights', 'categories': 'Nightlife'}, 'avgStars': 3.3, 'avgReview': 33.5}
{'_id': {'city': 'Olmsted Falls', 'categories': 'Bridal'}, 'avgStars': 5.0, 'avgReview': 9.0}
{'_id': {'city': 'Brecksville', 'categories': 'Thai'}, 'avgStars': 3.0, 'avgReview': 28.0}
{'_id': {'city': 'Davidson', 'categories': 'Asian Fusion'}, 'avgStars': 3.5, 'avgReview': 87.0}
{'_id': {'city': 'Peoria', 'categories': 'Bakeries'}, 'avgStars': 3.25, 'avgReview': 51.333333333333336}
{'_id': {'city': 'Outremont', 'categories': 'Meat Shops'}, 'avgStars': 3.5, 'avgReview': 3.0}
{'_id': {'city': 'Peoria', 'categories': 'Patio Coverings'}, 'avgStars': 4.5, 'avgReview': 16.5}
{'_id': {'city': 'Pittsburgh', 'categories': 'Cosmetology Schools'}, 'avgStars': 3.5, 'avgReview': 5.0}
{'_id': {'city': 'North Olmsted', 'categories': 'Hookah Bars'}, 'avgStars': 4.5, 'avgReview': 3.0}
{'_id'

{'_id': {'city': 'Peoria', 'categories': 'Pets'}, 'avgStars': 4.171428571428572, 'avgReview': 15.342857142857143}
{'_id': {'city': 'Solon', 'categories': 'Hotels'}, 'avgStars': 3.0, 'avgReview': 7.25}
{'_id': {'city': 'Harrisburg', 'categories': 'Weight Loss Centers'}, 'avgStars': 4.0, 'avgReview': 4.0}
{'_id': {'city': 'Pineville', 'categories': 'Tabletop Games'}, 'avgStars': 5.0, 'avgReview': 3.0}
{'_id': {'city': 'Mesa', 'categories': 'Aquarium Services'}, 'avgStars': 4.0, 'avgReview': 41.0}
{'_id': {'city': 'Woodmere', 'categories': 'Professional Services'}, 'avgStars': 5.0, 'avgReview': 3.0}
{'_id': {'city': 'Cleveland Heights', 'categories': "Men's Hair Salons"}, 'avgStars': 4.5, 'avgReview': 67.0}
{'_id': {'city': 'Pheonix', 'categories': 'Health & Medical'}, 'avgStars': 3.5, 'avgReview': 11.0}
{'_id': {'city': 'Gilbert', 'categories': 'Auto Loan Providers'}, 'avgStars': 4.0, 'avgReview': 4.5}
{'_id': {'city': 'Gilbert', 'categories': 'Car Dealers'}, 'avgStars': 3.6, 'avgReview'

## Grouping by state and city

In [42]:
db.business.aggregate([
    { "$unwind" : "$categories" },
    {"$group" :{"_id":{ "categories" : "$categories" , 
                       "city" : "$city", 
                       "state": "$state"} ,
                        "avgStars" :{"$avg":"$stars"}}},
    
     {"$group":{
         "_id": {"city":"$_id.city",  "state" :"$_id.state"},
         "category_star": { "$push":{ "categories": "$_id.categories","avgStars": "$avgStars"}}}
     },
    { "$out" : "state_city" }
])

<pymongo.command_cursor.CommandCursor at 0x10a0a3208>

In [41]:
for i in result2:
    print(i)

# Filtering based on attribute values


In [12]:
for i in db.business.find({
      "attributes.RestaurantsTakeOut": True,
    "categories": {"$in" : ["Mexican"]}}
    ):
    print(i)

{'_id': ObjectId('5a28dea5d31c5481b61744f8'), 'business_id': 'HmI9nhgOkrXlUr6KZGZZew', 'name': "Rocky's", 'neighborhood': 'Bloomfield', 'address': '4759 Liberty Ave', 'city': 'Pittsburgh', 'state': 'PA', 'postal_code': '15224', 'latitude': 40.4613503, 'longitude': -79.9481126, 'stars': 3.0, 'review_count': 15, 'is_open': 1, 'attributes': {'RestaurantsTableService': True, 'GoodForMeal': {'dessert': False, 'latenight': False, 'lunch': False, 'dinner': False, 'breakfast': True, 'brunch': False}, 'Alcohol': 'none', 'Caters': False, 'HasTV': False, 'RestaurantsGoodForGroups': False, 'NoiseLevel': 'loud', 'WiFi': 'no', 'RestaurantsAttire': 'casual', 'RestaurantsReservations': False, 'OutdoorSeating': False, 'BusinessAcceptsCreditCards': True, 'RestaurantsPriceRange2': 1, 'BikeParking': True, 'RestaurantsDelivery': False, 'Ambience': {'romantic': False, 'intimate': False, 'classy': False, 'hipster': False, 'divey': True, 'touristy': False, 'trendy': False, 'upscale': False, 'casual': False}, 

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.


## Aggregation on a set of data

In [14]:
result3 = db.business.aggregate([
                                {"$match" : {"$and": 
                                             [{"attributes.RestaurantsTakeOut": True},
                                              {"categories": {"$in" : ["Mexican"]}}] }},
                                {"$group": {"_id": "Mexican" ,"avgStars":{"$avg":"$stars"}}}  
])

In [15]:
for x in result3:
    print(x)

{'_id': 'Mexican', 'avgStars': 3.436754507628294}
