# Aggregation Basic

In [17]:
from pymongo import MongoClient
from pprint import pprint

client = MongoClient()
restaurants = client.agg1.restaurants

## Exercise 1

In [9]:
res = restaurants.aggregate([
    {'$match': {'cuisine': 'Korean'}},
    {'$group': {'_id': '$borough', 'count': {'$sum': 1}}}
])
list(res)

[{'_id': 'Manhattan', 'count': 78},
 {'_id': 'Brooklyn', 'count': 16},
 {'_id': 'Queens', 'count': 167},
 {'_id': 'Staten Island', 'count': 1}]

## Exercise 2

In [4]:
res = restaurants.aggregate([
    {'$match': {'cuisine': 'Korean'}},
    {'$unwind': '$grades'},
    {'$group': {'_id': {'borough': '$borough', 'grade': '$grades.grade'}, 'count': {'$sum': 1}}},
    {'$sort': {'_id.borough': -1, '_id.grade': 1}}
])
list(res)

[{'_id': {'borough': 'Staten Island', 'grade': 'A'}, 'count': 1},
 {'_id': {'borough': 'Queens', 'grade': 'A'}, 'count': 443},
 {'_id': {'borough': 'Manhattan', 'grade': 'A'}, 'count': 232},
 {'_id': {'borough': 'Brooklyn', 'grade': 'A'}, 'count': 38},
 {'_id': {'borough': 'Queens', 'grade': 'B'}, 'count': 140},
 {'_id': {'borough': 'Manhattan', 'grade': 'B'}, 'count': 49},
 {'_id': {'borough': 'Brooklyn', 'grade': 'B'}, 'count': 5},
 {'_id': {'borough': 'Queens', 'grade': 'C'}, 'count': 37},
 {'_id': {'borough': 'Manhattan', 'grade': 'C'}, 'count': 20},
 {'_id': {'borough': 'Brooklyn', 'grade': 'C'}, 'count': 1},
 {'_id': {'borough': 'Queens', 'grade': 'Not Yet Graded'}, 'count': 5},
 {'_id': {'borough': 'Manhattan', 'grade': 'Not Yet Graded'}, 'count': 2},
 {'_id': {'borough': 'Brooklyn', 'grade': 'Not Yet Graded'}, 'count': 1},
 {'_id': {'borough': 'Queens', 'grade': 'P'}, 'count': 7},
 {'_id': {'borough': 'Manhattan', 'grade': 'P'}, 'count': 4},
 {'_id': {'borough': 'Brooklyn', 'gr

## Exercise 3

### Exercise 3-1

In [18]:
res = restaurants.aggregate([
    {'$group': {'_id': '$name', 'count': {'$sum': 1}}},
    {'$match': {'count': {'$gt': 1}}}
])
l_res = list(res)
len(l_res)

1363

### Exercise 3-2

In [20]:
res2 = restaurants.aggregate([
    {'$match': {'name': {'$in': list(map(lambda x: x['_id'], l_res))}}},
    {'$unwind': '$grades'},
    {'$group': {'_id': '$name', 'avg_score': {'$avg': '$grades.score'}}},
    {'$match': {'avg_score': {'$gt': 30}}}
])
l_res2 = list(res2)
l_res2

[{'_id': 'Filicori Zecchini', 'avg_score': 31.5},
 {'_id': 'Aki Sushi', 'avg_score': 30.25},
 {'_id': 'Lucky 13 Saloon', 'avg_score': 31.833333333333332}]

### Exercise 3-3

In [23]:
for item in l_res2:
    res3 = restaurants.aggregate([
        {'$match': {'name': item['_id']}},
        {'$project': {'_id': 0, 'address': 1, 'bad_scores': {'$filter': {'input': '$grades.score', 'as': 'eval', 'cond': {'$lt': ['$$eval', item['avg_score']]}}}}},
        {'$match': {'bad_scores': {'$ne': []}}}
    ])
    print(item['_id'], list(res3))

Lucky 13 Saloon [{'address': {'building': '273', 'coord': [-73.98782489999999, 40.6666864], 'street': '13 Street', 'zipcode': '11215'}, 'bad_scores': [24, 29, 14]}]
Filicori Zecchini [{'address': {'building': '2541', 'coord': [-73.9724773, 40.7941879], 'street': 'Broadway', 'zipcode': '10025'}, 'bad_scores': [14]}]
Aki Sushi [{'address': {'building': '212', 'coord': [-73.969291, 40.756594], 'street': 'East   52 Street', 'zipcode': '10022'}, 'bad_scores': [13, 22, 26]}]
