In [3]:
from pymongo.mongo_client import MongoClient
from pymongo.server_api import ServerApi
from pprint import pprint
import json

# read in URI string from file to avoid showing password in plain text
with open("mongo-config.txt", 'r') as config:
        URI = config.read()

# Create a new client and connect to the server
client = MongoClient(URI, server_api=ServerApi('1'))

# Send a ping to test connection
try:
    client.admin.command('ping')
    print("Pinged your deployment. You successfully connected to MongoDB!")
except Exception as e:
    print(e)
    
RESTAURANT_PATH = "restaurants.json"

Pinged your deployment. You successfully connected to MongoDB!


In [23]:
# remove all items in collection
"""
db = client["dh3382"]
rest_collection = db["rest_data"]
rest_collection.delete_many({})
"""

DeleteResult({'n': 15088, 'electionId': ObjectId('7fffffff00000000000000e5'), 'opTime': {'ts': Timestamp(1701189224, 10270), 't': 229}, 'ok': 1.0, '$clusterTime': {'clusterTime': Timestamp(1701189224, 10270), 'signature': {'hash': b'k\x08\xecDfbdaJ\xf7:\x82G!\xe91\x92\xac\xa1,', 'keyId': 7261225766799540226}}, 'operationTime': Timestamp(1701189224, 10270)}, acknowledged=True)

In [4]:
# convenience function to print out num_to_print sample records and number of docs in cursor
def print_sample_and_total(cursor, num_to_print=5):
    num_docs = 0
    docs_to_print = []
    for doc in cursor:
        if(num_docs < num_to_print):
            docs_to_print.append(doc)
        num_docs = num_docs + 1

    # print number of docs returned by find() query
    print("\nTotal number of documents returned: " + str(num_docs) )
    print("\nSample docs:\n")

    for doc in docs_to_print:
        # easier to read JSON formatting
        pprint(doc)

## 1. Load the restaurants data into a collection

restaurants.json file is loaded into an array of JSON records and uploaded to remote MongoDB collection dh3382.rest_data. insert_many statement is commented out to avoid mistakenly uploading duplicate records

In [5]:
# read in restaurant data file, convert to list of JSON records to upload to DB
rest_data = []
with open(RESTAURANT_PATH, 'r') as file:
    for line in file:
        rest_data.append(json.loads(line) )
        
# find or create database 
db = client["dh3382"]

# find or create collection
rest_collection = db["rest_data"]

# commented out to avoid reinserting data
# rest_collection.insert_many(rest_data)

## 2. Count the number of documents in the collection

I first print out the length of the array of JSON records to ensure it matches up with the records uploaded to dh3382.rest_data. I then call count_documents on the rest_data collection object with an empty query to return a full count

In [6]:
# first check number of records in JSON array 
print("Number of records in rest_data array: " + str(len(rest_data) ) )

# count number of records uploaded to dh3382.rest_data and make sure it matches
print("Number of records in MongoDB dh3382.rest_data collection: " + str(rest_collection.count_documents({}) ) )

Number of records in rest_data array: 3772
Number of records in MongoDB dh3382.rest_data collection: 3772


## 3. Find all the documents in the collection

I use find() with an empty query to return a cursor pointing to the entire dh3382.rest_data collection. I then print out 5 documents to check that full documents were indeed returned, and count the number of documents returned to ensure that it matches up with the total count calculated in the previous cell

In [7]:
# returns a cursor to all documents in dh3382.rest_data collection
all_docs_cursor = rest_collection.find({})

# convenience function defined above, prints 5 documents by default and total number returned by query
print_sample_and_total(all_docs_cursor)


Total number of documents returned: 3772

Sample docs:

{'_id': ObjectId('6566166d88b337b1d7dbd2b1'),
 'address': {'building': '1007',
             'coord': [-73.856077, 40.848447],
             'street': 'Morris Park Ave',
             'zipcode': '10462'},
 'borough': 'Bronx',
 'cuisine': 'Bakery',
 'grades': [{'date': {'$date': 1393804800000}, 'grade': 'A', 'score': 2},
            {'date': {'$date': 1378857600000}, 'grade': 'A', 'score': 6},
            {'date': {'$date': 1358985600000}, 'grade': 'A', 'score': 10},
            {'date': {'$date': 1322006400000}, 'grade': 'A', 'score': 9},
            {'date': {'$date': 1299715200000}, 'grade': 'B', 'score': 14}],
 'name': 'Morris Park Bake Shop',
 'restaurant_id': '30075445'}
{'_id': ObjectId('6566166d88b337b1d7dbd2b2'),
 'address': {'building': '469',
             'coord': [-73.961704, 40.662942],
             'street': 'Flatbush Avenue',
             'zipcode': '11225'},
 'borough': 'Brooklyn',
 'cuisine': 'Hamburgers',
 'grades':

## 4. Display: restaurant_id, name, borough and cuisine, but exclude field _id, for all the documents in the collection

I query with a dict defined as key: field, val: boolean as the second parameter in order to exclude the \_id field and include only the restaurant_id, name, borough, and cuisine fields. I again print out 5 sample documents as well as the total number of documents returned to confirm the correct formatting and size of the query return val

In [8]:
# define fields that will be returned by query
fields = {"_id": 0, "restaurant_id": 1, "name": 1, "borough": 1, "cuisine": 1}

# query dh3382.rest_data
all_docs_4_fields = rest_collection.find({}, fields)

# convenience function defined above, prints 5 documents by default and total number returned by query
print_sample_and_total(all_docs_4_fields)


Total number of documents returned: 3772

Sample docs:

{'borough': 'Bronx',
 'cuisine': 'Bakery',
 'name': 'Morris Park Bake Shop',
 'restaurant_id': '30075445'}
{'borough': 'Brooklyn',
 'cuisine': 'Hamburgers',
 'name': "Wendy'S",
 'restaurant_id': '30112340'}
{'borough': 'Manhattan',
 'cuisine': 'Irish',
 'name': 'Dj Reynolds Pub And Restaurant',
 'restaurant_id': '30191841'}
{'borough': 'Brooklyn',
 'cuisine': 'American ',
 'name': 'Riviera Caterer',
 'restaurant_id': '40356018'}
{'borough': 'Queens',
 'cuisine': 'Jewish/Kosher',
 'name': 'Tov Kosher Kitchen',
 'restaurant_id': '40356068'}


## 5. Display all the restaurants in the Bronx

I define a dict using the mongoDB $eq operator, then execute the query to return all documents where the borough field is equal to "Bronx" and print out a sample as well as the total number of documents returned

In [9]:
# define filter with equals function
bronx = {"borough": {"$eq": "Bronx"} }

# execute query with filter
all_docs_bronx = rest_collection.find(bronx)

# convenience function defined above, prints 5 documents by default and total number returned by query
print_sample_and_total(all_docs_bronx)


Total number of documents returned: 309

Sample docs:

{'_id': ObjectId('6566166d88b337b1d7dbd2b1'),
 'address': {'building': '1007',
             'coord': [-73.856077, 40.848447],
             'street': 'Morris Park Ave',
             'zipcode': '10462'},
 'borough': 'Bronx',
 'cuisine': 'Bakery',
 'grades': [{'date': {'$date': 1393804800000}, 'grade': 'A', 'score': 2},
            {'date': {'$date': 1378857600000}, 'grade': 'A', 'score': 6},
            {'date': {'$date': 1358985600000}, 'grade': 'A', 'score': 10},
            {'date': {'$date': 1322006400000}, 'grade': 'A', 'score': 9},
            {'date': {'$date': 1299715200000}, 'grade': 'B', 'score': 14}],
 'name': 'Morris Park Bake Shop',
 'restaurant_id': '30075445'}
{'_id': ObjectId('6566166d88b337b1d7dbd2bb'),
 'address': {'building': '2300',
             'coord': [-73.8786113, 40.8502883],
             'street': 'Southern Boulevard',
             'zipcode': '10460'},
 'borough': 'Bronx',
 'cuisine': 'American ',
 'grades'

## 7. Display the first 5 restaurants in the Bronx
I query the first 10 restaurants in the Bronx using the previous query paired with the limit function. I save the result of the query to a list in memory and print out the first 5 results, displaying only the name of the restaurants for readability

In [12]:
# rerun query with limit function added
ten_docs_bronx = rest_collection.find(bronx).limit(10)

# append each document from query in order to a list
ten_docs_bronx_list = []
for doc in ten_docs_bronx:
    ten_docs_bronx_list.append(doc)

# print out the restaurant name from the first 5 entries in the list
for doc in ten_docs_bronx_list[:5]:
    print(doc["name"])

Morris Park Bake Shop
Wild Asia
Carvel Ice Cream
Happy Garden
Happy Garden


## 8. Display the second 5 restaurants (skipping the first 5) in the Bronx
I simply print out the remaining documents in the list from the previous question

In [13]:
# print out the last 5 items in the list from previous question
for doc in ten_docs_bronx_list[5:]:
    print(doc["name"])

Manhem Club
The New Starling Athletic Club Of The Bronx
Yankee Tavern
Mcdwyers Pub
The Punch Bowl


## 9. Find the restaurants with a score more than 85
Interpreted as find the restaurants with at least one score strictly greater than 85, use dot notation to evaluate all scores of each restaurant and the $gt operator to return restaurants with at least one score greater than 85. Use my convenience function to print out the results, in this case all of them because there were only four restaurants in total

In [30]:
# query all restaurants with at least one score greater than 85
rest_over_85 = rest_collection.find({"grades.score": {"$gt": 85}})

# print results
print_sample_and_total(rest_over_85)


Total number of documents returned: 4

Sample docs:

{'_id': ObjectId('6566166d88b337b1d7dbd40f'),
 'address': {'building': '65',
             'coord': [-73.9782725, 40.7624022],
             'street': 'West   54 Street',
             'zipcode': '10019'},
 'borough': 'Manhattan',
 'cuisine': 'American ',
 'grades': [{'date': {'$date': 1408665600000}, 'grade': 'A', 'score': 11},
            {'date': {'$date': 1395964800000}, 'grade': 'C', 'score': 131},
            {'date': {'$date': 1380067200000}, 'grade': 'A', 'score': 11},
            {'date': {'$date': 1365379200000}, 'grade': 'B', 'score': 25},
            {'date': {'$date': 1350259200000}, 'grade': 'A', 'score': 11},
            {'date': {'$date': 1318982400000}, 'grade': 'A', 'score': 13}],
 'name': "Murals On 54/Randolphs'S",
 'restaurant_id': '40372466'}
{'_id': ObjectId('6566166d88b337b1d7dbd4b0'),
 'address': {'building': '345',
             'coord': [-73.9864626, 40.7266739],
             'street': 'East 6 Street',
       

## 10. Find the restaurants that achieved a score, more than 80 but less than 100.

In [41]:
# define query
query_80_100 = { "grades.score": {"$gt": 80, "$lt": 100} }

# execute query
rest_80_to_100 = rest_collection.find(query_80_100)

# print results
print_sample_and_total(rest_80_to_100)


Total number of documents returned: 4

Sample docs:

{'_id': ObjectId('6566166d88b337b1d7dbd40f'),
 'address': {'building': '65',
             'coord': [-73.9782725, 40.7624022],
             'street': 'West   54 Street',
             'zipcode': '10019'},
 'borough': 'Manhattan',
 'cuisine': 'American ',
 'grades': [{'date': {'$date': 1408665600000}, 'grade': 'A', 'score': 11},
            {'date': {'$date': 1395964800000}, 'grade': 'C', 'score': 131},
            {'date': {'$date': 1380067200000}, 'grade': 'A', 'score': 11},
            {'date': {'$date': 1365379200000}, 'grade': 'B', 'score': 25},
            {'date': {'$date': 1350259200000}, 'grade': 'A', 'score': 11},
            {'date': {'$date': 1318982400000}, 'grade': 'A', 'score': 13}],
 'name': "Murals On 54/Randolphs'S",
 'restaurant_id': '40372466'}
{'_id': ObjectId('6566166d88b337b1d7dbd4b0'),
 'address': {'building': '345',
             'coord': [-73.9864626, 40.7266739],
             'street': 'East 6 Street',
       

## 11. Find the restaurants which locate in latitude value less than -95.754168

Assuming coordinates are recorded as [\<latitude\>, \<longitude\>], I define a query to find all lat values less than -95.754168. I then execute the query and print the results with my convenience function

In [52]:
# define query
query_lat = {"address.coord.0" : {"$lt": -95.754168} }

# execute query
rest_lat_95 = rest_collection.find(query_lat)

# print results
print_sample_and_total(rest_lat_95)


Total number of documents returned: 3

Sample docs:

{'_id': ObjectId('6566166d88b337b1d7dbd8f9'),
 'address': {'building': '3707',
             'coord': [-101.8945214, 33.5197474],
             'street': '82 Street',
             'zipcode': '11372'},
 'borough': 'Queens',
 'cuisine': 'American ',
 'grades': [{'date': {'$date': 1401840000000}, 'grade': 'A', 'score': 12},
            {'date': {'$date': 1383782400000}, 'grade': 'B', 'score': 19},
            {'date': {'$date': 1368748800000}, 'grade': 'A', 'score': 11},
            {'date': {'$date': 1346198400000}, 'grade': 'A', 'score': 11},
            {'date': {'$date': 1333411200000}, 'grade': 'A', 'score': 12},
            {'date': {'$date': 1321401600000}, 'grade': 'A', 'score': 7}],
 'name': 'Burger King',
 'restaurant_id': '40534067'}
{'_id': ObjectId('6566166d88b337b1d7dbdc64'),
 'address': {'building': '15259',
             'coord': [-119.6368672, 36.2504996],
             'street': '10 Avenue',
             'zipcode': '11357

## 12. Find the restaurants that do not prepare any cuisine of 'American' and their grade score more than 70 and latitude less than -65.754168.

Chained queries to meet all criteria. Noticed some restaurants had "American " cuisine with an extra whitespace at end, so manually removed to avoid mistakenly filtering out cuisine such as 'South & Central American'. Then executed query and printed results 

In [71]:
# define query
query_cuisine_score_lat = {
    "$and": [
        {"cuisine": {"$ne": "American", "$ne": "American "}},
        {"grades.score": {"$gt": 70}},
        {"address.coord.0": {"$lt": 60}}
    ]
}

# execute query
rest_cuisine_score_lat = rest_collection.find(query_cuisine_score_lat)

# print results
print_sample_and_total(rest_cuisine_score_lat)


Total number of documents returned: 5

Sample docs:

{'_id': ObjectId('6566166d88b337b1d7dbd4b0'),
 'address': {'building': '345',
             'coord': [-73.9864626, 40.7266739],
             'street': 'East 6 Street',
             'zipcode': '10003'},
 'borough': 'Manhattan',
 'cuisine': 'Indian',
 'grades': [{'date': {'$date': 1410739200000}, 'grade': 'A', 'score': 5},
            {'date': {'$date': 1389657600000}, 'grade': 'A', 'score': 8},
            {'date': {'$date': 1369872000000}, 'grade': 'A', 'score': 12},
            {'date': {'$date': 1366761600000}, 'grade': 'P', 'score': 2},
            {'date': {'$date': 1349049600000}, 'grade': 'A', 'score': 9},
            {'date': {'$date': 1333670400000}, 'grade': 'C', 'score': 92},
            {'date': {'$date': 1320278400000}, 'grade': 'C', 'score': 41}],
 'name': 'Gandhi',
 'restaurant_id': '40381295'}
{'_id': ObjectId('6566166d88b337b1d7dbd613'),
 'address': {'building': '130',
             'coord': [-73.984758, 40.7457939],
 

## 13. Find the restaurants which do not prepare any cuisine of 'American' and achieved a score more than 70 and located in the longitude less than -65.754168. (without using $and operator)

Largely the same as 12, just change the syntax to remove the $and operator and extra brackets

In [72]:
# define query
query_cuisine_score_lat = {
        "cuisine": {"$ne": "American", "$ne": "American "},
        "grades.score": {"$gt": 70},
        "address.coord.0": {"$lt": 60}
}

# execute query
rest_cuisine_score_lat = rest_collection.find(query_cuisine_score_lat)

# print results
print_sample_and_total(rest_cuisine_score_lat)


Total number of documents returned: 5

Sample docs:

{'_id': ObjectId('6566166d88b337b1d7dbd4b0'),
 'address': {'building': '345',
             'coord': [-73.9864626, 40.7266739],
             'street': 'East 6 Street',
             'zipcode': '10003'},
 'borough': 'Manhattan',
 'cuisine': 'Indian',
 'grades': [{'date': {'$date': 1410739200000}, 'grade': 'A', 'score': 5},
            {'date': {'$date': 1389657600000}, 'grade': 'A', 'score': 8},
            {'date': {'$date': 1369872000000}, 'grade': 'A', 'score': 12},
            {'date': {'$date': 1366761600000}, 'grade': 'P', 'score': 2},
            {'date': {'$date': 1349049600000}, 'grade': 'A', 'score': 9},
            {'date': {'$date': 1333670400000}, 'grade': 'C', 'score': 92},
            {'date': {'$date': 1320278400000}, 'grade': 'C', 'score': 41}],
 'name': 'Gandhi',
 'restaurant_id': '40381295'}
{'_id': ObjectId('6566166d88b337b1d7dbd613'),
 'address': {'building': '130',
             'coord': [-73.984758, 40.7457939],
 

## 14. Find the restaurants which do not prepare any cuisine of 'American ' and achieved a grade point 'A' and not in the borough of Brooklyn, sorted by cuisine in descending order.