# Eat Safe, Love

## Notebook Set Up

In [2]:
# Import dependencies
from pymongo import MongoClient
from pprint import pprint
import pandas as pd

In [3]:
# Create an instance of MongoClient
mongo = MongoClient(port=27017)

In [4]:
# assign the uk_food database to a variable name
db = mongo['uk_food']

In [5]:
# review the collections in our database
print(db.list_collection_names())

[]


In [6]:
# assign the collection to a variable
establishments = db['establishments']

## Part 3: Exploratory Analysis
Unless otherwise stated, for each question: 
* Use `count_documents` to display the number of documents contained in the result.
* Display the first document in the results using `pprint`.
* Convert the result to a Pandas DataFrame, print the number of rows in the DataFrame, and display the first 10 rows.

### 1. Which establishments have a hygiene score equal to 20?

In [11]:
# Find the establishments with a hygiene score of 20
query1 = {'scores.Hygiene': {'$eq': 20}}

# Use count_documents to display the number of documents in the result
print("The number of documents with a hygiene score of 20 is:", establishments.count_documents(query1))

# Display the first document in the results using pprint
pprint(db.establishments.find_one(query1))

# Run the query as a list and save the results to a variable
results1 = list(establishments.find(query1))

The number of documents with a hygiene score of 20 is: 0
None


In [12]:
# Convert the result to a Pandas DataFrame
query1_df = pd.DataFrame(results1)

# Display the number of rows in the DataFrame
print("Rows in DataFrame: ", len(query1_df))

# Display the first 10 rows of the DataFrame
query1_df.head(10)

Rows in DataFrame:  0


### 2. Which establishments in London have a `RatingValue` greater than or equal to 4?

In [13]:
# Find the establishments with London as the Local Authority and has a RatingValue greater than or equal to 4.
query2 = {'LocalAuthorityName': {'$regex': 'London'}, 
         'RatingValue': {'$gte': '4'}}

# Use count_documents to display the number of documents in the result
print("The number of establishments in London with a RatingValue greater than/equal to 4 is:", establishments.count_documents(query2))

# Display the first document in the results using pprint
pprint(db.establishments.find_one(query2))

The number of establishments in London with a RatingValue greater than/equal to 4 is: 0
None


In [14]:
# Run the query as a list and save the results to a variable
results2 = list(establishments.find(query2))

# Convert the result to a Pandas DataFrame
query2_df = pd.DataFrame(results2)

# Display the number of rows in the DataFrame
print("Rows in DataFrame: ", len(query2_df))

# Display the first 10 rows of the DataFrame
query2_df.head(10)

Rows in DataFrame:  0


### 3. What are the top 5 establishments with a `RatingValue` rating value of 5, sorted by lowest hygiene score, nearest to the new restaurant added, "Penang Flavours"?

In [15]:
# Search within 0.01 degree on either side of the latitude and longitude.
# Rating value must equal 5
# Sort by hygiene score

# Change the data type from String to Decimal for latitude
lat_type = establishments.update_many({}, [
                                                {'$set':
                                                        {"geocode.latitude":
                                                            {'$toDecimal': "$geocode.latitude"}
                                                        }
                                                }
                                            ]
                                      )

# Change the data type from String to Decimal for longitude
long_type = establishments.update_many({}, [
                                                {'$set':
                                                        {"geocode.longitude":
                                                            {'$toDecimal': "$geocode.longitude"}
                                                        }
                                                }
                                            ]
                                      )

# Define the variables
degree_search = 0.01
latitude = 51.49014200
longitude = 0.08384000

query3 = {'geocode.latitude': {'$gte': latitude - degree_search, '$lte': latitude + degree_search}, 
         'geocode.longitude': {'$gte': longitude - degree_search, '$lte': longitude + degree_search},
         'RatingValue': '4'
        }

# Sort by hygiene score
sort = [('scores.Hygiene', -1)]

# Limit to top 5 establoshments
limit = 5

# Print the results
pprint(list(establishments.find(query3).sort(sort).limit(limit)))

[]


In [16]:
# Run the query as a list and save the results to a variable
results3 = list(establishments.find(query3).sort(sort).limit(limit))

# Convert result to Pandas DataFrame
query3_df = pd.DataFrame(results3)

# Display the number of rows in the DataFrame
print("Rows in DataFrame: ", len(query3_df))

# Display the first 10 rows of the DataFrame
query3_df.head(10)

Rows in DataFrame:  0


### 4. How many establishments in each Local Authority area have a hygiene score of 0?

In [17]:
# Create a pipeline that: 
# 1. Matches establishments with a hygiene score of 0
match_query = {'$match': {'scores.Hygiene': {'$eq': 0}}}

# 2. Groups the matches by Local Authority
group_query = {'$group': {'_id': "$LocalAuthorityName", 'count': { '$sum': 1 }}}

# 3. Sorts the matches from highest to lowest
sort_values = {'$sort': { 'count': -1 }}

# Put the pipeline together
pipeline = [match_query, group_query, sort_values]

# Run the pipeline through the aggregate method, cast the results as a list, and save the results to a variable
results4 = list(establishments.aggregate(pipeline))

# Print the number of documents in the result
print("Number of establishments in each Local Authority area have a hygiene score of 0 in result: ", len(results4))

# Print the first 10 results
pprint(results4[0:10])

Number of establishments in each Local Authority area have a hygiene score of 0 in result:  0
[]


In [18]:
# Convert the result to a Pandas DataFrame
results4_df = pd.DataFrame(results4)

# Display the number of rows in the DataFrame
print("Rows in DataFrame: ", len(results4_df))

# Display the first 10 rows of the DataFrame
results4_df.head(10)

Rows in DataFrame:  0
