# Eat Safe, Love

## Notebook Set Up

In [4]:
# Import dependencies
from pymongo import MongoClient
from pprint import pprint
import pandas as pd

In [5]:
# Create an instance of MongoClient
mongo = MongoClient(port=27017)

In [6]:
# assign the uk_food database to a variable name
db = mongo['uk_food']

In [7]:
# review the collections in our database
print(db.list_collection_names())

['establishments']


In [8]:
# assign the collection to a variable
establishments = db['establishments']

## Part 3: Exploratory Analysis
Unless otherwise stated, for each question: 
* Use `count_documents` to display the number of documents contained in the result.
* Display the first document in the results using `pprint`.
* Convert the result to a Pandas DataFrame, print the number of rows in the DataFrame, and display the first 10 rows.

### 1. Which establishments have a hygiene score equal to 20?

In [17]:
# Find the establishments with a hygiene score of 20
query1 ={'scores.Hygiene' : 20}
result1 = establishments.find(query1)

# Use count_documents to display the number of documents in the resultkl
print('Number of establishments with a hygiene score of 20: ', establishments.count_documents(query1))

# Display the first document in the results using pprint
pprint(establishments.find_one(query1))




Number of establishments with a hygiene score of 20:  0
None


In [18]:
# Convert the result to a Pandas DataFrame
hygiene_df = pd.DataFrame(establishments.find(query1))

# Display the number of rows in the DataFrame
print('Number of rows in dataframe: ', len(hygiene_df))

# Display the first 10 rows of the DataFrame
hygiene_df.head(10)


Number of rows in dataframe:  0


### 2. Which establishments in London have a `RatingValue` greater than or equal to 4?

In [19]:
# Find the establishments with London as the Local Authority and has a RatingValue greater than or equal to 4.
query2 = {"LocalAuthorityName":{'$regex': "London"},
         "RatingValue" : {'$gte':4}}

# Use count_documents to display the number of documents in the result
count = establishments.count_documents(query2)
print("Number of documents:", count)

# Display the first document in the results using pprint
document = establishments.find_one(query2)
pprint(document)

Number of documents: 0
None


In [12]:
# Convert the result to a Pandas DataFrame
london_df=pd.DataFrame(establishments.find(query2))

# Display the number of rows in the DataFrame
print('Number of rows in dataframe: ', len(london_df))

# Display the first 10 rows of the DataFrame
london_df.head(10)

Number of rows in dataframe:  0


### 3. What are the top 5 establishments with a `RatingValue` rating value of 5, sorted by lowest hygiene score, nearest to the new restaurant added, "Penang Flavours"?

In [13]:
# Search within 0.01 degree on either side of the latitude and longitude.
# Rating value must equal 5
# Sort by hygiene score

degree_search = 0.01
latitude = 51.49014200
longitude = 0.08384000

query3 = query_3 = {'RatingValue' : '5',
                    'geocode.latitude' : {'$gte' : latitude - degree_search, '$lte' : latitude + degree_search},
                    'geocode.longitude': {'$gte' : longitude - degree_search, '$lte' : longitude + degree_search}
                    }
sort = [('scores.Hygiene', 1)] 

# Print the results
results = list(establishments.find(query3).sort(sort))
for result in results:
    pprint(result)

In [14]:
# Convert result to Pandas DataFrame
Penang_df = pd.DataFrame(list(results))


### 4. How many establishments in each Local Authority area have a hygiene score of 0?

In [15]:
# Create a pipeline that: 
# 1. Matches establishments with a hygiene score of 0
# 2. Groups the matches by Local Authority
# 3. Sorts the matches from highest to lowest
match4 = {'$match': {"scores.Hygiene":0}}
group4 = {'$group': {"_id" : "$LocalAuthorityName",
                   "count": {'$sum':1}}}
sort4 = {'$sort':{"count":-1}}
pipeline4 = [match4, group4, sort4]
results4 = list(establishments.aggregate(pipeline4))

# Print the number of documents in the result
print('Number of local authority areas: ', len(results4))

# Print the first 10 results
print(results4[0:10])


Number of local authority areas:  0
[]


In [16]:
# Convert the result to a Pandas DataFrame
area_df = pd.DataFrame(results4)

# Display the number of rows in the DataFrame
print('Number of rows in dataframe', len(area_df))

# Display the first 10 rows of the DataFrame
area_df.head(10)


Number of rows in dataframe 0
