# Eat Safe, Love

## Notebook Set Up

In [2]:
from pymongo import MongoClient
import pandas as pd
from pprint import pprint

In [3]:
# Create an instance of MongoClient
mongo = MongoClient(port=27017)

In [4]:
# assign the uk_food database to a variable name
db = mongo['uk_food']

In [5]:
# review the collections in our database
print(db.list_collection_names())

['establishments']


In [6]:
# assign the collection to a variable
establishments = db['establishments']

## Part 3: Exploratory Analysis
Unless otherwise stated, for each question: 
* Use `count_documents` to display the number of documents contained in the result.
* Display the first document in the results using `pprint`.
* Convert the result to a Pandas DataFrame, print the number of rows in the DataFrame, and display the first 10 rows.

### 1. Which establishments have a hygiene score equal to 20?

In [7]:
# Find the establishments with a hygiene score of 20
hygiene_20_query = {"scores.Hygiene": 20}

# Use count_documents to display the number of documents in the result
hygiene_20_count = establishments.count_documents(hygiene_20_query)
print(f"Number of establishments with a hygiene score of 20: {hygiene_20_count}")

# Display the first document in the results using pprint
first_document = establishments.find_one(hygiene_20_query)
if first_document:
    pprint(first_document)
else:
    print("No establishments found with a hygiene score of 20.")

Number of establishments with a hygiene score of 20: 41
{'AddressLine1': '5-6 Southfields Road',
 'AddressLine2': 'Eastbourne',
 'AddressLine3': 'East Sussex',
 'AddressLine4': '',
 'BusinessName': 'The Chase Rest Home',
 'BusinessType': 'Caring Premises',
 'BusinessTypeID': 5,
 'ChangesByServerID': 0,
 'Distance': 4613.888288172291,
 'FHRSID': 110681,
 'LocalAuthorityBusinessID': '4029',
 'LocalAuthorityCode': '102',
 'LocalAuthorityEmailAddress': 'Customerfirst@eastbourne.gov.uk',
 'LocalAuthorityName': 'Eastbourne',
 'LocalAuthorityWebSite': 'http://www.eastbourne.gov.uk/foodratings',
 'NewRatingPending': False,
 'Phone': '',
 'PostCode': 'BN21 1BU',
 'RatingDate': '2021-09-23T00:00:00',
 'RatingKey': 'fhrs_0_en-gb',
 'RatingValue': '0',
 'RightToReply': '',
 'SchemeType': 'FHRS',
 '_id': ObjectId('6705fdf66438b4cd1aee77c5'),
 'geocode': {'latitude': 50.769705, 'longitude': 0.27694},
 'links': [{'href': 'https://api.ratings.food.gov.uk/establishments/110681',
            'rel': 'sel

In [8]:
# Convert the result to a Pandas DataFrame  #comback for help
query = {"scores.Hygiene": 20}
results = establishments.find(query)
df = pd.DataFrame(list(results))

if df.empty:
    print("No establishments found with a hygiene score of 20.")
else:
    num_rows = len(df)
    print(f"Number of rows in DataFrame: {num_rows}")

# Display the number of rows in the DataFrame
    num_rows = len(df)
    print(f"Number of rows in DataFrame: {num_rows}")
# Display the first 10 rows of the DataFrame
    print(df.head(10))

Number of rows in DataFrame: 41
Number of rows in DataFrame: 41
                        _id   FHRSID  ChangesByServerID  \
0  6705fdf66438b4cd1aee77c5   110681                  0   
1  6705fdf76438b4cd1aee7b49   612039                  0   
2  6705fdf76438b4cd1aee7e49   730933                  0   
3  6705fdf76438b4cd1aee803e   172735                  0   
4  6705fdf76438b4cd1aee8052   172953                  0   
5  6705fdf76438b4cd1aee89ed   512854                  0   
6  6705fdf76438b4cd1aee8c05  1537089                  0   
7  6705fdf76438b4cd1aeea12e   155648                  0   
8  6705fdf76438b4cd1aeea576  1012883                  0   
9  6705fdf76438b4cd1aeead8d   644109                  0   

  LocalAuthorityBusinessID               BusinessName  \
0                     4029        The Chase Rest Home   
1                1970/FOOD                 Brenalwood   
2                1698/FOOD              Melrose Hotel   
3             PI/000023858              Seaford Pizza   
4

### 2. Which establishments in London have a `RatingValue` greater than or equal to 4?

In [9]:
# Find the establishments with London as the Local Authority and has a RatingValue greater than or equal to 4.
query = {
    'LocalAuthority': {'$regex': 'London', '$options': 'i'},  # Case-insensitive search for "London"
    'RatingValue': {'$gte': 4}  # RatingValue greater than or equal to 4
}

# Use count_documents to display the number of documents in the result
count_documents = establishments.count_documents(query)
print(f"Number of establishments in London with a RatingValue >= 4: {count_documents}")
# Display the first document in the results using pprint
first_document = establishments.find_one(query)
if first_document:
    pprint(first_document)
else:
    print("No establishments found in London with a RatingValue >= 4.")

Number of establishments in London with a RatingValue >= 4: 0
No establishments found in London with a RatingValue >= 4.


In [10]:
# Convert the result to a Pandas DataFrame  #comeback and double check
results = establishments.find(query)
df2 = pd.DataFrame(list(results))
# Display the number of rows in the DataFrame
num_rows = len(df2)
print(f"Number of rows in DataFrame: {num_rows}")
# Display the first 10 rows of the DataFrame
print(df.head(10))

Number of rows in DataFrame: 0
                        _id   FHRSID  ChangesByServerID  \
0  6705fdf66438b4cd1aee77c5   110681                  0   
1  6705fdf76438b4cd1aee7b49   612039                  0   
2  6705fdf76438b4cd1aee7e49   730933                  0   
3  6705fdf76438b4cd1aee803e   172735                  0   
4  6705fdf76438b4cd1aee8052   172953                  0   
5  6705fdf76438b4cd1aee89ed   512854                  0   
6  6705fdf76438b4cd1aee8c05  1537089                  0   
7  6705fdf76438b4cd1aeea12e   155648                  0   
8  6705fdf76438b4cd1aeea576  1012883                  0   
9  6705fdf76438b4cd1aeead8d   644109                  0   

  LocalAuthorityBusinessID               BusinessName  \
0                     4029        The Chase Rest Home   
1                1970/FOOD                 Brenalwood   
2                1698/FOOD              Melrose Hotel   
3             PI/000023858              Seaford Pizza   
4             PI/000024532        

### 3. What are the top 5 establishments with a `RatingValue` rating value of 5, sorted by lowest hygiene score, nearest to the new restaurant added, "Penang Flavours"?

In [17]:
# Search within 0.01 degree on either side of the latitude and longitude. #need help
# Rating value must equal 5
# Sort by hygiene score

degree_search = 0.01
latitude = 51.5350065  
longitude = 0.7121671

query = {
    'RatingValue': 5,
    'geocode.latitude': {'$gte': latitude - degree_search, '$lte': latitude + degree_search},
    'geocode.longitude': {'$gte': longitude - degree_search, '$lte': longitude + degree_search}
}
sort = [('scores.Hygiene', 1)] 
limit =10


results = establishments.find(query).sort(sort).limit(limit)


df3 = pd.DataFrame(list(results))

if df3.empty:
    print("No establishments found with a RatingValue of 5 within the specified location.")
else:
    print(f"Number of establishments found: {len(df)}")
    print(df3.head(10))  


No establishments found with a RatingValue of 5 within the specified location.


In [18]:
# Convert result to Pandas DataFrame  #NEED help
results = establishments.find(query)
df = pd.DataFrame(list(results))
df

### 4. How many establishments in each Local Authority area have a hygiene score of 0?

In [19]:
# Create a pipeline that: #NEED HELP
# 1. Matches establishments with a hygiene score of 0

match_query = {
    "$match": {
        "hygiene_score": 0
    }
}

# 2. Groups the matches by Local Authority

group_query = {
    "$group": {
        "_id": "$LocalAuthorityName",
        "count": {"$sum": 1}
    }
}

# 3. Sorts the matches from highest to lowest

sort_values = {
    "$sort": {
        "count": -1  
    }
}


pipeline = [match_query, group_query, sort_values]
results = establishments.aggregate(pipeline)

# Print the number of documents in the result

results_list = list(results)
print("Number of documents:", len(results_list))
# Print the first 10 results
pprint(results_list[:10])

Number of documents: 0
[]


In [20]:
# Convert the result to a Pandas DataFrame  #NEED HELP
df = pd.DataFrame(results)

# Display the number of rows in the DataFrame
num_rows = len(df)
# Display the first 10 rows of the DataFrame
print(f"Number of rows in DataFrame: {num_rows}")

Number of rows in DataFrame: 0
