# Eat Safe, Love

## Notebook Set Up

In [1]:
# Import dependencies
from pymongo import MongoClient
import pandas as pd
from pprint import pprint

In [2]:
# Create an instance of MongoClient
mongo = MongoClient(port=27017)

In [3]:
# assign the uk_food database to a variable name
db = mongo['uk_food']

In [4]:
# review the collections in our database
print(db.list_collection_names())

['establishments']


In [5]:
# assign the collection to a variable
establishments = db['establishments']

## Part 3: Exploratory Analysis
Unless otherwise stated, for each question: 
* Use `count_documents` to display the number of documents contained in the result.
* Display the first document in the results using `pprint`.
* Convert the result to a Pandas DataFrame, print the number of rows in the DataFrame, and display the first 10 rows.

### 1. Which establishments have a hygiene score equal to 20?

In [6]:
# Find the establishments with a hygiene score of 20
query = {'scores.Hygiene':{'$eq': 20}}

# Use count_documents to display the number of documents in the result
count_documents = establishments.count_documents(query)
print(f'There are {count_documents} documents')
# Display the first document in the results using pprint
first_result = establishments.find_one(query)
if first_result:
    pprint(first_result)
else:
    print("No documents found with a hygiene score of 20.")

There are 41 documents
{'AddressLine1': '5-6 Southfields Road',
 'AddressLine2': 'Eastbourne',
 'AddressLine3': 'East Sussex',
 'AddressLine4': '',
 'BusinessName': 'The Chase Rest Home',
 'BusinessType': 'Caring Premises',
 'BusinessTypeID': 5,
 'ChangesByServerID': 0,
 'Distance': 4613.888288172291,
 'FHRSID': 110681,
 'LocalAuthorityBusinessID': '4029',
 'LocalAuthorityCode': '102',
 'LocalAuthorityEmailAddress': 'Customerfirst@eastbourne.gov.uk',
 'LocalAuthorityName': 'Eastbourne',
 'LocalAuthorityWebSite': 'http://www.eastbourne.gov.uk/foodratings',
 'NewRatingPending': False,
 'Phone': '',
 'PostCode': 'BN21 1BU',
 'RatingDate': '2021-09-23T00:00:00',
 'RatingKey': 'fhrs_0_en-gb',
 'RatingValue': None,
 'RightToReply': '',
 'SchemeType': 'FHRS',
 '_id': ObjectId('673350881d1b2ab835eb966b'),
 'geocode': {'latitude': Decimal128('50.769705'),
             'longitude': Decimal128('0.27694')},
 'links': [{'href': 'https://api.ratings.food.gov.uk/establishments/110681',
            'r

In [7]:
# Convert the result to a Pandas DataFrame
import pandas as pd
query = {'scores.Hygiene': {'$eq': 20}}
results = list(establishments.find(query))
result_df = pd.DataFrame(results)
# Display the number of rows in the DataFrame
print("Rows in DataFrame: ", len(result_df))
# Display the first 10 rows of the DataFrame
result_df.head(10)

Rows in DataFrame:  41


Unnamed: 0,_id,FHRSID,ChangesByServerID,LocalAuthorityBusinessID,BusinessName,BusinessType,BusinessTypeID,AddressLine1,AddressLine2,AddressLine3,...,LocalAuthorityWebSite,LocalAuthorityEmailAddress,scores,SchemeType,geocode,RightToReply,Distance,NewRatingPending,meta,links
0,673350881d1b2ab835eb966b,110681,0,4029,The Chase Rest Home,Caring Premises,5,5-6 Southfields Road,Eastbourne,East Sussex,...,http://www.eastbourne.gov.uk/foodratings,Customerfirst@eastbourne.gov.uk,"{'Hygiene': 20, 'Structural': 20, 'ConfidenceI...",FHRS,"{'longitude': 0.27694, 'latitude': 50.769705}",,4613.888288,False,"{'dataSource': None, 'extractDate': '0001-01-0...","[{'rel': 'self', 'href': 'https://api.ratings...."
1,673350891d1b2ab835eb99eb,612039,0,1970/FOOD,Brenalwood,Caring Premises,5,Hall Lane,Walton-on-the-Naze,Essex,...,http://www.tendringdc.gov.uk/,fhsadmin@tendringdc.gov.uk,"{'Hygiene': 20, 'Structural': 15, 'ConfidenceI...",FHRS,"{'longitude': 1.278721, 'latitude': 51.857536}",,4617.965824,False,"{'dataSource': None, 'extractDate': '0001-01-0...","[{'rel': 'self', 'href': 'https://api.ratings...."
2,673350891d1b2ab835eb9cf5,730933,0,1698/FOOD,Melrose Hotel,Hotel/bed & breakfast/guest house,7842,53 Marine Parade East,Clacton On Sea,Essex,...,http://www.tendringdc.gov.uk/,fhsadmin@tendringdc.gov.uk,"{'Hygiene': 20, 'Structural': 20, 'ConfidenceI...",FHRS,"{'longitude': 1.15927, 'latitude': 51.789429}",,4619.656144,False,"{'dataSource': None, 'extractDate': '0001-01-0...","[{'rel': 'self', 'href': 'https://api.ratings...."
3,673350891d1b2ab835eb9ee0,172735,0,PI/000023858,Seaford Pizza,Takeaway/sandwich shop,7844,4 High Street,Seaford,East Sussex,...,http://www.lewes-eastbourne.gov.uk/,ehealth.ldc@lewes-eastbourne.gov.uk,"{'Hygiene': 20, 'Structural': 10, 'ConfidenceI...",FHRS,"{'longitude': 0.10202, 'latitude': 50.770885}",,4620.421725,False,"{'dataSource': None, 'extractDate': '0001-01-0...","[{'rel': 'self', 'href': 'https://api.ratings...."
4,673350891d1b2ab835eb9eef,172953,0,PI/000024532,Golden Palace,Restaurant/Cafe/Canteen,1,5 South Street,Seaford,East Sussex,...,http://www.lewes-eastbourne.gov.uk/,ehealth.ldc@lewes-eastbourne.gov.uk,"{'Hygiene': 20, 'Structural': 10, 'ConfidenceI...",FHRS,"{'longitude': 0.101446, 'latitude': 50.770724}",,4620.437179,False,"{'dataSource': None, 'extractDate': '0001-01-0...","[{'rel': 'self', 'href': 'https://api.ratings...."
5,673350891d1b2ab835eba890,512854,0,12/00816/BUTH,Ashby's Butchers,Retailers - other,4613,777 Southchurch Road,Southend-On-Sea,Essex,...,http://www.southend.gov.uk,EnvironmentalHealth@southend.gov.uk,"{'Hygiene': 20, 'Structural': 20, 'ConfidenceI...",FHRS,"{'longitude': 0.736349, 'latitude': 51.541448}",,4625.565258,False,"{'dataSource': None, 'extractDate': '0001-01-0...","[{'rel': 'self', 'href': 'https://api.ratings...."
6,673350891d1b2ab835ebaab1,1537089,0,22/00224/RESTUN,South Sea Express Cuisine,Restaurant/Cafe/Canteen,1,33 Alexandra Street,Southend-on-sea,Essex,...,http://www.southend.gov.uk,EnvironmentalHealth@southend.gov.uk,"{'Hygiene': 20, 'Structural': 20, 'ConfidenceI...",FHRS,"{'longitude': 0.7121671, 'latitude': 51.5350065}",,4626.200132,False,"{'dataSource': None, 'extractDate': '0001-01-0...","[{'rel': 'self', 'href': 'https://api.ratings...."
7,673350891d1b2ab835ebbfdd,155648,0,EH/00006058,Golden Palace,Takeaway/sandwich shop,7844,7 London Road,Rayleigh,Essex,...,http://www.rochford.gov.uk,customerservices@rochford.gov.uk,"{'Hygiene': 20, 'Structural': 15, 'ConfidenceI...",FHRS,"{'longitude': 0.602364, 'latitude': 51.591515}",,4632.27689,False,"{'dataSource': None, 'extractDate': '0001-01-0...","[{'rel': 'self', 'href': 'http://api.ratings.f..."
8,673350891d1b2ab835ebc420,1012883,0,17/00110/MIXED/S,The Tulip Tree,Restaurant/Cafe/Canteen,1,3 The Village,Chiddingstone,KENT,...,http://www.sevenoaks.gov.uk/,environmental.health@sevenoaks.gov.uk,"{'Hygiene': 20, 'Structural': 5, 'ConfidenceIn...",FHRS,"{'longitude': 0.146449998021126, 'latitude': 5...",,4633.934041,False,"{'dataSource': None, 'extractDate': '0001-01-0...","[{'rel': 'self', 'href': 'http://api.ratings.f..."
9,673350891d1b2ab835ebcc33,644109,0,41811,F & S,Retailers - other,4613,,81 Southernhay,Basildon,...,http://www.basildon.gov.uk,ehs@basildon.gov.uk,"{'Hygiene': 20, 'Structural': 20, 'ConfidenceI...",FHRS,"{'longitude': 0.462307, 'latitude': 51.57005}",,4636.552523,False,"{'dataSource': None, 'extractDate': '0001-01-0...","[{'rel': 'self', 'href': 'http://api.ratings.f..."


### 2. Which establishments in London have a `RatingValue` greater than or equal to 4?

In [14]:
# Find the establishments with London as the Local Authority and has a RatingValue greater than or equal to 4.
from pprint import pprint
london_boroughs = [
    "Barking and Dagenham", "City of London Corporation", "Greenwich", 
    "Hackney", "Kensington and Chelsea", "Lambeth", "Lewisham", 
    "Newham", "Redbridge", "Tower Hamlets", "Waltham Forest"
]

query = {
    'LocalAuthorityName': {'$in': london_boroughs},
    'scores.Hygiene': {'$gte': 4}
}
# Use count_documents to display the number of documents in the result
count_documents = establishments.count_documents(query)
print(f"There are {count_documents} establishments in specified London boroughs.")



# Display the first document in the results using pprint
results = establishments.find(query).limit(5)  
for document in results:
    pprint(document)

There are 3958 establishments in specified London boroughs.
{'AddressLine1': 'Oak Apple Farm Building 103 Sheernes Docks',
 'AddressLine2': 'Sheppy Kent',
 'AddressLine3': '',
 'AddressLine4': '',
 'BusinessName': "Charlie's",
 'BusinessType': 'Other catering premises',
 'BusinessTypeID': 7841,
 'ChangesByServerID': 0,
 'Distance': 4627.439467780196,
 'FHRSID': 621707,
 'LocalAuthorityBusinessID': 'PI/000025307',
 'LocalAuthorityCode': '508',
 'LocalAuthorityEmailAddress': 'publicprotection@cityoflondon.gov.uk',
 'LocalAuthorityName': 'City of London Corporation',
 'LocalAuthorityWebSite': 'http://www.cityoflondon.gov.uk/Corporation/homepage.htm',
 'NewRatingPending': False,
 'Phone': '',
 'PostCode': 'ME12',
 'RatingDate': '2021-10-18T00:00:00',
 'RatingKey': 'fhrs_4_en-gb',
 'RatingValue': None,
 'RightToReply': '',
 'SchemeType': 'FHRS',
 '_id': ObjectId('673350891d1b2ab835ebb083'),
 'geocode': {'latitude': Decimal128('51.369321'),
             'longitude': Decimal128('0.508551')},


In [40]:

# Convert the result to a Pandas DataFrame
result1_df = pd.DataFrame(results_list)

# Display the number of rows in the DataFrame
print('Rows in DataFrame:', len(result1_df))

# Display the first 10 rows of the DataFrame
print(result1_df.head(10))

Rows in DataFrame: 38786
                        _id     FHRSID  ChangesByServerID  \
0  673350881d1b2ab835eb7b36  1043695.0                0.0   
1  673350881d1b2ab835eb7b37   647177.0                0.0   
2  673350881d1b2ab835eb7b39   289353.0                0.0   
3  673350881d1b2ab835eb7b3d   289560.0                0.0   
4  673350881d1b2ab835eb7b3e   289352.0                0.0   
5  673350881d1b2ab835eb7b3f   344689.0                0.0   
6  673350881d1b2ab835eb7b40  1043701.0                0.0   
7  673350881d1b2ab835eb7b41   987206.0                0.0   
8  673350881d1b2ab835eb7b42   894592.0                0.0   
9  673350881d1b2ab835eb7b43   805702.0                0.0   

  LocalAuthorityBusinessID                         BusinessName  \
0             PI/000073616                         The Pavilion   
1             PI/000041489                  Wear Bay Bowls Club   
2             PI/000002468  St Marys COE (aided) Primary School   
3             PI/000039927         

### 3. What are the top 5 establishments with a `RatingValue` rating value of 5, sorted by lowest hygiene score, nearest to the new restaurant added, "Penang Flavours"?

In [58]:
from pymongo import MongoClient

# Connect to MongoDB
client = MongoClient('mongodb://localhost:27017/')
db = client['uk_food']
establishments = db['establishments']

# Define the filter and update
filter = {'BusinessName': 'Penang Flavours'}
update = {
    '$set': {
        'geocode.latitude': 51.083812,  
        'geocode.longitude': 1.195625  
    }
}

# Update the document
result = establishments.update_one(filter, update)

if result.modified_count > 0:
    print("Geolocation data updated successfully.")
else:
    print("No document found or update not required.")


No document found or update not required.


In [59]:
# Step 1: Find the coordinates of "Penang Flavours"
penang = establishments.find_one({'BusinessName': 'Penang Flavours'})

if penang:
    # Extract coordinates
    latitude = penang['geocode']['latitude']
    longitude = penang['geocode']['longitude']
    print(f"Coordinates of Penang Flavours: Latitude = {latitude}, Longitude = {longitude}")
    
    # Step 2: Define the search range and query for top establishments
    radius = 0.01  # Adjust radius as needed (in miles)
    query = {
        'geocode': {
            '$geoWithin': {
                '$centerSphere': [[longitude, latitude], radius / 3963.2]  # 3963.2 is Earth's radius in miles
            }
        },
        'scores.Hygiene': {'$ne': None}  # Ensure Hygiene score is not None
    }

    # Step 3: Find establishments within the radius, sorted by Hygiene score
    results = establishments.find(query).sort('scores.Hygiene', 1).limit(5)  # Limit to top 5
    results_list = list(results)  # Convert to list

    # Convert results to a DataFrame to display the top 5
    top_establishments_df = pd.DataFrame(results_list)
    print("Top 5 establishments near 'Penang Flavours' based on hygiene score:")
    print(top_establishments_df.head(5))

else:
    print("No establishment found with the name 'Penang Flavours'.")


Coordinates of Penang Flavours: Latitude = 51.083812, Longitude = 1.195625
Top 5 establishments near 'Penang Flavours' based on hygiene score:
                        _id   FHRSID  ChangesByServerID  \
0  673350881d1b2ab835eb7b36  1043695                  0   

  LocalAuthorityBusinessID  BusinessName             BusinessType  \
0             PI/000073616  The Pavilion  Restaurant/Cafe/Canteen   

   BusinessTypeID         AddressLine1   AddressLine2 AddressLine3  ...  \
0               1  East Cliff Pavilion  Wear Bay Road   Folkestone  ...   

                LocalAuthorityWebSite        LocalAuthorityEmailAddress  \
0  http://www.folkestone-hythe.gov.uk  foodteam@folkestone-hythe.gov.uk   

                                              scores SchemeType  \
0  {'Hygiene': 5, 'Structural': 5, 'ConfidenceInM...       FHRS   

                                          geocode RightToReply     Distance  \
0  {'longitude': 1.195625, 'latitude': 51.083812}               4591.765489   

  N

In [60]:
# Step 1: Find the coordinates of "Penang Flavours"
penang = establishments.find_one({'BusinessName': 'Penang Flavours'})

if penang:
    # Extract coordinates
    latitude = penang['geocode']['latitude']
    longitude = penang['geocode']['longitude']
    print(f"Coordinates of Penang Flavours: Latitude = {latitude}, Longitude = {longitude}")
    
    # Step 2: Define the search range and query for top establishments
    radius = 0.01  
    query = {
        'geocode': {
            '$geoWithin': {
                '$centerSphere': [[longitude, latitude], radius / 3963.2]  
            }
        },
        'scores.Hygiene': {'$ne': None}  # Ensure Hygiene score is not None
    }

    # Step 3: Find establishments within the radius, sorted by Hygiene score
    results = establishments.find(query).sort('scores.Hygiene', 1).limit(5) 


Coordinates of Penang Flavours: Latitude = 51.083812, Longitude = 1.195625


In [61]:
# Search within 0.01 degree on either side of the latitude and longitude.
# Rating value must equal 5
# Sort by hygiene score

degree_search = 0.01
latitude = 51.49014200
longitude = 0.08384000 

query ={
    'RatingValue': 5,
    'geocode.latitude': {'$gte': latitude - degree_search, '$lte': latitude + degree_search},
    'geocode.longitude': {'$gte': longitude - degree_search, '$lte': longitude + degree_search}
}

sort = [('scores.Hygiene', 1)]
limit = 5

# Print the results
print("Top 5 establishments with RatingValue of 5, sorted by lowest hygiene score within 0.01 degree of specified location:")
print(top_establishments_df)

Top 5 establishments with RatingValue of 5, sorted by lowest hygiene score within 0.01 degree of specified location:
                        _id   FHRSID  ChangesByServerID  \
0  673350881d1b2ab835eb7b36  1043695                  0   

  LocalAuthorityBusinessID  BusinessName             BusinessType  \
0             PI/000073616  The Pavilion  Restaurant/Cafe/Canteen   

   BusinessTypeID         AddressLine1   AddressLine2 AddressLine3  ...  \
0               1  East Cliff Pavilion  Wear Bay Road   Folkestone  ...   

                LocalAuthorityWebSite        LocalAuthorityEmailAddress  \
0  http://www.folkestone-hythe.gov.uk  foodteam@folkestone-hythe.gov.uk   

                                              scores SchemeType  \
0  {'Hygiene': 5, 'Structural': 5, 'ConfidenceInM...       FHRS   

                                          geocode RightToReply     Distance  \
0  {'longitude': 1.195625, 'latitude': 51.083812}               4591.765489   

  NewRatingPending           

In [65]:
# Convert result to Pandas DataFrame



### 4. How many establishments in each Local Authority area have a hygiene score of 0?

In [68]:
# Create a pipeline that:
# 1. Matches establishments with a hygiene score of 0
# 2. Groups the matches by Local Authority
# 3. Sorts the matches from highest to lowest
match_query = {'$match': {'scores.Hygiene': {'$eq': 0}}}
group_query = {'$group': {'_id': "$LocalAuthorityName", 'count': { '$sum': 1 }}}
sort_values = {'$sort': { 'count': -1 }}

pipeline = [match_query, group_query, sort_values]
results = list(establishments.aggregate(pipeline))

# Print the number of documents in the result
print("Number of establishments: ", len(results))
# Print the first 10 results
for result in results[:10]:
    print(result)

Number of establishments:  55
{'_id': 'Thanet', 'count': 1130}
{'_id': 'Greenwich', 'count': 882}
{'_id': 'Maidstone', 'count': 713}
{'_id': 'Newham', 'count': 711}
{'_id': 'Swale', 'count': 686}
{'_id': 'Chelmsford', 'count': 680}
{'_id': 'Medway', 'count': 672}
{'_id': 'Bexley', 'count': 607}
{'_id': 'Southend-On-Sea', 'count': 586}
{'_id': 'Tendring', 'count': 542}


In [70]:
# Convert the result to a Pandas DataFrame
result_df = pd.DataFrame(results)
# Display the number of rows in the DataFrame
print('Rows in DataFrame:', len(result_df))
# Display the first 10 rows of the DataFrame
for result in results[:10]:
    print(result)

Rows in DataFrame: 55
{'_id': 'Thanet', 'count': 1130}
{'_id': 'Greenwich', 'count': 882}
{'_id': 'Maidstone', 'count': 713}
{'_id': 'Newham', 'count': 711}
{'_id': 'Swale', 'count': 686}
{'_id': 'Chelmsford', 'count': 680}
{'_id': 'Medway', 'count': 672}
{'_id': 'Bexley', 'count': 607}
{'_id': 'Southend-On-Sea', 'count': 586}
{'_id': 'Tendring', 'count': 542}
