In [1]:
import requests
import json
import pandas as pd
import numpy as np
import datetime
from config import api_key
from config import google_key
import time
# import sqlalchemy
# import urllib
# from sqlalchemy import create_engine
# from sqlalchemy.ext.declarative import declarative_base
# from sqlalchemy import PrimaryKeyConstraint
# from sqlalchemy.orm import Session
# from sqlalchemy.orm import sessionmaker
# from sqlalchemy import Column, Integer, String, Float, DateTime
# from sqlalchemy.schema import Sequence

In [2]:
data = []

headers = {'Authorization': 'Bearer %s' % api_key}

url='https://api.yelp.com/v3/businesses/search'

print('Downloading Yelp Data...')

for offset in range(0, 1000, 50):
    
    params = {
        'limit':50, 
        'location':'Minneapolis, MN',

        'categories':'restaurants',
        'offset':offset
        }  
    
    response=requests.get(url, params=params, headers=headers)
    if response.status_code == 200:
        data += response.json()['businesses']
    elif response.status_code == 400:
        print('400 Bad Request')
        break
        
print(f'Yelp data downloaded...  There are {len(data)} records...')

Downloading Yelp Data...
Yelp data downloaded...  There are 1000 records...


In [45]:
i=0
yelp_list=[]
for places in data:
    yelp_id=data[i]['id']
    name=data[i]['name']
    street=data[i]['location']['address1'] 
    city=data[i]['location']['city']
    zipcode=data[i]['location']['zip_code']
    address= f'{street}, {city} {zipcode}'
    rating=data[i]['rating']
    reviews=data[i]['review_count']
    latitude=data[i]['coordinates']['latitude']
    longitude=data[i]['coordinates']['longitude']
    if data[i]['is_closed']==False:
        business_dict={"YelpID":yelp_id,"Name":name,"Latitude":latitude,"Longitude":longitude,"Address":address, "Rating":rating,"Reviews":reviews}
        yelp_list.append(business_dict)
    i+=1
    
print('yelp_list with needed data has been built.')

yelp_list with needed data has been built.


In [86]:
yelp_df=pd.DataFrame(yelp_list)
yelp_df=yelp_df[['YelpID','Name','Latitude','Longitude','Address','Rating','Reviews']]
yelp_df = yelp_df.drop_duplicates(subset=['Name','Address'])
yelp_df.to_csv("DataFiles/YelpData.csv")

print('Yelp DataFrame now stored in memory as "yelp_df" and csv "YelpData.csv" has been saved in DataFiles folder.')
print(f'Removed duplicates. Leaving {len(yelp_df)} restaurants.')
print('---------------')

Yelp DataFrame now stored in memory as "yelp_df" and csv "YelpData.csv" has been saved in DataFiles folder.
Removed duplicates. Leaving 999 restaurants.
---------------


In [47]:
print('Matching Yelp data list to Google API...   This will take some time, as we match each record...')

url = 'https://maps.googleapis.com/maps/api/place/findplacefromtext/json?'
google_data=[]

for places in yelp_list:
    
    params = {
        'key':google_key,
        'input':places['Name'],
        'inputtype':'textquery',
        'locationbias': 'point:' + str(places['Latitude']) + ", " + str(places['Longitude']),
        'radius': 10,
        'fields':'name,formatted_address,place_id,geometry,rating,user_ratings_total'
        }
    
    response = requests.get(url, params=params)

    if len(response.json()['candidates'])>0:
        google_data.append(response.json()['candidates'][0])
    else:
        google_data.append("")

print(f'Google match has been completed...  There are {len(google_data)} records')

Matching Yelp data list to Google API...   This will take some time, as we match each record...
Google match has been completed...  There are 1000 records


In [48]:
google_data


[{'formatted_address': '1121 Hennepin Ave, Minneapolis, MN 55403, United States',
  'geometry': {'location': {'lat': 44.97476349999999, 'lng': -93.2798227},
   'viewport': {'northeast': {'lat': 44.97607902989272,
     'lng': -93.27855997010727},
    'southwest': {'lat': 44.97337937010728, 'lng': -93.28125962989272}}},
  'name': 'Butcher & The Boar',
  'place_id': 'ChIJN2-3wpQys1IRK9G73sv97zg',
  'rating': 4.6,
  'user_ratings_total': 2077},
 {'formatted_address': '800 N Washington Ave, Minneapolis, MN 55401, United States',
  'geometry': {'location': {'lat': 44.9894213, 'lng': -93.27858189999999},
   'viewport': {'northeast': {'lat': 44.99063512989272,
     'lng': -93.27742472010726},
    'southwest': {'lat': 44.98793547010727, 'lng': -93.28012437989271}}},
  'name': 'Bar La Grassa',
  'place_id': 'ChIJBcDbP4oys1IRg97QGrkjtH8',
  'rating': 4.7,
  'user_ratings_total': 1397},
 {'formatted_address': '112 N 3rd St, Minneapolis, MN 55401, United States',
  'geometry': {'location': {'lat': 

In [49]:
i=0
google_list=[]

for places in google_data:
    if places != "":
        if "place_id" in places:
            google_id = places['place_id']
        if "name" in places:
            name = places['name']
        if "formatted_address" in places:
            address = places['formatted_address']
        if "rating" in places:
            rating  = places['rating']
        if "user_ratings_total" in places:
            reviews = places['user_ratings_total']
        if "geometry" in places:
            latitude = places['geometry']['location']['lat']
            longitude = places['geometry']['location']['lng']
        business_dict = {"Google Places ID":google_id,"Name":name,"Latitude":latitude,"Longitude":longitude,"Address":address, "Rating":rating,"Reviews":reviews}
    
    else:
        business_dict = {"Google Places ID":"","Name":"","Latitude":"","Longitude":"","Address":"", "Rating":"","Reviews":""}
    
    google_list.append(business_dict)
    
    i+=1
    
print('google_list with needed data has been built.')

google_list with needed data has been built.


In [50]:
google_list

[{'Google Places ID': 'ChIJN2-3wpQys1IRK9G73sv97zg',
  'Name': 'Butcher & The Boar',
  'Latitude': 44.97476349999999,
  'Longitude': -93.2798227,
  'Address': '1121 Hennepin Ave, Minneapolis, MN 55403, United States',
  'Rating': 4.6,
  'Reviews': 2077},
 {'Google Places ID': 'ChIJBcDbP4oys1IRg97QGrkjtH8',
  'Name': 'Bar La Grassa',
  'Latitude': 44.9894213,
  'Longitude': -93.27858189999999,
  'Address': '800 N Washington Ave, Minneapolis, MN 55401, United States',
  'Rating': 4.7,
  'Reviews': 1397},
 {'Google Places ID': 'ChIJ6ZJ3YYUys1IRihzyYe0WiGs',
  'Name': '112 Eatery',
  'Latitude': 44.982555,
  'Longitude': -93.2717606,
  'Address': '112 N 3rd St, Minneapolis, MN 55401, United States',
  'Rating': 4.7,
  'Reviews': 968},
 {'Google Places ID': 'ChIJD0SgSFIm9ocRdLlVXKBGwMU',
  'Name': 'George and the Dragon',
  'Latitude': 44.9121575,
  'Longitude': -93.2903128,
  'Address': '813 W 50th St, Minneapolis, MN 55419, United States',
  'Rating': 4.7,
  'Reviews': 1211},
 {'Google Pl

In [87]:
google_df=pd.DataFrame(google_list)
google_df=google_df[google_df.Name != ""]
google_df=google_df[['Google Places ID','Name','Latitude','Longitude','Address','Rating','Reviews']]
google_df = google_df.drop_duplicates(subset=['Google Places ID'])
google_df.to_csv('DataFiles/GoogleData.csv')


print('Google DataFrame now stored in memory as "google_df" and csv "GoogleData.csv" has been saved in DataFiles folder.')
print(f'Removed null entries.  {len(google_df)} restaurants remain.')
print('---------------')

Google DataFrame now stored in memory as "google_df" and csv "GoogleData.csv" has been saved in DataFiles folder.
Removed null entries.  977 restaurants remain.
---------------


In [76]:
google_df

Unnamed: 0,Address,Google Places ID,Latitude,Longitude,Name,Rating,Reviews
0,"1121 Hennepin Ave, Minneapolis, MN 55403, Unit...",ChIJN2-3wpQys1IRK9G73sv97zg,44.9748,-93.2798,Butcher & The Boar,4.6,2077
1,"800 N Washington Ave, Minneapolis, MN 55401, U...",ChIJBcDbP4oys1IRg97QGrkjtH8,44.9894,-93.2786,Bar La Grassa,4.7,1397
2,"112 N 3rd St, Minneapolis, MN 55401, United St...",ChIJ6ZJ3YYUys1IRihzyYe0WiGs,44.9826,-93.2718,112 Eatery,4.7,968
3,"813 W 50th St, Minneapolis, MN 55419, United S...",ChIJD0SgSFIm9ocRdLlVXKBGwMU,44.9122,-93.2903,George and the Dragon,4.7,1211
4,"211 N 1st St, Minneapolis, MN 55401, United St...",ChIJpQmgMoQys1IRID1e5YB8eMo,44.9856,-93.2695,Spoon and Stable,4.7,1495
5,"80 S 9th St, Minneapolis, MN 55402, United States",ChIJpcQ6D5cys1IRGIkZfb0UOUg,44.9746,-93.2727,Hell's Kitchen Inc.,4.4,4650
6,"5557 Xerxes Ave S, Minneapolis, MN 55410, Unit...",ChIJEWROdcIm9ocRbZQ9Buq-07w,44.9018,-93.3186,Pizzeria Lola,4.7,1987
7,"208 N 1st Ave, Minneapolis, MN 55401, United S...",ChIJzxizBYUys1IRDx_NN0FVbNE,44.9836,-93.2697,Red Cow North Loop,4.6,2758
8,"600 E Hennepin Ave, Minneapolis, MN 55414, Uni...",ChIJ-SDIlncts1IRxlaSi4Kah2c,44.9895,-93.2513,Brasa Premium Rotisserie,4.6,1570
9,"831 Nicollet Mall, Minneapolis, MN 55402, Unit...",ChIJSRQF5JYys1IRrrRbQLTgvAY,44.9751,-93.2736,Zelo,4.6,1310


In [None]:
i = 0
compare_list=[]
yelpgeo_list=[]

for i in range(len(google_list)):

    compare = {"Yelp":yelp_list[i]['Name'],"Google":google_list[i]['Name'],"GoogleAddress":google_list[i]['Address'],"Yelp Address":yelp_list[i]['Address']}
    compare_list.append(compare)
    i+=1

compare_df = pd.DataFrame(compare_list)
compare_df.to_csv('DataFiles/compare.csv')

print('"compare_df" has been stored in memory and csv "compare.csv" has been saved in DataFiles folder to allow easy comparison between Yelp and Google data.')
print('---------------')

In [38]:
print('Matching Yelp data list to Minneapolis Health Inspection API...   This will take some time, as we match each record...')


inspection_data=[]

for records in yelp_list:

    biz = records['Name']

    biz_string = biz.split(' ',1)[0].upper()
    biz_string = biz_string.replace("'","")
    biz_string = biz_string.replace("&","")

    url = 'https://services.arcgis.com/afSMGVsC7QlRK1kZ/arcgis/rest/services/Food_Inspections/FeatureServer/0/query?'

    minlat=records['Latitude']-.001
    maxlat=records['Latitude']+.001
    minlon=records['Longitude']-.001
    maxlon=records['Longitude']+.001

    params = f"where=BusinessName%20like%20'%25{biz_string}%25'%20AND%20Latitude%20%3E%3D%20{minlat}%20AND%20Latitude%20%3C%3D%20{maxlat}%20AND%20Longitude%20%3E%3D%20{minlon}%20AND%20Longitude%20%3C%3D%20{maxlon}"

    outfields = "&outFields=BusinessName,HealthFacilityIDNumber,FullAddress,InspectionType,DateOfInspection,InspectionIDNumber,InspectionScore,Latitude,Longitude&returnGeometry=false&outSR=4326"

    json = '&f=json'

    full_url = url+params+outfields+json

    response = requests.get(full_url)
    
    if response !="":
        inspection_data += response.json()['features']
    
print(f'Inspection data match has been completed...  There are {len(inspection_data)} records')

Matching Yelp data list to Minneapolis Health Inspection API...   This will take some time, as we match each record...
Inspection data match has been completed...  There are 22019 records


In [39]:
inspection_data

[{'attributes': {'BusinessName': 'BUTCHER & THE BOAR',
   'HealthFacilityIDNumber': 'LIC50791',
   'FullAddress': '1121 HENNEPIN AVE',
   'InspectionType': 'Routine',
   'DateOfInspection': 1506361500000,
   'InspectionIDNumber': 59767,
   'InspectionScore': 92,
   'Latitude': 44.97466,
   'Longitude': -93.27972}},
 {'attributes': {'BusinessName': 'BUTCHER & THE BOAR',
   'HealthFacilityIDNumber': 'LIC50791',
   'FullAddress': '1121 HENNEPIN AVE',
   'InspectionType': 'Routine',
   'DateOfInspection': 1506361500000,
   'InspectionIDNumber': 59767,
   'InspectionScore': 92,
   'Latitude': 44.97466,
   'Longitude': -93.27972}},
 {'attributes': {'BusinessName': 'BUTCHER & THE BOAR',
   'HealthFacilityIDNumber': 'LIC50791',
   'FullAddress': '1121 HENNEPIN AVE',
   'InspectionType': 'Routine',
   'DateOfInspection': 1506361500000,
   'InspectionIDNumber': 59767,
   'InspectionScore': 92,
   'Latitude': 44.97466,
   'Longitude': -93.27972}},
 {'attributes': {'BusinessName': 'BUTCHER & THE B

In [40]:
inspection_data_list = []

for records in inspection_data:
    item = records['attributes']
    item['DateOfInspection']=time.strftime('%m/%d/%Y',time.gmtime(records['attributes']['DateOfInspection']/1000))
    inspection_data_list.append(item)
    
print('inspection_data_list with needed data has been built.')
inspection_data_list

inspection_data_list with needed data has been built.


[{'BusinessName': 'BUTCHER & THE BOAR',
  'HealthFacilityIDNumber': 'LIC50791',
  'FullAddress': '1121 HENNEPIN AVE',
  'InspectionType': 'Routine',
  'DateOfInspection': '09/25/2017',
  'InspectionIDNumber': 59767,
  'InspectionScore': 92,
  'Latitude': 44.97466,
  'Longitude': -93.27972},
 {'BusinessName': 'BUTCHER & THE BOAR',
  'HealthFacilityIDNumber': 'LIC50791',
  'FullAddress': '1121 HENNEPIN AVE',
  'InspectionType': 'Routine',
  'DateOfInspection': '09/25/2017',
  'InspectionIDNumber': 59767,
  'InspectionScore': 92,
  'Latitude': 44.97466,
  'Longitude': -93.27972},
 {'BusinessName': 'BUTCHER & THE BOAR',
  'HealthFacilityIDNumber': 'LIC50791',
  'FullAddress': '1121 HENNEPIN AVE',
  'InspectionType': 'Routine',
  'DateOfInspection': '09/25/2017',
  'InspectionIDNumber': 59767,
  'InspectionScore': 92,
  'Latitude': 44.97466,
  'Longitude': -93.27972},
 {'BusinessName': 'BUTCHER & THE BOAR',
  'HealthFacilityIDNumber': 'LIC50791',
  'FullAddress': '1121 HENNEPIN AVE',
  'Ins

In [88]:
inspections_df = pd.DataFrame(inspection_data_list)
inspections_df = inspections_df.drop_duplicates(subset='InspectionIDNumber', keep='first')
inspections_df = inspections_df[['BusinessName','DateOfInspection','FullAddress','HealthFacilityIDNumber','InspectionIDNumber','InspectionScore','InspectionType','Latitude','Longitude']]
inspections_df.to_csv('DataFiles/InspectionsData.csv')

print('Inspections DataFrame now stored in memory as "inspections_df" and csv "InspectionsData.csv" has been saved in DataFiles folder.')
print(f'There are {len(inspections_df)} unique inspections.')
print('---------------')

Inspections DataFrame now stored in memory as "inspections_df" and csv "InspectionsData.csv" has been saved in DataFiles folder.
There are 2841 unique inspections.
---------------


In [42]:
inspections_df

Unnamed: 0,BusinessName,DateOfInspection,FullAddress,HealthFacilityIDNumber,InspectionIDNumber,InspectionScore,InspectionType,Latitude,Longitude
0,BUTCHER & THE BOAR,09/25/2017,1121 HENNEPIN AVE,LIC50791,59767,92,Routine,44.97466,-93.27972
6,BUTCHER & THE BOAR,10/02/2018,1121 HENNEPIN AVE,LIC50791,226770,88,Routine,44.97466,-93.27972
14,BUTCHER & THE BOAR,11/06/2017,1121 HENNEPIN AVE,LIC50791,226771,100,Follow-Up,44.97466,-93.27972
15,BUTCHER & THE BOAR,10/03/2019,1121 HENNEPIN AVE,LIC50791,275315,78,Routine,44.97466,-93.27972
31,BUTCHER & THE BOAR,11/07/2018,1121 HENNEPIN AVE,LIC50791,275316,100,Follow-Up,44.97466,-93.27972
35,BUTCHER & THE BOAR,10/31/2019,1121 HENNEPIN AVE,LIC50791,332488,100,Follow-Up,44.97466,-93.27972
37,BAR LA GRASSA,08/10/2017,800 WASHINGTON AVE N #102,LIC32013,25271,92,Routine,44.98953,-93.27869
41,BAR LA GRASSA,08/07/2018,800 WASHINGTON AVE N #102,LIC32013,216880,82,Routine,44.98953,-93.27869
55,BAR LA GRASSA,08/21/2019,800 WASHINGTON AVE N #102,LIC32013,262750,98,Routine,44.98953,-93.27869
61,BAR LA GRASSA,09/12/2018,800 WASHINGTON AVE N #102,LIC32013,262751,94,Follow-Up,44.98953,-93.27869


In [43]:
# #Postgres username, password, and database name
# ipaddress = 'localhost'
# port = '5432'
# username = 'postgres'
# password = 'password' 
# dbname = 'Minneapolis_Restaurants'
# # A long string that contains the necessary Postgres login information
# postgres_str = f'postgresql://{username}:{password}@{ipaddress}:{port}/{dbname}'

In [44]:
# # Creates Classes which will serve as the anchor points for our Table, loads table to Postgres and uplads the data

# Base = declarative_base()
# engine = create_engine(postgres_str)

# class Yelp(Base):
#     __tablename__ = 'yelp'
#     Index=Column(Integer,primary_key=True,autoincrement=True)
#     YelpID=Column(String,nullable=False)
#     Name=Column(String)
#     Latitude=Column(Float(20))
#     Longitude=Column(Float(20))
#     Address=Column(String)
#     Rating=Column(Float(10))
#     Reviews=Column(Integer)
                   
# Base.metadata.create_all(engine)

# yelp_df.to_sql('yelp', engine, if_exists='append', index=False)