In [None]:
import requests
import json
import pandas as pd
import numpy as np
import datetime
from config import api_key
from config import google_key
import sqlalchemy
import urllib
from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import PrimaryKeyConstraint
from sqlalchemy.orm import Session
from sqlalchemy.orm import sessionmaker
from sqlalchemy import Column, Integer, String, Float, DateTime
from sqlalchemy.schema import Sequence

In [None]:
data = []

headers = {'Authorization': 'Bearer %s' % api_key}

url='https://api.yelp.com/v3/businesses/search'

# lat=44.9375
# lon=-93.2010

for offset in range(0, 1000, 50):
    
    params = {
        'limit':50, 
        'location':'Minneapolis, MN',
#         'latitude':lat,
#         'longitude':lon,
#         'radius':10000,
        'categories':'restaurants',
        'offset':offset
        }  
    
    response=requests.get(url, params=params, headers=headers)
    if response.status_code == 200:
        data += response.json()['businesses']
    elif response.status_code == 400:
        print('400 Bad Request')
        break
        
print(json.dumps(data, indent=4, sort_keys=True))

In [None]:
len(data)

In [None]:
i=0
yelp_list=[]
for places in data:
    yelp_id=data[i]['id']
    name=data[i]['name']
    street=data[i]['location']['address1'] 
    city=data[i]['location']['city']
    zipcode=data[i]['location']['zip_code']
    address= f'{street}, {city} {zipcode}'
    rating=data[i]['rating']
    reviews=data[i]['review_count']
    latitude=data[i]['coordinates']['latitude']
    longitude=data[i]['coordinates']['longitude']
    if data[i]['is_closed']==False:
        business_dict={"YelpID":yelp_id,"Name":name,"Latitude":latitude,"Longitude":longitude,"Address":address, "Rating":rating,"Reviews":reviews}
        yelp_list.append(business_dict)
    i+=1
    
yelp_list

In [None]:
len(yelp_list)

In [None]:
yelp_df=pd.DataFrame(yelp_list)
yelp_df['Index']=yelp_df.index
yelp_df=yelp_df[['Index','YelpID','Name','Latitude','Longitude','Address','Rating','Reviews']]
yelp_df.to_csv("yelp.csv")
yelp_df

In [None]:
yelp_df.dtypes

In [None]:
#Postgres username, password, and database name
ipaddress = 'localhost'
port = '5432'
username = 'postgres'
password = 'password' 
dbname = 'Minneapolis_Restaurants'
# A long string that contains the necessary Postgres login information
postgres_str = f'postgresql://{username}:{password}@{ipaddress}:{port}/{dbname}'

In [None]:
# Creates Classes which will serve as the anchor points for our Table, loads table to Postgres and uplads the data

Base = declarative_base()
engine = create_engine(postgres_str)

class Yelp(Base):
    __tablename__ = 'yelp'
    Index=Column(Integer,primary_key=True,autoincrement=True)
    YelpID=Column(String,nullable=False)
    Name=Column(String)
    Latitude=Column(Float(20))
    Longitude=Column(Float(20))
    Address=Column(String)
    Rating=Column(Float(10))
    Reviews=Column(Integer)
                   
Base.metadata.create_all(engine)

yelp_df.to_sql('yelp', engine, if_exists='append', index=False)

In [None]:
url = 'https://maps.googleapis.com/maps/api/place/findplacefromtext/json?'
google_data=[]

for places in yelp_list:
    
    params = {
        'key':google_key,
        'input':places['Name'],
        'inputtype':'textquery',
        'locationbias': 'point:' + str(places['Latitude']) + ", " + str(places['Longitude']),
        'radius': 10,
        'fields':'name,formatted_address,place_id,geometry,rating,user_ratings_total'
        }
    
    response = requests.get(url, params=params)

    if len(response.json()['candidates'])>0:
        google_data.append(response.json()['candidates'][0])
    else:
        google_data.append("")

google_data

In [None]:
len(google_data)

In [None]:
i=0
google_list=[]

for places in google_data:
    if places != "":
        if "place_id" in places:
            google_id = places['place_id']
        if "name" in places:
            name = places['name']
        if "formatted_address" in places:
            address = places['formatted_address']
        if "rating" in places:
            rating  = places['rating']
        if "reviews" in places:
            reviews = places['user_ratings_total']
        if "geometry" in places:
            latitude = places['geometry']['location']['lat']
            longitude = places['geometry']['location']['lng']
        business_dict = {"Google Places ID":google_id,"Name":name,"Latitude":latitude,"Longitude":longitude,"Address":address, "Rating":rating,"Reviews":reviews}
    
    else:
        business_dict = {"Google Places ID":"","Name":"","Latitude":"","Longitude":"","Address":"", "Rating":"","Reviews":""}
    
    google_list.append(business_dict)
    
    i+=1
    
google_list

In [None]:
yelp_df = pd.DataFrame(yelp_list)
google_df=pd.DataFrame(google_list)

yelp_df.to_csv('yelp.csv')
google_df.to_csv('google.csv')

In [None]:
i - 0
compare_list=[]
yelpgeo_list=[]

for i in range(1000):

    compare = {"Yelp":yelp_list[i]['Name'],"Google":google_list[i]['Name'],"GoogleAddress":google_list[i]['Address'],"Yelp Address":yelp_list[i]['Address']}
    yelp_with_geometry = {'Name (Yelp)':yelp_list[i]['Name'],'Lat':yelp_list[i]['Latitude'],'Lon':yelp_list[i]['Longitude']}
    compare_list.append(compare)
    yelpgeo_list.append(yelp_with_geometry)
    i+=1

compare_df = pd.DataFrame(compare_list)
yelpGeometry_df=pd.DataFrame(yelpgeo_list)
compare_df.to_csv('compare.csv')
yelpGeometry_df.to_csv('yelpgeo.csv')

yelpGeometry_df

In [None]:
records = yelp_list[999]

biz = records['Name']

biz_string = biz.split(' ',1)[0].upper()

url='https://services.arcgis.com/afSMGVsC7QlRK1kZ/arcgis/rest/services/Food_Inspections/FeatureServer/0/query?where=1%3D1&outFields=*&outSR=4326&f=json'
pre_url = 'https://services.arcgis.com/afSMGVsC7QlRK1kZ/arcgis/rest/services/Food_Inspections/FeatureServer/0/query?'


minlat=records['Latitude']-.001
maxlat=records['Latitude']+.001
minlon=records['Longitude']-.001
maxlon=records['Longitude']+.001

params = f"where=BusinessName%20like%20'%25{biz_string}%25'%20AND%20Latitude%20%3E%3D%20{minlat}%20AND%20Latitude%20%3C%3D%20{maxlat}%20AND%20Longitude%20%3E%3D%20{minlon}%20AND%20Longitude%20%3C%3D%20{maxlon}"

outfields = "&outFields=FacilityCategory,BusinessName,RiskLevel,FullAddress,InspectionType,InspectionResult,DateOfInspection,InspectionIDNumber,YearOfInspection,InspectionScore,Latitude,Longitude,ZipCode,ViolationStatus,HealthFacilityIDNumber&returnGeometry=false&outSR=4326"

json = '&f=json'

full_url = pre_url+params+outfields+json

response = requests.get(full_url)

response.json()['features']


In [None]:
inspection_data=[]

for records in yelp_list:

    biz = records['Name']

    biz_string = biz.split(' ',1)[0].upper()
    biz_string = biz_string.replace("'","")
    biz_string = biz_string.replace("&","")

    url = 'https://services.arcgis.com/afSMGVsC7QlRK1kZ/arcgis/rest/services/Food_Inspections/FeatureServer/0/query?'

    minlat=records['Latitude']-.001
    maxlat=records['Latitude']+.001
    minlon=records['Longitude']-.001
    maxlon=records['Longitude']+.001

    params = f"where=BusinessName%20like%20'%25{biz_string}%25'%20AND%20Latitude%20%3E%3D%20{minlat}%20AND%20Latitude%20%3C%3D%20{maxlat}%20AND%20Longitude%20%3E%3D%20{minlon}%20AND%20Longitude%20%3C%3D%20{maxlon}"

    outfields = "&outFields=BusinessName,OBJECTID,HealthFacilityIDNumber,RiskLevel,FullAddress,InspectionType,InspectionResult,DateOfInspection,InspectionIDNumber,YearOfInspection,InspectionScore,Latitude,Longitude,ZipCode,ViolationStatus&returnGeometry=false&outSR=4326"

    json = '&f=json'

    full_url = url+params+outfields+json

    response = requests.get(full_url)
    
    if response !="":
        inspection_data += response.json()['features']
    
inspection_data

In [None]:
len(inspection_data)


In [None]:
inspection_data_list = []

for records in inspection_data:
    item = records['attributes']
    inspection_data_list.append(item)
    
inspection_data_list

In [None]:
inspection_df = pd.DataFrame(inspection_data_list)
inspection_df