In [1]:
import requests
import json
import pandas as pd
from pathlib import Path

# Foursquare

Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice. 

In [2]:
def four_square_search(latitude,longitude,index_file):
	lat = str(latitude)
	long=  str(longitude)
    
	url = "https://api.foursquare.com/v3/places/search?categories=13065%2C13053%2C13040%2C13059%2C13037%2C13032%2C13003&fields=store_id%2Clocation%2Crating%2Cpopularity%2Cprice%2Cdistance%2Cname&sort=RATING&limit=35"
	param =  {
  				"ll": lat + ',' + long,
  				"radius": "1000",
             }
	headers_dict = {
    				"Accept": "application/json",
    				"Authorization": ".........."
                   }
	message = requests.get(url, params=param, headers=headers_dict)
	message = message.text
	message = json.loads(message)
	save = open("FQ_Json"+ str(index_file) + ".json", "w")  
	json.dump(message, save, indent = 6) 
	save.close()

#sample to test the query function
four_square_search(45.61749978,-73.60601127,2)

#loop to query for all stations
geo_of_stations = pd.read_csv('info_on_stations.csv')
for i in range(len(geo_of_stations['latitude'])):
	four_square_search(geo_of_stations['latitude'][i],geo_of_stations['longitude'][i],i)


Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [105]:
def parse_json_file_FQ(file_name_json):
  with open(file_name_json) as temp:
    message = json.load(temp)
  
  #selecting required field
  try: 
    latitude = message['context']['geo_bounds']['circle']['center']['latitude']
  except:
    latitude = 0.0
        
  try:
    longitude = message['context']['geo_bounds']['circle']['center']['longitude']
  except:
    longitude = 0.0
  
  id = []
  for i in message['results']:
    try:
      id.insert(len(id), i['store_id'])
    except:
      id.insert(len(id), "not available")
  
  name = []
  for i in message['results']:
    try:
      name.insert(len(name), i['name'])
    except:
      name.insert(len(name), 'not available')
  
  rating = []
  for i in message['results']:
    try:
      rating.insert(len(rating), i['rating'])
    except:
      rating.insert(len(rating), 0)
  
  distance = []
  for i in message['results']:
    try:
      distance.insert(len(distance), i['distance'])
    except:
      distance.insert(len(distance), 0)
  
  price = []
  for i in message['results']:
    try:
      price.insert(len(price), i['price'])
    except:
      price.insert(len(price), 0)
  
  address = []
  for i in message['results']:
    try:
      address.insert(len(address), i['location']['address'])
    except:
      address.insert(len(address), 'not available')

  #gathering the collected data
  table = pd.DataFrame()
  table['address'] = address
  table['latitude'] = latitude
  table['longitude'] = longitude
  table['rating'] = rating
  table['distance'] = distance
  table['price'] = price
  table['name'] = name
  table['store_id'] = id
  return table   
      
#sample to test parsing function      
result = parse_json_file_FQ(Path("FQ_Json/FQ_Json210.json"))
result.head(5)


Unnamed: 0,address,latitude,longitude,rating,distance,price,name,store_id
0,124 Saint- Viateur Rue O,45.527513,-73.598791,9.2,398,1,Café Olimpico,not available
1,9 Fairmount Ave E,45.527513,-73.598791,9.2,497,2,Larry's,not available
2,263 Saint- Viateur Rue O,45.527513,-73.598791,9.1,599,1,St-Viateur Bagel,not available
3,5357 Ave du Parc,45.527513,-73.598791,9.1,678,4,Milos Restaurant,not available
4,809 Laurier Ave E,45.527513,-73.598791,9.0,880,0,Fous Desserts,not available


Put your parsed results into a DataFrame

In [106]:

import glob

#collecting all the json files name
file_path = Path("FQ_Json/")
json_file_list = []
for i in glob.glob('./FQ_Json/*.json'):
    json_file_list.append(i)

#parsing all the json files and save as .csv
frames = [parse_json_file_FQ(i) for i in json_file_list]
table = pd.concat(frames)
table = table.drop_duplicates(subset=['latitude', 'longitude', 'address'], keep='last')
table.to_csv('info_on_business_FQ.csv', index=False)

table.head(5)

Unnamed: 0,address,latitude,longitude,rating,distance,price,name,store_id
0,7301 Henri-Bourassa Blvd E,45.6175,-73.606011,7.4,941,1,Restaurant Prima Luna,not available
1,7000 Marie-Victorin Rue,45.6175,-73.606011,0.0,79,1,Café l'Exil,not available
2,7000 Maurice du Plessis Blvd,45.6175,-73.606011,0.0,240,2,Resto Capucine,not available
3,11737 4e Ave,45.6175,-73.606011,0.0,587,1,Biscuits Heavenly Taste Biscotti,not available
4,7230 Maurice-Duplessis Blvd,45.6175,-73.606011,0.0,696,2,Restaurant Phoenix Oriental,not available


# Yelp

Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

In [108]:
def yelp_search(latitude,longitude,index_file):
    lat = str(latitude)
    long = str(longitude)
    radius = '1000'
    
    url = 'https://api.yelp.com/v3/businesses/search?latitude='+lat+'&longitude='+long+'&radius='+radius+'&categories=amusementparks%2Cbikerentals%2Crecreation%2Crestaurants&sort_by=rating&limit=20'
    
    headers_dict = {'accept': 'application/JSON',
                    'authorization': 'Bearer .....'
                   }
    
    message = requests.get(url, headers=headers_dict)
    message = message.text
    message = json.loads(message)
    save = open("Yelp_Json"+ str(index_file) + ".json", "w")  
    json.dump(message, save, indent = 6) 
    save.close()
    
#sample to test the query function
yelp_search(45.54813639,-73.62434015,715)

#loop to query for all stations
geo_of_stations = pd.read_csv('info_on_stations.csv')
for i in range(len(geo_of_stations['latitude'])):
    yelp_search(geo_of_stations['latitude'][i],geo_of_stations['longitude'][i],i)


Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [110]:

def parse_json_file_yelp(file_name_json):
    #Read Json file and pull required information 
    with open(file_name_json) as temp:
        message = json.load(temp)
  
    #selecting required field
    try: 
        latitude = message['region']['center']['latitude']
    except:
        latitude = 0.0
            
    try:
        longitude = message['region']['center']['longitude']
    except:
        longitude = 0.0
      
    id = []
    for i in message['businesses']:
        try:
            id.insert(len(id), i['id'])
        except:
            id.insert(len(id), "not available")
    
    name = []
    for i in message['businesses']:
        try:
            name.insert(len(name), i['name'])
        except:
            name.insert(len(name), 'not available')
  
    rating = []
    for i in message['businesses']:
        try:
            rating.insert(len(rating), i['rating'])
        except:
            rating.insert(len(rating), 0)
    
    review_count = []
    for i in message['businesses']:
        try:
            review_count.insert(len(review_count), i['review_count'])
        except:
            review_count.insert(len(review_count), 0)

    distance = []
    for i in message['businesses']:
        try:
            distance.insert(len(distance), i['distance'])
        except:
            distance.insert(len(distance), 0)
  
    address = []
    for i in message['businesses']:
        try:
            address.insert(len(address), i['location']['address1'])
        except:
            address.insert(len(address), 'not available')

    #gathering the collected data
    table = pd.DataFrame()
    table['store_id'] = id
    table['latitude'] = latitude
    table['longitude'] = longitude
    table['address'] = address
    table['rating'] = rating
    table['distance'] = distance
    table['review_count'] = review_count
    table['name'] = name
    return table   
      
#sample to test function      
file_path = Path("Yelp_Json/")
result = parse_json_file_yelp(file_path/'Yelp_Json210.json')
result.head(3)


Unnamed: 0,store_id,latitude,longitude,address,rating,distance,review_count,name
0,co2OcgQ_UBYUhf_hgjlEPg,45.527513,-73.598791,111 Avenue du Mont-Royal O,5.0,1298.448445,13,Julian's Comptoir Gourmand
1,TQG3wFRyyqxukrNjO1lyCg,45.527513,-73.598791,5149 Avenue du Parc,5.0,827.182054,11,Mimi & Jones
2,MMnGeUrHR6q2zllksmJQqw,45.527513,-73.598791,31 Rue Saint Viateur E,5.0,128.856007,8,Bibiko


Put your parsed results into a DataFrame

In [112]:
import glob

#collecting all the json files name
json_file_list = []
for i in glob.glob("./Yelp_Json/*.json"):
    json_file_list.append(i)

#parsing all the json files and save as .csv
frames = [ parse_json_file_yelp(i) for i in json_file_list]
table = pd.concat(frames)
table = table.drop_duplicates(subset=['latitude', 'longitude', 'store_id'], keep='last')
table.to_csv('info_on_business_yelp.csv', index = False)

table.head(3)

Unnamed: 0,store_id,latitude,longitude,address,rating,distance,review_count,name
0,vRJPrk-VH75uxLiVVprBhA,45.6175,-73.606011,7000 Boulevard Maurice Duplessis,5.0,375.111311,4.0,Capucine
1,zbWm7G5L2PJMLpvYz-jFEw,45.6175,-73.606011,6363 Boulevard Henri Bourassa E,5.0,960.395183,1.0,La Veranda
2,2P1Rvd4IboY8z7oRoT8b8g,45.6175,-73.606011,7470 Maurice-Duplessis Boul,4.5,975.918454,3.0,Dagostino Pizza


# Comparing Results

Which API provided you with more complete data? Provide an explanation. 

Both API are flexible when making queries. You can custom your search according to your prefered criterion. You can say that:
    1 - They offer a large range of features that you can include in your seach to get information that match your need as close as possible. 
    
    2 - They both offer the most relevant characteristics about on their products (rating, distance from target, price, standard category,location, and so on). 
    
    3 - They have an ergonomic website that offer a good customer experience
    
    4 - Their Json file response structure is pretty convenient.

However, you can notice that:
    1 - Four Square do not always provide a unique ID for the locations. This ID would be very helpful to run analysis on data collected from those. In this case, you have to consider the locations' address to filter for duplicate. The address is less reliable. Yelp is more efficient by providing a consistent ID for the locations.

    2 - On a specific query custom, Yeld provides more locations

    3 - Yeld provides more details about the locations than Four Square

Base on that, I would say that Yeld provide a more complete package than Four Square.





Get the top 10 restaurants according to their rating

In [115]:
#top 10 from Four Square
result = pd.read_csv('info_on_business_FQ.csv')
result.sort_values('rating', ascending=False).head(10)


Unnamed: 0,address,latitude,longitude,rating,distance,price,name,store_id
5331,1431 Saint-Laurent Blvd,45.516091,-73.570129,9.4,778,2,Cadet,not available
20755,1201 Van Horne Ave,45.51597,-73.608275,9.4,827,4,Damas,not available
7750,1431 Saint-Laurent Blvd,45.518128,-73.561851,9.4,899,2,Cadet,not available
5758,1201 Van Horne Ave,45.519153,-73.616469,9.4,463,4,Damas,not available
20237,1201 Van Horne Ave,45.520019,-73.618907,9.4,534,4,Damas,not available
8493,1201 Van Horne Ave,45.514379,-73.610871,9.4,929,4,Damas,not available
545,1431 Saint-Laurent Blvd,45.515868,-73.560084,9.4,715,2,Cadet,not available
13183,1431 Saint-Laurent Blvd,45.506251,-73.571391,9.4,692,2,Cadet,not available
3237,1431 Saint-Laurent Blvd,45.510163,-73.556637,9.4,618,2,Cadet,not available
5235,1201 Van Horne Ave,45.522225,-73.606687,9.4,501,4,Damas,not available


In [116]:
#top 10 from Yelp
result = pd.read_csv('info_on_business_yelp.csv')
result.sort_values('rating', ascending=False).head(10)


Unnamed: 0,store_id,latitude,longitude,address,rating,distance,review_count,name
0,vRJPrk-VH75uxLiVVprBhA,45.6175,-73.606011,7000 Boulevard Maurice Duplessis,5.0,375.111311,4,Capucine
4468,OryvIJ6JCx-EQ1m4ZXydbQ,45.56371,-73.571395,4593 Rue Bélanger,5.0,1157.148083,4,Flamant Crêpes Café
10700,VPfSbgu0odpTvqndSX7mRg,45.515228,-73.575096,80 Rue Prince-Arthur E,5.0,270.234711,52,Dyad
10702,mrDKrJjS1O6cK19xFtcbjg,45.456703,-73.597643,6574 Monk boulevard,5.0,495.777806,4,Caffe Napoletana
10710,msALzEDqyTGjjjv4Q4npkw,45.456703,-73.597643,6396 Boulevard Monk,5.0,334.801546,2,Pang Ji Gui Lin Nouilles
4476,piazrJtfzmDnSB-4jR8VZA,45.56371,-73.571395,3589 Rue Beaubien E,5.0,1178.724108,1,Forketta Épicerie Gourmet
10712,lPedqWCNgeiMIXw69RmA4g,45.456703,-73.597643,5530 Rue St-Patrick,5.0,725.69295,1,Cafe Dominion
10713,cHmgHwt3OkkS4Q8qb-_0rA,45.456703,-73.597643,1800 Avenue de l'Église,5.0,1062.493478,1,Obvious Burger
4473,nBWIVi4WLYa1xpkfo375Jw,45.56371,-73.571395,4205 Rue Beaubien E,5.0,611.619975,1,Hokkaido
10714,X3DctEHOq9-vC8qZBw3Wlw,45.456703,-73.597643,2357 Rue Jolicoeur,5.0,226.540065,1,Maison Tsui Yuen
