In [None]:
# imports

# Foursquare

In [None]:
#importing libraries

import requests
import pandas as pd
from pandas import json_normalize

In [None]:
# bike data was converted into a dictionary
bikes_coordinates_dict = {}

for index, row in bikes_df.iterrows():
    lat, lon = (row['latitude'], row['longitude'])
    bike_stop = {index + 1:{'index': index + 1, 'coordinates': (lat,lon)}}
    bikes_coordinates_dict.update(bike_stop)

Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice. 

In [1]:
# 200 meter was chosen due to the closeness of the bike stops. 
# Many bikes stops were 300 - 500 meters away from a neighbouring stop.

foursquare_responses = []

for bike_stop in bikes_coordinates_dict.values():
    index = bike_stop['index']
    lat, lon = bike_stop['coordinates']
    url = f"https://api.foursquare.com/v3/places/search?query=museum&ll={lat},{lon}&radius=200"

    headers = {
        "accept": "application/json",
        "Authorization": open('C:/users/ellis/Lighthouse Labs/Lighthouse Tests +Projects/Statistical Modelling with Python/FoursquareApiKey.txt').readlines()[0].strip()
    }

    foursquare_response = requests.get(url, headers=headers)
    foursquare_responses.append({'index': index, "data": foursquare_response.json()})

In [None]:
# and saved to .json file (available in file: data > foursquare_output.json

import json
with open('foursquare_output.json', 'w') as json_file:
    json.dump(foursquare_responses, json_file)

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

Put your parsed results into a DataFrame.

In [None]:
foursquare_df = pd.DataFrame()

#iterate through to create dataframe
for response in foursquare_responses:
    data = response['data']['results']
    
    #keep bike stations that do not have any nearby museums
    if not data:
        row = {'Bike Stop #': response['index'], 'fsq_id': None, 'name': None, 'categories': None, 'distance': None}
        row_df = pd.DataFrame([row])
        foursquare_df = pd.concat([foursquare_df, row_df], ignore_index=True)
    else:
        normalized_data = pd.json_normalize(data)
    
        bike_stop_index = response['index']
        normalized_data['Bike Stop #'] = bike_stop_index
    
        foursquare_df = pd.concat([foursquare_df, normalized_data], ignore_index=True)

In [None]:
# parsing and cleaning columns. Mostly only require names and distance from a bike station

foursquare_df_cleaning = foursquare_df.drop(columns=['fsq_id'] + list(foursquare_df.columns[5:]))
foursquare_df_cleaning['venue_type'] = foursquare_df_cleaning['categories'].apply(lambda x: [y['name'] for y in x][0] if x is not None else [])
final_foursquare_df = foursquare_df_cleaning.drop(columns=['categories'])

#resulting dataframe example can be found in file: images > foursquare_df_examples.png

# Yelp

In [None]:
# as seen above

import requests
import pandas as pd
from pandas import json_normalize

In [None]:
bikes_coordinates_dict = {}

for index, row in bikes_df.iterrows():
    lat, lon = (row['latitude'], row['longitude'])
    bike_stop = {index + 1:{'index': index + 1, 'coordinates': (lat,lon)}}
    bikes_coordinates_dict.update(bike_stop)

Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

In [None]:
# 200 meter was chosen due to the closeness of the bike stops as before 

import requests

yelp_responses = []

for bike_stop in bikes_coordinates_dict.values():
    index = bike_stop['index']
    lat, lon = bike_stop['coordinates']

    url = f"https://api.yelp.com/v3/businesses/search"
    
    headers = {
        'accept': 'application/json',
        'Authorization': 'Bearer ' + open('C:/users/ellis/Lighthouse Labs/Lighthouse Tests +Projects/Statistical Modelling with Python/YelpApiKey.txt').readlines()[0].strip()
    }

    params = {
        'latitude': lat,
        'longitude': lon,
        'term': 'museum',
        'radius': 200
    }

    yelp_response = requests.get(url, headers=headers, params=params)
    
    yelp_responses.append({'index': index, "data": yelp_response.json()})

In [None]:
# saved to a .json file for potential future use. Found in file: data > yelp_output.json

with open('yelp_output.json', 'w') as json_file:
     json.dump(yelp_responses, json_file)

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

Put your parsed results into a DataFrame

In [None]:
yelp_df = pd.DataFrame()

# convert to dataframe first

for response in yelp_responses:
    data = response['data']
    
    #keep bike stations that do not have results and set to none
    
    if 'businesses' not in data or not data['businesses']:
        # Handle the case when there are no businesses or 'businesses' is not present
        row = {'Bike Stop #': response['index'], 'fsq_id': None, 'name': None, 'categories': None, 'distance': None}
        row_df = pd.DataFrame([row])
        yelp_df = pd.concat([yelp_df, row_df], ignore_index=True)
    else:
        normalized_data = pd.json_normalize(data['businesses'])
    
        bike_stop_index = response['index']
        normalized_data['Bike Stop #'] = bike_stop_index
    
        yelp_df = pd.concat([yelp_df, normalized_data], ignore_index=True)

In [None]:
# cleaning and parsing dataframe for use in analysis
# only distance to bike station and name of venues are important for my anaylsis

yelp_df_cleaning = yelp_df.drop(columns=['fsq_id', 'price'] + list(yelp_df.columns[5:24]))
yelp_df_cleaning2 = yelp_df_cleaning.copy()
yelp_df_cleaning2['venue_type'] = yelp_df_cleaning['categories'].apply(lambda x: [y['title'] for y in x][0] if x is not None else [])

final_yelp_df = yelp_df_cleaning2.drop(columns = 'categories')

# example of resulting dataframe can be found in file: images > yelp_df_examples.png

# Comparing Results

Which API provided you with more complete data? Provide an explanation. 

The Foursquare API provided more complete data compared the the Yelp API. Though the Yelp API had ratings, since my query relates to the proximity of these venues to the usage of bikes in Barcelona, the Yelp API did not provide any benefit. However, the foursquare API returned more results. When searching up with the "query" word of "museums", it returned a larger range of results ranging from art galleries, to concert halls, to monuments and planetariums. Comparatively, Yelp only returned a more limited subset of "museums" including art galleries, landmarks and museums. For future considerations, an expanded budget, and more time, I may be able to query more key words in my API requests to receive a boarder and potentially more full subset of venues that would be relevant to my research question. 

Get the top 10 restaurants according to their rating

In [None]:
# I was not expecting to need to do restaurants so I ran out of API requests
# theoretically, this would allow me to sort by ratings though

import requests

yelp_restaurants = []

for bike_stop in bikes_coordinates_dict.values():
    index = bike_stop['index']
    lat, lon = bike_stop['coordinates']

    url = f"https://api.yelp.com/v3/businesses/search"
    
    headers = {
        'accept': 'application/json',
        'Authorization': 'Bearer ' + open('C:/users/ellis/Lighthouse Labs/Lighthouse Tests +Projects/Statistical Modelling with Python/YelpApiKey.txt').readlines()[0].strip()
    }

    params = {
        'latitude': lat,
        'longitude': lon,
        'term': 'restaurants',
        'radius': 200
    }

    yelp_restaurants = requests.get(url, headers=headers, params=params)
    
    yelp_restaurants.append({'index': index, "data": yelp_response.json()})

yelp_restaurants

In [None]:
# yelp restaurants into a dataframe

yelp_restaurant_df = pd.DataFrame()

for restaurants in yelp_restaurants:
    data = response['data']
    
    normalized_restaurant_data = pd.json_normalize(data['businesses'])
    yelp_restaurant_df = pd.concat([yelp_restaurant_df, normalized_restaurant_data], ignore_index=True)

yelp_restaurant_df

In [None]:
# sort dataframe ordered by best rating

yelp_restaurant_df_sorted = yelp_restaurant_df.sort_values(by='rating', ascending=False)
print(yelp_df_sorted)

Extra: Since I was unable to do restaurant, here are the top 10 museums sorted by rating in Barcelona from the Yelp API

In [20]:
# read the saved .json file

import pandas as pd
import json

file_path = 'C:/users/ellis/Lighthouse Labs/Lighthouse Tests +Projects/Statistical Modelling with Python/yelp_output.json'

with open(file_path, 'r') as file:
    json_data = json.load(file)

In [73]:
# create dataframe from the .json file

venue_num = 0
ratings = []
names = []
reviews = []

for bike_stop in json_data:
    try: 
        for venue in bike_stop['data']['businesses']:
            venue_num += 1
            rating = venue['rating']
            ratings.append(rating)
            name = venue['name']
            names.append(name)
            review = venue['review_count']
            reviews.append(review)
    except (KeyError, IndexError):
        ratings.append(None)
        names.append(None)
        reviews.append(None)

df = pd.DataFrame({'Name': names, 'Rating': ratings, 'Review_num': reviews})
unique_df = df.drop_duplicates(subset=['Name'], keep='first')

In [74]:
# strictly highest rating museums in barcelona

top_museums_rating = unique_df.sort_values(by=['Rating', 'Review_num'], ascending=[False, False]).head(10)
top_museums_rating

Unnamed: 0,Name,Rating,Review_num
16,Hi This Is Barcelona,5.0,18.0
1,El Gat de Botero,5.0,14.0
10,Spanish Trails,5.0,9.0
109,Hash Marihuana Cáñamo & Hemp Museum,5.0,5.0
17,Antiga Sinagoga Major de Barcelona,5.0,4.0
45,Can Framis,5.0,3.0
21,Museo de la Musica,5.0,1.0
6,Basílica de la Sagrada Família,4.5,1136.0
36,Casa Batlló,4.5,348.0
23,La Pedrera - Casa Milà,4.5,263.0


In [76]:
# most reviews - may have more importance compared to highest 5 star ratings

top_museum_reviews = unique_df.sort_values(by=['Review_num', 'Rating'], ascending=[False, False]).head(10)
top_museum_reviews

Unnamed: 0,Name,Rating,Review_num
6,Basílica de la Sagrada Família,4.5,1136.0
36,Casa Batlló,4.5,348.0
23,La Pedrera - Casa Milà,4.5,263.0
29,Camp Nou,4.5,238.0
116,Font Màgica de Montjuïc,4.5,164.0
43,Palau de la Música Catalana,4.5,132.0
7,Catedral de Barcelona,4.5,120.0
60,Arc de Triomf,4.0,81.0
28,Barri Gòtic,4.5,71.0
54,MACBA - Museu d'Art Contemporani de Barcelona,3.5,55.0
