In [3]:
# imports
import pandas as pd
import requests
import os
from urllib3.exceptions import IncompleteRead


# Foursquare

Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice. 

In [28]:
# Import the Salvador station dataset
salvador_station = pd.read_csv(r"C:\Users\johnk\OneDrive\Desktop\Statistical-Modelling-Project\salvador_station.csv")

# Set the Foursquare API URL and retrieve the API key from environment variables
foursquare_url = "https://api.foursquare.com/v3/places/search"
API_key = os.getenv("4square_api_key")

# Check if the API key is available, exit if not found
if not API_key:
    print("API key not found. Please set the FOURSQUARE_API_KEY environment variable.")
    exit()

# Set up headers for the Foursquare API request
headers = {
    "Accept": "application/json",
    "Authorization": API_key
}

# Define business categories and their corresponding Foursquare category IDs
business_terms = {
    "13003": "Bars",
    "13065": "Restaurants",
    "18000": "Sports & Recreation"
}

# Initialize an empty list to store the Foursquare API results
foursquare_result = []

# Iterate over each row in the Salvador station DataFrame
for _, row in salvador_station.iterrows():
    latitude = row['Latitude']  # Extract the latitude of the station
    longitude = row['Longitude']  # Extract the longitude of the station
    
    # Iterate over each business category
    for term, category_name in business_terms.items():
        # Set up parameters for the API request
        params = {
            "ll": f"{latitude},{longitude}",
            "radius": 1000,
            "limit": 50,
            "categories": term
        }
        
        try:
            # Send the API request to Foursquare
            response = requests.get(foursquare_url, headers=headers, params=params, timeout=300)
            
            # If the response is successful, process the results
            if response.status_code == 200:
                data = response.json()
                venues = data.get("results", [])
                for venue in venues:
                    # Append venue details to the result list
                    foursquare_result.append({
                        "Station Name": row['Station Name'],
                        "Venue Name": venue.get('name', 'N/A'),
                        "Category": category_name,
                        "Venue Latitude": venue.get('geocodes', {}).get('main', {}).get('latitude'),
                        "Venue Longitude": venue.get('geocodes', {}).get('main', {}).get('longitude'),
                        "fsq_id": venue.get('fsq_id', 'N/A'),
                        "venue distance": venue.get('distance')
                    })
            else:
                # Print an error message if the API response is unsuccessful
                print(f"Error for location {latitude},{longitude}: {response.status_code}")
        except IncompleteRead as e:
            # Handle IncompleteRead exception
            print(f"Error reading response for {latitude},{longitude}: {e}")
        except Exception as e:
            # Handle other exceptions
            print(f"An error occurred for {latitude},{longitude}: {e}")


Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

Put your parsed results into a DataFrame

In [29]:
foursquare_df = pd.DataFrame(foursquare_result)


In [30]:
foursquare_df.head()

Unnamed: 0,Station Name,Venue Name,Category,Venue Latitude,Venue Longitude,fsq_id,venue distance
0,27 - Praça Ana Lúcia Magalhães,Preto,Bars,-12.991484,-38.460663,4c4a3bd5959220a13290d10d,207
1,27 - Praça Ana Lúcia Magalhães,Proa Cervejaria Salvador,Bars,-12.994175,-38.457994,5ce8a0eb1acf11002cdee3a2,326
2,27 - Praça Ana Lúcia Magalhães,Su Espeto Bistrot,Bars,-12.993303,-38.460751,4fceabcce4b0296cfc3c8836,16
3,27 - Praça Ana Lúcia Magalhães,Restaurante Caminho de Casa,Bars,-12.995514,-38.464786,4c0da9b97189c928fb6dd7b6,479
4,27 - Praça Ana Lúcia Magalhães,Acqua Café,Bars,-12.993441,-38.460507,504fcb01e4b0eec17d50ff2b,38


# Yelp

Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

In [31]:
# Load the Salvador station dataset
salvadore_station = pd.read_csv(r"C:\Users\johnk\OneDrive\Desktop\Statistical-Modelling-Project\salvador_station.csv")

# Set the Yelp API URL
yelp_url = "https://api.yelp.com/v3/businesses/search"

# Retrieve the Yelp API key from environment variables
api_key = os.getenv("yelp_api_key")

# Check if the API key is available, exit if not found
if not api_key:
    print("API key not found. Please set the yelp_API_KEY environment variable.")
    exit()

# Set up headers for the Yelp API request
headers = {
    "Accept": "application/json",
    "Authorization": f"Bearer {api_key}"
}

# Initialize an empty list to store the Yelp API results
yelp_result = []

# Define business categories to search for
business_terms = ["Restaurants", "Bars", "Fitness & Instruction"]

# Iterate over each row in the Salvador station DataFrame
for _, row in salvadore_station.iterrows():
    latitude = row['Latitude']  # Extract the latitude of the station
    longitude = row['Longitude']  # Extract the longitude of the station
    
    # Iterate over each business category term
    for term in business_terms:
        # Set up parameters for the API request
        params = {
            "latitude": latitude,
            "longitude": longitude,
            "radius": 500,  # Radius in meters
            "term": term, 
            "limit": 50  # Number of results to return
        }

        # Make the API request to Yelp
        response = requests.get(yelp_url, headers=headers, params=params)

        # Check if the response is successful
        if response.status_code == 200:
            # Parse the JSON response
            data = response.json()
            # Iterate over each business in the results
            for business in data.get("businesses", []):
                # Append business details to the result list
                yelp_result.append({
                    "Station Name": row['Station Name'],
                    "Latitude": latitude,
                    "Longitude": longitude,
                    "Search Term": term,
                    "Business Name": business['name'],
                    "Business Category": term,  # Add term as category
                    "Business Rating": business['rating'],
                    "Business Address": business['location'].get('address1', 'N/A'),
                    "Business Phone": business.get('phone', 'N/A'),
                    "Business Distance (meters)": business.get('distance', 'N/A')
                })
        else:
            # Print an error message if the API response is unsuccessful
            print(f"Error: {response.status_code}")
            print(f"Response details: {response.json()}")


Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

Put your parsed results into a DataFrame

In [7]:
yelp_df = pd.DataFrame(yelp_result)

In [15]:
yelp_df.head()

Unnamed: 0,Station Name,Latitude,Longitude,Search Term,Business Name,Business Category,Business Rating,Business Address,Business Phone,Business Distance (meters)
0,27 - Praça Ana Lúcia Magalhães,-12.993338,-38.460908,Restaurants,Acqua Café,Restaurants,4.3,"R. das Hortências, 600 - Pituba",557133581635,47.248643
1,27 - Praça Ana Lúcia Magalhães,-12.993338,-38.460908,Restaurants,Riz Bistrot & Risottos,Restaurants,3.9,"R. das Hortênsias, 966",557130196800,328.793942
2,27 - Praça Ana Lúcia Magalhães,-12.993338,-38.460908,Restaurants,Adam Cozinha Originária,Restaurants,4.6,"R. Edith Mendes da Gama e Abreu, 189",557135063191,208.535935
3,27 - Praça Ana Lúcia Magalhães,-12.993338,-38.460908,Restaurants,A Saúde Na Panela,Restaurants,4.7,"R. das Hortênsias, 752",557133536788,116.547057
4,27 - Praça Ana Lúcia Magalhães,-12.993338,-38.460908,Restaurants,Su Espeto Bistrot,Restaurants,3.5,"Rua das Hortênsias, 624.",557135068280,30.512338


# Comparing Results

Which API provided you with more complete data? Provide an explanation. 

In [9]:
yelp_df_poi_count = len(yelp_df)
foursquare_df_poi_count = len(foursquare_df)

print(f"the number of POI in yelp are {yelp_df_poi_count}")
print(f"the number of POI in foursquare are {foursquare_df_poi_count}")

if yelp_df_poi_count > foursquare_df_poi_count:
    print(f"the yelp DataFrame have more POIs.")
elif foursquare_df_poi_count > yelp_df_poi_count:
    print (f"the foursquare DataFrame have more POIs.")

the number of POI in yelp are 3467
the number of POI in foursquare are 7706
the foursquare DataFrame have more POIs.


In [14]:
foursquare_df.isnull().sum()

Station Name       0
Venue Name         0
Category           0
Venue Latitude     0
Venue Longitude    0
fsq_id             0
venue distance     0
dtype: int64

In [12]:
missing_data_yelp_df = yelp_df.isnull().sum()
missing_data_foursquare_df = foursquare_df.isnull().sum()

total_missing_data_yelp_df = missing_data_yelp_df.sum()
total_missing_data_foursquare_df = missing_data_foursquare_df.sum()

if total_missing_data_yelp_df > total_missing_data_foursquare_df:
   print(f"The yelp DataFrame have more missing values.")
elif total_missing_data_foursquare_df > total_missing_data_yelp_df:
   print(f"The foursquere DataFrame have more missing values.")
else:
    print("Both DataFrames have the same number of missing values.")

Both DataFrames have the same number of missing values.


Get the top 10 restaurants according to their rating

In [8]:
yelp_df.sort_values(by="Business Rating", ascending = False).head(10)

Unnamed: 0,Station Name,Latitude,Longitude,Search Term,Business Name,Business Category,Business Rating,Business Address,Business Phone,Business Distance (meters)
2875,47 - Barris (Praça João Mangabeira),-12.988084,-38.511655,Restaurants,Showmaki,Restaurants,5.0,"R. Politeama de Baixo, 32",557121379111.0,590.715794
2891,47 - Barris (Praça João Mangabeira),-12.988084,-38.511655,Restaurants,Golden Grill,Restaurants,5.0,CC Lapa,557133285533.0,603.38014
2881,47 - Barris (Praça João Mangabeira),-12.988084,-38.511655,Restaurants,Restaurante Govinda,Restaurants,5.0,"Ladeira dos Barris, 190",557191642514.0,426.405083
1564,24 - Barravento,-13.009336,-38.525735,Fitness & Instruction,Selfit Academias,Fitness & Instruction,5.0,"R. Augusto Frederico Schmith, 95",,506.551857
2880,47 - Barris (Praça João Mangabeira),-12.988084,-38.511655,Restaurants,Ajeum da Diáspora,Restaurants,5.0,"R. Amparo do Tororó, 157",557191608933.0,127.164232
2879,47 - Barris (Praça João Mangabeira),-12.988084,-38.511655,Restaurants,Churrascaria Fazendinha,Restaurants,5.0,"Rua Conselheiro Junqueira Aires, 1-125 , Barris",557130118542.0,604.82046
353,52 - Campo Grande II,-12.98827,-38.52219,Restaurants,Espetinho Sabiá,Restaurants,5.0,"R. Politeama de Baixo, 555",557133280658.0,452.036192
352,52 - Campo Grande II,-12.98827,-38.52219,Restaurants,Showmaki,Restaurants,5.0,"R. Politeama de Baixo, 32",557121379111.0,564.908352
351,52 - Campo Grande II,-12.98827,-38.52219,Restaurants,Porto do Moreira,Restaurants,5.0,"Rua Carlos Gomes, 486, Campo Grande",557133224112.0,1013.319245
350,52 - Campo Grande II,-12.98827,-38.52219,Restaurants,DAS Restaurante,Restaurants,5.0,Av. Lafayete Coutinho 1010,557130112410.0,791.725917
