In [None]:
# imports

# Foursquare

Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice. 

In [84]:
import requests
import pandas as pd

# Function to fetch location data from Foursquare API
def get_location_info(latitude, longitude):
    # Set up headers for the API request
    headers = {"Accept": "application/json"}
    headers['Authorization'] = "fsq35XmLZrC8aeGPvOzCUgV5TpEB3EocpNjTBaBn2tQqYx4="
    fields = 'name,categories,distance,rating,stats,geocodes,location'
    gps = str(latitude) + ',' + str(longitude)
    url = 'https://api.foursquare.com/v3/places/search?ll='+ gps + '&radius=500&limit=50&fields='+ fields
    
    # Make the API request
    response = requests.get(url, headers=headers)
    location_data_json = response.json()
    location_data = pd.json_normalize(location_data_json, record_path='results')
    
    # Create a reference dataframe with repeated GPS coordinates
    gps_series = pd.Series(gps)
    gps_ref_df = gps_series.repeat(len(location_data)).to_frame()
    gps_ref_df = gps_ref_df.rename(columns={0:'gps_ref'})
    gps_ref_df = gps_ref_df.reset_index()
    
    # Concatenate the location data with the reference dataframe
    location_data = pd.concat([location_data, gps_ref_df], axis=1)
    
    # Rename columns for clarity
    location_data = location_data[['gps_ref', 'name', 'distance', 'rating', 'stats.total_ratings', 'categories', 'geocodes.main.latitude', 'geocodes.main.longitude', 'location.formatted_address']]
    location_data = location_data.rename(columns={'stats.total_ratings':'total_ratings', 'geocodes.main.latitude':'latitude', 'geocodes.main.longitude':'longitude', 'location.formatted_address':'address'})
    
    return location_data

# Main function to run the script
def main():
    # latitude and longitude values
    NY_latitudes = [40.7128, 34.0522, 37.7749]  # NY latitude values
    NY_longitudes = [-74.0060, -118.2437, -122.4194]  # NY longitude values
    num_NY = len(NY_latitudes)
    
    # Fetch location data for each sample
    location_dfs = [get_location_info(NY_latitudes[i], NY_longitudes[i]) for i in range(num_NY)]
    
    # Concatenate all location data into a single DataFrame
    combined_location_df = pd.concat(location_dfs, ignore_index=True)
    
    # Save the combined DataFrame to a CSV file
    output_csv_path = "combined_location_data.csv"
    combined_location_df.to_csv(output_csv_path, index=False)
    print(f"Combined location data saved to {output_csv_path}")

# Call the main function to run the script
main()


Combined location data saved to combined_location_data.csv


In [104]:
import pandas as pd

# Load Foursquare data from the CSV file
FSQ_df = pd.read_csv("combined_location_data.csv")

# Parse out cafes and pubs
FSQ_df['categories'] = FSQ_df['categories'].apply(str)
FSQ_df_cafes = FSQ_df[FSQ_df['categories'].str.contains("cafe", case=False)]
FSQ_df_pubs = FSQ_df[FSQ_df['categories'].str.contains("pub", case=False)]

# Calculate cafe count and average rating
FSQ_cafes_count = FSQ_df_cafes['gps_ref'].value_counts()
FSQ_cafes_avg_rating = FSQ_df_cafes.groupby(['gps_ref'])['rating'].mean()

# Calculate pub count and average rating
FSQ_pubs_count = FSQ_df_pubs['gps_ref'].value_counts()
FSQ_pubs_avg_rating = FSQ_df_pubs.groupby(['gps_ref'])['rating'].mean()

# Create DataFrames for cafe summaries
FSQ_cafes_summary_df = pd.DataFrame({
    'FSQ_cafe_count': FSQ_cafes_count,
    'FSQ_avg_cafe_rating': FSQ_cafes_avg_rating
}).reset_index()
FSQ_cafes_summary_df.rename(columns={'index': 'gps_ref'}, inplace=True)

# Create DataFrames for pub summaries
FSQ_pubs_summary_df = pd.DataFrame({
    'FSQ_pub_count': FSQ_pubs_count,
    'FSQ_avg_pub_rating': FSQ_pubs_avg_rating
}).reset_index()
FSQ_pubs_summary_df.rename(columns={'index': 'gps_ref'}, inplace=True)

# Merge cafe and pub summaries
FSQ_summary_df = FSQ_pubs_summary_df.merge(FSQ_cafes_summary_df, how='left', on='gps_ref')

# Print or save the resulting summary DataFrame
print(FSQ_summary_df)


             gps_ref  FSQ_pub_count  FSQ_avg_pub_rating  FSQ_cafe_count  \
0  34.0522,-118.2437              2            8.350000               2   
1  37.7749,-122.4194              3            8.466667               3   
2    40.7128,-74.006              1            8.400000               1   

   FSQ_avg_cafe_rating  
0             8.750000  
1             8.666667  
2             8.500000  


Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [85]:
import requests
import pandas as pd

# Function to fetch location data from Foursquare API
def get_location_info(latitude, longitude):
    # Set up headers for the API request
    headers = {"Accept": "application/json"}
    headers['Authorization'] = "fsq35XmLZrC8aeGPvOzCUgV5TpEB3EocpNjTBaBn2tQqYx4="
    fields = 'name,categories,distance,rating,stats,geocodes,location'
    gps = str(latitude) + ',' + str(longitude)
    url = 'https://api.foursquare.com/v3/places/search?ll='+ gps + '&radius=500&limit=50&fields='+ fields
    
    # Make the API request
    response = requests.get(url, headers=headers)
    location_data_json = response.json()
    results = location_data_json.get('results', [])  # Get the 'results' array
    
    parsed_results = []
    for result in results:
        poi = {
            'name': result.get('name', ''),
            'categories': [category['name'] for category in result.get('categories', [])],
            'distance': result.get('distance', 0),
            'rating': result.get('rating', 0),
            'total_ratings': result.get('stats', {}).get('total_ratings', 0),
            'latitude': result.get('geocodes', {}).get('main', {}).get('latitude', 0),
            'longitude': result.get('geocodes', {}).get('main', {}).get('longitude', 0),
            'address': result.get('location', {}).get('formatted_address', ''),
        }
        parsed_results.append(poi)
    
    return pd.DataFrame(parsed_results)

# Main function to run the script
def main():
    # latitude and longitude values
    NY_latitudes = [40.7128, 34.0522, 37.7749]  # NY latitude values
    NY_longitudes = [-74.0060, -118.2437, -122.4194]  # NY longitude values
    num_NY = len(NY_latitudes)
    
    # Fetch location data for each sample
    location_dfs = [get_location_info(NY_latitudes[i], NY_longitudes[i]) for i in range(num_NY)]
    
    # Concatenate all location data into a single DataFrame
    combined_location_df = pd.concat(location_dfs, ignore_index=True)
    
    # Save the combined DataFrame to a CSV file with ratings information
    output_csv_path = "combined_location_data_with_ratings.csv"
    combined_location_df.to_csv(output_csv_path, index=False)
    print(f"Combined location data with ratings saved to {output_csv_path}")

# Call the main function to run the script
main()


Combined location data with ratings saved to combined_location_data_with_ratings.csv


Put your parsed results into a DataFrame

In [None]:
# Main function to run the script
def main():
    # latitude and longitude values
    NY_latitudes = [40.7128, 34.0522, 37.7749]  # NY latitude values
    NY_longitudes = [-74.0060, -118.2437, -122.4194]  # NY longitude values
    num_NY = len(NY_latitudes)
    
    # Fetch location data for each sample
    location_dfs = [get_location_info(NY_latitudes[i], NY_longitudes[i]) for i in range(num_NY)]
    
    # Concatenate all location data into a single DataFrame
    combined_location_df = pd.concat(location_dfs, ignore_index=True)
    
    # Save the combined DataFrame to a CSV file with ratings information
    output_csv_path = "combined_location_data_with_ratings.csv"
    combined_location_df.to_csv(output_csv_path, index=False)
    print(f"Combined location data with ratings saved to {output_csv_path}")

# Call the main function to run the script
main()

# Yelp

Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

In [109]:
import pandas as pd
import requests

def Yelp_POIs(lat, long):
    # API request fields
    headers = {
        "accept": "application/json",
        "Authorization": "Bearer hFoti5YCqKAmF3DAPl05LG9txNUxXYgSErgqC6HuPMrkvzlQnL75zh5yKGjx354mjvY4uCv0gdVBHDHIEd7iC_n23OvvR_qST-PiIu5GDYELi5gXYyOmPYzShIjnZHYx"
    }
    gps_yelp = 'latitude=' + str(lat) + '&longitude=' + str(long)
    radius = 1000  # 1000 meters radius
    url = "https://api.yelp.com/v3/businesses/search?" + gps_yelp + f"&radius={radius}&sort_by=best_match&limit=50"

    # API request
    yelp_api_request = requests.get(url, headers=headers)
    
    # Print the raw JSON response
    print(yelp_api_request.json())

    # Dataframe generation
    yelp_near_by_json = yelp_api_request.json()
    yelp_near_by_df = pd.json_normalize(yelp_near_by_json, record_path='businesses')  # Adjust the normalization based on response structure

    # ... rest of the code ...

# NY latitude and longitude of your city
NY_LATITUDE = 40.7128
NY_LONGITUDE = -74.0060

# Send request to Yelp for bike stations in your city
yelp_df = Yelp_POIs(NY_LATITUDE, NY_LONGITUDE)

# Print or use the resulting Yelp DataFrame
print(yelp_df)


{'businesses': [{'id': 'xKIv_b2L2pUHmsoJ-xOepg', 'alias': 'gran-morsi-new-york', 'name': 'Gran Morsi', 'image_url': 'https://s3-media3.fl.yelpcdn.com/bphoto/GpR9eqovfF0zHQ6BgnbvcA/o.jpg', 'is_closed': False, 'url': 'https://www.yelp.com/biz/gran-morsi-new-york?adjust_creative=ZkqdwG8kYqqkqF04TE-7EA&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=ZkqdwG8kYqqkqF04TE-7EA', 'review_count': 477, 'categories': [{'alias': 'italian', 'title': 'Italian'}, {'alias': 'venues', 'title': 'Venues & Event Spaces'}, {'alias': 'cocktailbars', 'title': 'Cocktail Bars'}], 'rating': 4.5, 'coordinates': {'latitude': 40.714312, 'longitude': -74.007791}, 'transactions': ['delivery', 'pickup'], 'price': '$$$', 'location': {'address1': '22 Warren St', 'address2': '', 'address3': None, 'city': 'New York', 'zip_code': '10007', 'country': 'US', 'state': 'NY', 'display_address': ['22 Warren St', 'New York, NY 10007']}, 'phone': '+12125772725', 'display_phone': '(212) 577-2725', 'distance': 22

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [110]:
import pandas as pd
import requests

def Yelp_POIs(lat, long):
    # API request fields
    headers = {
        "accept": "application/json",
        "Authorization": "Bearer hFoti5YCqKAmF3DAPl05LG9txNUxXYgSErgqC6HuPMrkvzlQnL75zh5yKGjx354mjvY4uCv0gdVBHDHIEd7iC_n23OvvR_qST-PiIu5GDYELi5gXYyOmPYzShIjnZHYx"
    }
    gps_yelp = 'latitude=' + str(lat) + '&longitude=' + str(long)
    radius = 1000  # 1000 meters radius
    url = "https://api.yelp.com/v3/businesses/search?" + gps_yelp + f"&radius={radius}&sort_by=best_match&limit=50"

    # API request
    yelp_api_request = requests.get(url, headers=headers)
    
    # Parse the response JSON
    response_json = yelp_api_request.json()
    
    # Initialize lists to store POI details
    names = []
    ratings = []
    review_counts = []
    latitudes = []
    longitudes = []
    addresses = []

    # Extract details from the response
    for business in response_json.get('businesses', []):
        names.append(business.get('name', ''))
        ratings.append(business.get('rating', 0))
        review_counts.append(business.get('review_count', 0))
        latitudes.append(business.get('coordinates', {}).get('latitude', 0))
        longitudes.append(business.get('coordinates', {}).get('longitude', 0))
        addresses.append(', '.join(business.get('location', {}).get('display_address', [])))

    # Create a DataFrame to store the extracted details
    poi_df = pd.DataFrame({
        'name': names,
        'rating': ratings,
        'review_count': review_counts,
        'latitude': latitudes,
        'longitude': longitudes,
        'address': addresses
    })
    
    return poi_df

# latitude and longitude of New York
NY_LATITUDE = 40.7128
NY_LONGITUDE = -74.0060

# Send request to Yelp for bike stations in New York
yelp_df = Yelp_POIs(NY_LATITUDE, NY_LONGITUDE)

# Print or use the resulting Yelp DataFrame
print(yelp_df)


                               name  rating  review_count   latitude  \
0                        Gran Morsi     4.5           477  40.714312   
1               Brooklyn Chop House     4.0           943  40.711510   
2                          1803 Nyc     4.0           790  40.715498   
3                        Da Claudio     4.0           625  40.710870   
4                     Marc Forgione     4.5          1006  40.716470   
5             Yankee Doodle Dandy's     4.5           148  40.714240   
6                        Carne Mare     4.5           220  40.705686   
7                          Manhatta     4.5           633  40.708006   
8                         Au Cheval     4.5           835  40.718388   
9         Artesano Peruvian Cuisine     4.5            75  40.714478   
10                           Khe-Yo     4.0           957  40.716906   
11                 Takahachi Bakery     4.0          1029  40.713730   
12                     CUT New York     4.0           217  40.71

Put your parsed results into a DataFrame

# Comparing Results

Which API provided you with more complete data? Provide an explanation. 

Get the top 10 restaurants according to their rating

In [111]:
import requests
import pandas as pd

def Yelp_POIs(lat, long):
    # API request fields
    headers = {
        "accept": "application/json",
        "Authorization": "Bearer hFoti5YCqKAmF3DAPl05LG9txNUxXYgSErgqC6HuPMrkvzlQnL75zh5yKGjx354mjvY4uCv0gdVBHDHIEd7iC_n23OvvR_qST-PiIu5GDYELi5gXYyOmPYzShIjnZHYx"
    }
    gps_yelp = 'latitude=' + str(lat) + '&longitude=' + str(long)
    url = "https://api.yelp.com/v3/businesses/search?" + gps_yelp + "&categories=restaurants&radius=1000&sort_by=rating&limit=10"

    # API request
    yelp_api_request = requests.get(url, headers=headers)
    
    # Dataframe generation
    yelp_near_by_json = yelp_api_request.json()
    yelp_near_by_df = pd.json_normalize(yelp_near_by_json['businesses'])
    
    # Cleaning
    yelp_near_by_df = yelp_near_by_df[['name', 'rating', 'review_count', 'location.address1', 'location.city', 'location.zip_code']]
    yelp_near_by_df.rename(columns={'review_count': 'rating_count', 'location.address1': 'address', 'location.city': 'city', 'location.zip_code': 'zip_code'}, inplace=True)
    
    return yelp_near_by_df

# Coordinates for New York City
NY_LATITUDE = 40.7128
NY_LONGITUDE = -74.0060

# Send request to Yelp for top 10 restaurants near the specified coordinates
yelp_df = Yelp_POIs(NY_LATITUDE, NY_LONGITUDE)

# Print or use the resulting Yelp DataFrame
print(yelp_df)


                           name  rating  rating_count                 address  \
0              First Class Cafe     5.0             5          52 Chambers St   
1               The Chubby Crab     5.0             6          77 Chrystie St   
2                    A-N-E Cafe     5.0             7            47 Market St   
3                ava's lifeline     5.0            15  745 Dekalb Ave Flr Grd   
4               Mysttik Masaala     5.0            58           21-77 31st St   
5      Harajuku Sushi and Crepe     5.0            14                    None   
6           Best Sicily Bottega     5.0            12            85 Beaver St   
7  Omar’s Mediterranean Cuisine     5.0            29               15 Ann St   
8                    Just Salad     5.0             9             55 Broadway   
9               London & Martin     5.0            17              6 Stone St   

       city zip_code  
0  New York    10007  
1  New York    10002  
2  New York    10002  
3  Brooklyn    1

In [116]:
import requests
import pandas as pd

def Foursquare_POIs(lat, long):
    # Foursquare API Key
    api_key = "fsq35XmLZrC8aeGPvOzCUgV5TpEB3EocpNjTBaBn2tQqYx4="

    # Set up headers for the API request
    headers = {
        "Accept": "application/json",
        "Authorization": api_key
    }
    fields = 'name,categories,distance,rating,stats,geocodes,location'
    gps = str(lat) + ',' + str(long)
    url = 'https://api.foursquare.com/v3/places/search?ll=' + gps + '&radius=500&limit=50&fields=' + fields

    # API request
    response = requests.get(url, headers=headers)
    foursquare_near_by_json = response.json()
    results = foursquare_near_by_json.get('results', [])
    
    # Create a DataFrame from the results
    rows = []
    for result in results:
        row = {
            'name': result.get('name', ''),
            'rating': result.get('rating', 0),
            'rating_count': result.get('stats', {}).get('ratingSignals', 0),
            'address': result.get('location', {}).get('formatted_address', ''),
            'distance': result.get('distance', 0),
            'categories': [category['name'] for category in result.get('categories', [])]
        }
        rows.append(row)
    
    foursquare_near_by_df = pd.DataFrame(rows)
    
    return foursquare_near_by_df

# Coordinates for New York City
NY_LATITUDE = 40.7128
NY_LONGITUDE = -74.0060

# Send request to Foursquare for top 10 restaurants near the specified coordinates
foursquare_df = Foursquare_POIs(NY_LATITUDE, NY_LONGITUDE)

# Print or use the resulting Foursquare DataFrame
print(foursquare_df)


                                          name  rating  rating_count  \
0                               City Hall Park     8.8             0   
1                 The Bar Room at Temple Court     9.2             0   
2                      Variety Coffee Roasters     8.2             0   
3                                 Birch Coffee     8.1             0   
4                             Takahachi Bakery     9.1             0   
5                              Los Tacos No. 1     9.3             0   
6                                   Da Claudio     8.1             0   
7                             Joe's Pizza FiDi     8.9             0   
8                                         Zara     7.7             0   
9                                    Nish Nush     8.8             0   
10                               Nobu Downtown     8.6             0   
11                          Lot-Less Closeouts     8.4             0   
12                               Anthropologie     8.6          

In [126]:
import requests
import pandas as pd

# Function to fetch bike station data from Foursquare API
def get_bike_stations(lat, long):
    # Set up headers for the API request
    headers = {"Accept": "application/json"}
    headers['Authorization'] = "fsq35XmLZrC8aeGPvOzCUgV5TpEB3EocpNjTBaBn2tQqYx4="
    
    # Construct the URL for Foursquare API request
    gps = str(lat) + ',' + str(long)
    url = f'https://api.foursquare.com/v3/places/search?ll={gps}&radius=500&limit=50&fields=name,categories,distance,rating,stats,geocodes,location'
    
    # Make the API request
    response = requests.get(url, headers=headers)
    data = response.json()
    
    # Parse and extract bike station details
    bike_stations = []
    for result in data.get('results', []):
        name = result.get('name', '')
        distance = result.get('distance', 0)
        rating = result.get('rating', 0)
        stats = result.get('stats', {})
        rating_count = stats.get('ratingSignals', 0)
        latitude = result.get('geocodes', {}).get('main', {}).get('latitude', 0)
        longitude = result.get('geocodes', {}).get('main', {}).get('longitude', 0)
        address = result.get('location', {}).get('formatted_address', '')

        bike_stations.append({
            'name': name,
            'distance': distance,
            'rating': rating,
            'rating_count': rating_count,
            'latitude': latitude,
            'longitude': longitude,
            'address': address
        })
    
    # Create a DataFrame from the extracted data
    bike_stations_df = pd.DataFrame(bike_stations)
    return bike_stations_df

# Latitude and longitude of New York
NY_LATITUDE = 40.7128
NY_LONGITUDE = -74.0060

# Fetch bike station data for New York
bike_stations_df = get_bike_stations(NY_LATITUDE, NY_LONGITUDE)

# Print or use the bike stations DataFrame
print(bike_stations_df)


                                          name  distance  rating  \
0                               City Hall Park       147     8.8   
1                 The Bar Room at Temple Court       164     9.2   
2                      Variety Coffee Roasters       178     8.2   
3                                 Birch Coffee       226     8.1   
4                             Takahachi Bakery       254     9.1   
5                              Los Tacos No. 1       283     9.3   
6                                   Da Claudio       259     8.1   
7                             Joe's Pizza FiDi       327     8.9   
8                                         Zara       295     7.7   
9                                    Nish Nush       341     8.8   
10                               Nobu Downtown       376     8.6   
11                          Lot-Less Closeouts       384     8.4   
12                               Anthropologie       392     8.6   
13                                      Suited  

In [153]:
import requests
import pandas as pd
from pathlib import Path

# Function to fetch bike station data from Foursquare API
def get_bike_stations(lat, long):
    # Set up headers for the API request
    headers = {"Accept": "application/json"}
    headers['Authorization'] = "fsq35XmLZrC8aeGPvOzCUgV5TpEB3EocpNjTBaBn2tQqYx4="
    
    # Construct the URL for Foursquare API request
    gps = str(lat) + ',' + str(long)
    url = f'https://api.foursquare.com/v3/places/search?ll={gps}&radius=500&limit=50&fields=name,categories,distance,rating,stats,geocodes,location'
    
    # Make the API request
    response = requests.get(url, headers=headers)
    data = response.json()
    
    # Parse and extract bike station details
    bike_stations = []
    for result in data.get('results', []):
        name = result.get('name', '')
        distance = result.get('distance', 0)
        rating = result.get('rating', 0)
        stats = result.get('stats', {})
        rating_count = stats.get('ratingSignals', 0)
        latitude = result.get('geocodes', {}).get('main', {}).get('latitude', 0)
        longitude = result.get('geocodes', {}).get('main', {}).get('longitude', 0)
        address = result.get('location', {}).get('formatted_address', '')

        bike_stations.append({
            'name': name,
            'distance': distance,
            'rating': rating,
            'rating_count': rating_count,
            'latitude': latitude,
            'longitude': longitude,
            'address': address
        })
    
    # Create a DataFrame from the extracted data
    bike_stations_df = pd.DataFrame(bike_stations)
    return bike_stations_df

# Latitude and longitude of New York
NY_LATITUDE = 40.7128
NY_LONGITUDE = -74.0060

# Fetch bike station data for New York
bike_stations_df = get_bike_stations(NY_LATITUDE, NY_LONGITUDE)

# Print or use the bike stations DataFrame
print(bike_stations_df)

# Define the function to fetch Yelp POI data
def Yelp_POIs(lat, long):

    # API request fields
    headers = {
        "accept": "application/json",
        "Authorization": "Bearer hFoti5YCqKAmF3DAPl05LG9txNUxXYgSErgqC6HuPMrkvzlQnL75zh5yKGjx354mjvY4uCv0gdVBHDHIEd7iC_n23OvvR_qST-PiIu5GDYELi5gXYyOmPYzShIjnZHYx"
    }
    gps_yelp = 'latitude=' + str(lat) + '&longitude=' + str(long)
    url = "https://api.yelp.com/v3/businesses/search?" + gps_yelp + "&categories=restaurants&radius=1000&sort_by=rating&limit=10"

    # API request
    yelp_api_request = requests.get(url, headers=headers)
    
    # Dataframe generation
    yelp_near_by_json = yelp_api_request.json()
    yelp_near_by_df = pd.json_normalize(yelp_near_by_json['businesses'])
    
    # Cleaning
    yelp_near_by_df = yelp_near_by_df[['name', 'rating', 'review_count', 'location.address1', 'location.city', 'location.zip_code']]
    yelp_near_by_df.rename(columns={'review_count': 'rating_count', 'location.address1': 'address', 'location.city': 'city', 'location.zip_code': 'zip_code'}, inplace=True)
    
    return yelp_near_by_df

# Coordinates for New York City
NY_LATITUDE = 40.7128
NY_LONGITUDE = -74.0060

# Send request to Yelp for top 10 restaurants near the specified coordinates
yelp_df = Yelp_POIs(NY_LATITUDE, NY_LONGITUDE)

# Print or use the resulting Yelp DataFrame
print(yelp_df)
# Define the function to fetch Foursquare POI data
def Foursquare_POIs(lat, long):
    # Set up headers for the API request
    headers = {"Accept": "application/json"}
    headers['Authorization'] = "fsq35XmLZrC8aeGPvOzCUgV5TpEB3EocpNjTBaBn2tQqYx4="
    fields = 'name,categories,distance,rating,stats,geocodes,location'
    gps = str(lat) + ',' + str(long)
    url = 'https://api.foursquare.com/v3/places/search?ll=' + gps + '&radius=500&limit=50&fields=' + fields

    # Make the API request and get the response
    response = requests.get(url, headers=headers)
    data = response.json()

    # Extract and process Foursquare data
    poi_data = []
    for result in data.get('results', []):
        poi = {
            'name': result.get('name', ''),
            'rating': result.get('rating', 0),
            'rating_count': result.get('stats', {}).get('ratingSignals', 0),
            'distance': result.get('distance', 0),
            'address': result.get('location', {}).get('formatted_address', ''),
            'latitude': result.get('geocodes', {}).get('main', {}).get('latitude', 0),
            'longitude': result.get('geocodes', {}).get('main', {}).get('longitude', 0)
        }
        categories = [category['name'] for category in result.get('categories', [])]
        poi['categories'] = categories
        poi_data.append(poi)

    # Create a DataFrame from the extracted data
    poi_df = pd.DataFrame(poi_data)
    return poi_df

# Main function to run the script
import requests
import pandas as pd
from pathlib import Path

# Function to fetch bike station data from Foursquare API
def get_bike_stations(lat, long):
    # ... (Your existing get_bike_stations function)

# Define the function to fetch Yelp POI data
def Yelp_POIs(lat, long):
    # ... (Your existing Yelp_POIs function)

# Define the function to fetch Foursquare POI data
def Foursquare_POIs(lat, long):
    # ... (Your existing Foursquare_POIs function)

# Main function to run the script
def main():
    # Latitude and longitude of New York
    NY_LATITUDE = 40.7128
    NY_LONGITUDE = -74.0060

    # Fetch bike station data for New York
    bike_stations_df = get_bike_stations(NY_LATITUDE, NY_LONGITUDE)
    print("Fetched bike station data")

    # Fetch Yelp and Foursquare data for New York
    yelp_df = Yelp_POIs(NY_LATITUDE, NY_LONGITUDE)
    foursquare_df = Foursquare_POIs(NY_LATITUDE, NY_LONGITUDE)
    print("Fetched Yelp and Foursquare data")

    # Rename columns for consistency
    bike_stations_df.rename(columns={'latitude': 'station_latitude', 'longitude': 'station_longitude'}, inplace=True)
    print("Renamed bike station columns")

    yelp_df.rename(columns={'latitude': 'poi_latitude', 'longitude': 'poi_longitude'}, inplace=True)
    print("Renamed Yelp columns")

    # Print unique values of relevant columns before merging
    print("Unique station latitude values:", bike_stations_df['station_latitude'].unique())
    print("Unique station longitude values:", bike_stations_df['station_longitude'].unique())
    print("Unique poi latitude values:", yelp_df['poi_latitude'].unique())
    print("Unique poi longitude values:", yelp_df['poi_longitude'].unique())

    # Merge bike station data with Yelp data
    bike_stations_with_yelp = bike_stations_df.merge(yelp_df, how='left', left_on=['station_latitude', 'station_longitude'], right_on=['poi_latitude', 'poi_longitude'])
    print("Merged bike station data with Yelp data")

    # Extract latitude and longitude from Foursquare data
    foursquare_df.rename(columns={'geocodes.main.latitude': 'poi_latitude', 'geocodes.main.longitude': 'poi_longitude'}, inplace=True)
    print("Renamed Foursquare columns")

    # Print columns of the dataframes before merging
    print("Bike Stations Columns:", bike_stations_with_yelp.columns)
    print("Foursquare Columns:", foursquare_df.columns)

    # Merge bike station data with Foursquare data
    bike_stations_with_foursquare = bike_stations_with_yelp.merge(foursquare_df, how='left', on=['poi_latitude', 'poi_longitude'])
    print("Merged bike station data with Foursquare data")

    # Save the merged data to a CSV file
    merged_csv_path = "merged_bike_station_data.csv"
    bike_stations_with_foursquare.to_csv(merged_csv_path, index=False)
    print("Merged bike station data saved to CSV file")

if __name__ == "__main__":
    main()



IndentationError: expected an indented block after function definition on line 127 (3537549822.py, line 131)

In [149]:
import requests

def Foursquare_POIs(latitude, longitude):
    # Set up headers for the API request
    headers = {"Accept": "application/json"}
    headers['Authorization'] = "fsq35XmLZrC8aeGPvOzCUgV5TpEB3EocpNjTBaBn2tQqYx4="
    fields = 'name,categories,distance,rating,stats,geocodes,location'
    gps = str(latitude) + ',' + str(longitude)
    url = 'https://api.foursquare.com/v3/places/search?ll='+ gps + '&radius=500&limit=50&fields='+ fields

    # Make the API request and get the response
    response = requests.get(url, headers=headers)
    data = response.json()

    # Print the entire JSON response for inspection
    print(data)

# Usage example
latitude = 40.7128
longitude = -74.0060
Foursquare_POIs(latitude, longitude)


{'results': [{'categories': [{'id': 16032, 'name': 'Park', 'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/parks_outdoors/park_', 'suffix': '.png'}}], 'distance': 147, 'geocodes': {'drop_off': {'latitude': 40.712407, 'longitude': -74.007613}, 'main': {'latitude': 40.712271, 'longitude': -74.007602}}, 'location': {'address': '17 Park Row', 'census_block': '360610031002009', 'country': 'US', 'cross_street': 'btwn Broadway & Centre St', 'dma': 'New York', 'formatted_address': '17 Park Row (btwn Broadway & Centre St), New York, NY 10038', 'locality': 'New York', 'postcode': '10038', 'region': 'NY'}, 'name': 'City Hall Park', 'rating': 8.8, 'stats': {'total_photos': 2046, 'total_ratings': 1323, 'total_tips': 141}}, {'categories': [{'id': 13013, 'name': 'Hotel Bar', 'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/travel/hotel_bar_', 'suffix': '.png'}}], 'distance': 164, 'geocodes': {'main': {'latitude': 40.711478, 'longitude': -74.007004}, 'roof': {'latitude': 40.711478

In [166]:
import requests
import pandas as pd
from pathlib import Path

# Function to fetch bike station data from Foursquare API
def get_bike_stations(lat, long):
    # Set up headers for the API request
    headers = {"Accept": "application/json"}
    headers['Authorization'] = "fsq35XmLZrC8aeGPvOzCUgV5TpEB3EocpNjTBaBn2tQqYx4="
    
    # Construct the URL for Foursquare API request
    gps = str(lat) + ',' + str(long)
    url = f'https://api.foursquare.com/v3/places/search?ll={gps}&radius=500&limit=50&fields=name,categories,distance,rating,stats,geocodes,location'
    
    # Make the API request
    response = requests.get(url, headers=headers)
    data = response.json()
    
    # Parse and extract bike station details
    bike_stations = []
    for result in data.get('results', []):
        name = result.get('name', '')
        distance = result.get('distance', 0)
        rating = result.get('rating', 0)
        stats = result.get('stats', {})
        rating_count = stats.get('ratingSignals', 0)
        latitude = result.get('geocodes', {}).get('main', {}).get('latitude', 0)
        longitude = result.get('geocodes', {}).get('main', {}).get('longitude', 0)
        address = result.get('location', {}).get('formatted_address', '')

        bike_stations.append({
            'name': name,
            'distance': distance,
            'rating': rating,
            'rating_count': rating_count,
            'latitude': latitude,
            'longitude': longitude,
            'address': address
        })
    
    # Create a DataFrame from the extracted data
    bike_stations_df = pd.DataFrame(bike_stations)
    return bike_stations_df

# Define the function to fetch Yelp POI data
def Yelp_POIs(lat, long):
    # API request fields
    headers = {
        "accept": "application/json",
        "Authorization": "Bearer hFoti5YCqKAmF3DAPl05LG9txNUxXYgSErgqC6HuPMrkvzlQnL75zh5yKGjx354mjvY4uCv0gdVBHDHIEd7iC_n23OvvR_qST-PiIu5GDYELi5gXYyOmPYzShIjnZHYx"
    }
    gps_yelp = 'latitude=' + str(lat) + '&longitude=' + str(long)
    url = "https://api.yelp.com/v3/businesses/search?" + gps_yelp + "&categories=restaurants&radius=1000&sort_by=rating&limit=10"

    # API request
    yelp_api_request = requests.get(url, headers=headers)
    
    # Dataframe generation
    yelp_near_by_json = yelp_api_request.json()
    yelp_near_by_df = pd.json_normalize(yelp_near_by_json['businesses'])
    
    # Cleaning
    yelp_near_by_df = yelp_near_by_df[['name', 'rating', 'review_count', 'location.address1', 'location.city', 'location.zip_code', 'coordinates.latitude', 'coordinates.longitude']]
    yelp_near_by_df.rename(columns={'review_count': 'rating_count', 'location.address1': 'address', 'location.city': 'city', 'location.zip_code': 'zip_code', 'coordinates.latitude': 'poi_latitude', 'coordinates.longitude': 'poi_longitude'}, inplace=True)
    
    return yelp_near_by_df




# Define the function to fetch Foursquare POI data
def Foursquare_POIs(lat, long):
    # Set up headers for the API request
    headers = {"Accept": "application/json"}
    headers['Authorization'] = "fsq35XmLZrC8aeGPvOzCUgV5TpEB3EocpNjTBaBn2tQqYx4="
    fields = 'name,categories,distance,rating,stats,geocodes,location'
    gps = str(lat) + ',' + str(long)
    url = 'https://api.foursquare.com/v3/places/search?ll=' + gps + '&radius=500&limit=50&fields=' + fields

    # Make the API request and get the response
    response = requests.get(url, headers=headers)
    data = response.json()

    # Extract and process Foursquare data
    poi_data = []
    for result in data.get('results', []):
        poi = {
            'name': result.get('name', ''),
            'rating': result.get('rating', 0),
            'rating_count': result.get('stats', {}).get('ratingSignals', 0),
            'distance': result.get('distance', 0),
            'address': result.get('location', {}).get('formatted_address', ''),
            'latitude': result.get('geocodes', {}).get('main', {}).get('latitude', 0),
            'longitude': result.get('geocodes', {}).get('main', {}).get('longitude', 0)
        }
        categories = [category['name'] for category in result.get('categories', [])]
        poi['categories'] = categories
        poi_data.append(poi)

    # Create a DataFrame from the extracted data
    poi_df = pd.DataFrame(poi_data)
    return poi_df

# Main function to run the script
# Main function to run the script
def main():
    # Latitude and longitude of New York
    NY_LATITUDE = 40.7128
    NY_LONGITUDE = -74.0060

    # Fetch bike station data for New York
    bike_stations_df = get_bike_stations(NY_LATITUDE, NY_LONGITUDE)
    print("Fetched bike station data")

    # Fetch Yelp and Foursquare data for New York
    yelp_df = Yelp_POIs(NY_LATITUDE, NY_LONGITUDE)
    foursquare_df = Foursquare_POIs(NY_LATITUDE, NY_LONGITUDE)
    print("Fetched Yelp and Foursquare data")

    # Rename columns for consistency
    bike_stations_df.rename(columns={'latitude': 'station_latitude', 'longitude': 'station_longitude'}, inplace=True)
    print("Renamed bike station columns")

    yelp_df.rename(columns={'coordinates.latitude': 'poi_latitude', 'coordinates.longitude': 'poi_longitude'}, inplace=True)
    print("Renamed Yelp columns")

    # Add poi_latitude column to bike_stations_df
    bike_stations_df['poi_latitude'] = pd.Series([0.0] * len(bike_stations_df))

    # Print unique values of relevant columns before merging
    print("Unique station latitude values:", bike_stations_df['station_latitude'].unique())
    print("Unique station longitude values:", bike_stations_df['station_longitude'].unique())
    print("Unique poi latitude values:", yelp_df['poi_latitude'].unique())
    print("Unique poi longitude values:", yelp_df['poi_longitude'].unique())


    # Debugging: Print column names before merging
    print("Columns in bike_stations_df:", bike_stations_df.columns)
    print("Columns in foursquare_df:", foursquare_df.columns)

    # Merge bike station data with Yelp data
    bike_stations_with_foursquare = bike_stations_df.merge(foursquare_df, how='left', left_on=['station_latitude', 'station_longitude'], right_on=['latitude', 'longitude'])
    print("Merged bike station data with Yelp data")

    # Save the merged data to a CSV file
    merged_csv_path = "merged_bike_station_data.csv"
    bike_stations_with_foursquare.to_csv(merged_csv_path, index=False)
    print("Merged bike station data saved to CSV file")

if __name__ == "__main__":
    main()

Fetched bike station data
Fetched Yelp and Foursquare data
Renamed bike station columns
Renamed Yelp columns
Unique station latitude values: [40.712271 40.711478 40.711231 40.710915 40.71387  40.71429  40.710765
 40.710003 40.711004 40.715669 40.710866 40.709515 40.709355 40.70951
 40.709167 40.716911 40.716076 40.716631 40.708787 40.716928 40.712073
 40.715609 40.711733 40.716734 40.708387 40.716902 40.716803 40.708739
 40.708504 40.709101 40.7152   40.716603]
Unique station longitude values: [-74.007602 -74.007004 -74.006165 -74.00589  -74.008277 -74.00873
 -74.007455 -74.007687 -74.008556 -74.007606 -74.009685 -74.005835
 -74.007749 -74.007973 -74.00473  -74.00556  -74.008495 -74.007906
 -74.00681  -74.006914 -74.010911 -74.009786 -74.011509 -74.008085
 -74.005659 -74.007873 -74.003592 -74.004626 -74.005315 -74.009135
 -74.010948 -74.008953]
Unique poi latitude values: [40.71345    40.71728296 40.71197739 40.692126   40.7762241  40.74913025
 40.70541999 40.71096    40.70683695 40.70