In [1]:
# imports
import pandas as pd
import os 
import requests

In [2]:
van_stations_df = pd.read_csv("van_stations.csv")
van_stations_df.head()

Unnamed: 0,name,latitude,longitude,extra.slots
0,Chilco & Barclay,49.291909,-123.140713,18
1,St George & Broadway,49.262321,-123.09306,14
2,Britannia Parking Lot,49.275882,-123.071865,14
3,Morton & Denman,49.28803,-123.142135,0
4,Thornton & National,49.273777,-123.092723,14


In [3]:
from dotenv import load_dotenv
import os

# Load environment variables from .env file
load_dotenv(dotenv_path=r'C:\Users\jessy\lighthouselabs\python\exercise\Other_data_types_exercise\.env')

FOURSQUARE_KEY = os.getenv('FOURSQUARE_API_KEY')

In [4]:
def get_venues_fs(latitude, longitude, radius, api_key, categories):
    """
    Get venues from foursquare with a specified place type and coordinates.
    Args:
        latitude (float): latitude for query (must be combined with longitude)
        longitude (float): longitude for query (must be combined with latitude)
        api_key (str): foursquare API to use for query
        categories (str) : Foursquare-recognized place type. If not passed no place_type will be specified. Separate ids with commas
    
    Returns:
        response: response object from the requests library.
    """

    url = "https://places-api.foursquare.com/places/search"
    headers = {
        "Accept": "application/json",
        "Authorization": api_key,
        "X-Places-Api-Version": "2025-06-17"
        }
    params = {
        "ll": f"{latitude},{longitude}",
        "radius": radius,
        "categories": categories,
        "limit": 50
        }

    response = requests.get(url, headers=headers, params=params)
    if response.status_code == 200:
        return response.json()
    else:
        print(f"API error: {response.status_code}")
        return response.json()

Due to the limitation of Foursquare requesting rate, I limited the number of venues around each station to 50. 

# Foursquare

Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice. 

In [5]:
# Send requests to Foursquare API for all bike stations with 1000m radius

all_venues = []

print(f"Querying Foursquare API for {len(van_stations_df)} bike stations...")

for idx, station in van_stations_df.iterrows():
    station_lat = station['latitude']
    station_lon = station['longitude']
    station_name = station.get('name', f'Station_{idx}')
    
    print(f"Processing station {idx + 1}/{len(van_stations_df)}: {station_name}")
    
    try:
        venues_data = get_venues_fs(
            latitude=station_lat, 
            longitude=station_lon, 
            radius=300,  
            api_key=FOURSQUARE_KEY, 
            categories=None 
        )
        
       
        if 'results' in venues_data:
            for venue in venues_data['results']:
                venue_info = {
                    'station_name': station_name,
                    'station_latitude': station_lat,
                    'station_longitude': station_lon,
                    'station_index': idx,
                    'venue_id': venue.get('fsq_place_id', ''),
                    'venue_name': venue.get('name', ''),
                    'venue_latitude': venue.get('latitude', None),
                    'venue_longitude': venue.get('longitude', None),
                    'venue_address': venue.get('location', {}).get('address', ''),
                    'venue_categories': [cat.get('name', '') for cat in venue.get('categories', [])],
                    'distance_from_station': venue.get('distance', None)
                }
                all_venues.append(venue_info)
    
    except Exception as e:
        print(f"Error querying station {station_name}: {e}")
        continue

print(f"Completed querying all stations. Found {len(all_venues)} venues total.")

Querying Foursquare API for 261 bike stations...
Processing station 1/261: Chilco & Barclay
Processing station 2/261: St George & Broadway
Processing station 3/261: Britannia Parking Lot
Processing station 4/261: Morton & Denman
Processing station 5/261: Thornton & National
Processing station 6/261: Burrard & 14th
Processing station 7/261: Olympic Village Station
Processing station 8/261: 8th & Macdonald
Processing station 9/261: 30th & Ontario
Processing station 10/261: Lakewood & Triumph
Processing station 11/261: 13th & Cambie
Processing station 12/261: Arbutus & Nanton
Processing station 13/261: 14th & Fraser
Processing station 14/261: 15th & Victoria (Trout Lake)
Processing station 15/261: Robson & Hornby
Processing station 16/261: Cardero & Robson
Processing station 17/261: 10th & Fraser
Processing station 18/261: Ontario & 5th
Processing station 19/261: Heather & 16th
Processing station 20/261: Richards & Smithe
Processing station 21/261: Robson & Beatty
Processing station 22/26

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [6]:
# Parse through the response to get POI details (restaurants, bars, banks, landmarks, etc)
if all_venues:
    print("Parsing and analyzing Foursquare POI data...")
    
    
    temp_df = pd.DataFrame(all_venues)
    
    print("\n=== FOURSQUARE POI DATA SUMMARY ===")
    print(f"Total venues found: {len(all_venues)}")
    print(f"Total bike stations processed: {temp_df['station_index'].nunique()}")
    print(f"Average venues per station: {len(all_venues) / temp_df['station_index'].nunique():.1f}")
    
    # Analyze venue categories 
    print(f"\nVenue Categories Analysis:")
    # Flatten the venue_categories lists to count individual category types
    all_categories = []
    for categories_list in temp_df['venue_categories']:
        if isinstance(categories_list, list):
            all_categories.extend(categories_list)
    
    if all_categories:
        category_counts = pd.Series(all_categories).value_counts()
        print(f"Top 15 most common venue types:")
        for category, count in category_counts.head(15).items():
            print(f"  - {category}: {count}")
    
    # Show venues with complete address information
    venues_with_address = temp_df[temp_df['venue_address'].notna() & (temp_df['venue_address'] != '')]
    print(f"\nVenues with address information: {len(venues_with_address)}")
    
    # Distance analysis
    if temp_df['distance_from_station'].notna().any():
        print(f"\nDistance from stations (meters):")
        distance_stats = temp_df['distance_from_station'].describe()
        print(f"  - Average: {distance_stats['mean']:.1f}m")
        print(f"  - Closest: {distance_stats['min']:.1f}m") 
        print(f"  - Farthest: {distance_stats['max']:.1f}m")
    
    # Show sample venues by major categories
    print(f"\nSample venues by category:")
    
    # Look for restaurants
    restaurants = temp_df[temp_df['venue_categories'].apply(
        lambda x: any('restaurant' in cat.lower() or 'food' in cat.lower() or 'cafe' in cat.lower() 
                     for cat in x) if isinstance(x, list) else False)]
    print(f"  - Restaurants/Food: {len(restaurants)} venues")
    if len(restaurants) > 0:
        print(f"    Sample: {restaurants['venue_name'].head(3).tolist()}")
    
    # Look for banks
    banks = temp_df[temp_df['venue_categories'].apply(
        lambda x: any('bank' in cat.lower() or 'atm' in cat.lower() 
                     for cat in x) if isinstance(x, list) else False)]
    print(f"  - Banks/Financial: {len(banks)} venues")
    if len(banks) > 0:
        print(f"    Sample: {banks['venue_name'].head(3).tolist()}")
    
    # Look for landmarks/attractions
    landmarks = temp_df[temp_df['venue_categories'].apply(
        lambda x: any('landmark' in cat.lower() or 'tourist' in cat.lower() or 'monument' in cat.lower() 
                     for cat in x) if isinstance(x, list) else False)]
    print(f"  - Landmarks/Tourist Attractions: {len(landmarks)} venues")
    if len(landmarks) > 0:
        print(f"    Sample: {landmarks['venue_name'].head(3).tolist()}")
    
    # Look for retail/shopping
    retail = temp_df[temp_df['venue_categories'].apply(
        lambda x: any('shop' in cat.lower() or 'store' in cat.lower() or 'retail' in cat.lower() 
                     for cat in x) if isinstance(x, list) else False)]
    print(f"  - Retail/Shopping: {len(retail)} venues")
    if len(retail) > 0:
        print(f"    Sample: {retail['venue_name'].head(3).tolist()}")
        
else:
    print("No venues found. Please check API key and network connection.")

Parsing and analyzing Foursquare POI data...

=== FOURSQUARE POI DATA SUMMARY ===
Total venues found: 6861
Total bike stations processed: 261
Average venues per station: 26.3

Venue Categories Analysis:
Top 15 most common venue types:
  - Coffee Shop: 624
  - Café: 437
  - Sushi Restaurant: 264
  - Japanese Restaurant: 243
  - Bakery: 221
  - Pizzeria: 181
  - Restaurant: 174
  - Fast Food Restaurant: 164
  - Sandwich Spot: 152
  - Dessert Shop: 150
  - Grocery Store: 149
  - Park: 146
  - Pub: 134
  - Bar: 131
  - Chinese Restaurant: 130

Venues with address information: 6384

Distance from stations (meters):
  - Average: 174.1m
  - Closest: 3.0m
  - Farthest: 307.0m

Sample venues by category:
  - Restaurants/Food: 2278 venues
    Sample: ['Carp', 'Subway', 'Thai Son']
  - Banks/Financial: 0 venues
  - Landmarks/Tourist Attractions: 23 venues
    Sample: ['Terry Fox Plaza', 'Roger Neilson Statue', 'Gastown Steam Clock']
  - Retail/Shopping: 2074 venues
    Sample: ['Blenz Coffee', "K

Put your parsed results into a DataFrame

In [7]:
# Create the final Foursquare DataFrame
foursquare_df = pd.DataFrame(all_venues)


if not foursquare_df.empty:
    foursquare_df['venue_categories_str'] = foursquare_df['venue_categories'].apply(
        lambda x: ', '.join(x) if isinstance(x, list) else str(x)
    )
    
   
    column_order = [
        'station_name', 'station_index', 'station_latitude', 'station_longitude',
        'venue_name', 'venue_id', 'venue_categories_str',
        'venue_latitude', 'venue_longitude', 'venue_address', 
        'distance_from_station'
    ]
    
    
    available_columns = [col for col in column_order if col in foursquare_df.columns]
    foursquare_df = foursquare_df[available_columns]
    
    print(f"Foursquare DataFrame created successfully!")
    print(f"Shape: {foursquare_df.shape}")
    print(f"Columns: {list(foursquare_df.columns)}")
    
    
    print("\nFirst 5 rows of Foursquare data:")
    display(foursquare_df.head())
    
    
    print(f"\nDataFrame Info:")
    print(f"- Total venues: {len(foursquare_df)}")
    print(f"- Venues per station (average): {len(foursquare_df) / len(van_stations_df):.1f}")
    print(f"- Unique venue categories: {foursquare_df['venue_categories_str'].nunique()}")
    
    # Save the DataFrame to CSV for future use
    output_file = "foursquare_venues.csv"
    foursquare_df.to_csv(output_file, index=False)
    print(f"✅ Foursquare data saved to '{output_file}'")
    
else:
    print("No data available to create DataFrame.")
    foursquare_df = pd.DataFrame() 

Foursquare DataFrame created successfully!
Shape: (6861, 11)
Columns: ['station_name', 'station_index', 'station_latitude', 'station_longitude', 'venue_name', 'venue_id', 'venue_categories_str', 'venue_latitude', 'venue_longitude', 'venue_address', 'distance_from_station']

First 5 rows of Foursquare data:


Unnamed: 0,station_name,station_index,station_latitude,station_longitude,venue_name,venue_id,venue_categories_str,venue_latitude,venue_longitude,venue_address,distance_from_station
0,Chilco & Barclay,0,49.291909,-123.140713,Blenz Coffee,4aa97278f964a520b25320e3,Coffee Shop,49.290251,-123.13747,935 Denman St,298
1,Chilco & Barclay,0,49.291909,-123.140713,Haro Street,4d5474a0cc65a1434d2f425e,Neighborhood,49.29121,-123.137897,,218
2,Chilco & Barclay,0,49.291909,-123.140713,RSVP Finish Line,520ff63111d2fbdd436c4aaf,Plaza,49.289984,-123.141443,,220
3,Chilco & Barclay,0,49.291909,-123.140713,Ted and Mary Greig Rhododendron Garden,4bd4c2b3637ba593fd68f570,Garden,49.293592,-123.142806,Stanley Park,240
4,Chilco & Barclay,0,49.291909,-123.140713,Jungle Room,6455be524d616826ab184696,Bar,49.28999,-123.138117,961 Denman St. P,284



DataFrame Info:
- Total venues: 6861
- Venues per station (average): 26.3
- Unique venue categories: 867
✅ Foursquare data saved to 'foursquare_venues.csv'


#### Since I am not able to access Yelp, so I did some additional analysis of the Foursquare data:
1. top 10 most common venues
2. distance analysis
3. top 10 most common venue category
4. data quality check
5. geography distribution
6. analyze the number of venues

In [11]:
# # Additional detailed analysis of the Foursquare data
print("=== DETAILED FOURSQUARE ANALYSIS ===\n")

if not foursquare_df.empty:
    print("1. Top 10 Most Common Venue Names:")
    venue_name_counts = foursquare_df['venue_name'].value_counts().head(10)
    print(venue_name_counts)
    
    print("\n2. Venues by Distance from Station:")
    if foursquare_df['distance_from_station'].notna().any():
        distance_stats = foursquare_df['distance_from_station'].describe()
        print(distance_stats)
    else:
        print("No distance data available")
    
    print("\n3. Top 10 Most Common Venue Category Combinations:")
    unique_categories = foursquare_df['venue_categories_str'].value_counts().head(10)
    print(unique_categories)
    
        
    print("\n4. Data Quality Check:")
    print(f"- Missing venue names: {foursquare_df['venue_name'].isna().sum()}")
    print(f"- Missing coordinates: {foursquare_df[['venue_latitude', 'venue_longitude']].isna().sum().sum()}")
    print(f"- Missing addresses: {foursquare_df['venue_address'].isna().sum()}")
    print(f"- Venues with complete data: {len(foursquare_df.dropna())}")
    
    print("\n5. Geographic Distribution:")
    print(f"- Latitude range: {foursquare_df['venue_latitude'].min():.6f} to {foursquare_df['venue_latitude'].max():.6f}")
    print(f"- Longitude range: {foursquare_df['venue_longitude'].min():.6f} to {foursquare_df['venue_longitude'].max():.6f}")

    print("\n6. Venue Count Analysis by Station:")
    # Count venues per station
    venues_per_station = foursquare_df.groupby('station_name').size()
    
    print(f"- Total number of stations: {len(venues_per_station)}")
    print(f"- Average venues per station: {venues_per_station.mean():.2f}")
    print(f"- Minimum venues per station: {venues_per_station.min()}")
    print(f"- Maximum venues per station: {venues_per_station.max()}")
    print(f"- Median venues per station: {venues_per_station.median():.2f}")
    print(f"- Standard deviation: {venues_per_station.std():.2f}")
    
    # Show stations with highest and lowest venue counts
    print(f"\nTop 5 stations with most venues:")
    top_stations = venues_per_station.nlargest(5)
    for station, count in top_stations.items():
        print(f"  - {station}: {count} venues")
    
    print(f"\nBottom 5 stations with fewest venues:")
    bottom_stations = venues_per_station.nsmallest(5)
    for station, count in bottom_stations.items():
        print(f"  - {station}: {count} venues")
    
    # Distribution analysis
    print(f"\nVenue count distribution:")
    venue_dist = venues_per_station.value_counts().sort_index()
    for count, stations in venue_dist.items():
        print(f"  - {stations} station(s) have {count} venue(s)")
    
    print(f"\n✅ Analysis complete! Total of {len(foursquare_df)} venues analyzed across {len(venues_per_station)} stations.")
    
else:
    print("No Foursquare data available for detailed analysis.")

=== DETAILED FOURSQUARE ANALYSIS ===

1. Top 10 Most Common Venue Names:
venue_name
Starbucks             127
Subway                 61
Tim Hortons            43
Blenz Coffee           42
JJ Bean                37
Shoppers Drug Mart     33
BC Liquor Store        32
A&W Restaurant         21
Cactus Club Cafe       20
London Drugs           20
Name: count, dtype: int64

2. Venues by Distance from Station:
count    6861.000000
mean      174.050721
std        74.703962
min         3.000000
25%       115.000000
50%       179.000000
75%       237.000000
max       307.000000
Name: distance_from_station, dtype: float64

3. Top 10 Most Common Venue Category Combinations:
venue_categories_str
Coffee Shop              426
Sushi Restaurant         178
Pizzeria                 138
Japanese Restaurant      128
Café                     126
Restaurant               101
Park                      97
Vietnamese Restaurant     97
Bakery                    93
Chinese Restaurant        83
Name: count, dtype

# Yelp

Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

Put your parsed results into a DataFrame

# Comparing Results

Which API provided you with more complete data? Provide an explanation. 

Get the top 10 restaurants according to their rating