## Part 1 Data

In [3]:
import requests
import os
import pandas as pd
import json

In [4]:
def get_city_bike_data(city_name):
    url = 'http://api.citybik.es/v2/networks/'
   
    response = requests.get(url)
    
    if response.status_code == 200:
        data = response.json()
        networks = data.get('networks',[])
        for network in networks:
            if network['location']['city'].lower() == city_name.lower():
                return network
        print(f"'{city_name}' not found in the CityBikes API.")
        return None
    else:
        print(f"Error: Unable to fetch data for {city_name}. Status code: {response.status_code}")
        return None

In [5]:
city_name = 'Vancouver'
city_bike_data = get_city_bike_data(city_name)

if city_bike_data is not None:
    print(json.dumps(city_bike_data, indent=4))

{
    "company": [
        "Vanncouver Bike Share Inc.",
        "CycleHop LLC",
        "City of Vancouver",
        "Shaw Communications Inc.",
        "Fifteen"
    ],
    "gbfs_href": "https://vancouver-gbfs.smoove.pro/gbfs/2/gbfs.json",
    "href": "/v2/networks/mobibikes",
    "id": "mobibikes",
    "location": {
        "city": "Vancouver",
        "country": "CA",
        "latitude": 49.2827,
        "longitude": -123.1207
    },
    "name": "Mobi"
}


In [6]:
def get_bike_station_details(city_name):
    url = f'http://api.citybik.es/v2/networks/{network_id}'
    response = requests.get(url)
    
    if response.status_code == 200:
        data = response.json()
        return data.get('network', None)
    else:
        print(f"Error: Unable to fetch data for {network_id}. Status code: {response.status_code}")
        return None
    
network_id = 'mobibikes'
bike_station_details = get_bike_station_details(network_id)
    
if bike_station_details is not None:
    stations = bike_station_details.get('stations', [])
    if not stations:
        print(f"No bike stations data available for {network_id}.")
    else:
        station_details_list = []
        for station in stations:
            station_name = station['name']
            latitude = station['latitude']
            longitude = station['longitude']
            num_bikes = station['free_bikes']
            empty_slots = station['empty_slots']
            station_id = station['id']
            
            station_details_list.append({
                'Station Name': station_name,
                # 'Station ID': station_id,   #didn't want to show it to cluster data...
                'Latitude': latitude,
                'Longitude': longitude,
                'Available Bikes': num_bikes,
                'Empty Slots': empty_slots
            })
        df_city_bike = pd.DataFrame(station_details_list)
else:
    print(f"No data available for {network_id} or network ID not found.")

In [7]:
df_city_bike

Unnamed: 0,Station Name,Latitude,Longitude,Available Bikes,Empty Slots
0,10th & Cambie,49.262487,-123.114397,19,16
1,Yaletown-Roundhouse Station,49.274566,-123.121817,15,1
2,Dunsmuir & Beatty,49.279764,-123.110154,8,17
3,12th & Yukon (City Hall),49.260599,-123.113504,5,11
4,8th & Ash,49.264215,-123.117772,15,1
...,...,...,...,...,...
240,Cordova & Granville,49.285670,-123.112543,8,9
241,22nd & Main,49.250940,-123.101306,9,5
242,PNE - Hastings & Windermere,49.280977,-123.035969,5,35
243,1st & Fir,49.270783,-123.141564,9,11


## Part 2 Data

Setup

In [8]:
api_key = os.environ["location"]
client_id = os.environ["client_id"]
client_secret = os.environ["client_secret"]
api_version = '20230730'

location = "Vancouver, Canada"
url = "https://api.foursquare.com/v3/places/search?near=" + location

headers = {"Accept": "application/json"}

headers['Authorization'] = api_key

In [9]:
result = requests.get(url, headers=headers)
print(result)

<Response [200]>


In [10]:
data = result.json()
json_data = json.dumps(data, indent=4)
# print(json_data)

### Foursquare Data

In [11]:
city_name = "Vancouver, Canada"

# '49.2827,-123.1207'
def foursquare_bike_stations(city_name, latitude, longitude, api_key):
    url = f"https://api.foursquare.com/v3/places/search"

    params = {
        'll': f"{latitude},{longitude}",  # Latitude and longitude of the bike station
        'radius': 300,
        'categories': "13033",   # foursquare category for bubble tea shops
        'limit': 5
    }

    headers = {"Accept": "application/json"}
    headers['Authorization'] = api_key

    response = requests.get(url, params=params, headers=headers)

    if response.status_code == 200:
        data = response.json()
        bike_stations = data.get('results', [])

        return bike_stations
    else:
        print(f"Error: Unable to fetch Foursquare data. Status code: {response.status_code}")
        return []

# Function to collect the details of the bubble tea shops for each bike station
def bubble_tea_details(station_name, bubble_tea_shops):
    details_list = []
    for idx, shop in enumerate(bubble_tea_shops, start=1):
        name = shop.get('name', 'N/A')
        rating = shop.get('rating', 'N/A')
        location = shop.get('location', {})
        shop_latitude = location.get('lat', 'N/A')
        shop_longitude = location.get('lng', 'N/A')

        details_list.append({
            'Station Name': station_name,
            'Bubble Tea Shop Name': name,
            'Rating': rating,
            'Latitude': shop_latitude,
            'Longitude': shop_longitude
        })

    return details_list

# Create an empty list to store the details of bubble tea shops for all bike stations
all_bubble_tea_details = []

# Loop through each bike station and get bubble tea shop details
for index, row in df_city_bike.iterrows():
    station_name = row['Station Name']
    latitude = row['Latitude']
    longitude = row['Longitude']

    bubble_tea_shops = foursquare_bike_stations(city_name, latitude, longitude, api_key)

    # Collect bubble tea shop details for the current bike station
    station_bubble_tea_details = bubble_tea_details(station_name, bubble_tea_shops)

    # Add the details to the list
    all_bubble_tea_details.extend(station_bubble_tea_details)

# Create a DataFrame from the collected bubble tea shop details
df_bubble_tea = pd.DataFrame(all_bubble_tea_details)

# Print DataFrame
df_bubble_tea

Unnamed: 0,Station Name,Bubble Tea Shop Name,Rating,Latitude,Longitude
0,10th & Cambie,OneZo Tapioca,,,
1,Yaletown-Roundhouse Station,ShareTea Yaletown,,,
2,Yaletown-Roundhouse Station,Cha Yuan,,,
3,Dunsmuir & Beatty,Bubble King Tea House,,,
4,8th & Ash,OneZo Tapioca,,,
...,...,...,...,...,...
160,Cordova & Granville,Coco Fresh Tea & Juice,,,
161,Cordova & Granville,Teaever Bubble Tea,,,
162,Cordova & Granville,Gong Cha West Pender,,,
163,Cordova & Granville,Chatime,,,


### Yelp Data

In [12]:
import requests
import os

yelp_api_key = "pMleMhYHNwkjKxj725WRoV50sEPxvah29T4bQ9spFVCodth9zsyHvxueifmowF6xz-BDCFMxb4ba8THLm83BlhuEdHindRV9VnaZot4c-JgkBCnsuqxaMkDsn-nHZHYx"

url = "https://api.yelp.com/v3/categories"

headers = {
    "accept": "application/json",
    "Authorization": f"Bearer {yelp_api_key}"
}

response = requests.get(url, headers=headers)

In [13]:
radius = 300  # 300 meters radius for Yelp API request - reduced to return less results, run faster code

city_name = "Vancouver, Canada"

# Function to send a request to Yelp API
def yelp_bike_stations(city_name, latitude, longitude, yelp_api_key):
    url = f"https://api.yelp.com/v3/businesses/search"

    params = {
        'latitude': latitude,
        'longitude': longitude,
        'radius': radius,
        'categories': "bubbletea",
        'limit': 5
    }

    headers = {"Authorization": f"Bearer {yelp_api_key}"}

    response = requests.get(url, params=params, headers=headers)

    if response.status_code == 200:
        data = response.json()
        bike_stations = data.get('businesses', [])

        return bike_stations
    else:
        print(f"Error: Unable to fetch Yelp data. Status code: {response.status_code}")
        return []

# collect details of Yelp businesses for each bike station
def collect_yelp_details(station_name, bike_stations):
    details_list = []
    for idx, station in enumerate(bike_stations, start=1):
        station_latitude = station.get('coordinates', {}).get('latitude', 'N/A')
        station_longitude = station.get('coordinates', {}).get('longitude', 'N/A')

        # Fetch Yelp business details for the current bike station
        yelp_businesses = yelp_bike_stations(city_name, station_latitude, station_longitude, yelp_api_key)

        if yelp_businesses:
            for idx, business in enumerate(yelp_businesses, start=1):
                name = business.get('name', 'N/A')
                rating = business.get('rating', 'N/A')
                business_latitude = business.get('coordinates', {}).get('latitude', 'N/A')
                business_longitude = business.get('coordinates', {}).get('longitude', 'N/A')

                details_list.append({
                    'Station Name': station_name,
                    'Station Latitude': station_latitude,
                    'Station Longitude': station_longitude,
                    'Yelp Business Name': name,
                    'Rating': rating,
                    'Business Latitude': business_latitude,
                    'Business Longitude': business_longitude
                })

    return details_list

# Create an empty list to store the details of Yelp businesses for all bike stations
all_yelp_details = []

# Loop through each bike station and get Yelp business details
for index, row in df_city_bike.iterrows():
    station_name = row['Station Name']
    latitude = row['Latitude']
    longitude = row['Longitude']

    bike_stations = yelp_bike_stations(city_name, latitude, longitude, yelp_api_key)

    # Collect Yelp business details for the current bike station
    station_yelp_details = collect_yelp_details(station_name, bike_stations)

    # Add the details to the list
    all_yelp_details.extend(station_yelp_details)

# Create a DataFrame from the collected Yelp business details
df_yelp_businesses = pd.DataFrame(all_yelp_details)

# Print DF
df_yelp_businesses

Error: Unable to fetch Yelp data. Status code: 429
Error: Unable to fetch Yelp data. Status code: 429
Error: Unable to fetch Yelp data. Status code: 429
Error: Unable to fetch Yelp data. Status code: 429
Error: Unable to fetch Yelp data. Status code: 429
Error: Unable to fetch Yelp data. Status code: 429


KeyboardInterrupt: 

## Join the data from Part 1 with the data from Part 2 to create a new dataframe.

In [14]:
# Merge the DataFrames on the common columns (e.g., Latitude and Longitude)
merged_df = df_city_bike.merge(df_yelp_businesses, on=['Latitude', 'Longitude'], how='inner')

# Print the merged DataFrame
print(merged_df)

NameError: name 'df_yelp_businesses' is not defined

## Provide a visualization that you used as part of your EDA process. Explain the initial pattern or relationship you discovered through this visualization. 

In [15]:
import matplotlib.pyplot as plt

# Assuming you have the merged DataFrame 'merged_df'

# Get the Latitude, Longitude, and Yelp Rating columns from the DataFrame
latitude = merged_df['Latitude']
longitude = merged_df['Longitude']
yelp_rating = merged_df['Yelp Rating']

# Create a scatter plot
plt.figure(figsize=(10, 6))
plt.scatter(longitude, latitude, c=yelp_rating, cmap='viridis', alpha=0.7)
plt.colorbar(label='Yelp Rating')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.title('Scatter Plot of Bike Stations and Top-rated Yelp Restaurants')
plt.grid(True)
plt.show()

NameError: name 'merged_df' is not defined

# Database

Put all your results in an SQLite3 database (remember, SQLite stores its databases as files in your local machine - make sure to create your database in your project's data/ directory!)

In [None]:
import pandas as pd
import sqlite3

# Define the paths for the SQLite database file
yelp_db_path = pd.DataFrame(all_yelp_details)
foursquare_db_path = pd.DataFrame(all_bubble_tea_details)

# Function - create a table and insert data into the SQLite database
def create_and_insert_table(df, db_path, table_name):
    # Connect to the SQLite database
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()

    create_table_query = f"""
    CREATE TABLE IF NOT EXISTS {table_name} (
        Latitude REAL,
        Longitude REAL,
        Rating REAL
    )
    """
    cursor.execute(create_table_query)

    # Insert data into the table
    insert_query = f"""
    INSERT INTO {table_name} (Latitude, Longitude, Rating)
    VALUES (?, ?, ?)
    """

    # Convert DataFrame rows into a list of tuples for insertion
    data_to_insert = df[['Latitude', 'Longitude', 'Yelp Rating']].to_records(index=False)
    cursor.executemany(insert_query, data_to_insert)

    # Commit changes and close the connection
    conn.commit()
    conn.close()

# Create/insert data for Yelp DataFrame
create_and_insert_table(df_yelp, yelp_db_path, "yelp_data")

# Create/insert data for Foursquare DataFrame
create_and_insert_table(df_foursquare, foursquare_db_path, "foursquare_data")


Look at the data before and after the join to validate your data.