In [1]:
# imports
# Importing necessary libraries
import pandas as pd
import requests
import json
import os

In [4]:
# Accessing environmental variables
yelp_key = os.environ.get('yelp_api')
yelp_key2 = os.environ.get('yelp_api2')
FOURSQUARE_KEY = os.environ.get('FOURSQUARE_API_KEY')

In [5]:
# Import ph_bikes to use latitude-longitude pairs

# read in saved csv of philly bike stations.
ph_bikes = pd.read_csv("C:/Users/HP/Music/LHLDataCourse/Python/project_data/ph_bikes.csv")
ph_bikes.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 233 entries, 0 to 232
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   ID           233 non-null    int64  
 1   name         233 non-null    object 
 2   empty_slots  233 non-null    int64  
 3   free_bikes   233 non-null    int64  
 4   latitude     233 non-null    float64
 5   longitude    233 non-null    float64
dtypes: float64(2), int64(3), object(1)
memory usage: 11.1+ KB


# Foursquare

Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice. 

### POIs Selected - Colleges/Universities & Fitness-Related Businesses.
#### *I aim to observe the impact, if any, of the proximity to the POIs on bike usage and so, when making the calls, I sort by distance and loop through the data to access the closest business for ech latitude-longitude pair.*

- For the College/Uni POI, I remove radius limits since universities are not as commonly found as fitness centers but I maintain the radius of 1000 for fitness centers.

### Fitness POI

In [6]:
# Function created to make a call to foursquare.

def get_fsq(latitude, longitude, radius, api_key, categories):

    url = "https://api.foursquare.com/v3/places/search"

    params = {
        "ll": '{},{}'.format(latitude, longitude),
        "radius": radius,
        "categories": categories,
        "sort": sort,
        # "limit": 50
    }

    headers = {
        "Accept": "application/json",
        "Authorization": FOURSQUARE_KEY
    }

    response = requests.request("GET", url, params=params, headers=headers)
    return response.json() 

In [8]:
# Testing the function
from pprint import pprint

# testing
res = get_fsq(latitude=51.51, longitude=-0.1337, radius=1000, api_key=FOURSQUARE_KEY, categories=18000)
# pprint(res)

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [15]:
# Function to get distance from a location using a specific provider
def get_location_distance(latitude, longitude, radius, api_key, categories, provider='foursquare'):
    try:
        if provider == 'foursquare':
            location = get_fsq(latitude=latitude, longitude=longitude, radius=radius, api_key=api_key, categories=categories)
        elif provider == 'yelp':
            location = get_yelp(latitude=latitude, longitude=longitude, radius=radius, api_key=api_key, categories=categories)
        else:
            raise ValueError("Invalid provider. Supported providers: 'foursquare', 'yelp'")

        if provider == 'foursquare':
            key = 'results'
        elif provider == 'yelp':
            key = 'businesses'
        else:
            key = None

        if key and key in location and len(location[key]) > 0:
            first_distance = location[key][0]['distance']
            return first_distance
        else:
            # Handle cases where no locations are returned
            return 'NA'
    except Exception as e:
        print(f"An error occurred while fetching location data: {e}")
        return 'NA'

In [16]:
# assign values for fitness POI
provider = 'foursquare'
latitude = ph_bikes['latitude']
longitude = ph_bikes['longitude']
radius = 1000
sort = "distance"
categories = 18021  # sports and recreation > gym & studio

In [17]:
# Iterating through each lat-lng pair
fitness_loc = []

for index, row in ph_bikes.iterrows():
    lat = row['latitude']
    lng = row['longitude']
    distance = get_location_distance(lat, lng, radius=1000, api_key=FOURSQUARE_KEY, categories=categories, provider=provider)
    fitness_loc.append(distance)

In [18]:
# fitness_loc

In [19]:
len(fitness_loc)

233

Put your parsed results into a DataFrame

In [20]:
# Convert the data into a DataFrame and assign ID to aid when merging

fsq_fit = pd.DataFrame(fitness_loc)
fsq_fit = fsq_fit.rename_axis("ID").reset_index()

In [21]:
# Renaming the columns

fsq_fit.columns = ["ID", "fsq_center_dist"]
fsq_fit.head()

Unnamed: 0,ID,fsq_center_dist
0,0,204
1,1,431
2,2,79
3,3,142
4,4,729


In [40]:
# saving dataframes to csv files.
def save_dataframe_to_csv(dataframe, file_path, index=False):
    try:
        dataframe.to_csv(file_path, index=index)
        print(f"DataFrame successfully saved to {file_path}")
    except Exception as e:
        print(f"An error occurred while saving the DataFrame to a CSV file: {e}")

In [41]:
save_dataframe_to_csv(fsq_fit, 'fsq_fit.csv')

DataFrame successfully saved to fsq_fit.csv


###
### College POI

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [24]:
# assign values for collegeuniv POI
provider = 'foursquare'
latitude = ph_bikes['latitude']
longitude = ph_bikes['longitude']
radius = None
sort = "distance"
categories = 12013 # college and university

In [25]:
# Iterating through each lat-lng pair
college_loc = []

for index, row in ph_bikes.iterrows():
    lat = row['latitude']
    lng = row['longitude']
    distance = get_location_distance(lat, lng, radius=1000, api_key=FOURSQUARE_KEY, categories=categories, provider=provider)
    college_loc.append(distance)

In [26]:
len(college_loc)

233

Put your parsed results into a DataFrame

In [27]:
# Convert the data into a DataFrame and assign ID to aid when merging

fsq_uni = pd.DataFrame(college_loc)
fsq_uni = fsq_uni.rename_axis("ID").reset_index()

In [28]:
# Renaming the columns

fsq_uni.columns = ["ID", "fsq_college_dist"]
fsq_uni.head()

Unnamed: 0,ID,fsq_college_dist
0,0,272
1,1,280
2,2,952
3,3,526
4,4,796


In [29]:
save_dataframe_to_csv(fsq_uni, 'fsq_uni.csv')

DataFrame successfully saved to fsq_uni.csv


# 
# Yelp

Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

In [30]:
# Function for API call to Yelp

def get_yelp(latitude, longitude, radius, api_key, categories):
    url = "https://api.yelp.com/v3/businesses/search"

    headers = {
        "accept": "application/json",
        "Authorization": yelp_key
    }

    params = {
        "latitude": latitude,
        "longitude": longitude,
        "categories": categories,
        "sort_by": sort_by
    }

    response = requests.get(url, headers=headers, params=params)

    return response.json()

In [36]:
test = get_yelp(latitude=39.97195, longitude=-75.13445, radius=None, api_key=yelp_key, categories="fitness")
# pprint(test)

### Fitness POI

In [31]:
# assign values for fitness POI using distance
provider = 'yelp'
latitude = ph_bikes['latitude']
longitude = ph_bikes['longitude']
categories = "fitness"
sort_by = "distance"

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [32]:
# Iterating through each lat-lng pair
fitness_loc = []

for index, row in ph_bikes.iterrows():
    lat = row['latitude']
    lng = row['longitude']
    distance = get_location_distance(lat, lng, radius=1000, api_key=yelp_key, categories=categories, provider=provider)
    fitness_loc.append(distance)

In [33]:
len(fitness_loc)

233

Put your parsed results into a DataFrame

In [34]:
# Convert the data into a DataFrame and assign ID to aid when merging

yelp_fit = pd.DataFrame(fitness_loc)
yelp_fit = yelp_fit.rename_axis("ID").reset_index()

In [35]:
# Renaming the columns

yelp_fit.columns = ["ID", "yelp_center_dist"]
yelp_fit.head()

Unnamed: 0,ID,yelp_center_dist
0,0,206.798597
1,1,130.753256
2,2,794.358688
3,3,165.841893
4,4,711.265245


In [37]:
save_dataframe_to_csv(yelp_fit, 'yelp_fit.csv')

DataFrame successfully saved to yelp_fit.csv


### 
### College POI

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [38]:
# assign values for college POI
provider = 'yelp'
latitude = ph_bikes['latitude']
longitude = ph_bikes['longitude']
categories = "collegeuniv"
sort_by = "distance"

In [39]:
# Iterating through each lat-lng pair
college_dist = []

for index, row in ph_bikes.iterrows():
    lat = row['latitude']
    lng = row['longitude']
    distance = get_location_distance(lat, lng, radius=1000, api_key=yelp_key2, categories=categories, provider=provider)
    college_dist.append(distance)

In [48]:
len(college_dist)

233

Put your parsed results into a DataFrame

In [42]:
# Convert the data into a DataFrame and assign ID to aid when merging

yelp_uni = pd.DataFrame(college_dist)
yelp_uni = yelp_uni.rename_axis("ID").reset_index()

In [43]:
# Renaming the columns

yelp_uni.columns = ["ID", "yelp_college_dist"]
yelp_uni.head()

Unnamed: 0,ID,yelp_college_dist
0,0,1331.446597
1,1,1706.096309
2,2,2282.52981
3,3,1808.390164
4,4,2707.043477


In [44]:
save_dataframe_to_csv(yelp_uni, 'yelp_uni.csv')

DataFrame successfully saved to yelp_uni.csv


# 
# Comparing Results

Which API provided you with more complete data? Provide an explanation. 

#### *Comparing one lat-lng point for restaurants on the Yelp and Foursquare API*

In [45]:
# restaurant testing - Foursquare
sort = "relevance"

fsq_res = get_fsq(latitude=39.97195, longitude=-75.13445, radius=1000, api_key=FOURSQUARE_KEY, categories=13065)
# pprint(fsq_res)

# no info on ratings

In [46]:
fsq_df = pd.json_normalize(fsq_res['results'])
print(fsq_df.info())
print(fsq_df.shape)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 25 columns):
 #   Column                         Non-Null Count  Dtype  
---  ------                         --------------  -----  
 0   fsq_id                         10 non-null     object 
 1   categories                     10 non-null     object 
 2   chains                         10 non-null     object 
 3   distance                       10 non-null     int64  
 4   link                           10 non-null     object 
 5   name                           10 non-null     object 
 6   timezone                       10 non-null     object 
 7   geocodes.main.latitude         10 non-null     float64
 8   geocodes.main.longitude        10 non-null     float64
 9   geocodes.roof.latitude         10 non-null     float64
 10  geocodes.roof.longitude        10 non-null     float64
 11  location.address               10 non-null     object 
 12  location.census_block          10 non-null     object

In [47]:
# restaurant testing - Yelp
sort_by = "best_match"

yelp_res = get_yelp(latitude=39.97195, longitude=-75.13445, radius=None, api_key=yelp_key, categories="restaurants")
# pprint(yelp_res)

In [48]:
yelp_df = pd.json_normalize(yelp_res['businesses'])
print(yelp_df.info())
print(yelp_df.shape)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 24 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   id                        20 non-null     object 
 1   alias                     20 non-null     object 
 2   name                      20 non-null     object 
 3   image_url                 20 non-null     object 
 4   is_closed                 20 non-null     bool   
 5   url                       20 non-null     object 
 6   review_count              20 non-null     int64  
 7   categories                20 non-null     object 
 8   rating                    20 non-null     float64
 9   transactions              20 non-null     object 
 10  price                     18 non-null     object 
 11  phone                     20 non-null     object 
 12  display_phone             20 non-null     object 
 13  distance                  20 non-null     float64
 14  coordinates.

#### *The Yelp API has more information especially with regards to ratings, reviews, price etc.* I worked with fitness and college POIs, not restaurants, but a comparison of dataframe snippets are below.

The Yelp API gives 20 businesses for the first lat-lng pair while the Foursquare API yields 10; the Foursquare API gives more information on the location while the Yelp API gives more business-focused details.

In [49]:
pd.set_option('display.max_columns', None)
yelp_df.head()

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,transactions,price,phone,display_phone,distance,coordinates.latitude,coordinates.longitude,location.address1,location.address2,location.address3,location.city,location.zip_code,location.country,location.state,location.display_address
0,vUrTGX_7HxqeoQ_6QCVz6g,suraya-philadelphia-2,Suraya,https://s3-media1.fl.yelpcdn.com/bphoto/H6vumh...,False,https://www.yelp.com/biz/suraya-philadelphia-2...,1415,"[{'alias': 'lebanese', 'title': 'Lebanese'}, {...",4.5,[delivery],$$$,12153021900.0,(215) 302-1900,197.629294,39.973687,-75.133956,1528 Frankford Ave,,,Philadelphia,19125,US,PA,"[1528 Frankford Ave, Philadelphia, PA 19125]"
1,u0Bt7uvvj7LAjAdiMtrhug,laser-wolf-philadelphia-2,Laser Wolf,https://s3-media1.fl.yelpcdn.com/bphoto/rGW-4F...,False,https://www.yelp.com/biz/laser-wolf-philadelph...,365,"[{'alias': 'mideastern', 'title': 'Middle East...",4.5,[delivery],$$$$,,,230.946802,39.97049,-75.13662,1301 N Howard St,,,Philadelphia,19122,US,PA,"[1301 N Howard St, Philadelphia, PA 19122]"
2,uJZgom8HpK74xz8IojpT0A,pizzeria-beddia-philadelphia,Pizzeria Beddia,https://s3-media1.fl.yelpcdn.com/bphoto/CltlyN...,False,https://www.yelp.com/biz/pizzeria-beddia-phila...,712,"[{'alias': 'pizza', 'title': 'Pizza'}, {'alias...",4.0,[delivery],$$,12679282256.0,(267) 928-2256,135.990058,39.970799,-75.134989,1313 North Lee St,,,Philadelphia,19122,US,PA,"[1313 North Lee St, Philadelphia, PA 19122]"
3,hUmHwBQtGg0iH-PZakQJDw,wm-mulherins-sons-philadelphia,Wm Mulherin's Sons,https://s3-media3.fl.yelpcdn.com/bphoto/22EU-2...,False,https://www.yelp.com/biz/wm-mulherins-sons-phi...,709,"[{'alias': 'pizza', 'title': 'Pizza'}, {'alias...",4.5,[delivery],$$$,12152911355.0,(215) 291-1355,57.439658,39.97171,-75.135063,1355 N Front St,,,Philadelphia,19122,US,PA,"[1355 N Front St, Philadelphia, PA 19122]"
4,HbnWgfnaX-kUobTx2QTWpQ,kalaya-philadelphia-7,Kalaya,https://s3-media4.fl.yelpcdn.com/bphoto/GXfcrf...,False,https://www.yelp.com/biz/kalaya-philadelphia-7...,198,"[{'alias': 'thai', 'title': 'Thai'}]",4.0,[],,12155452535.0,(215) 545-2535,427.347274,39.97551,-75.133,4 W Palmer St,,,Philadelphia,19125,US,PA,"[4 W Palmer St, Philadelphia, PA 19125]"


In [50]:
# percentage of missing values in each column
round(100 * (yelp_df.isnull().sum()/len(yelp_df)),2).sort_values(ascending=False)

location.address2           35.0
location.address3           30.0
price                       10.0
id                           0.0
distance                     0.0
location.state               0.0
location.country             0.0
location.zip_code            0.0
location.city                0.0
location.address1            0.0
coordinates.longitude        0.0
coordinates.latitude         0.0
display_phone                0.0
alias                        0.0
phone                        0.0
transactions                 0.0
rating                       0.0
categories                   0.0
review_count                 0.0
url                          0.0
is_closed                    0.0
image_url                    0.0
name                         0.0
location.display_address     0.0
dtype: float64

In [52]:
# percentage of missing values in each row
round(100 * (yelp_df.isnull().sum(axis=1)/len(yelp_df)),2).sort_values(ascending=False)

6     15.0
4     10.0
0      5.0
15     5.0
3      5.0
5      5.0
7      5.0
9      5.0
1      5.0
11     5.0
12     5.0
13     5.0
18     0.0
17     0.0
16     0.0
10     0.0
14     0.0
8      0.0
2      0.0
19     0.0
dtype: float64

In [65]:
fsq_df.head()

Unnamed: 0,fsq_id,categories,chains,distance,link,name,timezone,geocodes.main.latitude,geocodes.main.longitude,geocodes.roof.latitude,...,location.dma,location.formatted_address,location.locality,location.postcode,location.region,geocodes.drop_off.latitude,geocodes.drop_off.longitude,related_places.children,geocodes.front_door.latitude,geocodes.front_door.longitude
0,56fb2e82498ec4440b0aa2e9,"[{'id': 13009, 'name': 'Cocktail Bar', 'icon':...",[],59,/v3/places/56fb2e82498ec4440b0aa2e9,Wm. Mulherin's Sons,America/New_York,39.971695,-75.134998,39.971695,...,Philadelphia,"1355 N Front St, Philadelphia, PA 19122",Philadelphia,19122,PA,,,,,
1,5427166a498e85a2c0b936b3,"[{'id': 13034, 'name': 'Café', 'icon': {'prefi...",[],83,/v3/places/5427166a498e85a2c0b936b3,La Colombe Coffee Roasters,America/New_York,39.971136,-75.134231,39.971136,...,Philadelphia,"1335 Frankford Ave (btwn Mercer & Master St), ...",Philadelphia,19125,PA,39.971139,-75.134416,,,
2,5c958f5e2619ee002c1a8470,"[{'id': 13064, 'name': 'Pizzeria', 'icon': {'p...",[],155,/v3/places/5c958f5e2619ee002c1a8470,Pizzeria Beddia,America/New_York,39.970664,-75.135406,39.970664,...,Philadelphia,"1313 N LEE St (btwn Master & Thompson St), Phi...",Philadelphia,19125,PA,,,,,
3,507ad324e4b0c101c709028d,"[{'id': 13022, 'name': 'Sports Bar', 'icon': {...",[],159,/v3/places/507ad324e4b0c101c709028d,Bottle Bar East,America/New_York,39.970528,-75.134752,39.970528,...,Philadelphia,"1308 Frankford Ave (at Thompson St), Philadelp...",Philadelphia,19125,PA,39.970518,-75.134431,,,
4,597397c3851de565cc92bc6d,"[{'id': 13263, 'name': 'Japanese Restaurant', ...",[],38,/v3/places/597397c3851de565cc92bc6d,Cheu Fishtown,America/New_York,39.972245,-75.134603,39.972245,...,Philadelphia,"1416 Frankford Ave, Philadelphia, PA 19125",Philadelphia,19125,PA,39.972243,-75.134402,,,


In [51]:
# percentage of missing values in each column
round(100 * (fsq_df.isnull().sum()/len(fsq_df)),2).sort_values(ascending=False)

geocodes.front_door.longitude    90.0
geocodes.front_door.latitude     90.0
related_places.children          80.0
geocodes.drop_off.longitude      30.0
geocodes.drop_off.latitude       30.0
location.country                  0.0
location.region                   0.0
location.postcode                 0.0
location.locality                 0.0
location.formatted_address        0.0
location.dma                      0.0
location.cross_street             0.0
fsq_id                            0.0
categories                        0.0
location.address                  0.0
geocodes.roof.longitude           0.0
geocodes.roof.latitude            0.0
geocodes.main.longitude           0.0
geocodes.main.latitude            0.0
timezone                          0.0
name                              0.0
link                              0.0
distance                          0.0
chains                            0.0
location.census_block             0.0
dtype: float64

In [53]:
# percentage of missing values in each row
round(100 * (fsq_df.isnull().sum(axis=1)/len(fsq_df)),2).sort_values(ascending=False)

0    50.0
2    50.0
9    40.0
1    30.0
3    30.0
4    30.0
5    30.0
8    30.0
6    20.0
7    10.0
dtype: float64

The Foursquare data has more missing values in rows and columns compared to the Yelp data.