In [2]:
# imports
# Importing necessary libraries
import pandas as pd
import requests
import json
import os

In [37]:
# Accessing environmental variables
yelp_key = os.environ.get('yelp_api')
yelp_key2 = os.environ.get('yelp_api2')
FOURSQUARE_KEY = os.environ.get('FOURSQUARE_API_KEY')

In [8]:
# Import ph_bikes to use latitude-longitude pairs

# read in saved csv of philly bike stations.
ph_bikes = pd.read_csv("C:/Users/HP/Music/LHLDataCourse/Python/project_data/ph_bikes.csv")
ph_bikes.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 233 entries, 0 to 232
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   ID           233 non-null    int64  
 1   name         233 non-null    object 
 2   empty_slots  233 non-null    int64  
 3   free_bikes   233 non-null    int64  
 4   latitude     233 non-null    float64
 5   longitude    233 non-null    float64
dtypes: float64(2), int64(3), object(1)
memory usage: 11.1+ KB


# Foursquare

Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice. 

### POIs Selected - Colleges/Universities & Fitness-Related Businesses.
#### *I aim to observe the impact, if any, of the proximity to the POIs on bike usage and so, when making the calls, I sort by distance and loop through the data to access the closest business for ech latitude-longitude pair.*

- For the College/Uni POI, I remove radius limits since universities are not as commonly found as fitness centers but I maintain the radius of 1000 for fitness centers.

### Fitness POI

In [9]:
# Function created to make a call to foursquare.

def get_fsq(latitude, longitude, radius, api_key, categories):

    url = "https://api.foursquare.com/v3/places/search"

    params = {
        "ll": '{},{}'.format(latitude, longitude),
        "radius": radius,
        "categories": categories,
        "sort": sort,
        # "limit": 50
    }

    headers = {
        "Accept": "application/json",
        "Authorization": FOURSQUARE_KEY
    }

    response = requests.request("GET", url, params=params, headers=headers)
    return response.json() 

In [10]:
# assign values for fitness POI
latitude = ph_bikes['latitude']
longitude = ph_bikes['longitude']
radius = 1000
sort = "distance"
categories = 18021  # sports and recreation > gym & studio

In [11]:
# Testing the function
from pprint import pprint

# testing
res = get_fsq(latitude=51.51, longitude=-0.1337, radius=1000, api_key=FOURSQUARE_KEY, categories=18000)
pprint(res)

{'context': {'geo_bounds': {'circle': {'center': {'latitude': 51.51,
                                                  'longitude': -0.1337},
                                       'radius': 1000}}},
 'results': [{'categories': [{'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/arts_entertainment/musicvenue_',
                                       'suffix': '.png'},
                              'id': 10039,
                              'name': 'Music Venue'},
                             {'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/building/default_',
                                       'suffix': '.png'},
                              'id': 11035,
                              'name': 'Entertainment Agency'},
                             {'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/shops/sports_outdoors_',
                                       'suffix': '.png'},
                              'id': 18000,
                              'name

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [21]:
# getting each fitness center's distance from each station

# list to store each location's data
fitness_loc = []

# iterating through each lat-lng pair
for index, row in ph_bikes.iterrows():
    lat = row['latitude']
    lng = row['longitude']

    # call and append
    location = get_fsq(latitude=lat, longitude=lng, radius=1000, api_key=FOURSQUARE_KEY, categories=categories)

    # nested loop to iterate through centers for each location
    if 'results' in location and len(location['results']) > 0:
        first_distance = location['results'][0]['distance']
        fitness_loc.append(first_distance)
    else:
        # Handle cases where no centers are returned
        fitness_loc.append('NA')

In [13]:
len(fitness_loc)

233

Put your parsed results into a DataFrame

In [14]:
# Convert the data into a DataFrame and assign ID to aid when merging

fsq_fit = pd.DataFrame(fitness_loc)
fsq_fit = fsq_fit.rename_axis("ID").reset_index()

In [15]:
# Renaming the columns

fsq_fit.columns = ["ID", "fsq_center_dist"]
fsq_fit.head()

Unnamed: 0,ID,fsq_center_dist
0,0,204
1,1,431
2,2,79
3,3,142
4,4,729


In [16]:
csv_file_path = 'fsq_fit.csv'

# Save the DataFrame to a CSV file
fsq_fit.to_csv(csv_file_path, index=False)

###
### College POI

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [22]:
# assign values for collegeuniv POI
latitude = ph_bikes['latitude']
longitude = ph_bikes['longitude']
radius = None
sort = "distance"
categories = 12013 # college and university

In [19]:
# college testing
col = get_fsq(latitude=39.97195, longitude=-75.13445, radius=None, api_key=FOURSQUARE_KEY, categories=12013)
pprint(col)

{'context': {'geo_bounds': {'circle': {'center': {'latitude': 39.97195,
                                                  'longitude': -75.13445},
                                       'radius': 22000}}},
 'results': [{'categories': [{'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/education/academicbuilding_',
                                       'suffix': '.png'},
                              'id': 12014,
                              'name': 'College Academic Building'}],
              'chains': [],
              'distance': 1867,
              'fsq_id': '4b27a49af964a520168824e3',
              'geocodes': {'main': {'latitude': 39.981465,
                                    'longitude': -75.152612},
                           'roof': {'latitude': 39.981465,
                                    'longitude': -75.152612}},
              'link': '/v3/places/4b27a49af964a520168824e3',
              'location': {'address': '1115 W Berks St',
                           'cens

In [20]:
# check how to access distances
col['results'][0]['distance']

1867

In [23]:
# getting each college's distance from each station
# list to store each location's data
college_loc = []

# iterating through each lat-lng pair
for index, row in ph_bikes.iterrows():
    lat = row['latitude']
    lng = row['longitude']

    # call and append
    location = get_fsq(latitude=lat, longitude=lng, radius=radius, api_key=FOURSQUARE_KEY, categories=categories)

    # nested loop to iterate through colleges for each location
    if 'results' in location and len(location['results']) > 0:
        first_distance = location['results'][0]['distance']
        college_loc.append(first_distance)
    else:
        # Handle cases where no centers are returned
        college_loc.append('NA')

In [24]:
len(college_loc)

233

Put your parsed results into a DataFrame

In [25]:
# Convert the data into a DataFrame and assign ID to aid when merging

fsq_uni = pd.DataFrame(college_loc)
fsq_uni = fsq_uni.rename_axis("ID").reset_index()

In [26]:
# Renaming the columns

fsq_uni.columns = ["ID", "fsq_college_dist"]
fsq_uni.head()

Unnamed: 0,ID,fsq_college_dist
0,0,1555
1,1,1732
2,2,2069
3,3,1579
4,4,2114


In [27]:
csv_file_path = 'fsq_uni.csv'

# Save the DataFrame to a CSV file
fsq_uni.to_csv(csv_file_path, index=False)

# 
# Yelp

Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

In [39]:
# Function for API call to Yelp

def get_yelp(latitude, longitude, radius, api_key, categories):
    url = "https://api.yelp.com/v3/businesses/search"

    headers = {
        "accept": "application/json",
        "Authorization": yelp_key
    }

    params = {
        "latitude": latitude,
        "longitude": longitude,
        "categories": categories,
        "sort_by": sort_by
    }

    response = requests.get(url, headers=headers, params=params)

    return response.json()

In [38]:
test = get_yelp(latitude=39.97195, longitude=-75.13445, radius=None, api_key=yelp_key, categories="fitness")
pprint(test)

{'businesses': [{'alias': 'amrita-yoga-and-wellness-philadelphia',
                 'categories': [{'alias': 'yoga', 'title': 'Yoga'},
                                {'alias': 'pilates', 'title': 'Pilates'}],
                 'coordinates': {'latitude': 39.96885, 'longitude': -75.1346},
                 'display_phone': '(267) 928-3176',
                 'distance': 310.26798197383675,
                 'id': 'mtcgWB-WYipeGkJtaplEjg',
                 'image_url': 'https://s3-media4.fl.yelpcdn.com/bphoto/39vpNv0jw3NBqtUUFSc3rg/o.jpg',
                 'is_closed': False,
                 'location': {'address1': '1204 Frankford Ave',
                              'address2': 'Fl 2',
                              'address3': '',
                              'city': 'Philadelphia',
                              'country': 'US',
                              'display_address': ['1204 Frankford Ave',
                                                  'Fl 2',
                               

### Fitness POI

In [40]:
# assign values for fitness POI using distance
latitude = ph_bikes['latitude']
longitude = ph_bikes['longitude']
categories = "fitness"
sort_by = "distance"

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [41]:
# getting each fitness center's distance from each station
# list to store each location's data
fitness_loc = []

# iterating through each lat-lng pair
for index, row in ph_bikes.iterrows():
    lat = row['latitude']
    lng = row['longitude']

    # call and append
    location = get_yelp(latitude=lat, longitude=lng, radius=1000, api_key=yelp_key2, categories=categories)

    # nested loop to iterate through centers for each location
    if 'businesses' in location and len(location['businesses']) > 0:
        first_distance = location['businesses'][0]['distance']
        fitness_loc.append(first_distance)
    else:
        # Handle cases where no centers are returned
        fitness_loc.append(NA)

In [42]:
len(fitness_loc)

233

Put your parsed results into a DataFrame

In [43]:
# Convert the data into a DataFrame and assign ID to aid when merging

yelp_fit = pd.DataFrame(fitness_loc)
yelp_fit = yelp_fit.rename_axis("ID").reset_index()

In [44]:
# Renaming the columns

yelp_fit.columns = ["ID", "yelp_center_dist"]
yelp_fit.head()

Unnamed: 0,ID,yelp_center_dist
0,0,206.798597
1,1,130.753256
2,2,794.358688
3,3,165.841893
4,4,711.265245


In [45]:
csv_file_path = 'yelp_fit.csv'

# Save the DataFrame to a CSV file
yelp_fit.to_csv(csv_file_path, index=False)

### 
### College POI

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [46]:
# assign values for college POI
latitude = ph_bikes['latitude']
longitude = ph_bikes['longitude']
categories = "collegeuniv"
sort_by = "distance"

In [47]:
# getting each college's distance from each station

# list to store each location's data
college_dist = []

# iterating through each lat-lng pair
for index, row in ph_bikes.iterrows():
    lat = row['latitude']
    lng = row['longitude']

    # call and append
    location = get_yelp(latitude=lat, longitude=lng, radius=None, api_key=yelp_key, categories=categories)

    # nested loop to iterate through colleges for each location
    if 'businesses' in location and len(location['businesses']) > 0:
        first_distance = location['businesses'][0]['distance']
        college_dist.append(first_distance)
    else:
        # Handle cases where no colleges are returned
        college_dist.append(NA)

In [48]:
len(college_dist)

233

Put your parsed results into a DataFrame

In [49]:
# Convert the data into a DataFrame and assign ID to aid when merging

yelp_uni = pd.DataFrame(college_dist)
yelp_uni = yelp_uni.rename_axis("ID").reset_index()

In [50]:
# Renaming the columns

yelp_uni.columns = ["ID", "yelp_college_dist"]
yelp_uni.head()

Unnamed: 0,ID,yelp_college_dist
0,0,1331.446597
1,1,1706.096309
2,2,2282.52981
3,3,1808.390164
4,4,2707.043477


In [51]:
csv_file_path = 'yelp_uni.csv'

# Save the DataFrame to a CSV file
yelp_uni.to_csv(csv_file_path, index=False)

# 
# Comparing Results

Which API provided you with more complete data? Provide an explanation. 

#### *Comparing one lat-lng point for restaurants on the Yelp and Foursquare API*

In [54]:
# restaurant testing - Foursquare
sort = "relevance"

fsq_res = get_fsq(latitude=39.97195, longitude=-75.13445, radius=1000, api_key=FOURSQUARE_KEY, categories=13065)
pprint(fsq_res)

# no info on ratings

{'context': {'geo_bounds': {'circle': {'center': {'latitude': 39.97195,
                                                  'longitude': -75.13445},
                                       'radius': 1000}}},
 'results': [{'categories': [{'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/nightlife/cocktails_',
                                       'suffix': '.png'},
                              'id': 13009,
                              'name': 'Cocktail Bar'},
                             {'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/pizza_',
                                       'suffix': '.png'},
                              'id': 13064,
                              'name': 'Pizzeria'},
                             {'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/italian_',
                                       'suffix': '.png'},
                              'id': 13236,
                              'name': 'Italian Restaurant'}],
    

In [64]:
fsq_df = pd.json_normalize(fsq_res['results'])
print(fsq_df.info())
print(fsq_df.shape)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 25 columns):
 #   Column                         Non-Null Count  Dtype  
---  ------                         --------------  -----  
 0   fsq_id                         10 non-null     object 
 1   categories                     10 non-null     object 
 2   chains                         10 non-null     object 
 3   distance                       10 non-null     int64  
 4   link                           10 non-null     object 
 5   name                           10 non-null     object 
 6   timezone                       10 non-null     object 
 7   geocodes.main.latitude         10 non-null     float64
 8   geocodes.main.longitude        10 non-null     float64
 9   geocodes.roof.latitude         10 non-null     float64
 10  geocodes.roof.longitude        10 non-null     float64
 11  location.address               10 non-null     object 
 12  location.census_block          10 non-null     object

In [55]:
# restaurant testing - Yelp
sort_by = "best_match"

yelp_res = get_yelp(latitude=39.97195, longitude=-75.13445, radius=None, api_key=yelp_key, categories="restaurants")
pprint(yelp_res)

{'businesses': [{'alias': 'suraya-philadelphia-2',
                 'categories': [{'alias': 'lebanese', 'title': 'Lebanese'},
                                {'alias': 'gourmet', 'title': 'Specialty Food'},
                                {'alias': 'coffee', 'title': 'Coffee & Tea'}],
                 'coordinates': {'latitude': 39.9736865005167,
                                 'longitude': -75.1339557766914},
                 'display_phone': '(215) 302-1900',
                 'distance': 197.629294418328,
                 'id': 'vUrTGX_7HxqeoQ_6QCVz6g',
                 'image_url': 'https://s3-media1.fl.yelpcdn.com/bphoto/H6vumhULeuBQESdnGKxq0w/o.jpg',
                 'is_closed': False,
                 'location': {'address1': '1528 Frankford Ave',
                              'address2': '',
                              'address3': None,
                              'city': 'Philadelphia',
                              'country': 'US',
                              'display

In [62]:
yelp_df = pd.json_normalize(yelp_res['businesses'])
print(yelp_df.info())
print(yelp_df.shape)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 24 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   id                        20 non-null     object 
 1   alias                     20 non-null     object 
 2   name                      20 non-null     object 
 3   image_url                 20 non-null     object 
 4   is_closed                 20 non-null     bool   
 5   url                       20 non-null     object 
 6   review_count              20 non-null     int64  
 7   categories                20 non-null     object 
 8   rating                    20 non-null     float64
 9   transactions              20 non-null     object 
 10  price                     18 non-null     object 
 11  phone                     20 non-null     object 
 12  display_phone             20 non-null     object 
 13  distance                  20 non-null     float64
 14  coordinates.

#### *The Yelp API has more information especially with regards to ratings, reviews, price etc.* I worked with fitness and college POIs, not restaurants, but a comparison of dataframe snippets are below.

The Yelp API gives 20 businesses for the first lat-lng pair while the Foursquare API yields 10; the Foursquare API gives more information on the location while the Yelp API gives more business-focused details.

In [63]:
yelp_df.head()

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,transactions,...,coordinates.latitude,coordinates.longitude,location.address1,location.address2,location.address3,location.city,location.zip_code,location.country,location.state,location.display_address
0,vUrTGX_7HxqeoQ_6QCVz6g,suraya-philadelphia-2,Suraya,https://s3-media1.fl.yelpcdn.com/bphoto/H6vumh...,False,https://www.yelp.com/biz/suraya-philadelphia-2...,1415,"[{'alias': 'lebanese', 'title': 'Lebanese'}, {...",4.5,[delivery],...,39.973687,-75.133956,1528 Frankford Ave,,,Philadelphia,19125,US,PA,"[1528 Frankford Ave, Philadelphia, PA 19125]"
1,uJZgom8HpK74xz8IojpT0A,pizzeria-beddia-philadelphia,Pizzeria Beddia,https://s3-media1.fl.yelpcdn.com/bphoto/CltlyN...,False,https://www.yelp.com/biz/pizzeria-beddia-phila...,712,"[{'alias': 'pizza', 'title': 'Pizza'}, {'alias...",4.0,[delivery],...,39.970799,-75.134989,1313 North Lee St,,,Philadelphia,19122,US,PA,"[1313 North Lee St, Philadelphia, PA 19122]"
2,hUmHwBQtGg0iH-PZakQJDw,wm-mulherins-sons-philadelphia,Wm Mulherin's Sons,https://s3-media3.fl.yelpcdn.com/bphoto/22EU-2...,False,https://www.yelp.com/biz/wm-mulherins-sons-phi...,709,"[{'alias': 'pizza', 'title': 'Pizza'}, {'alias...",4.5,[delivery],...,39.97171,-75.135063,1355 N Front St,,,Philadelphia,19122,US,PA,"[1355 N Front St, Philadelphia, PA 19122]"
3,u0Bt7uvvj7LAjAdiMtrhug,laser-wolf-philadelphia-2,Laser Wolf,https://s3-media1.fl.yelpcdn.com/bphoto/rGW-4F...,False,https://www.yelp.com/biz/laser-wolf-philadelph...,365,"[{'alias': 'mideastern', 'title': 'Middle East...",4.5,[delivery],...,39.97049,-75.13662,1301 N Howard St,,,Philadelphia,19122,US,PA,"[1301 N Howard St, Philadelphia, PA 19122]"
4,TjwWB-ET-qmO2-8bfIHMig,cheu-fishtown-philadelphia,Cheu Fishtown,https://s3-media3.fl.yelpcdn.com/bphoto/i_tXvE...,False,https://www.yelp.com/biz/cheu-fishtown-philade...,310,"[{'alias': 'noodles', 'title': 'Noodles'}, {'a...",4.0,"[delivery, pickup]",...,39.972203,-75.134603,1416 Frankford Ave,,,Philadelphia,19125,US,PA,"[1416 Frankford Ave, Philadelphia, PA 19125]"


In [65]:
fsq_df.head()

Unnamed: 0,fsq_id,categories,chains,distance,link,name,timezone,geocodes.main.latitude,geocodes.main.longitude,geocodes.roof.latitude,...,location.dma,location.formatted_address,location.locality,location.postcode,location.region,geocodes.drop_off.latitude,geocodes.drop_off.longitude,related_places.children,geocodes.front_door.latitude,geocodes.front_door.longitude
0,56fb2e82498ec4440b0aa2e9,"[{'id': 13009, 'name': 'Cocktail Bar', 'icon':...",[],59,/v3/places/56fb2e82498ec4440b0aa2e9,Wm. Mulherin's Sons,America/New_York,39.971695,-75.134998,39.971695,...,Philadelphia,"1355 N Front St, Philadelphia, PA 19122",Philadelphia,19122,PA,,,,,
1,5427166a498e85a2c0b936b3,"[{'id': 13034, 'name': 'Café', 'icon': {'prefi...",[],83,/v3/places/5427166a498e85a2c0b936b3,La Colombe Coffee Roasters,America/New_York,39.971136,-75.134231,39.971136,...,Philadelphia,"1335 Frankford Ave (btwn Mercer & Master St), ...",Philadelphia,19125,PA,39.971139,-75.134416,,,
2,5c958f5e2619ee002c1a8470,"[{'id': 13064, 'name': 'Pizzeria', 'icon': {'p...",[],155,/v3/places/5c958f5e2619ee002c1a8470,Pizzeria Beddia,America/New_York,39.970664,-75.135406,39.970664,...,Philadelphia,"1313 N LEE St (btwn Master & Thompson St), Phi...",Philadelphia,19125,PA,,,,,
3,507ad324e4b0c101c709028d,"[{'id': 13022, 'name': 'Sports Bar', 'icon': {...",[],159,/v3/places/507ad324e4b0c101c709028d,Bottle Bar East,America/New_York,39.970528,-75.134752,39.970528,...,Philadelphia,"1308 Frankford Ave (at Thompson St), Philadelp...",Philadelphia,19125,PA,39.970518,-75.134431,,,
4,597397c3851de565cc92bc6d,"[{'id': 13263, 'name': 'Japanese Restaurant', ...",[],38,/v3/places/597397c3851de565cc92bc6d,Cheu Fishtown,America/New_York,39.972245,-75.134603,39.972245,...,Philadelphia,"1416 Frankford Ave, Philadelphia, PA 19125",Philadelphia,19125,PA,39.972243,-75.134402,,,
