In [1]:
import pandas as pd
import json
import requests

In [2]:
# Set the option to opt-in to the future behavior
pd.set_option('future.no_silent_downcasting', True)

# Foursquare

Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice. 

In [230]:
#retreiving the data from part 1 for vancouver bike stations
bikes_data = pd.read_csv('bikes_data_part1.csv')

In [231]:
bikes_data.head()

Unnamed: 0,bike_station_name,free_bikes,latitude,longitude
0,10th & Cambie,11,49.262487,-123.114397
1,10th & Columbia,3,49.262206,-123.109091
2,10th & Commercial,1,49.261403,-123.070102
3,10th & Fraser,1,49.26151,-123.089413
4,10th & Granville,2,49.262598,-123.138921


Creating lists for storing information that will be recieved from Foursquare API

- ***bike_station_name*** : Name of the bike station
- ***poi_name*** : Name of the point of interest (Bars)
- ***rating*** : Rating of POI
- ***distance_to_station*** : Distance of POI from the bike station
- ***popularity*** : Popularity of POI
- ***total_ratings*** : Total ratings of POI
- ***cc_payment*** : If POI accepts credit card payment
- ***outdoor_seating*** : If POI offers outdoor seating
- ***street_parking*** : If POI offers street parking

In [234]:
bike_station_name =[]
poi_name = []
rating = []
distance_to_station = []
popularity = []
total_ratings = []
cc_payment = []
outdoor_seating = []
street_parking =[]
price_fs = []

In [235]:
# This API calls retrieve the bars around the bike station.

def get_poi(latitude,longitude):
    url = "https://api.foursquare.com/v3/places/search"

    params={
        'radius':1000,
        'll':f'{latitude},{longitude}',
        'categories':'13003,13004,13005,13006,13007,13008,13009,13010,13011,13012,13013,13014,13015,13016,13017,13018,13019,13020,13021,13022,13023,13024,13025,13389'
        }

    headers = {
     "accept": "application/json",
     "Authorization": "fsq3vRYFiHIvZ0kw5BVmIupZkvD8NeBdN8xSNZRDGfu7lcw="
    }

    response = requests.get(url, headers=headers,params=params).json()

    return response


The below API (#2) calls use the Foursquare id of the POI retrieved from the above API (#1) call which results in retrieveing the features of the API

*API CALL #1 : api.foursquare.com/v3/places/search results in businessess search for set of different categories*

*API CALL #2 : api.foursquare.com/v3/places/{i['fsq_id']} results in place search and its features using the foursquare id of the place*

In [None]:
for index,row in bikes_data.iterrows():
    
    result = get_poi(latitude=row['latitude'],longitude=row['longitude'])
    
    headers = {
        "accept": "application/json",
        "Authorization": "fsq3vRYFiHIvZ0kw5BVmIupZkvD8NeBdN8xSNZRDGfu7lcw="
        }

    for i in result['results']:

        bike_station_name.append(row['bike_station_name'])

        poi_name.append(i['name'])

        distance_to_station.append(i['distance'])

        url = f"https://api.foursquare.com/v3/places/{i['fsq_id']}?fields=stats,rating,features,popularity"
    
        second_response = requests.get(url, headers=headers).json()

        #appending 0 in case of no poularity feature found in response
        if(second_response.get('popularity')):
            popularity.append(second_response['popularity'])
        else:
            popularity.append(0)

        #appending 0 in case of no rating feature found in response
        if(second_response.get('rating')):
            rating.append(second_response['rating'])
        else:
            rating.append(0)

        #appending 0 in case of no total_ratings feature found in response
        if(second_response.get('stats')):
            total_ratings.append(second_response['stats']['total_ratings'])
        else:
            total_ratings.append(0)
        
        #assumption of False in case of no CC feature found in response
        if(second_response.get('features')):
            if(second_response['features'].get('payment')):    
                cc_payment.append(second_response['features']['payment']['credit_cards']['accepts_credit_cards'])
            else:
                cc_payment.append(False)

            #assumption of False in case of no outdoor_seating feature found in response
            #assumption of False in case of no street_parking feature found in response

            if(second_response['features'].get('amenities')):
                if(second_response['features']['amenities'].get('outdoor_seating')):
                    outdoor_seating.append(second_response['features']['amenities']['outdoor_seating'])
                else:
                    outdoor_seating.append(False)

                if(second_response['features']['amenities'].get('street_parking')):
                    street_parking.append(second_response['features']['amenities']['street_parking'])
                else:
                    street_parking.append(False)
                    
            #assumption of False in case of no outdoor_seating feature found in response
            #assumption of False in case of no street_parking feature found in response
            else: 
                outdoor_seating.append(False)
                street_parking.append(False)

        #assumption of False in case of no CC feature found in response
        #assumption of False in case of no outdoor_seating feature found in response
        #assumption of False in case of no street_parking feature found in response
        else:
            outdoor_seating.append(False)
            street_parking.append(False)
            cc_payment.append(False)

In [None]:
# making sure the length of the lists are same to create a dataframe

print(len(bike_station_name))
print(len(poi_name))
print(len(rating))
print(len(distance_to_station))
print(len(popularity))
print(len(cc_payment))
print(len(outdoor_seating))
print(len(street_parking))
print(len(total_ratings))

Put your parsed results into a DataFrame

In [239]:
fsq_data = pd.DataFrame({'poi_name':poi_name,
              'rating_fs':rating,
              'total_ratings_fs':total_ratings,
              'popularity_fs':popularity,
              'cc_payment_fs':cc_payment,
              'outdoor_seating_fs':outdoor_seating,
              'street_parking_fs':street_parking,
              'bike_station_name':bike_station_name,
              'distance_to_station_fs':distance_to_station
              })

In [240]:
fsq_data

Unnamed: 0,poi_name,rating_fs,total_ratings_fs,popularity_fs,cc_payment_fs,outdoor_seating_fs,street_parking_fs,bike_station_name,distance_to_station_fs
0,Rogue Kitchen & Wetbar,8.3,127,0.981731,True,True,False,10th & Cambie,250
1,33 Acres Brewing Co,9.3,380,0.983225,True,False,False,10th & Cambie,674
2,The Cascade Room,8.6,200,0.945325,True,False,False,10th & Cambie,984
3,Biercraft Bistro,8.1,155,0.920526,True,True,False,10th & Cambie,744
4,Faculty Brewing Co,8.1,58,0.950547,False,False,False,10th & Cambie,994
...,...,...,...,...,...,...,...,...,...
2222,The Vancouver Club,8.5,42,0.991487,False,False,False,šxʷƛ̓ənəq Xwtl'e7énḵ Square - Vancouver Art Ga...,587
2223,Tap & Barrel,8.6,353,0.994236,True,True,False,šxʷƛ̓ənəq Xwtl'e7énḵ Square - Vancouver Art Ga...,765
2224,Pourhouse Restaurant,8.9,381,0.973350,True,True,False,šxʷƛ̓ənəq Xwtl'e7énḵ Square - Vancouver Art Ga...,890
2225,Steamworks Brewing Co,8.4,919,0.998876,True,True,False,šxʷƛ̓ənəq Xwtl'e7énḵ Square - Vancouver Art Ga...,690


Replacing the 0 appended values for columns rating_fs, total_ratings_fs, popularity_fs with mean of the columns

In [241]:
fsq_data['rating_fs'] = fsq_data['rating_fs'].replace(0, round(fsq_data['rating_fs'].mean(),1))

fsq_data['total_ratings_fs'] = fsq_data['total_ratings_fs'].replace(0, round(fsq_data['total_ratings_fs'].mean(),0))

fsq_data['popularity_fs'] = fsq_data['popularity_fs'].replace(0, round(fsq_data['popularity_fs'].mean(),0))

In [None]:
#checking dataframe after the replacement

fsq_data.head()

Checking how many results ( POI ) per bike_station is recieved in foursquare

In [246]:
fsq_data[fsq_data['bike_station_name']=='10th & Cambie']

Unnamed: 0,poi_name,rating_fs,total_ratings_fs,popularity_fs,cc_payment_fs,outdoor_seating_fs,street_parking_fs,bike_station_name,distance_to_station_fs
0,Rogue Kitchen & Wetbar,8.3,127,0.981731,True,True,False,10th & Cambie,250
1,33 Acres Brewing Co,9.3,380,0.983225,True,False,False,10th & Cambie,674
2,The Cascade Room,8.6,200,0.945325,True,False,False,10th & Cambie,984
3,Biercraft Bistro,8.1,155,0.920526,True,True,False,10th & Cambie,744
4,Faculty Brewing Co,8.1,58,0.950547,False,False,False,10th & Cambie,994
5,Fox Cabaret,8.0,50,0.962219,False,False,False,10th & Cambie,965
6,Mahony & Sons,7.5,86,0.994642,False,True,False,10th & Cambie,811
7,Tocador,7.5,14,0.936085,False,False,False,10th & Cambie,996
8,Vancouver Hackspace,7.4,8,0.855685,False,False,False,10th & Cambie,820
9,The Wicklow Pub,7.1,83,0.988985,True,True,False,10th & Cambie,760


In [247]:
# saving the bikes data to csv for Part 3
fsq_data.to_csv('fsq_data.csv', sep=',', index=False, encoding='utf-8')

# Yelp

Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

In [253]:
bikes_data.head()

Unnamed: 0,bike_station_name,free_bikes,latitude,longitude
0,10th & Cambie,11,49.262487,-123.114397
1,10th & Columbia,3,49.262206,-123.109091
2,10th & Commercial,1,49.261403,-123.070102
3,10th & Fraser,1,49.26151,-123.089413
4,10th & Granville,2,49.262598,-123.138921


In [275]:
yelp_key = 'Bearer Ohznuk5veifhcVncNy_E0R6tI98TF-DkuUJyICPYMoZaSpiQX_9bzR8kgJGVve9sFNU6aetLykAeB2eHLHyQJ4GP_LGaHIzUyEJ_CKuXYTsw8ucXXFa04NIXTZ0dZnYx'

In [276]:
def get_poi_yelp(latitude,longitude):
    url = f'https://api.yelp.com/v3/businesses/search?radius=1000&latitude={latitude}&longitude={longitude}&categories=bars'

    headers = {
        "accept": "application/json",
        'Authorization':yelp_key
    }

    response = requests.get(url, headers=headers).json()
    print(response)
    return response


Creating lists for storing information that will be recieved from Foursquare API

- ***bike_station_name*** : Name of the bike station
- ***poi_name_y*** : Name of the point of interest (Bars)
- ***rating_y*** : Rating of POI
- ***category*** : Category of the POI
- ***distance_to_station_y*** : Distance of POI from the bike station
- ***price*** : Total price bracket of POI
- ***review_count*** : The review count of POI on yelp

In [277]:
bike_station_name_y = []
poi_name_y = []
rating_y = []
category = []
distance_to_station_y = []
price = []
review_count = []

In [None]:
for index,row in bikes_data.iterrows():
    
    result = get_poi_yelp(latitude=row['latitude'],longitude=row['longitude'])

    for i in result['businesses']:

        bike_station_name_y.append(row['bike_station_name'])

        poi_name_y.append(i['name'])

        rating_y.append(i['rating'])

        category.append(i['categories'][0]['title'])

        if(i.get('price')):
            price.append(i['price'])
        else:
            price.append(0)

        distance_to_station_y.append(round(i['distance'],2))

        review_count.append(i['review_count'])
        

Put your parsed results into a DataFrame

In [301]:
#checking the lengths of lists are same to create a dataframe

print(len(bike_station_name_y))
print(len(poi_name_y))
print(len(rating_y))
print(len(category))
print(len(distance_to_station_y))
print(len(price))
print(len(review_count))

4148
4148
4148
4148
4148
4148
4148


In [302]:
yelp_data= pd.DataFrame({'poi_name':poi_name_y,
              'rating_yelp':rating_y,
              'category_yelp':category,
              'bike_station_name':bike_station_name_y,
              'distance_to_station':distance_to_station_y,
              'price_yelp':price,
              'review_count_yelp':review_count
              })

In [303]:
yelp_data

Unnamed: 0,poi_name,rating_yelp,category_yelp,bike_station_name,distance_to_station,price_yelp,review_count_yelp
0,Cactus Club Cafe,3.7,Canadian (New),10th & Cambie,221.48,$$,150
1,Anh and Chi,4.0,Vietnamese,10th & Cambie,1284.21,$$$,672
2,The Cascade Room,3.9,Pubs,10th & Cambie,992.50,$$,235
3,Indochine Kitchen,3.2,Asian Fusion,10th & Cambie,707.63,$$,114
4,CRAFT Beer Market,3.4,Beer Bar,10th & Cambie,1050.39,$$,520
...,...,...,...,...,...,...,...
4143,Shizen Ya,4.2,Japanese,šxʷƛ̓ənəq Xwtl'e7énḵ Square - Vancouver Art Ga...,395.61,$$,589
4144,Earls Kitchen + Bar,3.6,Canadian (New),šxʷƛ̓ənəq Xwtl'e7énḵ Square - Vancouver Art Ga...,296.78,$$,285
4145,Grain Tasting Bar,3.4,Wine Bars,šxʷƛ̓ənəq Xwtl'e7énḵ Square - Vancouver Art Ga...,207.77,$$,54
4146,The Lennox Pub,3.5,Pubs,šxʷƛ̓ənəq Xwtl'e7énḵ Square - Vancouver Art Ga...,247.58,$$,137


In [304]:
yelp_data[yelp_data['bike_station_name']=='10th & Cambie']

Unnamed: 0,poi_name,rating_yelp,category_yelp,bike_station_name,distance_to_station,price_yelp,review_count_yelp
0,Cactus Club Cafe,3.7,Canadian (New),10th & Cambie,221.48,$$,150
1,Anh and Chi,4.0,Vietnamese,10th & Cambie,1284.21,$$$,672
2,The Cascade Room,3.9,Pubs,10th & Cambie,992.5,$$,235
3,Indochine Kitchen,3.2,Asian Fusion,10th & Cambie,707.63,$$,114
4,CRAFT Beer Market,3.4,Beer Bar,10th & Cambie,1050.39,$$,520
5,R&B Brewing,4.1,Breweries,10th & Cambie,964.1,$,66
6,Suyo Modern Peruvian,4.6,Peruvian,10th & Cambie,1349.23,0,29
7,Johnnie Fox's Irish Pub,3.9,Irish,10th & Cambie,1395.62,$$,121
8,R&B Brewing,4.3,Pizza,10th & Cambie,943.58,$$,23
9,Red Truck Beer,3.9,Breweries,10th & Cambie,1469.95,$$,88


In [305]:
#assumption of price columns with 0 values atleast offers '$$' - price level of 2 ( medium priced ranged bars)
yelp_data['price_yelp'] = yelp_data['price_yelp'].replace(0,'$$')

In [306]:
## replacing the string variables with integer price levels 

yelp_data['price_yelp'] = yelp_data['price_yelp'].replace({
    '$':1,
    '$$':2,
    '$$$':3,
    '$$$$':4
})

In [307]:
# saving the bikes data to csv for Part 3
yelp_data.to_csv('yelp_data.csv', sep=',', index=False, encoding='utf-8')

# Comparing Results

Which API provided you with more complete data? Provide an explanation. 

*** Comparison between YELP and FOURSQUARE API's***

- There were pros and cons with both the API but Yelp definitely did provide a broader search and large number of results around the bike stations. 

- As compared to Yelp, I had to do a second API call in foursquare to get more details about a 
business place. So 2 foursquare calls = 1 Yelp API call

- The Foursquare API had more interesting features such as payments types accepted, outdoor seating, parking as compared to Yelp.

- I did noticed one thing while overlapping these results that categories ditribution over both the API do differ such as Cactus Club Cafe ( one of the POIs) yielded 164 rows in YELP and none in Foursquare. There might be a reason that it is not listed under 'Bars' in foursquare.

- Both the API radius perimeters resulted in broader search with distance Max values of 1973m and 2626m 

In [308]:
#the data points foursquare retrieved
fsq_data.shape

(2227, 9)

In [309]:
#the data points yelp retrieved
yelp_data.shape

(4148, 7)

In [310]:
#checking the stats for foursquare data
fsq_data.describe()

Unnamed: 0,rating_fs,total_ratings_fs,popularity_fs,distance_to_station_fs
count,2227.0,2227.0,2227.0,2227.0
mean,7.620117,124.615626,0.925465,596.807364
std,0.853748,147.416489,0.137426,258.763304
min,5.1,8.0,0.018031,10.0
25%,6.7,29.0,0.939006,399.0
50%,7.8,86.0,0.966661,613.0
75%,8.3,127.0,0.985684,812.0
max,9.3,919.0,1.0,1973.0


In [311]:
#checking the stats for yelp data
yelp_data.describe()

Unnamed: 0,rating_yelp,distance_to_station,review_count_yelp
count,4148.0,4148.0,4148.0
mean,3.884643,648.107387,200.608245
std,0.518422,329.577047,298.560529
min,1.2,1.97,1.0
25%,3.6,374.825,34.0
50%,4.0,646.345,114.0
75%,4.2,907.0775,285.0
max,5.0,2477.07,2626.0


In [288]:
fsq_data.isnull().sum()

poi_name                  0
rating_fs                 0
total_ratings_fs          0
popularity_fs             0
cc_payment_fs             0
outdoor_seating_fs        0
street_parking_fs         0
bike_station_name         0
distance_to_station_fs    0
dtype: int64

In [289]:
yelp_data.isnull().sum()

poi_name               0
rating_yelp            0
category_yelp          0
bike_station_name      0
distance_to_station    0
price_yelp             0
delivery_yelp          0
pickup_yelp            0
review_count_yelp      0
dtype: int64

In [290]:
fsq_data['bike_station_name'].nunique()

244

In [291]:
yelp_data['bike_station_name'].nunique()

245

Get the top 10 restaurants according to their rating

### Foursquare Data Top 10 Bars

In [312]:
fsq_data.drop_duplicates(subset=['poi_name']).sort_values(by='rating_fs',ascending=False).head(10)

Unnamed: 0,poi_name,rating_fs,total_ratings_fs,popularity_fs,cc_payment_fs,outdoor_seating_fs,street_parking_fs,bike_station_name,distance_to_station_fs
1,33 Acres Brewing Co,9.3,380,0.983225,True,False,False,10th & Cambie,674
497,Keefer Bar,9.1,234,0.986027,True,True,False,Abbott & Cordova,533
506,Callister Brewing Co,9.0,31,0.995955,False,False,False,Adanac & McLean,531
496,Pourhouse Restaurant,8.9,381,0.97335,True,True,False,Abbott & Cordova,190
14,Anh and Chi,8.9,92,0.988856,False,False,False,10th & Columbia,986
516,Cardero's Restaurant & Marine Pub,8.7,379,0.978347,True,True,False,Alberni & Jervis,448
232,The Boxcar,8.7,38,0.946197,False,False,False,1st & Manitoba,848
501,Alibi Room,8.7,441,0.978175,True,True,False,Abbott & Cordova,510
2,The Cascade Room,8.6,200,0.945325,True,False,False,10th & Cambie,984
520,Score on Davie,8.6,228,0.980303,True,True,False,Alberni & Jervis,787


### Yelp Data Top 10 Bars

In [313]:
yelp_data.drop_duplicates(subset=['poi_name']).sort_values(by='rating_yelp',ascending=False).head(10)

Unnamed: 0,poi_name,rating_yelp,category_yelp,bike_station_name,distance_to_station,price_yelp,review_count_yelp
3082,Alchemy Bar and Kitchen,5.0,Burgers,Olympic Village Station,1054.24,2,1
143,Osteria Elio Volpe,5.0,Italian,10th & Oak,1040.61,2,1
145,Ramson’s Pizzeria And Bar,5.0,Bars,10th & Oak,680.38,2,1
1114,Arike Restaurant,4.9,African,Aquatic Centre,974.52,2,27
1617,Bar Gobo,4.8,Wine Bars,Campbell & Hastings,1052.27,2,4
42,Hariyali Express Indian Cuisine & Bar,4.8,Indian,10th & Commercial,571.95,2,6
45,Giancarlo's Sports Bar,4.8,Coffee & Tea,10th & Commercial,736.15,1,12
290,The Watson,4.8,Cocktail Bars,14th & Fraser,820.07,2,8
3259,Sports Illustrated Clubhouse,4.7,Sports Bars,Pharmaceutical Sciences,1073.94,2,3
1856,Bombay Kitchen + Bar,4.7,Indian,Chilco & Barclay,360.6,2,3


In [325]:
yelp_data[yelp_data['poi_name']=='Cactus Club Cafe'	]

Unnamed: 0,poi_name,rating_yelp,category_yelp,bike_station_name,distance_to_station,price_yelp,review_count_yelp
0,Cactus Club Cafe,3.7,Canadian (New),10th & Cambie,221.48,2,150
26,Cactus Club Cafe,3.7,Canadian (New),10th & Columbia,597.64,2,150
69,Cactus Club Cafe,3.7,Canadian (New),10th & Granville,121.66,2,162
132,Cactus Club Cafe,3.7,Canadian (New),10th & Oak,671.00,2,150
134,Cactus Club Cafe,3.7,Canadian (New),10th & Oak,976.67,2,162
...,...,...,...,...,...,...,...
4049,Cactus Club Cafe,3.7,Canadian (New),Wylie & 2nd,1090.79,3,218
4054,Cactus Club Cafe,3.7,Canadian (New),Yaletown-Roundhouse Station,102.31,3,218
4108,Cactus Club Cafe,3.7,Canadian (New),Yukon & 6th,419.06,2,150
4132,Cactus Club Cafe,4.1,Canadian (New),šxʷƛ̓ənəq Xwtl'e7énḵ Square - Vancouver Art Ga...,285.51,2,492


In [344]:
fsq_data[fsq_data['poi_name'].str.contains('Cactus')]

Unnamed: 0,poi_name,rating_fs,total_ratings_fs,popularity_fs,cc_payment_fs,outdoor_seating_fs,street_parking_fs,bike_station_name,distance_to_station_fs


### Joining YELP and FOURSQUARE

In [350]:
# Merge the DataFrames on the 'poi_name' column
merged_data = pd.merge(yelp_data, fsq_data, on=('poi_name','bike_station_name'), how='inner')

# Display the merged DataFrame
merged_data

Unnamed: 0,poi_name,rating_yelp,category_yelp,bike_station_name,distance_to_station,price_yelp,review_count_yelp,rating_fs,total_ratings_fs,popularity_fs,cc_payment_fs,outdoor_seating_fs,street_parking_fs,distance_to_station_fs
0,The Cascade Room,3.9,Pubs,10th & Cambie,992.50,2,235,8.6,200,0.945325,True,False,False,984
1,Tocador,3.7,Latin American,10th & Cambie,999.09,2,50,7.5,14,0.936085,False,False,False,996
2,Anh and Chi,4.0,Vietnamese,10th & Columbia,995.65,3,672,8.9,92,0.988856,False,False,False,986
3,The Cascade Room,3.9,Pubs,10th & Columbia,606.21,2,235,8.6,200,0.945325,True,False,False,598
4,Tocador,3.7,Latin American,10th & Columbia,612.81,2,50,7.5,14,0.936085,False,False,False,610
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
466,JOEY Burrard,4.1,Canadian (New),Yaletown-Roundhouse Station,899.52,2,583,8.2,260,0.992690,True,True,False,930
467,The Ellis,4.2,Cocktail Bars,York & Yew,21.68,2,84,6.5,106,0.959523,False,False,False,10
468,Corduroy,4.0,Bars,York & Yew,493.23,2,79,8.5,45,0.884569,True,True,False,493
469,The Cascade Room,3.9,Pubs,Yukon & 6th,996.98,2,235,8.6,200,0.945325,True,False,False,987


In [346]:
# saving the bikes data to csv for Part 3
merged_data.to_csv('merged_data.csv', sep=',', index=False, encoding='utf-8')