In [3]:
# imports
import requests
import pandas as pd
from IPython.display import JSON
stations_clean = pd.read_csv('stations.csv')
## Add a new column 'll' containing the combined lat and long, this will probably be helpful when working with Foursquare and Yelp API 
stations_clean['ll'] = stations_clean['lat'].astype(str) + ',' + stations_clean['long'].astype(str)


# Foursquare

Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice. 

In [8]:
## we want to create a list of fsq ids
bar_id_list = []
def get_bar_ids(coords):
    url = "https://api.foursquare.com/v3/places/search"

    params = {
        "categories": "13003",
        "ll": str(coords),
        'radius': "1000",
        "sort":"DISTANCE",
        "limit":"50"
    }

    headers = {
        "Accept": "application/json",
        "Authorization": key
    }
## Get the request
    response = requests.request("GET", url, params=params, headers=headers)
## Convert to python object with .json()
    res = response.json()
    ## Create loop to pull out fsq_ids
    
    for i in range(len(res['results'])):
        bar_id_list.append(res['results'][i]['fsq_id'])
        

In [16]:
## Apply the function to our column of coord, the 'll' column from stations_clean dataframe
stations_clean['ll'].apply(get_bar_ids)

0      None
1      None
2      None
3      None
4      None
       ... 
236    None
237    None
238    None
239    None
240    None
Name: ll, Length: 241, dtype: object

In [17]:
len(bar_id_list)

6545

In [18]:
## Remove duplicates from the list
bar_id_list_clean = list(set(bar_id_list))
len(bar_id_list_clean)

373

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [None]:
## Run a loop to call for specific information about the bars
bar_info_list = []
for f_id in bar_id_list_clean:
    url = "https://api.foursquare.com/v3/places/" + f_id

    params = {
        "session_token": "YOUR USER GENERATED SESSION TOKEN HERE",
        "fields": "fsq_id,name,rating,stats,popularity,price"
    }

    headers = {
        "Accept": "application/json",
        "Authorization": key
    }

    response = requests.request("GET", url, params=params, headers=headers)
    res = response.json()
    bar_info_list.append(res)

Put your parsed results into a DataFrame

In [None]:
## Create dataframe
df_bar_foursquare = pd.json_normalize(bar_info_list)

In [23]:
## Going to keep rows with Null values for now, will clean them up later on when necessary
df_bar_foursquare

Unnamed: 0,fsq_id,name,popularity,price,rating,stats.total_photos,stats.total_ratings,stats.total_tips
0,5cbba694aa6c9500396a888d,Finfolk,0.035142,,,,,
1,4ccbaf08c0378cfa0aed8048,London Pub,0.960836,1.0,5.7,110.0,79.0,34.0
2,5ce14232914845534bd3da0e,Tavern Enterprises Ltd,,,,,,
3,4b722eb3f964a520f1722de3,The Bimini Public House,0.935625,1.0,6.7,85.0,71.0,14.0
4,8148382d92f84a49142ceabf,Fivesixty,,,,,,
...,...,...,...,...,...,...,...,...
368,4abea310f964a520ff8e20e3,Pourhouse Restaurant,0.959123,2.0,8.8,339.0,378.0,117.0
369,5488ce65498ec20cb7c81a56,Bar One,0.857701,2.0,6.1,16.0,10.0,2.0
370,62aba59049bfe6193d4feb3a,The Hart,0.875099,2.0,,,,
371,9d791a97a7b2481154aad915,Club Mumbai Vancouver,,,,,,


# Yelp

Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

In [24]:
##Create list of latitudes and longitudes
lat_list = stations_clean['lat'].values.tolist()
long_list = stations_clean['long'].values.tolist()

In [34]:
## Loop a Yelp API call, Create a list of bar ids
yelp_list = []
for lat,long in zip(lat_list,long_list):
    url = "https://api.yelp.com/v3/businesses/search?latitude=" + str(lat) + "&longitude=" + str(long) + "&radius=1000&categories=bars&sort_by=best_match&limit=50"

    headers = {
        "accept": "application/json",
        "Authorization": ykey
    }

    response = requests.get(url, headers=headers)

    res = response.json()
    for i in range(len(res['businesses'])):
        yelp_list.append(res['businesses'][i]['id'])

In [35]:
len(yelp_list)

6902

In [36]:
## Create new list without duplicates
yelp_list_clean = list(set(yelp_list))
len(yelp_list_clean)

318

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [37]:
## Use list of bar ids to make another call for general information from our bars
yelp_info_list = []

for y_id in yelp_list_clean:
    url = "https://api.yelp.com/v3/businesses/" + str(y_id) + "?locale=en_CA"

    headers = {
        "accept": "application/json",
        "Authorization": ykey
    }

    response = requests.get(url, headers=headers)
    res = response.json()
    yelp_info_list.append(res)


In [None]:
JSON(yelp_info_list)

In [38]:
df_bar_yelp = pd.json_normalize(yelp_info_list)

Put your parsed results into a DataFrame

In [40]:
## Only keep columns we need
df_yelp = df_bar_yelp[['name','review_count', 'rating', 'price']]

In [42]:
## Again, will keep columns with NaNs for now
df_yelp

Unnamed: 0,name,review_count,rating,price
0,The Dime Granville,22,3.5,$
1,The Pawn Shop YVR,152,4.0,$$
2,Numbers Cabaret,36,3.0,$$
3,Ivanhoe Pub,34,3.5,$
4,King’s Head Public House,8,3.0,
...,...,...,...,...
313,Hello Goodbye,34,3.5,$$
314,Waldorf Hotel,3,2.5,
315,The Shameful Tiki Room,359,4.5,$$
316,The Farmhouse,5,3.5,


# Comparing Results

Which API provided you with more complete data? Provide an explanation. 

Foursquare. Foursquare returned a higher number of results. Furthermore I felt foursquare returned more useful attributes. I also prefer Foursquares scaling on their rating and pricing system as they used a continous scale as opposed to Yelps categorical scale.

Get the top 10 restaurants according to their rating

In [47]:
## According to foursquare
df_bar_foursquare[['name', 'rating']].sort_values('rating', ascending = False).head(10)

Unnamed: 0,name,rating
362,33 Acres Brewing Co,9.2
168,Keefer Bar,9.0
118,The Boxcar,8.9
368,Pourhouse Restaurant,8.8
212,Anh and Chi,8.7
249,Tap & Barrel,8.7
283,Strange Fellows Brewing,8.6
337,Cardero's Restaurant & Marine Pub,8.6
207,Callister Brewing Co,8.6
88,Brassneck Brewery,8.5


In [48]:
## According to Yelp --> yelp rating only out of 5
df_yelp[['name', 'rating']].sort_values('rating', ascending = False).head(10)

Unnamed: 0,name,rating
37,Sooda Korean BBQ,5.0
132,West Hotel & Bar,5.0
48,The Magnet,5.0
285,Brewing August,5.0
106,Arike Restaurant,5.0
275,The Chickadee Room,5.0
57,Courtside,5.0
27,Alley 16,5.0
94,Bar Gobo,5.0
283,Novella Coffee Bar,5.0



### After Looking through and analyzing the results from the above dataframes, I decided to create new ones with the information that I wanted to work with for the model building section of this project

In [4]:
## Create list of bike id and coords
bike_id_list = stations_clean['id'].values.tolist()
coords_list = stations_clean['ll'].values.tolist()


In [32]:
## Get bar info into a dataframe
df_bar = pd.DataFrame()
## for each of the bike station coordinates, get info about bars in a 500m radius
for b_id, coord in zip(bike_id_list,coords_list):
    url = "https://api.foursquare.com/v3/places/search"

    params = {
        "categories": "13003",
        "ll": str(coord),
        'radius': "500",
        "fields": "fsq_id,name,rating,popularity,price",
        "sort":"DISTANCE",
        "limit":"50"
    }

    headers = {
        "Accept": "application/json",
        "Authorization": key
    }
    ## Get the request
    response = requests.request("GET", url, params=params, headers=headers)
    
    ## Convert to python object with .json()
    res = response.json()
    
    ## Create temp dataframe of info we want
    df_temp = pd.json_normalize(res['results'])
    
    ## Add new columns to dataframe 
    df_temp['station_id'] = b_id
    df_temp['num_bars'] = len(df_temp.index)
    
    ## concat temp dataframe to our final dataframe
    df_bar = pd.concat([df_bar,df_temp])

In [55]:
## Get cafe info into a dataframe
df_cafe = pd.DataFrame()
## for each of the bike station coordinates, get info about cafes in a 500m radius
for b_id, coord in zip(bike_id_list,coords_list):
    url = "https://api.foursquare.com/v3/places/search"

    params = {
        "categories": "13032",
        "ll": str(coord),
        'radius': "500",
        "fields": "fsq_id,name,rating,popularity,price",
        "sort":"DISTANCE",
        "limit":"50"
    }

    headers = {
        "Accept": "application/json",
        "Authorization": key
    }
    ## Get the request
    response = requests.request("GET", url, params=params, headers=headers)
    
    ## Convert to python object with .json()
    res = response.json()
    
    ## Create temp dataframe of info we want
    df_temp = pd.json_normalize(res['results'])
    
    ## Add new columns to dataframe
    df_temp['station_id'] = b_id
    df_temp['num_cafes'] = len(df_temp.index)
    
    
    ## concat temp dataframe to our final dataframe
    df_cafe = pd.concat([df_cafe,df_temp])

In [60]:
## group by station_id, get the mean
df_c = df_cafe.groupby('station_id', as_index=False).mean()
df_b = df_bar.groupby('station_id', as_index=False).mean()

In [61]:
## Rename columns
df_b = df_b.rename(columns = {'popularity':'b_pop', 'price':'b_price', 'rating':'b_rating'})
df_c = df_c.rename(columns = {'popularity':'c_pop', 'price':'c_price', 'rating':'c_rating'})

In [67]:
#Merge into a single dataframe, merging on bike station id
df_final = pd.merge(df_b, df_c, on = 'station_id')

In [70]:
df_final.to_csv('info.csv', index=False)


In [71]:
## This is the dataframe I will be using to merge with the citybikes dataframe from part 1
df_final

Unnamed: 0,station_id,b_pop,b_price,b_rating,num_bars,c_pop,c_price,c_rating,num_cafes
0,00fa94ad698dc4a9e4d708d6fd32f294,0.615994,2.333333,7.000000,4.0,0.852829,1.363636,6.850000,14.0
1,012d3e06901cc222b1c2cf0a2ace3a29,0.755921,1.714286,7.166667,9.0,0.942287,1.000000,5.550000,5.0
2,029a505bd4422a1afd127987757f71a6,0.956910,2.000000,7.200000,5.0,0.824118,1.307692,7.557143,15.0
3,0438114d2e6b96118de69bc9775bb21e,0.786189,2.000000,7.100000,12.0,0.871889,1.272727,7.260000,13.0
4,0459b7e93703980b853cd65a9dc60596,0.234849,1.000000,,3.0,0.917332,1.000000,5.400000,6.0
...,...,...,...,...,...,...,...,...,...
209,fbb4c06b719596c21f91fd51ef6d4710,0.793671,1.500000,7.225000,7.0,0.803475,1.166667,6.936364,23.0
210,fe0c57d04d6e682d284d501446095fc9,0.864246,1.928571,7.260000,17.0,0.739874,1.222222,7.354545,22.0
211,fedff7a263c182df94bda7307059cc52,0.781839,2.322581,7.033333,50.0,0.821499,1.162162,6.889286,49.0
212,fef69fb400210d861107a61db954d037,0.780483,2.090909,7.221739,50.0,0.872809,1.264706,7.386957,46.0
