In [63]:
# imports
import requests
import pandas as pd
import config
import sys
import categories
import numpy as np

In [62]:
FOURSQUARE_KEY = config.foursquare_apiKey

# Foursquare

Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice. 

In [61]:
# build a helper function to avoid repeated calls
def get_venues_fs(latitude, longitude, radius,categories=None):
    """
    Get venues from foursquare with a specified place type and coordinates.
    Args:
        latitude (float): latitude for query (must be combined with longitude)
        longitude (float): longitude for query (must be combined with latitude)
        api_key (str): foursquare API to use for query
        categories (str) : Foursquare-recognized place type. If not passed no place_type will be specified. Separate ids with commas
    
    Returns:
        response: response object from the requests library.
    """
    url = 'https://api.foursquare.com/v3/places/search'
    
    params = {
    'll':f"{latitude},{longitude}",
    'radius':str(int(radius)),
    'categories':categories,
    'fields':'fsq_id,categories,name,location,rating,distance,stats,price'
    }
    
    headers = {
    'Authorization': str(FOURSQUARE_KEY)
    }
    response = requests.get(url,params=params,headers=headers)
    return response

In [57]:
df_city_bikes_data = pd.read_csv('city_bikes_data')

In [90]:
df_city_bikes_subset = df_city_bikes_data.sort_values(by='id').head(300)

In [91]:
df_city_bikes_subset.shape

(300, 5)

In [96]:
bike_location = [[row.id,(row.latitude,row.longitude)] for row in df_city_bikes_subset.itertuples()]

In [97]:
len(bike_location)

300

In [98]:
bike_location[0][1]

(52.5486835, 13.3881981)

In [71]:
# explore fs api with test call
bars_id = '13003'
restaurant_id = '13065'
fashion_retail_id = '17039'


category_ids =f"{bars_id},{restaurant_id},{fashion_retail_id}"
result = get_venues_fs(*bike_location[0][1],radius=1000,categories=category_ids)

In [120]:
category_ids

'13003,13065,17039'

In [72]:
result.request.url

'https://api.foursquare.com/v3/places/search?ll=52.504157%2C13.335328&radius=1000&categories=13003%2C13065%2C17039&fields=fsq_id%2Ccategories%2Cname%2Clocation%2Crating%2Cdistance%2Cstats%2Cprice'

In [73]:
response = result.json()

In [76]:
response['results'][1]

{'fsq_id': '618fedbcef568823929a748e',
 'categories': [{'id': 13026,
   'name': 'BBQ Joint',
   'short_name': 'BBQ',
   'plural_name': 'BBQ Joints',
   'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/bbqalt_',
    'suffix': '.png'}},
  {'id': 17057,
   'name': 'Food and Beverage Retail',
   'short_name': 'Food & Beverage',
   'plural_name': 'Food and Beverage Retail',
   'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/shops/foodanddrink_',
    'suffix': '.png'}}],
 'distance': 187,
 'location': {'address': 'Marburger Str. 16',
  'country': 'DE',
  'cross_street': '',
  'formatted_address': 'Marburger Str. 16, 10789 Berlin',
  'locality': 'Berlin',
  'postcode': '10789',
  'region': 'Berlin'},
 'name': 'Chicago Williams BBQ',
 'price': 2,
 'rating': 8.7,
 'stats': {'total_photos': 29, 'total_ratings': 30, 'total_tips': 9}}

In [78]:
column_names = ['category_id','theme','category_name']
fs_restaurant_df = pd.read_csv('fs_restaurant_google.csv', header=None,usecols=[0,1,2],names=column_names)[['category_id','category_name']]

In [79]:
fs_restaurant_df.head()

Unnamed: 0,category_id,category_name
0,13026,Restaurant
1,13027,Restaurant
2,13030,Restaurant
3,13031,Restaurant
4,13039,Restaurant


In [80]:
column_names = ['category_id','theme','category_name']
fs_bars_df = pd.read_csv('fs_bars_google.csv', header=None,usecols=[0,1,2],names=column_names)[['category_id','category_name']]

In [81]:
column_names = ['category_id','theme','category_name']
fs_fashion_retail_df = pd.read_csv('fs_fashion_retail_google.csv', header=None,usecols=[0,1,2],names=column_names)[['category_id','category_name']]

In [82]:
fs_categories_df = pd.concat([fs_restaurant_df,fs_bars_df,fs_fashion_retail_df],axis=0)
fs_categories_df.head()

Unnamed: 0,category_id,category_name
0,13026,Restaurant
1,13027,Restaurant
2,13030,Restaurant
3,13031,Restaurant
4,13039,Restaurant


In [83]:
fs_categories_df['category_id']=fs_categories_df['category_id'].astype(str)
fs_categories = fs_categories_df.set_index('category_id')['category_name'].to_dict()

In [84]:
fs_categories['17052'].strip()

'Fashion Retail'

In [93]:
if not "":
  print(True)

True


Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [131]:
# build helper function to parse the json output for the POIs
def parse_fs_response(response_json,reference_station_id):
    store_record = []
    for poi in response_json['results']:
        poi_id = poi.get('fsq_id',None)
        poi_name = poi.get('name',None)
        poi_distance = poi.get('distance',None)
        poi_source = 'foursquare'
        poi_rating = poi.get('rating',None)
        poi_location = poi.get('location',{}).get('formatted_address',None)
        poi_price_level = poi.get('price',None)
        poi_total_reviews = poi.get('stats',{}).get('total_tips',None)
        fs_categories_keys = [key for key in fs_categories.keys()]
        poi_primary_category = ""
        for category in poi['categories']:
            if str(category['id']) in fs_categories_keys:
                poi_primary_category = fs_categories[str(category['id'])].strip()
                poi_sub_category = category.get('name',None)
                break
        if not poi_primary_category:
            poi_primary_category = 'Unknown'
            poi_sub_category = np.nan
        record = {
        'poi_reference_station_id':reference_station_id,
        'poi_id':poi_id if poi_id is not None else np.nan,
        'poi_name':poi_name if poi_name is not None else np.nan,
        'poi_distance':poi_distance if poi_distance is not None else np.nan,
        'poi_source':poi_source,
        'poi_rating':round((float(poi_rating)/10)*100) if poi_rating is not None else np.nan,
        'poi_price_level':poi_price_level if poi_price_level is not None else np.nan,
        'poi_total_reviews':poi_total_reviews if poi_total_reviews is not None else np.nan,
        'poi_location':poi_location if poi_location is not None else np.nan,
        'poi_primary_category':poi_primary_category,
        'poi_sub_category':poi_sub_category if poi_sub_category is not None else np.nan,
        }
        store_record.append(record)
    return store_record

In [86]:
records = parse_fs_response(response,bike_location[0][0])

In [87]:
poi_df = pd.DataFrame(records)

In [99]:
poi_df.head()

Unnamed: 0,poi_reference_station_id,poi_id,poi_name,poi_distance,poi_source,poi_rating,poi_price_level,poi_total_reviews,poi_location,poi_primary_category,poi_sub_category
0,f5462555877f4cb1317baf6041527a93,5167ee31498e427ae94d6790,Other Stories,159,foursquare,89.0,,13,"Kurfürstendamm 234, 10719 Berlin",Fashion Retail,Women's Store
1,f5462555877f4cb1317baf6041527a93,618fedbcef568823929a748e,Chicago Williams BBQ,187,foursquare,87.0,2.0,9,"Marburger Str. 16, 10789 Berlin",Restaurant,BBQ Joint
2,f5462555877f4cb1317baf6041527a93,56f31647498e5e450e4383fa,Upper Burger Grill,111,foursquare,83.0,2.0,109,"Rankestr. 3, 10789 Berlin",Restaurant,BBQ Joint
3,f5462555877f4cb1317baf6041527a93,52ea152411d28bebe4d0b5f2,NENI Berlin,240,foursquare,86.0,3.0,179,"Budapester Str. 40, Berlin",Restaurant,Mediterranean Restaurant
4,f5462555877f4cb1317baf6041527a93,52ea169411d28bebe4d114a1,Monkey Bar,205,foursquare,85.0,3.0,347,"Budapester Str. 40, 10787 Berlin",Bar,Cocktail Bar


Put your parsed results into a DataFrame

In [137]:
# use the helper functions to execute the batch calls
bars_id = '13003'
restaurant_id = '13065'
fashion_retail_id = '17039'
category_ids =f"{bars_id},{restaurant_id},{fashion_retail_id}"

store_fs_records =[]
for i in range(0,300):
    try:
        result= get_venues_fs(*bike_location[i][1],radius=1000,categories=category_ids)
    except Exception as e:
            print(e)
    else:
       records= parse_fs_response(result.json(),bike_location[i][0])
       store_fs_records.extend(records)

In [138]:
store_fs_records[0]

{'poi_reference_station_id': '0001cd599fc5f3ae6cfa5474779cd94f',
 'poi_id': '4b6dcb34f964a52076902ce3',
 'poi_name': 'Curry Baude',
 'poi_distance': 115,
 'poi_source': 'foursquare',
 'poi_rating': 85,
 'poi_price_level': 1,
 'poi_total_reviews': 38,
 'poi_location': 'Badstr. 1-5, 13357 Berlin',
 'poi_primary_category': 'Restaurant',
 'poi_sub_category': 'Fast Food Restaurant'}

In [139]:
poi_fs_df = pd.DataFrame(store_fs_records)

In [140]:
poi_fs_df.head()

Unnamed: 0,poi_reference_station_id,poi_id,poi_name,poi_distance,poi_source,poi_rating,poi_price_level,poi_total_reviews,poi_location,poi_primary_category,poi_sub_category
0,0001cd599fc5f3ae6cfa5474779cd94f,4b6dcb34f964a52076902ce3,Curry Baude,115,foursquare,85.0,1.0,38.0,"Badstr. 1-5, 13357 Berlin",Restaurant,Fast Food Restaurant
1,0001cd599fc5f3ae6cfa5474779cd94f,4b8d0776f964a520f1e432e3,La Femme,251,foursquare,86.0,2.0,14.0,"Badstr. 67 (Hochstr.), 13357 Berlin",Restaurant,Restaurant
2,0001cd599fc5f3ae6cfa5474779cd94f,4beb27466295c9b6ce5c8708,Offside,566,foursquare,90.0,2.0,23.0,"Jülicher Str. 4, 13357 Berlin",Bar,Sports Bar
3,0001cd599fc5f3ae6cfa5474779cd94f,4b705ad5f964a520d6142de3,Shalimar Restaurant,406,foursquare,81.0,2.0,24.0,"Bellermannstr. 17a, 13357 Berlin",Restaurant,Indian Restaurant
4,0001cd599fc5f3ae6cfa5474779cd94f,4e72d880196b52816e04af8b,Studio8,380,foursquare,79.0,1.0,11.0,"Grüntaler Str. 8, 13357 Berlin",Bar,Bar


In [141]:
poi_fs_df.to_csv('fs_data.csv',index=False)

In [112]:
round(86.999)

87

# Yelp

Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

In [4]:
YELP_KEY = config.yelp_apiKey

In [3]:
sys.path

['C:\\Users\\Etinosa Ekomwenrenr\\lighthouse_projects\\weekend_projects\\modeling\\Statistical-Modelling-Project\\notebooks',
 'C:\\Users\\Etinosa Ekomwenrenr\\miniconda3\\envs\\datascience\\python38.zip',
 'C:\\Users\\Etinosa Ekomwenrenr\\miniconda3\\envs\\datascience\\DLLs',
 'C:\\Users\\Etinosa Ekomwenrenr\\miniconda3\\envs\\datascience\\lib',
 'C:\\Users\\Etinosa Ekomwenrenr\\miniconda3\\envs\\datascience',
 '',
 'C:\\Users\\Etinosa Ekomwenrenr\\miniconda3\\envs\\datascience\\lib\\site-packages',
 'C:\\Users\\Etinosa Ekomwenrenr\\miniconda3\\envs\\datascience\\lib\\site-packages\\win32',
 'C:\\Users\\Etinosa Ekomwenrenr\\miniconda3\\envs\\datascience\\lib\\site-packages\\win32\\lib',
 'C:\\Users\\Etinosa Ekomwenrenr\\miniconda3\\envs\\datascience\\lib\\site-packages\\Pythonwin']

In [2]:
dir(config)

['__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__spec__',
 'foursquare_apiKey',
 'yelp_apiKey',
 'yelp_client_id']

In [36]:
# build a helper function to avoid repeated calls
def get_venues_yelp(latitude, longitude, radius,categories=None):
    """
    Get venues from foursquare with a specified place type and coordinates.
    Args:
        latitude (float): latitude for query (must be combined with longitude)
        longitude (float): longitude for query (must be combined with latitude)
        api_key (str): foursquare API to use for query
        categories (str) : Foursquare-recognized place type. If not passed no place_type will be specified. Separate ids with commas
    
    Returns:
        response: response object from the requests library.
    """
    url = 'https://api.yelp.com/v3/businesses/search'
    
    params = {
    'latitude':str(latitude),
    'longitude':str(longitude),
    'radius':str(int(radius)),
    'categories':categories
    }
    
    headers = {
    'Authorization': f"Bearer {str(YELP_KEY)}"
    }
    response = requests.get(url,params=params,headers=headers)
    return response

In [121]:
# explore yelp api with test call
bars_id = 'bars'
restaurant_id = 'restaurants'
fashion_retail_id = 'fashion'


category_ids =f"{bars_id},{restaurant_id},{fashion_retail_id}"

In [122]:
category_ids

'bars,restaurants,fashion'

In [None]:
result = get_venues_yelp(*bike_location[0],radius=1000,categories=category_ids)

In [34]:
response = result.json()

In [43]:
response['businesses'][0]

{'id': 'snbFWEiLSiMYYqbOCXGlJQ',
 'alias': 'restaurant-heising-berlin',
 'name': 'Restaurant Heising',
 'image_url': 'https://s3-media2.fl.yelpcdn.com/bphoto/VBSD0Bk4DFkQehvvd11iJg/o.jpg',
 'is_closed': False,
 'url': 'https://www.yelp.com/biz/restaurant-heising-berlin?adjust_creative=qtawcsWGHEDGS1WIRYIVPQ&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=qtawcsWGHEDGS1WIRYIVPQ',
 'review_count': 26,
 'categories': [{'alias': 'french', 'title': 'French'}],
 'rating': 4.5,
 'coordinates': {'latitude': 52.50322, 'longitude': 13.33488},
 'transactions': [],
 'price': '€€€',
 'location': {'address1': 'Rankestr. 32',
  'address2': None,
  'address3': None,
  'city': 'Berlin',
  'zip_code': '10789',
  'country': 'DE',
  'state': 'BE',
  'display_address': ['Rankestr. 32', '10789 Berlin', 'Germany']},
 'phone': '+49302133952',
 'display_phone': '+49 30 2133952',
 'distance': 100.35029545938917}

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [44]:
yelp_categories = categories.yelp_categories

In [45]:
yelp_categories['bars']

'Bar'

In [143]:
# build helper function to parse the json output for the POIs
def parse_yelp_response(response_json,reference_station_id):
    store_record = []
    for poi in response_json['businesses']:
        poi_id = poi.get('id',None)
        poi_name = poi.get('name',None)
        poi_distance = poi.get('distance',None)
        poi_source = 'yelp'
        poi_rating = poi.get('rating',None)
        poi_location = poi.get('location',{}).get('address1',None)
        poi_price_level = poi.get('price',None)
        poi_total_reviews = poi.get('review_count',None)
        yelp_categories_keys = [key for key in yelp_categories.keys()]
        poi_primary_category = ""
        for category in poi['categories']:
            if str(category['alias']) in yelp_categories_keys:
                poi_primary_category = yelp_categories[str(category['alias'])].strip()
                poi_sub_category = category.get('alias',None)
            else:
                poi_primary_category = 'Restaurant'
                poi_sub_category = category.get('alias',None)
            break
        record = {
        'poi_reference_station_id':reference_station_id,
        'poi_id':poi_id if poi_id is not None else np.nan,
        'poi_name':poi_name if poi_name is not None else np.nan,
        'poi_distance':poi_distance if poi_distance is not None else np.nan,
        'poi_source':poi_source,
        'poi_rating':round((float(poi_rating)/5)*100) if poi_rating is not None else np.nan,
        'poi_price_level':len(list(str(poi_price_level))) if poi_price_level is not None else np.nan,
        'poi_total_reviews':poi_total_reviews if poi_total_reviews is not None else np.nan,
        'poi_location':poi_location if poi_location is not None else np.nan,
        'poi_primary_category':poi_primary_category,
        'poi_sub_category':poi_sub_category if poi_sub_category is not None else np.nan,
        }
        store_record.append(record)
    return store_record

In [52]:
records = parse_yelp_response(response)

In [53]:
poi_yelp_df = pd.DataFrame(records)

In [55]:
poi_yelp_df.head(20)

Unnamed: 0,poi_id,poi_name,poi_distance,poi_source,poi_rating,poi_price_level,poi_total_reviews,poi_location,poi_primary_category,poi_sub_category
0,snbFWEiLSiMYYqbOCXGlJQ,Restaurant Heising,100.350295,yelp,90.0,3,26,Rankestr. 32,Restaurant,french
1,_lNkh-NUq1UbC3-kQb7mUw,Cao Cao,159.941631,yelp,90.0,2,41,Marburger Str. 2,Restaurant,vietnamese
2,3VZeCWNPMQQs3q8PEtxbJw,Neni Berlin,172.040536,yelp,70.0,3,119,Budapester Str. 40,Restaurant,israeli
3,rhjEqwKz8PR27zETg0mZZQ,L'Osteria,228.159344,yelp,70.0,2,81,Budapester Str. 38 - 50,Restaurant,italian
4,nrK5bxNb76qybG-3GSx8hw,Upper Burger Grill,107.624815,yelp,80.0,1,49,Rankestr. 3,Restaurant,steak
5,ZQNW9eAWn7Ej9LKuBoiVXA,La Sepia,152.32066,yelp,80.0,3,55,Marburger Str. 2,Restaurant,spanish
6,EArHZu-0cOEQfkVJjzMKsg,Lang Bar,227.457651,yelp,90.0,4,25,Waldorf Astoria Berlin,Bar,cocktailbars
7,zoqrQLulC-weIVB83OIc4Q,Elefant,892.975704,yelp,90.0,2,184,Fuggerstr. 18,Restaurant,german
8,Wf22t_02JGIR1b3p83cmsg,Asia-Land,298.158536,yelp,90.0,2,20,Augsburger Str. 29,Restaurant,panasian
9,qwepqtmQnoSw4ZpADi2tGg,BLOCK HOUSE,181.385572,yelp,70.0,3,39,Budapester Str. 42-50,Restaurant,steak


Put your parsed results into a DataFrame

In [160]:
# use the helper functions to execute the batch calls
bars_id = 'bars'
restaurant_id = 'restaurants'
fashion_retail_id = 'fashion'
category_ids =f"{bars_id},{restaurant_id},{fashion_retail_id}"

store_yelp_records =[]
for i in range(0,300):
    #print(i)
    #print(bike_location[i][0])
    try:
        result=get_venues_yelp(*bike_location[i][1],radius=1000,categories=category_ids)
    except Exception as e:
            print(e)
    else:
       records= parse_yelp_response(result.json(),bike_location[i][0])
       store_yelp_records.extend(records)

In [161]:
len(store_yelp_records)

5637

In [162]:
poi_yelp_df = pd.DataFrame(store_yelp_records)

In [163]:
poi_yelp_df.head()

Unnamed: 0,poi_reference_station_id,poi_id,poi_name,poi_distance,poi_source,poi_rating,poi_price_level,poi_total_reviews,poi_location,poi_primary_category,poi_sub_category
0,0001cd599fc5f3ae6cfa5474779cd94f,p11gyzRUK0o9SqV25qX8YQ,Curry Baude,102.145973,yelp,90,1.0,108,Badstr. 1 - 5,Restaurant,currysausage
1,0001cd599fc5f3ae6cfa5474779cd94f,XvwwWZojW5lExwdtujz-7w,Pizza Pasta Tralala,109.747869,yelp,80,1.0,29,Badstr. 4,Restaurant,pizza
2,0001cd599fc5f3ae6cfa5474779cd94f,A4YUIQQEBkXCMJ38JovZ3w,Lichtburg,182.872133,yelp,80,2.0,40,Behmstr. 9,Restaurant,german
3,0001cd599fc5f3ae6cfa5474779cd94f,ZEUm1htbgBHyjR0E0YjlBg,Shalimar,408.614625,yelp,90,2.0,68,Bellermannstr. 17 A,Restaurant,pakistani
4,0001cd599fc5f3ae6cfa5474779cd94f,C49HN1YVvsNa3WnoU3uITQ,Cocos,123.776481,yelp,80,2.0,18,Badstr. 4,Restaurant,sushi


In [164]:
poi_yelp_df.to_csv('yelp_data.csv',index=False)

# Comparing Results

Which API provided you with more complete data? Provide an explanation. 

Get the top 10 restaurants according to their rating