In [2]:
# imports
import os 
import pandas as pd
import requests
import json

# Foursquare

Send a request to Foursquare with a small radius (1000m) for all the bike stations in your city of choice. 

In [9]:
file_path = r'C:\Users\amer_\Desktop\Statistical_modeling_python\data_files\citybike\citybike.csv'

df_citybike = pd.read_csv(file_path)

url = "https://api.foursquare.com/v3/places/search"
api_key = os.getenv("api_key")

headers = {
    "Accept": "application/json",
    "Authorization": api_key
}

results_list = []

for index, row in df_citybike.iterrows():
    params = {
        "query": "restaurant, school, university",
        "country": "Austria",
        "radius": 1000,
        "ll": f"{row['Latitude']},{row['Longitude']}",
        "sort": "DISTANCE",
    }
    
    response = requests.get(url, params=params, headers=headers)
    results = response.json()['results']
    results_list.append(results)

output_path = r'C:\Users\amer_\Desktop\Statistical_modeling_python\data_files\foursquare\json\foursquare_output2.json'
with open(output_path, 'w') as file:
    json.dump(results_list, file)

In [10]:
json_file_path = r'C:\Users\amer_\Desktop\Statistical_modeling_python\data_files\foursquare\json\foursquare_output2.json' 
with open(json_file_path, 'r') as file:
    results_list = json.load(file)

results_list

[[{'fsq_id': '4e4bf5621838fe3c818e780e',
   'categories': [],
   'chains': [],
   'distance': 49,
   'geocodes': {'main': {'latitude': 48.305158, 'longitude': 14.285047}},
   'link': '/v3/places/4e4bf5621838fe3c818e780e',
   'location': {'country': 'AT',
    'cross_street': '',
    'formatted_address': 'Linz',
    'locality': 'Linz',
    'region': 'Oberösterreich'},
   'name': 'Restaurant Bar Absolut!on',
   'related_places': {},
   'timezone': 'Europe/Vienna'},
  {'fsq_id': '78a54586e46a4fde404084dd',
   'categories': [{'id': 13034,
     'name': 'Café',
     'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/cafe_',
      'suffix': '.png'}},
    {'id': 13199,
     'name': 'Indian Restaurant',
     'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/indian_',
      'suffix': '.png'}}],
   'chains': [],
   'distance': 75,
   'geocodes': {'drop_off': {'latitude': 48.304985, 'longitude': 14.284522},
    'main': {'latitude': 48.304891, 'longitude': 14.284415},
    

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [3]:
json_file_path = r'C:\Users\amer_\Desktop\Statistical_modeling_python\data_files\foursquare\json\foursquare_output2.json' 

with open(json_file_path, 'r') as file:
    results_list = json.load(file)

foursquare_data = results_list
# Existing code...

N_A = "N/A"  # to return "not available" if it doesn't exist
places = []
postcode = None 

for item in foursquare_data:
    for item1 in item:
        fsq_id = item1['fsq_id']
        name = item1['name']
        country = item1['location']['country']
        latitude = item1['geocodes']['main']['latitude']
        longitude = item1['geocodes']['main']['longitude']
        distance = item1['distance']
        if 'locality' in item1['location']:
            city = item1['location']['locality']
        else: 
            city = N_A
        
        if 'postcode' in item1['location']:
            try:
                postcode = int(item1['location']['postcode'])  # Convert to integer
            except ValueError:
                postcode = None  # Handle cases where postcode is not a valid integer
            
        if 'address' in item1['location']:
            address = item1['location']['address']
        elif 'formatted_address' in item1['location']:
            address = item1['location']['formatted_address']
            
        categories = []
        for category in item1['categories']:
            category_name = category['name']
            if category_name == "":
                categories = ["N/A"]
            else:
                categories.append(category_name)
        
        categories_str = ', '.join(categories)
            
        places.append({
            'name': name,
            'distance': distance,
            'address': address,
            'zip_code': postcode,
            'city': city,
            'latitude': latitude,
            'longitude': longitude,
            'categories': categories_str
        })


Put your parsed results into a DataFrame


In [4]:
df_foursquare_pre = pd.DataFrame(places)
df_foursquare = df_foursquare_pre.drop_duplicates('name')
df_foursquare

Unnamed: 0,name,distance,address,zip_code,city,latitude,longitude,categories
0,Restaurant Bar Absolut!on,49,Linz,,Linz,48.305158,14.285047,
1,IKAAN Restaurant Bar Cafe,75,Altstadt 16,4020.0,Linz,48.304891,14.284415,"Café, Indian Restaurant"
2,J. Leibetseder Restaurant- und BarbetriebsgmbH,139,Hauptplatz 12,4020.0,Linz,48.306419,14.286042,Restaurant
3,China-Restaurant Kim San,150,Hauptplatz 4,4020.0,Linz,48.306450,14.286628,Chinese Restaurant
4,Restaurant Persia,168,Klosterstr. 1,4020.0,Linz,48.304685,14.286580,Restaurant
...,...,...,...,...,...,...,...,...
416,Pferdehof Restaurant & Cafe,457,Ebelsberger Schloßweg 22,4030.0,Linz,48.246007,14.333185,Inn
417,Restaurant z Husaren,547,Traundorfer Str. 8,4030.0,Linz,48.247310,14.341719,"Café, Restaurant"
418,Restaurant Volkshaus Ebelsberg,651,Kremsmünsterer Str. 1- 3,4030.0,Linz,48.244087,14.329184,"BBQ Joint, Diner"
419,Restaurant Pussy Cat,884,Wambacher Str. 50/1,4030.0,Linz,48.246198,14.326622,Restaurant


# Yelp

Send a request to Yelp with a small radius (1000m) for all the bike stations in your city of choice. 

In [8]:
directory = r'C:\Users\amer_\Desktop\Statistical_modeling_python\data_files\yelp\json' #replace with your own directory
os.makedirs(directory, exist_ok=True)
yelp_api = os.getenv('yelp_api')

all_responses = []  # List to store all JSON responses

for index, row in df_citybike.iterrows():
    url = f"https://api.yelp.com/v3/businesses/search?latitude={row['Latitude']}&longitude={row['Longitude']}&sort_by=best_match&limit=20"

    headers = {
        "accept": "application/json",
        "Authorization": f"bearer {yelp_api}"
    }
    params = {
        'location': 'Linz, Austria',
        'categories': 'restaurant, bar, university, school',
        'radius' : 1000
    }

    response = requests.get(url, headers=headers)
    response_data = response.json()
    all_responses.append(response_data)

# Save all_responses to a single JSON file
path_file2 = r'C:\Users\amer_\Downloads'
output_file = os.path.join(path_file2, 'all_responses.json')
with open(output_file, 'w') as file:
    json.dump(all_responses, file)

    print(response.text)


{"businesses": [{"id": "KnXFQ3Kn6NWZg6TOoZr1oA", "alias": "city-wok-linz", "name": "City Wok", "image_url": "https://s3-media1.fl.yelpcdn.com/bphoto/A5Sb1uiKsY3HazkXsVlf5g/o.jpg", "is_closed": false, "url": "https://www.yelp.com/biz/city-wok-linz?adjust_creative=oTMM45yX8Bu5dJym9wWymQ&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=oTMM45yX8Bu5dJym9wWymQ", "review_count": 5, "categories": [{"alias": "chinese", "title": "Chinese"}, {"alias": "bars", "title": "Bars"}], "rating": 4.0, "coordinates": {"latitude": 48.2573395, "longitude": 14.3606005}, "transactions": [], "price": "\u20ac\u20ac\u20ac\u20ac", "location": {"address1": "Lunaplatz 7", "address2": "", "address3": "", "city": "Linz", "zip_code": "4030", "country": "AT", "state": "4", "display_address": ["Lunaplatz 7", "4030 Linz", "Austria"]}, "phone": "+43732321164", "display_phone": "+43 732 321164", "distance": 618.4318448012164}, {"id": "OPIHwGmO2ZQkwjYEsm4PDA", "alias": "ristorante-leonardo-linz", "name"

Parse through the response to get the POI (such as restaurants, bars, etc) details you want (ratings, name, location, etc)

In [5]:

json_file_path = r'C:\Users\amer_\Desktop\Statistical_modeling_python\data_files\yelp\json\all_responses.json'
# all_responses is from the previous cell. the json_file_path is just to specify in case of a crash of VScode
with open(json_file_path, 'r') as file:
    all_responses = json.load(file)
all_data = []

for response_data in all_responses:
    businesses = response_data['businesses']
    
    for business in businesses:
        business_id = business['id']
        name = business['name']
        distance = round(business['distance'])
        address = business['location']['address1']
        city = business['location']['city']
        
        # Convert zip_code to integer or set to None
        try:
            zip_code = int(business['location']['zip_code'])
        except (ValueError, TypeError):
            zip_code = None
        
        latitude = business['coordinates']['latitude']
        longitude = business['coordinates']['longitude']
        rating = business['rating']
        review_count = business['review_count']
        
        categories = []
        
        if 'alias' in business['categories'][0]:
            category_name = business['categories'][0]['alias']
            categories.append(category_name)
        else:
            print("N/A")
        
        categories_str = ', '.join(categories)

        
        all_data.append({
            'name': name,
            'distance': distance,
            'address': address,
            'city': city,
            'zip_code': zip_code,
            'latitude': latitude,
            'longitude': longitude,
            'rating': rating,
            'review_count': review_count,
            'categories': categories_str 
        })


Put your parsed results into a DataFrame

In [13]:
df_yelp = pd.DataFrame(all_data)

df_yelp = df_yelp.drop_duplicates('name')


# Comparing Results

Which API provided you with more complete data? Provide an explanation. 

the Foursquare api provided less data for the city however, it provided categories other than food.
the yelp api provided more data, however only for food related.

Get the top 10 restaurants according to their rating

In [16]:
yelp_sort_rating = df_yelp.sort_values("rating", ascending= False)
yelp_sort_review = yelp_sort_rating.sort_values("review_count", ascending= False)
yelp_sort_review.head(10)
#the most reviews with the highest ratings 

Unnamed: 0,name,distance,address,city,zip_code,latitude,longitude,rating,review_count,categories
0,Gelbes Krokodil,543,Dametzstr. 30,Linz,4020.0,48.302849,14.290824,4.5,56,modern_european
74,Klosterhof,113,Landstr. 30,Linz,4020.0,48.30219,14.28934,3.0,49,austrian
13,L'Osteria,205,Promenade 22,Linz,4020.0,48.304278,14.28673,3.5,46,italian
53,Josef das Stadtbräu,947,Landstr. 49,Linz,4020.0,48.299719,14.291365,3.0,44,beergarden
6,Cubus,468,Ars-Electronicastr. 1,Linz,4040.0,48.3097,14.28424,4.0,36,austrian
14,Royal Bombay Palace - Indisches Restaurant,1281,Goethestraße 34,Linz,4020.0,48.29734,14.29687,4.5,34,indpak
3,Steakhouse Restaurant,416,Untere Donaulände 12,Linz,4020.0,48.30789,14.28908,4.0,28,steak
1,Cocktailbar Easy,525,Baumbachstr. 14,Linz,4020.0,48.300818,14.284656,5.0,24,cocktailbars
557,Walker,111,Hauptplatz 21,Linz,4020.0,48.305106,14.286525,3.5,24,bars
19,Promenadenhof,206,Promenade 39,Linz,4020.0,48.3039,14.2834,3.5,23,mediterranean


In [78]:
#there is no rating for restuarants in foursquare. also, the amount of data in foursquare in not comparable to yelp

df_foursquare

Unnamed: 0,name,address,zip_code,latitude,longitude,categories
0,Restaurant Bar Absolut!on,Linz,4030,48.305158,14.285047,[]
1,IKAAN Restaurant Bar Cafe,Altstadt 16,4020,48.304891,14.284415,"[Café, Indian Restaurant]"
2,J. Leibetseder Restaurant- und BarbetriebsgmbH,Hauptplatz 12,4020,48.306419,14.286042,[Restaurant]
3,China-Restaurant Kim San,Hauptplatz 4,4020,48.306450,14.286628,[Chinese Restaurant]
4,Restaurant Persia,Klosterstr. 1,4020,48.304685,14.286580,[Restaurant]
...,...,...,...,...,...,...
416,Pferdehof Restaurant & Cafe,Ebelsberger Schloßweg 22,4030,48.246007,14.333185,[Inn]
417,Restaurant z Husaren,Traundorfer Str. 8,4030,48.247310,14.341719,"[Café, Restaurant]"
418,Restaurant Volkshaus Ebelsberg,Kremsmünsterer Str. 1- 3,4030,48.244087,14.329184,"[BBQ Joint, Diner]"
419,Restaurant Pussy Cat,Wambacher Str. 50/1,4030,48.246198,14.326622,[Restaurant]
