In [1]:
# data wrangling imports
import numpy as np
import pandas as pd

# other imports
import csv
import re
import requests

In [2]:
liefe_df = pd.read_csv("../geotracker/data/lieferando_restaurants.csv")
# creating column to identify the data source
liefe_df["database"] = "lieferando"
# dropping duplicated columns
liefe_df.drop_duplicates(inplace=True)

# updating city column, to Berlin
liefe_df.city = "Berlin"

# converting avg review into 1-10 scale
liefe_df.avg_review_score = liefe_df.avg_review_score * 2


In [3]:
# preprocessing type_of_cuisine
liefe_df.type_of_cuisine = liefe_df.type_of_cuisine.apply(
    lambda x: x.split(",")[0])
liefe_df['type_of_cuisine_categorized'] = liefe_df.type_of_cuisine.str.lower()

In [5]:
# dictionary containing all keywords and categorizing
unique_toc_dict = {
    'thai': 'asian',
    'fine-dining': 'middle eastern',
    'cheese': np.nan,
    'fusion': np.nan,
    'butterchicken': 'middle eastern',
    'chinese': 'asian',
    'traditional': np.nan,
    'café': 'cafes',
    'german': 'european',
    'cocktail': 'bars',
    'hamburger': 'american',
    'baklava': 'middle eastern',
    'pokebowl': 'poke',
    'bagel': 'breakfast/dessert',
    'waffles': 'cafes',
    'worklunch': 'fastfood',
    'tapas': 'mediterranean',
    'mediterranean': 'mediterranean',
    'pastries': 'breakfast/dessert',
    'sliders': 'fastfood',
    'turkish': 'middle eastern',
    'steak': 'steak',
    'snacks': 'snacks',
    'Sashimi': 'asian',
    'pancakes': 'breakfast/dessert',
    'Georgian': 'european',
    'risotto': 'italian',
    'glutenfree': 'healthy',
    'bento': 'asian',
    'bistro': 'european',
    'shawarma': 'middle eastern',
    'meatballs': 'european',
    'sushi': 'asian',
    'fish': 'seafood',
    'Don': np.nan,
    'bakery': 'breakfast/dessert',
    'delicious': np.nan,
    'mexican': 'mexican',
    'summerrolls': 'asian',
    'chickennuggets': 'fastfood',
    'korean': 'asian',
    'vegan': 'vegetarian or vegan',
    'chocolate': 'breakfast/dessert',
    'porridge': 'breakfast/dessert',
    'Schnitzel': 'european',
    'Arabic': 'middle eastern',
    'moussaka': 'middle eastern',
    'Dessert': 'breakfast/dessert',
    'vegetarian': 'vegetarian or vegan',
    'donut': 'breakfast/dessert',
    'friedchicken': 'fastfood',
    'smoothie': 'breakfast/dessert',
    'beer': 'bars',
    'indian': 'middle eastern',
    'rice': 'asian',
    'fruit': 'breakfast/dessert',
    'icecoffee': 'cafes',
    'Pastrami': np.nan,
    'dumplings': 'asian',
    'currywurst': 'fastfood',
    'asian': 'asian',
    'pizza': 'italian',
    'grill': 'steak',
    'seafood': 'seafood',
    'wine': 'bars',
    'schnitzel': 'european',
    'american': 'american',
    'healthy': 'healthy',
    'hummus': 'middle eastern',
    'russian': 'russian',
    'Donburi': np.nan,
    'baguette': 'breakfast/dessert',
    'salad': 'healthy',
    'fries': 'fastfood',
    'Austrian': 'european',
    'gyoza': 'asian',
    'potato': np.nan,
    'naan': 'nan',
    'icecream': 'breakfast/dessert',
    'pita': 'mediterranean',
    'sausage': 'european',
    'neapolitanpizza': 'italian',
    'spaghetti': 'italian',
    'cake': 'breakfast/dessert',
    'ribs': 'american',
    'dessert': "breakfast/dessert",
    'Doughnut': 'breakfast/dessert',
    'matcha': 'breakfast/dessert',
    'focaccia': 'italian',
    'homemade': np.nan,
    'milkshake': 'american',
    'taco': 'mexican',
    'curry': 'fastfood',
    'israeli': 'middle eastern',
    'bapburgersandpastrami': '',
    'sandwich': 'fastfood',
    'bowl': 'poke',
    'maki': 'asian',
    'roll': 'asian',
    'brunch': 'breakfast/dessert',
    'pho': 'asian',
    'vietnamese': 'asian',
    'burgers': 'fastfood',
    'muchapizza': 'italian',
    'masala': 'middle eastern',
    'contemporary': np.nan,
    'falafel': 'middle eastern',
    'gyros': 'greek',
    'friedrice': 'asian',
    'chicken': 'snacks',
    'italian': 'european',
    'spaetzle': 'european',
    'streetfood': 'fastfood',
    'Mozzarella': 'italian',
    'ramen': 'asian',
    'antipasti': 'italian',
    'noodles': 'italian',
    'hotdog': 'snacks',
    'coffee': 'cafes',
    'oriental': 'middle eastern',
    'bubbletea': 'breakfast/dessert',
    'greek': 'mediterranean',
    'middleeastern': 'middle eastern',
    'pasta': 'italian',
    'fresh': 'healthy',
    'tandoori': 'middle eastern',
    'wrap': 'healthy',
    'european': 'european',
    'tea': 'breakfast/dessert',
    'bao': 'asian',
    'beyondmeat': 'vegetarian or vegan',
    'duck': 'asian',
    'galette': 'european',
    'panini': 'italian',
    'soup': 'healthy',
    'fastfood': 'fastfood',
    'LatinAmerican': 'south american',
    'juice': 'breakfast/dessert',
    'halal': 'middle eastern',
    'burger': 'american',
    'vegetable': 'vegetarian or vegan',
    'Hawaii': 'american',
    'breakfast': 'breakfast/dessert',
    'poke': 'poke',
    'drinks': 'bars',
    'burrito': 'mexican',
    'salmon': 'seafood',
    'homecooking': np.nan,
    'spanish': 'mediterranean',
    'meat': 'steak',
    'french': 'european',
    'veggieburger': 'vegetarian or vegan',
    'lunch': 'steak',
    'meze': 'asian',
    'homemademeals': np.nan,
    'kebab': 'middle eastern',
    'beef': 'steak',
    'tex-mex': 'mexican',
    'japanese': 'asian',
    'arab' : 'middle eastern',
    'italian style pizza' : 'italian',
    'spanish/tapas' : 'mediterranean',
    'drinks/snacks': 'bars',
    '100% halal':'middle eastern',
    'bio': 'healthy',
    'ice cream' :'breakfast/dessert',
    'steaks':'steak',
    'sandwiches' :'fastfood',
    'turkish pizza' : 'middle eastern',
    'salads': 'healthy',
    'indonesian': 'asian',
    'german dishes':'european',
    'döner': 'middle eastern',
    'argentinian': 'south american',
    'wraps':'healthy',
    'desserts':'breakfast/dessert',
    'spare ribs':'steak',
    'lebanese': 'middle eastern',
    'moroccan':'middle eastern',
    'polish': 'european',
    'gluten-free': "healthy",
    'american style pizza': 'american',
    'austrian cuisine': 'european',
    'soups': 'healthy',
    'african': 'middle eastern',
    'other': np.nan,
    'balkans':'european',
    'iranian': 'middle eastern',
    'baked goods': 'breakfast/dessert',
    '': np.nan
}

# updating type of cuisine columns
liefe_df[
    'type_of_cuisine_categorized'] = liefe_df.type_of_cuisine_categorized.map(
        unique_toc_dict)

# in case we need to add more columns

# for x in liefe_df.type_of_cuisine.unique():
#     if x not in unique_toc_dict.keys():
#         unique_toc_dict[x] = np.nan

#to check if sth's missing
# for x in liefe_df.type_of_cuisine.unique():
#     if x not in unique_toc_dict:
#         print(x)


In [29]:
liefe_df.head()

Unnamed: 0.1,Unnamed: 0,restaurant_name,restaurant_url,avg_review_score,reviews,type_of_cuisine,street,zip_code,city_name,database,type_of_cuisine_categorized,address
0,0,BURGER KING ®,https://www.lieferando.de/en/menu/burger-king-...,8.0,210,Snacks,Schönhauser Allee 79-80,10439,Berlin,lieferando,snacks,"Schönhauser Allee 79-80, 10439, Berlin"
1,1,Bowl Time,https://www.lieferando.de/en/menu/bowl-time,9.0,10,Falafel,Danziger Straße 61,10435,Berlin,lieferando,middle eastern,"Danziger Straße 61, 10435, Berlin"
2,2,BURGER KING ®,https://www.lieferando.de/en/menu/burger-king-...,5.0,217,Snacks,Georgenstraße 14-18,10117,Berlin,lieferando,snacks,"Georgenstraße 14-18, 10117, Berlin"
3,3,BURGER KING ®,https://www.lieferando.de/en/menu/burger-king-...,4.0,777,Snacks,Europaplatz 1,10557,Berlin,lieferando,snacks,"Europaplatz 1, 10557, Berlin"
4,4,Bring Bakery,https://www.lieferando.de/en/menu/bring-bakery,8.0,14,Breakfast,Beusselstraße 31,10553,Berlin,lieferando,breakfast/dessert,"Beusselstraße 31, 10553, Berlin"


In [7]:
liefe_df['address'] = liefe_df.street + ", " + liefe_df.zip_code + ", " + liefe_df.city

In [22]:
# adding lat/lon
def latitude(address):
    try:
        params = {"q": address, "format": "json"}
        places = requests.get(f"https://nominatim.openstreetmap.org/search",
                          params=params).json()[0]["lat"]
        # [0]['lat']
        return places
    except Exception:
        return np.nan

def longitude(address):
    try:
        params = {"q": address, "format": "json"}
        places = requests.get(f"https://nominatim.openstreetmap.org/search",
                              params=params).json()[0]['lon']
        return places
    except Exception:
        return np.nan


In [30]:
liefe_df['latitude'] = liefe_df.address.apply(latitude)
liefe_df['longitude'] = liefe_df.address.apply(longitude)


In [31]:
liefe_df.rename(columns={"city": "city_name"}, inplace=True)

# creating clean df
liefe_df_clean = liefe_df[[
    'restaurant_name', 'avg_review_score', 'reviews', 'type_of_cuisine_categorized',
    'street', 'zip_code', 'city_name', 'latitude', 'longitude', 'database']]

In [32]:
liefe_df_clean

Unnamed: 0,restaurant_name,avg_review_score,reviews,type_of_cuisine_categorized,street,zip_code,city_name,latitude,longitude,database
0,BURGER KING ®,8.0,210,snacks,Schönhauser Allee 79-80,10439,Berlin,52.550786,13.4141103,lieferando
1,Bowl Time,9.0,10,middle eastern,Danziger Straße 61,10435,Berlin,52.5396828,13.4225656,lieferando
2,BURGER KING ®,5.0,217,snacks,Georgenstraße 14-18,10117,Berlin,52.5202635,13.3882862,lieferando
3,BURGER KING ®,4.0,777,snacks,Europaplatz 1,10557,Berlin,52.5257435,13.3685757,lieferando
4,Bring Bakery,8.0,14,breakfast/dessert,Beusselstraße 31,10553,Berlin,52.5302234,13.3288675,lieferando
...,...,...,...,...,...,...,...,...,...,...
3317,McDonald's®,9.0,491,american,Potsdamer Straße 2,14513,Berlin,52.5096032,13.37351,lieferando
3318,Le Asia,9.0,948,asian,Potsdamer Straße 61,14513,Berlin,52.5041143,13.3670578,lieferando
3319,Domino's Pizza,9.0,516,snacks,Potsdamer Straße 53,14513,Berlin,52.4350381,13.2594338,lieferando
3320,burgerme,9.0,82,snacks,Bäckerstraße 4,14513,Berlin,52.41481,13.4932191,lieferando


In [33]:
# saving de into csv
liefe_df_clean.to_csv("../geotracker/data/lieferando_clean_data.csv")