In [21]:
# data wrangling imports
import numpy as np
import pandas as pd

In [37]:
# importing datasets
wolt = pd.read_csv("../geotracker/data/wolt_clean_data.csv").iloc[:, 1:]
liefe = pd.read_csv("../geotracker/data/lieferando_clean_data.csv").iloc[:, 1:]
maps = pd.read_csv("../geotracker/data/final.csv").drop(labels=3168)

# renaming columns
wolt.rename(columns={
    "type_of_cuisine_categorized": "type_of_cuisine",
    "avg_delivery_time_clean": "avg_delivery_time"
}, inplace=True)

liefe.rename(columns={"type_of_cuisine_categorized": "type_of_cuisine"}, inplace=True)

maps.rename(columns={
    'Coordinates': 'coordinates',
    'Name': 'restaurant_name',
    'Type': 'type',
    "Address": "address",
    "Opening Hours": "opening_hours",
    "Street": "street",
    "Bezirk": "city_name",
    "PLZ": "zip_code"
}, inplace=True)

# concatenating datasets
all_deliveries_df = pd.concat([wolt,liefe])

# creating coordinates (lat,lon) column
all_deliveries_df["coordinates"] = all_deliveries_df.latitude.astype(
    str) + "," + all_deliveries_df.longitude.astype(str)

# creating address column 
all_deliveries_df[
    'address'] = all_deliveries_df.street + ", " + all_deliveries_df.zip_code.astype(
        str) + ", " + all_deliveries_df.city_name

# reorganizing tables
all_deliveries_df = all_deliveries_df[[
    'restaurant_name', 'avg_review_score', 'reviews', 'minimum_order_value',
    'delivery_fee', 'pricyness', 'avg_delivery_time', 'type_of_cuisine', 'address',
    'street', 'zip_code', 'city_name', 'coordinates', 'latitude', 'longitude',
    'database'
]]

# converting zip_code to integer, in maps table
maps["zip_code"] = maps["zip_code"].astype('Int64')

# creating lat, lon columns for maps table
maps['latitude'] = maps.coordinates.apply(
    lambda x: x.replace("[", "").replace("]", "").split(", ")[0])

maps['longitude'] = maps.coordinates.apply(
    lambda x: x.replace("[", "").replace("]", "").split(", ")[1])

maps['coordinates'] = maps.coordinates.apply(
    lambda x: x.replace("[", "").replace("]", ""))

maps["database"] = "here_maps"

In [39]:
all_deliveries_df.head()

Unnamed: 0,restaurant_name,avg_review_score,reviews,minimum_order_value,delivery_fee,pricyness,avg_delivery_time,type_of_cuisine,address,street,zip_code,city_name,coordinates,latitude,longitude,database
0,Besh,8.8,,10.0,,1.0,35.0,vegetarian or vegan,"Brückenstraße 15, 10179, Berlin",Brückenstraße 15,10179,Berlin,"52.511594911272894,13.416638899619755",52.511595,13.416639,wolt
1,Five Rivers,8.8,,10.0,,2.0,55.0,asian,"Albrechtstraße 11, 10117, Berlin",Albrechtstraße 11,10117,Berlin,"52.5214627,13.3848372",52.521463,13.384837,wolt
2,Ferrarino Pizza,8.6,,10.0,,2.0,35.0,italian,"Garnisonkirchpl. 2, 10178, Berlin",Garnisonkirchpl. 2,10178,Berlin,"52.52182999999999,13.40174",52.52183,13.40174,wolt
3,Hito Falafel,9.0,,10.0,,2.0,35.0,middle eastern,"Chauseestraße 4, 10115, Berlin",Chauseestraße 4,10115,Berlin,"52.527779975720726,13.38692562524518",52.52778,13.386926,wolt
4,Miu Lunch,,,10.0,1.9,1.0,,asian,"Reinhardtstraße 47 A, 10117, Berlin",Reinhardtstraße 47 A,10117,Berlin,"52.5221767,13.3781882",52.522177,13.378188,wolt


- readapt cuisine_1 to be consistent with other tables 


In [30]:
maps.head()

Unnamed: 0,coordinates,restaurant_name,type,address,opening_hours,Cuisine_1,Cuisine_2,street,city_name,zip_code,latitude,longitude,database
0,"52.45481, 13.59031",Ihr Koch on Tour,Restaurant,Seelenbinderstraße 112 Köpenick 12555 Berlin,Wed-Sat: 17:00 - 22:30 Sun: 14:00 - 19:00,German,,Seelenbinderstraße 112,Köpenick,12555,52.45481,13.59031,here_maps
1,"52.45481, 13.59031",Pizzawerkköpenick,Restaurant,Seelenbinderstraße 112 Köpenick 12555 Berlin,,Pizza,Burgers,Seelenbinderstraße 112,Köpenick,12555,52.45481,13.59031,here_maps
2,"52.45481, 13.59031",Pizzawerk,Restaurant,Seelenbinderstraße 112 Köpenick 12555 Berlin,Tue-Sun: 17:00 - 22:00,Pizza,,Seelenbinderstraße 112,Köpenick,12555,52.45481,13.59031,here_maps
3,"52.45268, 13.59594",Veracruz,Restaurant,Fürstenwalder Damm 260 Friedrichshagen 12587 B...,"Mon-Thu, Sun: 12:00 - 23:00 Fri, Sat: 12:00 - ...",Mexican,,Fürstenwalder Damm 260,Friedrichshagen,12587,52.45268,13.59594,here_maps
4,"52.44842, 13.61015",Marina Sol,Restaurant,Müggelseedamm 70 Friedrichshagen 12587 Berlin,Mon-Sun: 12:00 - 22:00,Balkan,Grill,Müggelseedamm 70,Friedrichshagen,12587,52.44842,13.61015,here_maps


In [43]:
maps.Cuisine_1.unique()

array(['german', 'pizza', 'mexican', 'balkan', 'seafood', nan, 'asian',
       'american', 'mediterranean', 'burgers', 'greek', 'pakistani',
       'middle eastern', 'italian', 'breakfast', 'spanish', 'sicilian',
       'turkish', 'grill', 'sandwiches', 'steak', 'fusion',
       'international', 'brunch', 'soup', 'chicken', 'brewpub',
       'ice cream', 'chinese - cantonese', 'austrian', 'french', 'vegan',
       'tapas', 'hot dogs', 'hawaiian/polynesian', 'dinner', 'vegetarian',
       'canadian', 'chilean', 'african', 'chinese', 'lebanese',
       'japanese - sushi', 'irish', 'argentinean', 'australian',
       'european', 'natural/healthy', 'indian', 'caucasian', 'norwegian',
       'polish', 'hungarian', 'caribbean', 'pastries',
       'american - creole', 'british', 'jewish/kosher', 'crêperie',
       'russian', 'barbecue', 'fondue', 'american - cajun', 'bohemian'],
      dtype=object)

In [42]:
# preprocessing type_of_cuisine
maps['Cuisine_1'] = maps.Cuisine_1.str.lower()

In [48]:
# dictionary containing all keywords and categorizing
unique_toc_dict = {
    'thai': 'asian',
    'fine-dining': 'middle eastern',
    'cheese': np.nan,
    'fusion': np.nan,
    'butterchicken': 'middle eastern',
    'chinese': 'asian',
    'traditional': np.nan,
    'café': 'cafes',
    'german': 'european',
    'cocktail': 'bars',
    'hamburger': 'american',
    'baklava': 'middle eastern',
    'pokebowl': 'poke',
    'bagel': 'breakfast/dessert',
    'waffles': 'cafes',
    'worklunch': 'fastfood',
    'tapas': 'mediterranean',
    'mediterranean': 'mediterranean',
    'pastries': 'breakfast/dessert',
    'sliders': 'fastfood',
    'turkish': 'middle eastern',
    'steak': 'steak',
    'snacks': 'snacks',
    'Sashimi': 'asian',
    'pancakes': 'breakfast/dessert',
    'Georgian': 'european',
    'risotto': 'italian',
    'glutenfree': 'healthy',
    'bento': 'asian',
    'bistro': 'european',
    'shawarma': 'middle eastern',
    'meatballs': 'european',
    'sushi': 'asian',
    'fish': 'seafood',
    'Don': np.nan,
    'bakery': 'breakfast/dessert',
    'delicious': np.nan,
    'mexican': 'mexican',
    'summerrolls': 'asian',
    'chickennuggets': 'fastfood',
    'korean': 'asian',
    'vegan': 'vegetarian or vegan',
    'chocolate': 'breakfast/dessert',
    'porridge': 'breakfast/dessert',
    'Schnitzel': 'european',
    'Arabic': 'middle eastern',
    'moussaka': 'middle eastern',
    'Dessert': 'breakfast/dessert',
    'vegetarian': 'vegetarian or vegan',
    'donut': 'breakfast/dessert',
    'friedchicken': 'fastfood',
    'smoothie': 'breakfast/dessert',
    'beer': 'bars',
    'indian': 'middle eastern',
    'rice': 'asian',
    'fruit': 'breakfast/dessert',
    'icecoffee': 'cafes',
    'Pastrami': np.nan,
    'dumplings': 'asian',
    'currywurst': 'fastfood',
    'asian': 'asian',
    'pizza': 'italian',
    'grill': 'steak',
    'seafood': 'seafood',
    'wine': 'bars',
    'schnitzel': 'european',
    'american': 'american',
    'healthy': 'healthy',
    'hummus': 'middle eastern',
    'russian': 'russian',
    'Donburi': np.nan,
    'baguette': 'breakfast/dessert',
    'salad': 'healthy',
    'fries': 'fastfood',
    'Austrian': 'european',
    'gyoza': 'asian',
    'potato': np.nan,
    'naan': 'nan',
    'icecream': 'breakfast/dessert',
    'pita': 'mediterranean',
    'sausage': 'european',
    'neapolitanpizza': 'italian',
    'spaghetti': 'italian',
    'cake': 'breakfast/dessert',
    'ribs': 'american',
    'dessert': "breakfast/dessert",
    'Doughnut': 'breakfast/dessert',
    'matcha': 'breakfast/dessert',
    'focaccia': 'italian',
    'homemade': np.nan,
    'milkshake': 'american',
    'taco': 'mexican',
    'curry': 'fastfood',
    'israeli': 'middle eastern',
    'bapburgersandpastrami': 'fastfood',
    'sandwich': 'fastfood',
    'bowl': 'poke',
    'maki': 'asian',
    'roll': 'asian',
    'brunch': 'breakfast/dessert',
    'pho': 'asian',
    'vietnamese': 'asian',
    'burgers': 'fastfood',
    'muchapizza': 'italian',
    'masala': 'middle eastern',
    'contemporary': np.nan,
    'falafel': 'middle eastern',
    'gyros': 'greek',
    'friedrice': 'asian',
    'chicken': 'snacks',
    'italian': 'european',
    'spaetzle': 'european',
    'streetfood': 'fastfood',
    'Mozzarella': 'italian',
    'ramen': 'asian',
    'antipasti': 'italian',
    'noodles': 'italian',
    'hotdog': 'snacks',
    'coffee': 'cafes',
    'oriental': 'middle eastern',
    'bubbletea': 'breakfast/dessert',
    'greek': 'mediterranean',
    'middleeastern': 'middle eastern',
    'pasta': 'italian',
    'fresh': 'healthy',
    'tandoori': 'middle eastern',
    'wrap': 'healthy',
    'european': 'european',
    'tea': 'breakfast/dessert',
    'bao': 'asian',
    'beyondmeat': 'vegetarian or vegan',
    'duck': 'asian',
    'galette': 'european',
    'panini': 'italian',
    'soup': 'healthy',
    'fastfood': 'fastfood',
    'LatinAmerican': 'south american',
    'juice': 'breakfast/dessert',
    'halal': 'middle eastern',
    'burger': 'american',
    'vegetable': 'vegetarian or vegan',
    'Hawaii': 'american',
    'breakfast': 'breakfast/dessert',
    'poke': 'poke',
    'drinks': 'bars',
    'burrito': 'mexican',
    'salmon': 'seafood',
    'homecooking': np.nan,
    'spanish': 'mediterranean',
    'meat': 'steak',
    'french': 'european',
    'veggieburger': 'vegetarian or vegan',
    'lunch': 'steak',
    'meze': 'asian',
    'homemademeals': np.nan,
    'kebab': 'middle eastern',
    'beef': 'steak',
    'tex-mex': 'mexican',
    'japanese': 'asian',
    'arab': 'middle eastern',
    'italian style pizza': 'italian',
    'spanish/tapas': 'mediterranean',
    'drinks/snacks': 'bars',
    '100% halal': 'middle eastern',
    'bio': 'healthy',
    'ice cream': 'breakfast/dessert',
    'steaks': 'steak',
    'sandwiches': 'fastfood',
    'turkish pizza': 'middle eastern',
    'salads': 'healthy',
    'indonesian': 'asian',
    'german dishes': 'european',
    'döner': 'middle eastern',
    'argentinian': 'south american',
    'wraps': 'healthy',
    'desserts': 'breakfast/dessert',
    'spare ribs': 'steak',
    'lebanese': 'middle eastern',
    'moroccan': 'middle eastern',
    'polish': 'european',
    'gluten-free': "healthy",
    'american style pizza': 'american',
    'austrian cuisine': 'european',
    'soups': 'healthy',
    'african': 'middle eastern',
    'other': np.nan,
    'balkans': 'european',
    'iranian': 'middle eastern',
    'baked goods': 'breakfast/dessert',
    '': np.nan,
    'balkan': 'european',
    'pakistani': 'middle eastern',
    'middle eastern': 'middle eastern',
    'sicilian': 'italian',
    "international": "international",
    "brewpub": "bars",
    "chinese - cantonese": "asian",
    "austrian": "european",
    "hot dogs": "fastfood",
    "hawaiian/polynesian": "american",
    "dinner": np.nan,
    "canadian": "american",
    "chilean": 'south american',
    "japanese - sushi": "asian",
    "irish": "european",
    "argentinean": 'south american',
    "australian": "asian",
    "natural/healthy": "healthy",
    'caucasian': "european",
    'norwegian': "european",
    'hungarian': "european",
    'caribbean': 'south american',
    'american - creole': "american",
    'british': "european",
    'jewish/kosher': 'middle eastern',
    'crêperie': 'breakfast/dessert',
    'barbecue': 'steak',
    'fondue': "european",
    'american - cajun': "american",
    'bohemian': np.nan
}

# in case we need to add more columns

# for x in liefe_df.type_of_cuisine.unique():
#     if x not in unique_toc_dict.keys():
#         unique_toc_dict[x] = np.nan

#to check if sth's missing
# for x in maps.Cuisine_1.unique():
#     if x not in unique_toc_dict:
#         print(x)
