In [1]:
# data wrangling imports
import numpy as np
import pandas as pd 

# other imports
import csv
import re

In [2]:
wolt_df = pd.read_csv("../geotracker/data/wolt.csv")

# creating column to identify the data source
wolt_df["delivery_app"] = "wolt"

In [3]:
# dropping duplicated columns
wolt_df.drop_duplicates(inplace=True)

In [4]:
# preprocessing and turning into float min delivery_fee column
wolt_df.delivery_fee.replace("deliveryMin.", np.NaN, inplace=True)
wolt_df.delivery_fee = wolt_df.delivery_fee.apply(lambda x: 1.90 if x == "1.90Min." else x)

In [5]:
# preprocessing and turning into float min avg_delivery_time column
mask_1 = wolt_df['avg_delivery_time'].notnull()
mask_2 = wolt_df.avg_delivery_time != "Takeaway"
mask = mask_1 & mask_2

wolt_df["min_avg_delivery_time"] = wolt_df[
    mask].loc[:, "avg_delivery_time"].apply(
        lambda x: x.replace("–", " ").split(" ")[0])
wolt_df["max_avg_delivery_time"] = wolt_df[
    mask].loc[:, "avg_delivery_time"].apply(
        lambda x: x.replace("–", " ").split(" ")[1])

wolt_df[["min_avg_delivery_time", "max_avg_delivery_time"
         ]] = wolt_df[["min_avg_delivery_time", "max_avg_delivery_time"]].apply(pd.to_numeric)

# calculating clean avg_delivery_time : avg_delivery_time_clean
wolt_df["avg_delivery_time_clean"] = wolt_df[[
    "min_avg_delivery_time", "max_avg_delivery_time"
]].mean(axis=1)
wolt_df.drop(columns=["avg_delivery_time"], inplace=True)

In [6]:
# preprocessing type_of_cuisine

#dictionaries to determines type_of_cuisine categories
food_types_categories = {
    'japanese': 'asian',
    'indonesian': 'asian',
    'vietnamese': 'asian',
    'portuguese': 'european',
    'french': 'european',
    'greek': 'mediterranean',
    'german': 'european',
    'amerikanisch - barbecue': 'american',
    'spanish': 'mediterranean',
    'lebanese': 'middle eastern',
    'pizza': 'european',
    'thai': 'asian',
    'korean': 'asian',
    'chinese': 'asian',
    'brunch': 'breakfast',
    'taiwanese': 'asian',
    'polish': 'european',
    'hotdog': 'snacks',
    'chicken': 'snacks',
    'sandwiches': 'snacks',
    'austrian': 'european',
    'argentine': 'south american',
    'peruvian': 'south american',
    'swiss': 'european',
    'malaysian': 'asian',
    'hungaran': 'balkan',
    'indpak': 'indian',
    'hotdogs': 'snacks',
    'currysausage': 'snacks',
    'icecream': 'ice',
    'sushi': 'asian',
    'latin': 'south american',
    'coffee': 'cafes',
    'tradamerican': 'american',
    'israeli': 'middle eastern',
    'foodstands': 'snacks',
    'mideastern': 'middle eastern',
    'breakfast_brunch': 'breakfast',
    'panasian': 'asian',
    'oriental': 'middle eastern',
    'falafel': 'middle eastern',
    'foodtrucks': 'snacks',
    'easterngerman': 'european',
    'kebab': 'fastfood',
    'bavarian': 'european',
    'schnitzel': 'european',
    'fish': 'seafood',
    'persian': 'middle eastern',
    'ramen': 'asian',
    'italian': 'european',
    'grilled': 'steak',
    'divebars': 'bars',
    'beachbars': 'bars',
    'beer_and_wine': 'bars',
    'hungarian': 'european',
    'tibetian': 'asian',
    'japanese': 'asian',
    'modern_european': 'european',
    'brasseries': 'european',
    'cocktailbars': 'bars',
    'pubs': 'bars',
    'beachbars': 'bars',
    'food': 'snacks',
    'gourmet': 'european',
    'gastropub': 'european',
    'beergarden': 'bars',
    'arabian': 'middle eastern',
    'moroccan': 'middle eastern',
    'delicatessen': 'snacks',
    'bistro': 'european',
    'vegan': 'vegetarian or vegan',
    'vegetarian': 'vegetarian or vegan',
    'brewery': 'bars',
    'burgers': 'fastfood',
    'swabian': 'european',
    'irish': 'european',
    'british': 'european',
    'juicebar': 'snacks',
    'halal': 'middle eastern',
    'kosher': 'middle eastern',
    'wine_bars': 'bars',
    'juicebars': 'snacks',
    'bistros': 'european',
    'canteen': 'european',
    'gastropubs': 'european',
    'restaurants': 'european',
    'brazilian': 'south american',
    'bbq': 'fastfood',
    'latin american': 'south american'
}
types_of_cuisine = set(list(food_types_categories.values()))


In [7]:
wolt_df.type_of_cuisine = wolt_df.type_of_cuisine.apply(lambda x: x.replace(
    "'", "").replace(" ", "").replace("[", "").replace("]", "").split(","))


In [8]:
# creating separate columns for each of the 3 types of cuisine options
wolt_df["type_of_cuisine_1"] = wolt_df["type_of_cuisine"].apply(pd.Series)[0]
wolt_df["type_of_cuisine_2"] = wolt_df["type_of_cuisine"].apply(pd.Series)[1]
wolt_df["type_of_cuisine_3"] = wolt_df["type_of_cuisine"].apply(pd.Series)[2]

In [9]:
wolt_df

Unnamed: 0,restaurant_name,avg_review_score,street,zip_code,city_name,type_of_cuisine,minimum_order_value,delivery_fee,pricyness,latitude,longitude,delivery_app,min_avg_delivery_time,max_avg_delivery_time,avg_delivery_time_clean,type_of_cuisine_1,type_of_cuisine_2,type_of_cuisine_3
0,CRACKBUNS,9.0,Auguststrasse 63,10117,Berlin,"[burger, sliders, fries]",10.0,,€,52.527203,13.397106,wolt,30.0,40.0,35.0,burger,sliders,fries
1,Monsieur Vuong,8.8,Alte Schönhauser Str. 46,10119,Berlin,"[asian, pho, rice]",10.0,,€€,52.526659,13.407946,wolt,30.0,40.0,35.0,asian,pho,rice
2,Umami Prenzlauer Berg,8.8,Knaackstraße 16-18,10405,Berlin,"[asian, vietnamese, fish]",10.0,,€€,52.534618,13.419791,wolt,30.0,40.0,35.0,asian,vietnamese,fish
3,Kuchi Mitte,8.8,Gipsstraße 3,10119,Berlin,"[asian, japanese, sushi]",10.0,,€€,52.527184,13.399308,wolt,30.0,40.0,35.0,asian,japanese,sushi
4,Kebap with attitude,8.2,Gipsstraße 2,10119,Berlin,"[kebab, bowl, turkish]",10.0,,€€,52.527243,13.399138,wolt,30.0,40.0,35.0,kebab,bowl,turkish
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2781,dean&david Quartier 205,8.0,Mohrenstraße 50,10117,Berlin,"[salad, healthy, bowl]",10.0,1.9,€€,52.512267,13.390251,wolt,,,,salad,healthy,bowl
2852,FACTORY GIRL,8.8,Auguststraße 29c,10119,Berlin,"[breakfast, brunch, healthy]",,,€€,52.527276,13.398774,wolt,,,,breakfast,brunch,healthy
2867,Standard Serious Pizza - Mitte,7.4,Torstraße 102,10119,Berlin,"[pizza, italian, mediterranean]",,,€€,52.529476,13.403917,wolt,,,,pizza,italian,mediterranean
4089,FACTORY GIRL,8.8,Auguststraße 29c,10119,Berlin,"[breakfast, brunch, healthy]",,,€€,52.527276,13.398774,wolt,,,,breakfast,brunch,healthy


In [43]:
toc1 = wolt_df.type_of_cuisine_1.unique().tolist()
toc2 = wolt_df.type_of_cuisine_2.unique().tolist()
toc3 = wolt_df.type_of_cuisine_3.unique().tolist()
toc = toc1 + toc2 + toc3
unique_toc = list(set(toc))

In [44]:
unique_toc


['',
 nan,
 'homecooking',
 'vegetarian',
 'spaetzle',
 'bakery',
 'taco',
 'israeli',
 'tandoori',
 'smoothie',
 'italian',
 'pokebowl',
 'chicken',
 'duck',
 'chocolate',
 'seafood',
 'dumplings',
 'tapas',
 'sausage',
 'poke',
 'drinks',
 'bistro',
 'sliders',
 'porridge',
 'pho',
 'soup',
 'Dessert',
 'wine',
 'asian',
 'tex-mex',
 'salmon',
 'panini',
 'waffles',
 'middleeastern',
 'spanish',
 'potato',
 'donut',
 'cocktail',
 'bowl',
 'tea',
 'oriental',
 'cheese',
 'american',
 'dessert',
 'fresh',
 'Mozzarella',
 'Sashimi',
 'café',
 'wrap',
 'kebab',
 'LatinAmerican',
 'thai',
 'Don',
 'Arabic',
 'butterchicken',
 'meatballs',
 'bubbletea',
 'Donburi',
 'chickennuggets',
 'fish',
 'gyros',
 'contemporary',
 'Schnitzel',
 'roll',
 'vegetable',
 'korean',
 'homemademeals',
 'mexican',
 'sushi',
 'coffee',
 'ramen',
 'lunch',
 'Pastrami',
 'grill',
 'currywurst',
 'gyoza',
 'moussaka',
 'Austrian',
 'homemade',
 'bao',
 'fruit',
 'vegan',
 'beyondmeat',
 'pasta',
 'european',
 'm