In [1]:
# data wrangling imports
import numpy as np
import pandas as pd 

# other imports
import csv
import re

In [2]:
wolt_df = pd.read_csv("../geotracker/data/wolt.csv")

# creating column to identify the data source
wolt_df["delivery_app"] = "wolt"

In [3]:
# dropping duplicated columns
wolt_df.drop_duplicates(inplace=True)

In [4]:
# preprocessing and turning into float min delivery_fee column
wolt_df.delivery_fee.replace("deliveryMin.", np.NaN, inplace=True)
wolt_df.delivery_fee = wolt_df.delivery_fee.apply(lambda x: 1.90 if x == "1.90Min." else x)

In [5]:
# preprocessing and turning into float min avg_delivery_time column
mask_1 = wolt_df['avg_delivery_time'].notnull()
mask_2 = wolt_df.avg_delivery_time != "Takeaway"
mask = mask_1 & mask_2

wolt_df["min_avg_delivery_time"] = wolt_df[
    mask].loc[:, "avg_delivery_time"].apply(
        lambda x: x.replace("–", " ").split(" ")[0])
wolt_df["max_avg_delivery_time"] = wolt_df[
    mask].loc[:, "avg_delivery_time"].apply(
        lambda x: x.replace("–", " ").split(" ")[1])

wolt_df[["min_avg_delivery_time", "max_avg_delivery_time"
         ]] = wolt_df[["min_avg_delivery_time", "max_avg_delivery_time"]].apply(pd.to_numeric)

# calculating clean avg_delivery_time : avg_delivery_time_clean
wolt_df["avg_delivery_time_clean"] = wolt_df[[
    "min_avg_delivery_time", "max_avg_delivery_time"
]].mean(axis=1)
wolt_df.drop(columns=["avg_delivery_time"], inplace=True)

In [6]:
# preprocessing type_of_cuisine

#dictionaries to determines type_of_cuisine categories
food_types_categories = {
    'japanese': 'asian',
    'indonesian': 'asian',
    'vietnamese': 'asian',
    'portuguese': 'european',
    'french': 'european',
    'greek': 'mediterranean',
    'german': 'european',
    'amerikanisch - barbecue': 'american',
    'spanish': 'mediterranean',
    'lebanese': 'middle eastern',
    'pizza': 'european',
    'thai': 'asian',
    'korean': 'asian',
    'chinese': 'asian',
    'brunch': 'breakfast',
    'taiwanese': 'asian',
    'polish': 'european',
    'hotdog': 'snacks',
    'chicken': 'snacks',
    'sandwiches': 'snacks',
    'austrian': 'european',
    'argentine': 'south american',
    'peruvian': 'south american',
    'swiss': 'european',
    'malaysian': 'asian',
    'hungaran': 'balkan',
    'indpak': 'indian',
    'hotdogs': 'snacks',
    'currysausage': 'snacks',
    'icecream': 'ice',
    'sushi': 'asian',
    'latin': 'south american',
    'coffee': 'cafes',
    'tradamerican': 'american',
    'israeli': 'middle eastern',
    'foodstands': 'snacks',
    'mideastern': 'middle eastern',
    'breakfast_brunch': 'breakfast',
    'panasian': 'asian',
    'oriental': 'middle eastern',
    'falafel': 'middle eastern',
    'foodtrucks': 'snacks',
    'easterngerman': 'european',
    'kebab': 'fastfood',
    'bavarian': 'european',
    'schnitzel': 'european',
    'fish': 'seafood',
    'persian': 'middle eastern',
    'ramen': 'asian',
    'italian': 'european',
    'grilled': 'steak',
    'divebars': 'bars',
    'beachbars': 'bars',
    'beer_and_wine': 'bars',
    'hungarian': 'european',
    'tibetian': 'asian',
    'japanese': 'asian',
    'modern_european': 'european',
    'brasseries': 'european',
    'cocktailbars': 'bars',
    'pubs': 'bars',
    'beachbars': 'bars',
    'food': 'snacks',
    'gourmet': 'european',
    'gastropub': 'european',
    'beergarden': 'bars',
    'arabian': 'middle eastern',
    'moroccan': 'middle eastern',
    'delicatessen': 'snacks',
    'bistro': 'european',
    'vegan': 'vegetarian or vegan',
    'vegetarian': 'vegetarian or vegan',
    'brewery': 'bars',
    'burgers': 'fastfood',
    'swabian': 'european',
    'irish': 'european',
    'british': 'european',
    'juicebar': 'snacks',
    'halal': 'middle eastern',
    'kosher': 'middle eastern',
    'wine_bars': 'bars',
    'juicebars': 'snacks',
    'bistros': 'european',
    'canteen': 'european',
    'gastropubs': 'european',
    'restaurants': 'european',
    'brazilian': 'south american',
    'bbq': 'fastfood',
    'latin american': 'south american'
}
types_of_cuisine = set(list(food_types_categories.values()))


In [7]:
wolt_df.type_of_cuisine = wolt_df.type_of_cuisine.apply(lambda x: x.replace(
    "'", "").replace(" ", "").replace("[", "").replace("]", "").split(","))


In [8]:
# creating separate columns for each of the 3 types of cuisine options
wolt_df["type_of_cuisine_1"] = wolt_df["type_of_cuisine"].apply(pd.Series)[0]
wolt_df["type_of_cuisine_2"] = wolt_df["type_of_cuisine"].apply(pd.Series)[1]
wolt_df["type_of_cuisine_3"] = wolt_df["type_of_cuisine"].apply(pd.Series)[2]

In [9]:
toc1 = wolt_df.type_of_cuisine_1.unique().tolist()
toc2 = wolt_df.type_of_cuisine_2.unique().tolist()
toc3 = wolt_df.type_of_cuisine_3.unique().tolist()
toc = toc1 + toc2 + toc3
unique_toc = list(set(toc))[2:]

In [11]:
unique_toc_dict = {
    'thai': 'asian',
    'fine-dining': 'middle eastern',
    'cheese': np.nan,
    'fusion': np.nan,
    'butterchicken': 'middle eastern',
    'chinese': 'asian',
    'traditional': np.nan,
    'café': 'cafes',
    'german': 'european',
    'cocktail': 'bars',
    'hamburger': 'american',
    'baklava': 'middle eastern',
    'pokebowl': 'poke',
    'bagel': 'breakfast',
    'waffles': 'cafes',
    'worklunch': 'fastfood',
    'tapas': 'mediterranean',
    'mediterranean': 'mediterranean',
    'pastries': 'snacks',
    'sliders': 'fastfood',
    'turkish': 'middle eastern',
    'steak': 'steak',
    'snacks': 'snacks',
    'Sashimi': 'asian',
    'pancakes': 'breakfast',
    'Georgian': 'european',
    'risotto': 'italian',
    'glutenfree': 'healthy',
    'bento': 'asian',
    'bistro': 'european',
    'shawarma': 'middle eastern',
    'meatballs': 'european',
    'sushi': 'asian',
    'fish': 'seafood',
    'Don': np.nan,
    'bakery': 'breakfast',
    'delicious': np.nan,
    'mexican': 'mexican',
    'summerrolls': 'asian',
    'chickennuggets': 'fastfood',
    'korean': 'asian',
    'vegan': 'vegetarian or vegan',
    'chocolate': 'snacks',
    'porridge': 'breakfast',
    'Schnitzel': 'european',
    'Arabic': 'middle eastern',
    'moussaka': 'middle eastern',
    'Dessert': np.nan,
    'vegetarian': 'vegetarian or vegan',
    'donut': 'breakfast',
    'friedchicken': 'fastfood',
    'smoothie': 'breakfast',
    'beer': 'bars',
    'indian': 'middle eastern',
    'rice': 'asian',
    'fruit': 'breakfast',
    'icecoffee': 'breakfast',
    'Pastrami': np.nan,
    'dumplings': 'asian',
    'currywurst': 'fastfood',
    'asian': 'asian',
    'pizza': 'italian',
    'grill': 'steak',
    'seafood': 'seafood',
    'wine': 'bars',
    'schnitzel': 'european',
    'american': 'american',
    'healthy': 'healthy',
    'hummus': 'middle eastern',
    'russian': 'russian',
    'Donburi': np.nan,
    'baguette': 'breakfast',
    'salad': 'healthy',
    'fries': 'fastfood',
    'Austrian': 'european',
    'gyoza': 'asian',
    'potato': np.nan,
    'naan': 'nan',
    'icecream': 'ice',
    'pita': 'mediterranean',
    'sausage': 'european',
    'neapolitanpizza': 'italian',
    'spaghetti': 'italian',
    'cake': 'cafes',
    'ribs': 'american',
    'dessert': np.nan,
    'Doughnut': 'breakfast',
    'matcha': 'breakfast',
    'focaccia': 'italian',
    'homemade': np.nan,
    'milkshake': 'american',
    'taco': 'mexican',
    'curry': 'fastfood',
    'israeli': 'middle eastern',
    'bapburgersandpastrami': '',
    'sandwich': 'fastfood',
    'bowl': 'poke',
    'maki': 'asian',
    'roll': 'asian',
    'brunch': 'breakfast',
    'pho': 'asian',
    'vietnamese': 'asian',
    'burgers': 'fastfood',
    'muchapizza': 'italian',
    'masala': 'middle eastern',
    'contemporary': np.nan,
    'falafel': 'middle eastern',
    'gyros': 'greek',
    'friedrice': 'asian',
    'chicken': 'snacks',
    'italian': 'european',
    'spaetzle': 'european',
    'streetfood': 'fastfood',
    'Mozzarella': 'italian',
    'ramen': 'asian',
    'antipasti': 'italian',
    'noodles': 'italian',
    'hotdog': 'snacks',
    'coffee': 'cafes',
    'oriental': 'middle eastern',
    'bubbletea': 'asian',
    'greek': 'mediterranean',
    'middleeastern': 'middle eastern',
    'pasta': 'italian',
    'fresh': 'healthy',
    'tandoori': 'middle eastern',
    'wrap': 'healthy',
    'european': 'european',
    'tea': 'breakfast',
    'bao': 'asian',
    'beyondmeat': 'vegetarian or vegan',
    'duck': 'asian',
    'galette': 'european',
    'panini': 'italian',
    'soup': 'healthy',
    'fastfood': 'american',
    'LatinAmerican': 'south american',
    'juice': 'breakfast',
    'halal': 'middle eastern',
    'burger': 'american',
    'vegetable': 'vegetarian or vegan',
    'Hawaii': 'american',
    'breakfast': 'breakfast',
    'poke': 'poke',
    'drinks': 'bars',
    'burrito': 'mexican',
    'salmon': 'seafood',
    'homecooking': np.nan,
    'spanish': 'mediterranean',
    'meat': 'steak',
    'french': 'european',
    'veggieburger': 'vegetarian or vegan',
    'lunch': 'steak',
    'meze': 'asian',
    'homemademeals': np.nan,
    'kebab': 'middle eastern',
    'beef': 'steak',
    'tex-mex': 'mexican',
    'japanese': 'asian'
}

for x in unique_toc:
    if x in food_types_categories.keys():
        unique_toc_dict[x] = food_types_categories[x]
    else:
        unique_toc_dict[x] = np.nan




In [13]:
wolt_df.type_of_cuisine_1 = wolt_df.type_of_cuisine_1.map(unique_toc_dict)
wolt_df.type_of_cuisine_2 = wolt_df.type_of_cuisine_2.map(unique_toc_dict)
wolt_df.type_of_cuisine_3 = wolt_df.type_of_cuisine_3.map(unique_toc_dict)