In [43]:
import re
import csv
import pandas as pd
from collections import Counter
from nltk.corpus import wordnet as wn

In [14]:
adj_exceptions = ['gratinated', 'indian', 'greek']

def is_adjective(expr):
    global adj_exceptions
    for word in expr.split():
        s = wn.synsets(word)        
        if (len(s) == 0 or s[0].pos() == 'n') and word not in adj_exceptions:
            return False
    return True


def correct(s):
    return s.replace(
        ',', " and").replace(
        '-', ' ').replace(        
        '&', "and").replace(
        "garnish", '').replace(
        "bell pepper", "bellpepper").replace(
        "or ", '')

def parse_name(s):
    parsed = re.split('/|with|"|and', correct(s))
    parsed = [x.strip().lower() for x in parsed if len(x.strip())]
    items = []
    acc = ""
    for i, w in enumerate(parsed):
        if i < len(parsed) - 1 and is_adjective(w):
                acc += w + ' '
        else:
            items.append(acc + w)
            acc = ''
    return items

In [26]:
df = pd.read_csv("./sv_restaurant_data.csv")
df = df.apply(lambda x: [parse_name(y) for y in x] if x.name == 'name' else x)
display(df)

Unnamed: 0,year,week,start_date,end_date,day,date,category,subcategory,price,allergens,name
0,2019,8,2019-02-25,2019-03-01,Monday,2019-02-25,Soups,,1.10,ACGL,[beef broth]
1,2019,8,2019-02-25,2019-03-01,Monday,2019-02-25,Soups,,1.10,AG,[cream of corn soup]
2,2019,8,2019-02-25,2019-03-01,Monday,2019-02-25,Menus,home-style cooking,4.00,AC,"[pasta, ham, small salad]"
3,2019,8,2019-02-25,2019-03-01,Monday,2019-02-25,Menus,vegetarian,5.00,AFG,"[falafel balls, tomato eggplant salad, mint yo..."
4,2019,8,2019-02-25,2019-03-01,Monday,2019-02-25,Menus,fish & seafood,6.00,AD,"[grilled hake, ratatouille, olive potatoes]"
5,2019,8,2019-02-25,2019-03-01,Monday,2019-02-25,Dessert,,1.30,ACG,[poppy seed cheese cake]
6,2019,8,2019-02-25,2019-03-01,Tuesday,2019-02-26,Soups,,1.10,ACGL,[beef broth]
7,2019,8,2019-02-25,2019-03-01,Tuesday,2019-02-26,Soups,,1.10,AG,[creamed garlic soup]
8,2019,8,2019-02-25,2019-03-01,Tuesday,2019-02-26,Menus,vegetarian,4.00,AH,"[pumpkin spinach pan, dates, nuts, bulgur]"
9,2019,8,2019-02-25,2019-03-01,Tuesday,2019-02-26,Menus,home-style cooking,5.00,GM,"[käsekrainer, french fries, onion mustard]"


In [62]:
def get_schnitzel_names(df):
    schnitzel_names = df[(df["day"] == "Wednesday") & (df["subcategory"] == "home-style cooking")]
    return schnitzel_names.name.map(lambda x: x[0])

def get_average_prices(df):
    mains = df[df["category"] == "Menus"]
    for v in mains.subcategory.unique():
        prices = df[df["subcategory"] == v].price
        avg = prices.mean()
        print(v, avg)
                
def get_frequencies(df, day=None):
    mains = df[df["category"] == "Menus"]
    if day is None:
        return df.subcategory.value_counts()
    else:
        return df[df["day"] == day].subcategory.value_counts()
        
def workcloud(df, day=None):
    words = list(df.name)
    words = [y for l in words for x in l for y in x.replace("of", '').replace("sauce", '').split()]
    counts = Counter(words)
    if day is None:
        return counts
    else:
        words = list(df[df["day"] == day].name)
        #words = [x for l in words for x in l]
        words = [y for l in words for x in l for y in x.replace("of", '').replace("sauce", '').split()]
        ncounts = Counter(words)
        total = sum(counts.values(), 0.0)
        for key in counts:
            counts[key] /= total
        print(counts).most_common(15)
        ncounts.subtract(counts)
        return ncounts
        
workcloud(df, day="Wednesday").most_common(100)

Counter({'beef': 0.05055487053020962, 'broth': 0.04110152075626798, 'soup': 0.037813399095766545, 'cream': 0.027127003699136867, 'rice': 0.027127003699136867, 'cake': 0.02548294286888615, 'potatoes': 0.018906699547883273, 'vegetables': 0.01726263871763255, 'potato': 0.01726263871763255, 'salad': 0.01726263871763255, 'fried': 0.013563501849568433, 'cheese': 0.013563501849568433, 'chicken': 0.013152486642005754, 'pork': 0.012741471434443074, 'herb': 0.011508425811755036, 'tomato': 0.011097410604192354, 'breaded': 0.009864364981504316, 'noodles': 0.009864364981504316, 'creamed': 0.009453349773941636, 'vegetable': 0.009453349773941636, 'spinach': 0.009042334566378957, 'turkey': 0.009042334566378957, 'curry': 0.009042334566378957, 'filet': 0.008220304151253596, 'garlic': 0.007809288943690916, 'bread': 0.007398273736128237, 'grilled': 0.007398273736128237, 'dip': 0.007398273736128237, 'schnitzel': 0.006576243321002877, 'pasta': 0.006576243321002877, 'yogurt': 0.006165228113440197, 'strudel':

AttributeError: 'NoneType' object has no attribute 'most_common'

In [25]:
with open("./sv_restaurant_data.csv", 'r') as f:
    r = csv.DictReader(f)
    for line in r:
        print(parse_name(line["name"]))

['beef broth']
['cream of corn soup']
['pasta', 'ham', 'small salad']
['falafel balls', 'tomato eggplant salad', 'mint yogurt']
['grilled hake', 'ratatouille', 'olive potatoes']
['poppy seed cheese cake']
['beef broth']
['creamed garlic soup']
['pumpkin spinach pan', 'dates', 'nuts', 'bulgur']
['käsekrainer', 'french fries', 'onion mustard']
['gratinated chicken filet', 'tomato', 'mozzarella', 'bell peper', 'zucchini', 'herb pasta']
['latte macchiato cake']
['beef broth']
['minestrone']
['potato vegetable strudel', 'leaf salad', 'herb dip']
['breaded fried chicken schnitzel', 'rice']
['wok noodles', 'turkey strips', 'vegetables', 'cashews']
['yogurt fruit cake']
['beef broth']
['cream of cauliflower soup']
['grilled leberkäse', 'green beans', 'dill', 'potatoes']
['jambalaya', 'bulgur']
['esterházy beef escalope', 'green pasta', 'root vegetables']
['apricot cake']
['beef broth']
['creamed basil soup']
['buttermilk', 'vegetable curry', 'spelt']
['breaded fried plaice', 'parsley potatoes'

In [7]:
f

Unnamed: 0,year,week,start_date,end_date,day,date,category,subcategory,price,allergens,name
0,2019,8,2019-02-25,2019-03-01,Monday,2019-02-25,Soups,,1.10,ACGL,beef broth/garnish
1,2019,8,2019-02-25,2019-03-01,Monday,2019-02-25,Soups,,1.10,AG,cream of corn soup
2,2019,8,2019-02-25,2019-03-01,Monday,2019-02-25,Menus,home-style cooking,4.00,AC,pasta with ham/small salad
3,2019,8,2019-02-25,2019-03-01,Monday,2019-02-25,Menus,vegetarian,5.00,AFG,falafel balls/tomato eggplant salad/mint yogurt
4,2019,8,2019-02-25,2019-03-01,Monday,2019-02-25,Menus,fish & seafood,6.00,AD,grilled hake/ratatouille/olive potatoes
5,2019,8,2019-02-25,2019-03-01,Monday,2019-02-25,Dessert,,1.30,ACG,poppy seed cheese cake
6,2019,8,2019-02-25,2019-03-01,Tuesday,2019-02-26,Soups,,1.10,ACGL,beef broth/garnish
7,2019,8,2019-02-25,2019-03-01,Tuesday,2019-02-26,Soups,,1.10,AG,creamed garlic soup
8,2019,8,2019-02-25,2019-03-01,Tuesday,2019-02-26,Menus,vegetarian,4.00,AH,"pumpkin spinach pan/dates, nuts/bulgur"
9,2019,8,2019-02-25,2019-03-01,Tuesday,2019-02-26,Menus,home-style cooking,5.00,GM,"""Käsekrainer""/french fries/onion mustard"
