# Data Format And Export
1. Classify ingredients
2. Formats cocktails & generates cocktails_db.pkl file <i>(deployed with flask server)</i>
3. Formats ingredients & generates ingredients_db.pkl f

In [7]:
import pandas as pd
import pickle
import re
from collections import OrderedDict, Counter
from data_utils import preprocess, name_to_id

cocktails = pickle.load(open('cleaned_cocktails.pkl', 'rb'))

# 1. Classify & Format Ingredients 

In [8]:
# import manual data clean-up from here: https://docs.google.com/spreadsheets/d/1vx8_zaeBUoiW3gm0SWQSicsTAOnes82owozXdIyVHP0/edit#gid=1831899176
idf = pd.read_csv('manual_ingredient_annotation.csv', index_col=0)
idf['manual_name'] = idf.manual_name.str.title()

In [9]:
df = pd.DataFrame([ing for cocktail in cocktails for ing in cocktail['ingredients']])
print('-----> Found {} total ingredients for {} total cocktails'.format(len(df), len(cocktails)))
df = df.drop_duplicates('name')
print('-----> Found {} unique ingredients de-duping on name'.format(len(df)))

df = df[['name', 'link']]

# Splice in manual annotations
df = pd.merge(df, idf[['name', 'manual_name']], how='inner', on='name')
df['original_name'] = df.name
df['name'] = df[['name', 'manual_name']].apply(lambda x: x['manual_name'] if not pd.isnull(x['manual_name']) else x['name'], axis=1)

df['objectID'] = df.name.map(name_to_id)
ing_map = dict(df[['original_name', 'name']].values)

-----> Found 13412 total ingredients for 2980 total cocktails
-----> Found 689 unique ingredients de-duping on name


# 2. Formats Cocktails & Generate cocktails_db.pkl File

In [10]:
def format_cocktail_ingredient(ing):
    ing['original_name'] = ing['name']
    ing['name'] = ing_map[ing['original_name']]
    ing['id'] = name_to_id(ing['name'])
    return ing
    
cdf = pd.DataFrame(cocktails)
cdf['objectID'] = cdf.name.map(name_to_id)
cdf['ingredients'] = cdf.ingredients.map(lambda x: [format_cocktail_ingredient(y) for y in x])
cdf['num_ingredients'] = cdf.ingredients.map(lambda x: len(x))
cdf['ingredient_categories'] = cdf.ingredients.map(lambda x: [y['id'] for y in x])
pickle.dump(cdf, open('cocktails_db.pkl','wb'))

# 3. Formats Ingredients & Generates ingredients_db.pkl File

In [11]:
# Get # cocktails per ingredient (used for ranking purposes)
c = Counter([ing for ing_list in cdf.ingredient_categories for ing in ing_list])
icdf = pd.DataFrame(c.most_common(), columns=['objectID', 'count'])
icdf['name'] = icdf.objectID.map(lambda x: x.replace('-', ' '))
icdf['priority'] = icdf.name.map(lambda x: 1 if any (v in x for v in mapping) else 0)
pickle.dump(icdf, open('ingredients_db.pkl','wb'))



# SCRATCH

## Explore Ingredient ngrams

In [39]:
def get_ngrams(string, n=1):
    tokens = string.split('-')
    ngrams = []
    ngrams = zip(*[tokens[i:] for i in range(n)])
    return [" ".join(ngram) for ngram in ngrams]
        
from collections import Counter
c = Counter([ngram for x in df.name_clean.tolist() for ngram in get_ngrams(x, 2)]) 

In [6]:
def cr(keywords):
    return re.compile(r'\b' + r'\b|\b'.join(keywords) + r'\b')

mapping = OrderedDict({
    'vodka':cr(['vodka']),
    'tequila':cr(['tequila']),
    'vermouth':cr(['vermouth', 'noilly prat extra dry']),
    'mezcal':cr(['mezcal']),
    'brandy':cr(['brandy', 'avallen calvados']),
    'rum':cr(['rum']),
    'whiskey':cr(['whiskey', 'scotch', 'whisky']),
    'gin':cr(['gin']),
    'amaro':cr(['amaro']),
    'sherry':cr(['sherry']),
    'coffee liqueur': cr(['coffee.*liqueur', 'kahlua']),
    'irish cream liqueur':cr(['irish cream liqueur']),
    'cinnamon schnapps':cr(['cinnamon schnapps']),
    'apple schnapps':cr(['apple schnapps']),
    'peach schnapps':cr(['peach schnapps']),
    'triple sec': cr(['triple sec']),
    'st-germain': cr(['st-germain', 'st germain', 'stgermain']),
    'sambuca': cr(['sambuca']),
    'grand marnier':cr(['grand marnier']),
    'cognac':cr(['cognac']),
    'tonic water':cr(['tonic water', 'soda water']),
    'white wine':cr(['white wine']),
    'red wine':cr(['red wine']),
    'port wine':cr(['port']),
    'simple syrup':cr(['sugar syrup','simple syrup', 'cane syrup']),
    'bitters':cr(['bitters']),
    'lime juice':cr(['lime']),
    'lemon juice':cr(['lemon']),
    'apple juice':cr(['apple juice']),
    'pineapple juice':cr(['pineapple juice']),
    'cranberry juice':cr(['cranberry juice']),
    'absinthe':cr(['absinthe']),
    'coffee':cr(['coffee', 'espresso']),
    'egg':cr(['egg']),
    'orange juice':cr(['orange']),
    'tea':cr(['tea']),
    'grenadine':cr(['grenadine']),
    'ginger ale':cr(['ginger ale']),
    'kiwi':cr(['kiwi fruit']),
    'watermelon':cr(['watermelon']),
    'passion fruit':cr(['passion fruit']),
    'champagne':cr(['champagne']),
    'amaretto':cr(['amaretto']),
    'worcestershire sauce':cr(['worcestershire sauce']),
    'kumquats':cr(['kumquats']),
    'basil':cr(['basil']),
    'sprite':cr(['lemonlime soda', 'sprite', '7up']),
    'cola':cr(['cola']),
    'figs':cr(['figs']),
    'mint':cr(['mint']),
    'pear':cr(['pear']),
    'water':cr(['water']),
    'ice':cr(['ice cubes?'])
})

# # def classify(ingredient):
# #     for key, reg in mapping.items():
# #         if reg.findall(ingredient):
# #             return key
# #     return ingredient

# # df['category'] = df.name_clean.map(classify)
# # ing_map = dict(df[['objectID', 'category']].values)

In [3]:
js = "\n{\n  \"name\": \"Highland Mule\",\n  \"ingredients\": [\n    {\n      \"quantity\": 2 fl oz, \n      \"name\": Scotch Whiskey\n    }, \n    {  \n      \"quantity\": 1/2 fl oz, \n      \"name\": Lime Juice  \t\t\t\t  \t       }, \n\n    {  \n       \"quantity: 1 egg white\"     ,             },\n\n    {                                         }\n\n     {                                        }\n\n     {\"Quantity: 3/4 fl oz White Wine        }, \n\n     {\"Quantity: 3/4 cup Coca Cola           },\n\n     {\"Quantity: 1/2 cup Sprite              },\n\n     {\"Quantity: 1/4 cup Cranberry Juice     },\n\n     {\"Quantity: 4 dashes Orange Bitters      }],                   ],                  ]},               ],              ]},            ],          ]},        ],         ],       ],\"directions\":\"Shake all ingredients with ice and strain into a chilled glass. Garnish with a lime wedge.\",\"serving container\":\"Collins Glass\"}"

In [7]:
import json

In [11]:
from ast import literal_eval

In [None]:
js =

In [17]:
literal_eval('{"name": "Bald Eagle","ingredients": ["2 fl oz of Reposado Tequila","3/4 fl oz of Grapefruit Juice","1/2 fl oz of Cranberry Juice", "1/4 fl oz of Lemon Juice", "1/4 fl oz of Simple Syrup"], "directions": "Throw all ingredients with ice and strain into chilled glass", "serving_container": "couple glass"}')

{'name': 'Bald Eagle',
 'ingredients': ['2 fl oz of Reposado Tequila',
  '3/4 fl oz of Grapefruit Juice',
  '1/2 fl oz of Cranberry Juice',
  '1/4 fl oz of Lemon Juice',
  '1/4 fl oz of Simple Syrup'],
 'directions': 'Throw all ingredients with ice and strain into chilled glass',
 'serving_container': 'couple glass'}

In [14]:
literal_eval('{\"name\": \"Scotch and Lime Fizz\",\"ingredients\": [{\"name\": \"Scotch Whiskey\",\"quantity\": \"2 fl oz\"},{\"name\": \"Lime Juice\",\"quantity\":\"1/2 fl oz\"},{\"name\":\"Egg White\",\"quantity\":\"1 ea\"},{\"name\":\"White Wine\",\"quantity\":\"3/4 fl oz\"},{\"name\":\"Coca Cola\",\"quantity\":\"3/4 fl oz\"},{\"name\":\"Sprite\",\"quantity\":\"\",\"3/4fl oz},{\"Name\" :\"Cranberry Juice\", \"Quantity\" :\"1/4 fl ox\"}, {\"Name\":  \"Orange Bitters\",  \"Quantity\" :  1 dash}],\"directions\":\"\",\"Fill shaker with ice, add all ingredients except the soda, shake vigorously. Strain into a highball glass filled with fresh ice and top off with soda. Garnish with lime wedge.\",\"serving_container\":     \"highball glass\"}')

SyntaxError: invalid syntax (<unknown>, line 1)

In [5]:
js = '{\"name\": \"Scotch and Lime Fizz\",\"ingredients\": [{\"name\": \"Scotch Whiskey\",\"quantity\": \"2 fl oz\"},{\"name\": \"Lime Juice\",\"quantity\":\"1/2 fl oz\"},{\"name\":\"Egg White\",\"quantity\":\"1 ea\"},{\"name\":\"White Wine\",\"quantity\":\"3/4 fl oz\"},{\"name\":\"Coca Cola\",\"quantity\":\"3/4 fl oz\"},{\"name\":\"Sprite\",\"quantity\":\"\",\"3/4fl oz},{\"Name\" :\"Cranberry Juice\", \"Quantity\" :\"1/4 fl ox\"}, {\"Name\":  \"Orange Bitters\",  \"Quantity\" :  1 dash}],\"directions\":\"\",\"Fill shaker with ice, add all ingredients except the soda, shake vigorously. Strain into a highball glass filled with fresh ice and top off with soda. Garnish with lime wedge.\",\"serving_container\":     \"highball glass\"}'

In [9]:
json.loads(js.strip())

JSONDecodeError: Expecting ':' delimiter: line 1 column 316 (char 315)

In [10]:
literal_eval

NameError: name 'literal_eval' is not defined