In [61]:
## import json
import os
import shutil
from base64 import b64encode
from http.client import HTTPSConnection
import yaml
import requests
import pandas as pd
import os
import re

'''
Orginally I wanted to save the user/pass as an enviromental variable. However, since
jupyterlab is called as a service, it doesn't load .bashrc therefore, it's not
possible to call enviro values while running in juypterlab. However, the end goal
is to run this script outside of jupyter to gather recipes and push to github. 
So getting vars from .bashrc should work. 
'''

#Try to get from enviro var. Need to check if this works when ran outside jupyter
email = os.environ.get('PAPRIKAAPPEMAIL')
password = os.environ.get('PAPRIKAAPPPASSWORD')

#If email or password wasn't found, read bashrc directly and find the vars
if email == None or password == None:
    print('Brute force - read .bashrc and get user/pass')
    f = open("/home/pi/.bashrc", "r")
    lines = f.readlines()

    for line in lines:
        matchEmail = re.match( r'(export PAPRIKAAPPEMAIL=)(.*)', line, re.MULTILINE)
        if matchEmail:
            email = matchEmail.group(2)

        matchPassword = re.match( r'(export PAPRIKAAPPPASSWORD=)(.*)', line, re.MULTILINE)
        if matchPassword:
            password = matchPassword.group(2)


c = HTTPSConnection("www.paprikaapp.com")
userAndPass = b64encode(bytes(email+":"+password, 'utf-8')).decode("ascii")
headers = { 'Authorization' : 'Basic %s' %  userAndPass }

c.request('GET', '/api/v1/sync/categories/', headers=headers)
res = c.getresponse()
data = res.read()
categories = {}
for item in json.loads(data)['result']:
    categories[item['uid']] = item['name']

c.request('GET', '/api/v1/sync/recipes/', headers=headers)
res = c.getresponse()
data = res.read()
#print(data)

df = pd.DataFrame()

recipes = []
for item in json.loads(data)['result']:
    c.request('GET', '/api/v1/sync/recipe/'+item['uid']+'/', headers=headers)
    res = c.getresponse()
    data = res.read()
    recipe = json.loads(data)['result']
    
    #Check to see if it's in the Drinks catergory
    if 'F59B5D0D-9C8A-41FD-8134-F26921D1B19A-18063-0000098A8D94DC39' in recipe['categories']:
        #print(recipe)
        
        df = df.append(recipe, ignore_index=True)


In [62]:
df.dtypes

categories           object
cook_time            object
created              object
description          object
difficulty           object
directions           object
hash                 object
image_url            object
in_trash            float64
ingredients          object
is_pinned           float64
name                 object
notes                object
nutritional_info     object
on_favorites        float64
on_grocery_list      object
photo                object
photo_hash           object
photo_large          object
photo_url            object
prep_time            object
rating              float64
scale                object
servings             object
source               object
source_url           object
total_time           object
uid                  object
dtype: object

In [75]:
import re
df[ (df['ingredients'].str.contains(r'(gin)', flags=re.IGNORECASE)) & (df['ingredients'].str.contains(r'(lemon|lime)', flags=re.IGNORECASE))  ]['ingredients']

  return func(self, *args, **kwargs)


3     2 ounces gin\n3/4 ounce sweet vermouth\n3/4 ou...
4     1 1/2 ounces (45ml) bourbon\n2 ounces (60ml) d...
6       2 oz gin\n1 Oz orgeat\n.75 lemon juice\nBitters
11    4 oz. Ginger Beer\n1½ oz. Bourbon\n½ oz. Black...
17    .25 oz freshly squeezed lemon juice\n3-4 mint ...
20    1 1/2 ounces (45ml) Cynar\n1 1/2 ounces (45ml)...
23    2 oz gin\n1 tablespoons honey\n2 tablespoons f...
24    ice\n1 ounce gin (Tanqueray or Beefeater recom...
25    Small handful of blackberries\nMint\n1-1.5 oz ...
26    12 blackberries\n20 fresh mint leaves\n2 Meyer...
28    1.5 oz. NOLET’S Silver Gin\n1 oz. Fresh Lime J...
30    1 lemon\n3 tablespoons (1 1/2 ounces) gin\n1 1...
31    1.5 oz. NOLET’S Silver Gin\n1 oz. Lemon Juice\...
32    2 ounces gin\n1/2 ounce lemon juice\n1/2 ounce...
34    2 ounces London dry gin\n3/4 ounces triple sec...
35    Adapted from Bon Appetit\nFor 2 tall drinks\n1...
39    3 mint springs\n1 and ½ ounce gin\n1/2 ounce s...
40    Rosemary Syrup\n1/2 cup (125ml) water\n1/2

In [67]:
#Export all recipes
jsonRecipeExport = df.to_json(orient="index")

with open('recipes.json', 'w') as outfile:
    outfile.write(jsonRecipeExport)

In [64]:
#By default, ingredients are seperated by \n. Replace with space
#df['ingredients'] = df['ingredients'].str.replace('\n', '-')

#Need to clean up ingredients
listOfIngredients = df['ingredients'].tolist()

In [66]:
#'Flatten list to string'
ingredientList = ''
ingredientList = ingredientList.join(listOfIngredients)

#I believe the last item per recipe doesn't have a \n, so 
# it ends up with WordAWordB. Need to fix
ingredientList = ingredientList.split('\n')

cleanedIngredientsList = []

for _line in ingredientList:
    #print(_line)
    
    #Clean out measurements
    regexMeasurements = re.compile(r'\bounce.\b|\boz.\b|\bm[l|L]\b|\bcup.\b|\btablespoon.\b|\bpart.\b', re.IGNORECASE)
    _line = re.sub(regexMeasurements, '', _line)
    
    #Clean out numbers
    regexNumbers = re.compile(r'(\d/\d)|(\d)|(\.\d*)|(¾)|(½)|(¼)', re.IGNORECASE)
    _line = re.sub(regexNumbers, '', _line)
    
    #Single words or characters - Ignore case
    regexSingleWords = re.compile(r'\bfreshly\b|\bgarnish\b|\bdash\b|\bdashes\b|\bsqueeze\b|\bsqueezed\b|\bglass\b|\band\b|\bor\b|\bice\b|', re.IGNORECASE)
    _line = re.sub(regexSingleWords, '', _line)
    
    #More words
    regexMoreWords = re.compile(r'\bcube\b|\byour\b|\bfavorite\b|\bother\b|\bgood-quality\b|\boptional\b|\bto\b|\bfrom\b|\bfor\b', re.IGNORECASE)
    _line = re.sub(regexMoreWords, '', _line)
    
    #Anything in quotes
    regexQuotes = re.compile(r'\(.*\)', re.IGNORECASE)
    _line = re.sub(regexQuotes, '', _line)

    #Remove special characters
    regexSpecialChars = re.compile(r'\:|,|\*|-|\(|\)|&|\*|\%', re.IGNORECASE)
    _line = re.sub(regexSpecialChars, '', _line)
    
    #Remove \r \n
    _line = _line.strip()
    
    #Remove lines that are too short or empty
    if len(_line) < 2:
        pass
    else:
        #print(_line)
        cleanedIngredientsList.append(_line)
        pass

print(cleanedIngredientsList)


['bourbon    rye whiskey', 'Cointreau   orange liqueur', 'sweet vermouth', 'lemon juice', 'Brandied cherries  lemon peel    white rum', 'maraschino liqueur', 'grapefruit juice', 'lime juice', 'simple syrup', 'lime wedge oz Belle Meade bourbon', 'oz grapefruit juice', 'oz lemon juice', 'oz Campari', 'oz simple syrup  gin', 'sweet vermouth', 'dry vermouth', 'orange juice   orange juice   lemon juice', 'orange bitters    bourbon', 'dry hard cider', 'ginger beer', 'Angostura bitters', 'Large lemon wedge   Rittenhouse Rye Whiskey', 'Fresh lemon juice', 'oz Simple syrup', 'Huckleberry Shrub', 'Egg white', 'Coupe', 'Lemon zest  huckleberry shrub spray', 'Huckleberries', 'Cider vinegar', 'Sugar gin', 'orgeat', 'lemon juice', 'Bitters  blanco tequila', 'Kahlua Coffee Liqueur', 'fresh grapefruit juice', 'fresh lime juice', 'teaspoon Cholula   hot sauce', 'grapefruit wheels sliced in half', 'Flaky salt  Siete Misterios DobaYej Mezcal', 'mL  Campari', 'mL  Lemon Juice', 'mL  Orange Juice', 'mL  Es

In [69]:
#Export keywords - Need to remove [] from file
with open('keywords.json', 'w') as outfile:
    json.dump(cleanedIngredientsList, outfile)

In [None]:
'''
End goal is to make another netilfy page. No flask

Need to load the username and password from enviromental variables
https://www.nylas.com/blog/making-use-of-environment-variables-in-python/

Create a python script to export a json object with all recipes.
Use code above - export json to file

The rest is all html/javascript/bootstrap
 - Word cloud where clicking on word will add AND term to search
 - All words are from the ingredients
 


'''