In [26]:
import numpy as np
import pandas as pd
import re
import matplotlib.pyplot as plt
from scipy.spatial.distance import pdist, squareform
from src.data_processing import get_recipe_df, mean_scale_recipes, drop_uncommon_ingreds
from pymongo import MongoClient
from src.recipe_distance import *
from src.recipe_annotation import Recipe, RecipeGroup

In [2]:
client = MongoClient()
db = client['recipes']
coll = db.eda_cookies

cookie_regx = re.compile("cookie", re.IGNORECASE)

In [29]:
df_cookies = get_recipe_df(coll, cookie_regx)

In [40]:
df_scaled = mean_scale_recipes(df_cookies)

In [43]:
df_reduced = drop_uncommon_ingreds(df_scaled, 20)

In [44]:
df_reduced.shape

(2488, 102)

In [45]:
df_scaled.shape

(2488, 245)

In [34]:
df_binary = df_reduced>0

In [36]:
df_binary.head().T

url,http://www.seriouseats.com/recipes/2010/02/the-best-chocolate-chip-cookies-kumiko-recipe-20100201.html,http://www.seriouseats.com/recipes/2012/06/chocolate-chocolate-chip-cookies-recipe.html,http://www.seriouseats.com/recipes/2012/12/brownie-chocolate-chip-cookies-recipe.html,http://www.seriouseats.com/recipes/2012/12/se-swap-chocolate-chip-cookies-recipe.html,http://www.seriouseats.com/recipes/2011/12/bacon-chocolate-chip-cookies-recipe.html
allspic,False,False,False,False,False
almond,False,False,False,False,False
almond extract,False,False,False,False,False
almond flour,False,False,False,False,False
almond meal,False,False,False,False,False
appl,False,False,False,False,False
applesauc,False,False,False,False,False
apricot,False,False,False,False,False
bacon,False,False,False,False,True
bake powder,True,True,True,False,False


In [47]:
pdist(df_reduced, metric='cosine')

array([ 0.34384383,  0.55835529,  0.3471587 , ...,  0.52642452,
        0.6312994 ,  0.53389407])

In [22]:
pair_dist_composite(df, ratio=0)

array([[ 0.        ,  0.46153846,  0.46153846, ...,  0.5625    ,
         0.4       ,  0.625     ],
       [ 0.46153846,  0.        ,  0.57142857, ...,  0.46666667,
         0.58823529,  0.625     ],
       [ 0.46153846,  0.57142857,  0.        , ...,  0.64705882,
         0.58823529,  0.70588235],
       ..., 
       [ 0.5625    ,  0.46666667,  0.64705882, ...,  0.        ,
         0.41176471,  0.61111111],
       [ 0.4       ,  0.58823529,  0.58823529, ...,  0.41176471,
         0.        ,  0.55555556],
       [ 0.625     ,  0.625     ,  0.70588235, ...,  0.61111111,
         0.55555556,  0.        ]])

In [30]:
df = mean_scale_recipes(drop_uncommon_ingreds(df_cookies, 30))

In [120]:
a_recp_key = df.index[600]

In [121]:
a_recp_key

u'http://www.finecooking.com/recipes/butter-cookies.aspx'

In [129]:
a_group = RecipeGroup(df_cookies)

In [130]:
a_group.df = drop_uncommon_ingreds(a_group.df)

In [131]:
a_group.grow_from_center(a_recp_key, 50, ratio=0.5)

(2488,)


In [132]:
print a_group.members[a_recp_key]

BUTTER: 226.8g    |   used in 100.0%  of recipes|
     avg: 183.6g  | range: 114.5g - 265.1g       |  std: 2.0537g
------------------------------------------------------------
FLOUR: 269.3g     |   used in 100.0%  of recipes|
     avg: 271.7g  | range: 182.4g - 346.3g       |  std: 1.9665g
------------------------------------------------------------
SUGAR: 150.0g     |   used in 100.0%  of recipes|
     avg: 175.5g  | range: 98.6g - 302.6g       |  std: 3.0982g
------------------------------------------------------------
VANILLA EXTRACT:  6.3g  |   used in 98.0%  of recipes|
     avg:   4.7g  | range:  2.0g -  9.2g       |  std: 0.0035g
------------------------------------------------------------
SALT:  3.0g       |   used in 96.0%  of recipes|
     avg:   2.2g  | range:  0.7g -  6.4g       |  std: 0.0013g
------------------------------------------------------------
EGG: 50.0g        |   used in 96.0%  of recipes|
     avg:  47.8g  | range: 25.1g - 81.5g       |  std: 0.2080g
---------

In [133]:
for recp in a_group.members.itervalues():
    print recp.label

martha stewart rosemary butter cookies
icebox butter cookies
christmas sugar cookies
vanilla sugar cookies recipe
valentine's sugar cookies recipe
rosemary butter cookies
spitzbuben (raspberry jam sandwich cookies)
frosted sugar cookies
butter cookies
sugar cookies
lavender shortbread cookies
hugs and kisses sugar cookies
best ever peanut butter cookies
vanilla slice & bake cookies
big crunchy sugar cookies
heart-glazed cornmeal cookies
dad sugar cookies
basic sugar cookies
animal-cracker cookies
easter cut-out sugar cookies
pressed sugar cookies
butter cookies
basic sugar cookies
basic vanilla cookie dough
nibby buckwheat butter cookie recipe
cinnamon sugar cookies
swedish shortbread cookies
coffee walnut cookies
chewy sugar cookies
citrus sugar cookies
simple sugar cookies
chocolate cookie dough
lemon polenta cookies
siobhan's thumbprint cookies
soft and chewy sugar cookies
spritz cookies
sugar cookies
old-fashioned sugar cookies
chewy sugar cookies #2
ideal sugar cookies
classic rol