In [1]:
import requests
import json
import re
import os
import pandas as pd 
import numpy as np
import random

In [2]:
from sentence_transformers import SentenceTransformer
from ranker import TransformerRanker
from preprocessor import *
from mapper import Mapper
from display_products import DisplayProducts
import joblib

The steps followed here are:

1) Load 1m recipe
2) Standardise the units
3) Load density info
4) Calculate the required amount of each ingredient

In [3]:
# import zipfile
# corpus_zip = zipfile.ZipFile('/Users/mvellera/USF/Fall - II/Practicum (Target)/Data/recipe1M_layers/layer1.zip', 'r')
# recipe_str = corpus_zip.read(corpus_zip.namelist()[0])
# recipe = json.loads(recipe_str)

In [None]:
filepath = "/Users/mvellera/USF/Fall - II/Practicum (Target)/Data/recipe1M_layers/layer1.json"
op_file_path = "./Data/ing_desnsity.csv"
with open(filepath) as json_data:
    recipe = json.load(json_data)

In [None]:
def recipe_load_index(i):
    dict_ingredients = { 'ingredient' :[],
                    'unit': [],
                    'quantity':[]   
    }
    ingredients_list=[]
    title = recipe[i]['title']
    id = recipe[i]['id']
    print(f'Recipe: {title}')

    for lis in recipe[i]['ingredients']:
        for key, val in lis.items():   
            ingredients_list.append(val)
            rem = re.sub("[\(\[].*?[\)\]]", "", val)
            rem = re.sub(' c. ', ' cup ', rem)
            rem = re.sub("[.]", "", rem)
            if rem !='':
                u =re.findall(r" ounces | ounce | teaspoon | cups | cup | tablespoon | tbsp | tsp | can ", rem)
                if len(u) == 0:
                    qty = re.split(' ', rem)
                    qty_list = re.findall('[0-9/]+', qty[0])
                    if len(qty_list) !=0:

                        dict_ingredients['quantity'].append(qty_list[-1])
                        dict_ingredients['unit'].append('count')
                        j = ' '.join(i for i in qty[1:])
                        dict_ingredients['ingredient'].append(j.split(',')[0].strip())
                    else:  
                        dict_ingredients['quantity'].append(np.nan)
                        dict_ingredients['unit'].append('')
                        j= ' '.join(i for i in qty)
                        dict_ingredients['ingredient'].append(j.split(',')[0].strip())

                else:
                    qty = re.split(r" ounces | ounce | teaspoon | cups | cup | tablespoon | tbsp | tsp | can ", rem)[0].strip()  
                    qty_list = re.findall('[0-9/]+', qty)

                    if len(qty_list) !=0:
                        dict_ingredients['unit'].append(u[0])
                        dict_ingredients['quantity'].append(qty_list[-1])
                        j = re.split(r"ounces | ounce | teaspoon | cups | cup | tablespoon | tbsp | tsp | can ", rem)[1].strip()
                        dict_ingredients['ingredient'].append(j.split(',')[0].strip())
                    else:
                        dict_ingredients['quantity'].append(np.nan)
                        dict_ingredients['unit'].append('')
                        j=' '.join(i for i in qty)
                        dict_ingredients['ingredient'].append(j.split(',')[0].strip())
    
    return dict_ingredients

In [None]:
def recipe_load(n):
    dict_ingredients = { 'ingredient' :[],
                    'unit': [],
                    'quantity':[]   
    }
    ingredients_list=[]
    for i in range(0,n):
        title = recipe[i]['title']
        id = recipe[i]['id']
        
        for lis in recipe[i]['ingredients']:
            for key, val in lis.items():   
                ingredients_list.append(val)
                rem = re.sub("[\(\[].*?[\)\]]", "", val)
                rem = re.sub(' c. ', ' cup ', rem)
                rem = re.sub("[.]", "", rem)
                if rem !='':
                    u =re.findall(r" ounces | ounce | teaspoon | cups | cup | tablespoon | tbsp | tsp | can ", rem)
                    if len(u) == 0:
                        qty = re.split(' ', rem)
                        qty_list = re.findall('[0-9/]+', qty[0])
                        if len(qty_list) !=0:
                            
                            dict_ingredients['quantity'].append(qty_list[-1])
                            dict_ingredients['unit'].append('count')
                            j = ' '.join(i for i in qty[1:])
                            dict_ingredients['ingredient'].append(j.split(',')[0].strip())
                        else:  
                            dict_ingredients['quantity'].append(np.nan)
                            dict_ingredients['unit'].append('')
                            j= ' '.join(i for i in qty)
                            dict_ingredients['ingredient'].append(j.split(',')[0].strip())
                        
                    else:
                        qty = re.split(r" ounces | ounce | teaspoon | cups | cup | tablespoon | tbsp | tsp | can ", rem)[0].strip()  
                        qty_list = re.findall('[0-9/]+', qty)
                        
                        if len(qty_list) !=0:
                            dict_ingredients['unit'].append(u[0])
                            dict_ingredients['quantity'].append(qty_list[-1])
                            j = re.split(r"ounces | ounce | teaspoon | cups | cup | tablespoon | tbsp | tsp | can ", rem)[1].strip()
                            dict_ingredients['ingredient'].append(j.split(',')[0].strip())
                        else:
                            dict_ingredients['quantity'].append(np.nan)
                            dict_ingredients['unit'].append('')
                            j=' '.join(i for i in qty)
                            dict_ingredients['ingredient'].append(j.split(',')[0].strip())
    
    return dict_ingredients

def convert_fraction(utf):
    if utf is np.nan:
        return utf
    pattern = r'/'
    if '/' in re.findall(pattern, utf) :
        d = re.split(pattern, utf)
        number = int(d[0])/ int(d[1]) 
        return number

    return utf


In [None]:

unit_abbreviation = { 'tbsp' : ["tablespoon"],
                      'tsp' : ['teaspoon'],
                     'ml' : ['milliliter'],
                     'cup' : ['cups'],
                     'oz' : ['ounces']
    
}

In [None]:
#Normalizes quantity required
def normalize_units(combined_ingredient_df):
    normalized_units = list()
    m_list=[]
    for ingredient in combined_ingredient_df.iterrows():
        unit = ingredient[1][1].strip()
        normalized_unit = ''
        for key, val in unit_abbreviation.items():
            if unit in val:
                normalized_unit = key
                if normalized_unit == 'cup':
                     m = 225
                elif normalized_unit == 'tsp':
                     m = 5
                elif normalized_unit == 'tbsp':
                     m = 15
                elif normalized_unit == 'ml':
                     m = 1
                elif normalized_unit == 'oz':
                     m = 30
        if normalized_unit == '':
            normalized_unit = unit
            m = 0
        normalized_units.append(normalized_unit)
        m_list.append(m)
    combined_ingredient_df['quantity']= combined_ingredient_df['quantity'].astype(float)
    combined_ingredient_df['normalized_unit'] = normalized_units
    combined_ingredient_df['Volume_in_ml'] = combined_ingredient_df['quantity']*m_list
    return combined_ingredient_df
def search_density(ingredient):
    df = pd.read_csv(op_file_path)
    ing = (df['ingredient']).tolist()
    for n,i in enumerate(ing):
        if ingredient.lower() in i.lower():
            return (df.iloc[n,1], df.iloc[n,2], df.iloc[n,3])    
#     matching = [s for s in ing if lower(ingredient) in lower(s)]
#         if product[1]['ingredient']t.contains(ingredient):
#             return (row.standard_vol, row.standard_weight_gm, row.standard_unit)
    return (None, None, '')
def req_oz_recipe(combined_ingredient_df):
    req_oz=[]

    for index, row in combined_ingredient_df.iterrows():
        if row.normalized_unit=='ounce':
            req_oz.append(row.quantity)
        elif row.normalized_unit=='pound':
            req_oz.append(row.quantity*16)
        elif 'cup' in row.standard_unit.strip():
            req_gm = (row.standard_weight_gm/225)*row.Volume_in_ml
            req_oz.append(req_gm/28.35)
        elif 'tbsp' in row.standard_unit.strip() or 'tablespoon' in row.standard_unit.strip():
            req_gm = (row.standard_weight_gm/15)*row.Volume_in_ml
            req_oz.append(req_gm/28.35)
        elif 'tsp' in row.standard_unit.strip() or 'teaspoon' in row.standard_unit.strip():
            req_gm = (row.standard_weight_gm/5)*row.Volume_in_ml
            req_oz.append(req_gm/28.35)
        else: req_oz.append(0)

    combined_ingredient_df['req_oz']=req_oz
    combined_ingredient_df['req_oz']=np.round(combined_ingredient_df['req_oz'], 3)
    return combined_ingredient_df

def recommended_qty(join_df):
    recommended_qty=[]
    for index, row in join_df.iterrows():
        if row.req_oz > 0 :
            if row.package_weight_unit_of_measure.strip().lower() =='pound':
                pack_oz = row.package_weight * 16
                rec = row.req_oz/pack_oz
                recommended_qty.append(np.ceil(rec))
            elif row.package_weight_unit_of_measure.strip().lower() =='ounce':
                rec = row.req_oz/row.package_weight
                recommended_qty.append(np.ceil(rec))
        else :
            if row.normalized_unit == '':
                if row.net_content_quantity_unit_of_measure.strip().lower() =='dozen':
                    rec = row.quantity/12
                    recommended_qty.append(np.ceil(rec))
                if row.net_content_quantity_unit_of_measure.strip().lower() =='count':
                    rec = row.quantity
                    recommended_qty.append(np.ceil(rec))

            else:    

                recommended_qty.append(0)

    join_df['recommended_qty'] = recommended_qty
    return join_df

In [None]:
# Set input file directory
ip_file_dir = "../Data/Target Data/"

# Get grocery product hierarchy information
group10 = pd.read_csv(os.path.join(ip_file_dir, 
                                   'group10_header.csv'),
                      sep='\t', 
                      low_memory=False)

# Get scraped information for the above products
products = pd.read_csv(os.path.join(ip_file_dir,
                                    'scraped/products.csv'))

# Merge scraped information into the hierarchy table
group10 = pd.merge(group10, products, 
                   how = 'left', on = 'tcin')

# Preprocess the table
group10 = preprocess_df(group10)

lm = SentenceTransformer('all-MiniLM-L6-v2')

# Get list of preprocessed product titles
product_titles = group10['title_modified'].values

tr = TransformerRanker(model=lm, product_ids=group10['tcin'], max_rank=3)
product_title_embeddings = joblib.load('data/lm_embeddings')
tr.load_embeddings(product_title_embeddings)

pm = Mapper(group10)
dp = DisplayProducts(ranker=tr, mapper=pm)

In [None]:
i = random.randint(0, 1000000)
print(i)
dict_ingredients= recipe_load_index(i)
df_combined_ing = pd.DataFrame.from_dict(dict_ingredients)
df_combined_ing['quantity'] = df_combined_ing['quantity'].apply(convert_fraction)
df_combined_ing['quantity'] = df_combined_ing['quantity'].apply(float)
combined_ingredient_df=normalize_units(df_combined_ing)
# required for multiple recipes - combining qty
recipe = df_combined_ing.groupby(by=['ingredient', 'normalized_unit'], 
                                               as_index = False)\
                                      .agg({'quantity': 'sum', 'Volume_in_ml': 'sum'})
recipe

In [None]:
dp.display_products_recipe(recipe['ingredient'])