In [32]:
import requests
import json
import re
import pandas as pd 
import numpy as np
from recipe_loading import *
from qty_mapping import *
from sentence_transformers import SentenceTransformer
from ranker import *
from preprocessor import *
import joblib

The steps followed here are:

1) Load 1m recipe
2) Standardise the units
3) Load density info
4) Calculate the required amount of each ingredient

In [2]:
# from zipfile import ZipFile
# with ZipFile('../../data/layer1.zip') as myzip:
#     with myzip.open(myzip.namelist()[0]) as myfile:
#         recipe_str = myfile.read()
# recipe = json.loads(recipe_str)

In [3]:
filepath = "/Users/chahaksethi/Desktop/Target/data/1m_recipe/recipe1M_layers/layer1.json"
op_file_path = "../../../Data/ing_density.csv"

In [4]:
with open(filepath) as json_data:
    recipe_tot = json.load(json_data)

In [5]:

unit_abbreviation = { 'tbsp' : ["tablespoon"],
                      'tsp' : ['teaspoon'],
                     'ml' : ['milliliter'],
                     'cup' : ['cups','cup'],
                     'oz' : ['ounces','oz', 'ounce']
    
}

In [6]:
# loading recipe
dict_ingredients= recipe_load(n=2, recipe=recipe_tot)
df_combined_ing = pd.DataFrame.from_dict(dict_ingredients)
df_combined_ing

Unnamed: 0,ingredient,unit,quantity
0,penne,ounces,6
1,Beechers Flagship Cheese Sauce,cups,2
2,Cheddar,ounce,1
3,Gruyere cheese,ounce,1
4,chipotle chili powder,teaspoon,1/2
5,unsalted butter,cup,1/4
6,all-purpose flour,cup,1/3
7,milk,cups,3
8,semihard cheese,ounces,14
9,semisoft cheese,ounces,2


In [7]:
#converting qty from fraction to float
df_combined_ing['quantity'] = df_combined_ing['quantity'].apply(convert_fraction)
df_combined_ing['quantity'] = df_combined_ing['quantity'].apply(float)


In [8]:
#normalising units
qty = Qty_normal_map(unit_abbreviation = unit_abbreviation, op_file_path= op_file_path)
combined_ingredient_df=qty.normalize_units(df_combined_ing)
combined_ingredient_df

Unnamed: 0,ingredient,unit,quantity,normalized_unit,Volume_in_ml
0,penne,ounces,6.0,oz,180.0
1,Beechers Flagship Cheese Sauce,cups,2.0,cup,450.0
2,Cheddar,ounce,1.0,oz,30.0
3,Gruyere cheese,ounce,1.0,oz,30.0
4,chipotle chili powder,teaspoon,0.5,tsp,2.5
5,unsalted butter,cup,0.25,cup,56.25
6,all-purpose flour,cup,0.333333,cup,75.0
7,milk,cups,3.0,cup,675.0
8,semihard cheese,ounces,14.0,oz,420.0
9,semisoft cheese,ounces,2.0,oz,60.0


In [9]:
# required for multiple recipes - combining qty
combined_ingredient_df = df_combined_ing.groupby(by=['ingredient', 'normalized_unit'], 
                                               as_index = False)\
                                      .agg({'quantity': 'sum', 'Volume_in_ml': 'sum'})

In [10]:
#using density info calculating required ounces for each ingredient
final_df = qty.req_oz_recipe(combined_ingredient_df)

  combined_ingredient_df['standard_vol'],combined_ingredient_df['standard_weight_gm'],\


In [11]:
final_df

Unnamed: 0,ingredient,normalized_unit,quantity,Volume_in_ml,standard_vol,standard_weight_gm,standard_unit,req_oz
0,Beechers Flagship Cheese Sauce,cup,2.0,450.0,,,,16.0
1,Cheddar,oz,1.0,30.0,1.0,5.0,tbsp,1.0
2,Gruyere cheese,oz,1.0,30.0,1.0,108.0,cup,1.0
3,all-purpose flour,cup,0.333333,75.0,1.0,125.0,cup,1.47
4,chipotle chili powder,tsp,1.0,5.0,1.0,8.0,tbsp,0.094
5,cubed American cheese,cup,1.0,225.0,,,,8.0
6,dry dill weed,tsp,0.5,2.5,5.0,1.0,sprigs,0.0
7,elbow macaroni,cup,1.0,225.0,1.0,230.0,cup,8.113
8,garlic powder,tsp,0.125,0.625,1.0,9.7,tbsp,0.014
9,kosher salt,tsp,0.5,2.5,1.0,40.4,,0.0


In [33]:
# Set input file directory
ip_file_dir = "../../data/"

# Get grocery product hierarchy information
group10 = pd.read_csv(os.path.join(ip_file_dir, 
                                   'group10_header.csv'),
                      sep='\t', 
                      low_memory=False)

# Get scraped information for the above products
products = pd.read_csv(os.path.join(ip_file_dir,
                                    'scraped/products.csv'))

# Merge scraped information into the hierarchy table
group10 = pd.merge(group10, products, 
                   how = 'left', on = 'tcin')

# Preprocess the table
group10 = preprocess_df(group10)

In [34]:
# Get list of preprocessed product titles
product_titles = group10['title_modified'].values
# Preprocess recipe ingredients
final_df['ingredient'] = preprocess(final_df['ingredient'])

In [37]:
final_df

Unnamed: 0,ingredient,normalized_unit,quantity,Volume_in_ml,standard_vol,standard_weight_gm,standard_unit,req_oz
0,beechers flagship cheese sauce,cup,2.0,450.0,,,,16.0
1,cheddar,oz,1.0,30.0,1.0,5.0,tbsp,1.0
2,gruyere cheese,oz,1.0,30.0,1.0,108.0,cup,1.0
3,all purpose flour,cup,0.333333,75.0,1.0,125.0,cup,1.47
4,chipotle chili powder,tsp,1.0,5.0,1.0,8.0,tbsp,0.094
5,cubed american cheese,cup,1.0,225.0,,,,8.0
6,dry dill weed,tsp,0.5,2.5,5.0,1.0,sprigs,0.0
7,elbow macaroni,cup,1.0,225.0,1.0,230.0,cup,8.113
8,garlic powder,tsp,0.125,0.625,1.0,9.7,tbsp,0.014
9,kosher salt,tsp,0.5,2.5,1.0,40.4,,0.0


In [15]:
recipe_ingredients = list(final_df['ingredient'].values)

In [16]:
lm = SentenceTransformer('all-MiniLM-L6-v2')

In [17]:
tr = TransformerRanker(model=lm, product_ids=group10['tcin'])
product_title_embeddings = joblib.load('../../data/lm_embeddings')
tr.load_embeddings(product_title_embeddings)

In [23]:
# Ranked list of product tcin matches for each ingredient - Returns a list of lists 
ranked_match = tr.rank_products_recipe(recipe_ingredients)

In [24]:
rslt_df = group10[['title', 'tcin', 'short_desc','price','net_content_quantity_unit_of_measure', 'net_content_quantity_value', 'package_weight_unit_of_measure','package_weight']]
final_rslt_df=pd.DataFrame()

for i in range(len(ranked_match)):
    rslt_inter = rslt_df.loc[group10['tcin'].isin(ranked_match[i])] 
    ing = recipe_ingredients[i]
    length = min(len(ranked_match[i]),9)
    for n in range(0,length):
        for j, row in rslt_inter.iterrows():
            if row.tcin == ranked_match[i][n] :
                rslt_inter.loc[j,'rank']=n+1
                rslt_inter.loc[j,'ingredient']=ing
                break
       
    rslt_inter_n=rslt_inter.sort_values('rank')[0:9] 
    final_rslt_df= pd.concat([final_rslt_df,rslt_inter_n], ignore_index=True)
    

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


In [25]:
#matching target database with recipe 
join_df = pd.merge(final_rslt_df, final_df, 
                   how = 'left', on = 'ingredient')

In [30]:
#calculating recommended quantity
rec_df = qty.recommended_quantity(join_df)
df = rec_df.sort_values(by=['recommended_qty', 'price'])

In [31]:
df[df['ingredient']=='all purpose flour']
# df[df['ingredient']=='milk']

Unnamed: 0,title,tcin,short_desc,price,net_content_quantity_unit_of_measure,net_content_quantity_value,package_weight_unit_of_measure,package_weight,rank,ingredient,normalized_unit,quantity,Volume_in_ml,standard_vol,standard_weight_gm,standard_unit,req_oz,product_qty_oz_ct,recommended_qty
29,Unbleached All Purpose Flour - 5lbs - Good & G...,77640693,GG Flour UNBLCHD ALL PRPSE 5LB,1.69,POUND,5.0,POUND,5.12,3.0,all purpose flour,cup,0.333333,75.0,1.0,125.0,cup,1.47,81.92,1.0
28,All Purpose Flour - 5lbs - Market Pantry™,13474783,MP FLOUR MP ALL PURPOSE FLR 5LB,1.79,POUND,5.0,POUND,5.66,2.0,all purpose flour,cup,0.333333,75.0,1.0,125.0,cup,1.47,90.56,1.0
30,Whole Wheat Flour - 5LB - Good & Gather™,78206460,GG Flour WHOLE WHEAT 5LB,2.49,POUND,5.0,POUND,5.0,4.0,all purpose flour,cup,0.333333,75.0,1.0,125.0,cup,1.47,80.0,1.0
27,Gold Medal All Purpose Flour - 2lbs,13016243,GOLD MEDAL ALL PURPOSE FLOUR 2LB,2.99,POUND,2.0,POUND,2.02,1.0,all purpose flour,cup,0.333333,75.0,1.0,125.0,cup,1.47,32.32,1.0
33,"Mission 6"" Flour Tortillas - 23oz/20ct",14770774,"MISSION MSN 23OZ 20CT 6"" TRT",4.19,OUNCE,23.0,POUND,1.438,7.0,all purpose flour,cup,0.333333,75.0,1.0,125.0,cup,1.47,23.008,1.0
34,King Arthur Flour Unbleached White Whole Wheat...,14776414,KING ARTHUR 5LB KAF WWW FLOUR,4.49,POUND,5.0,POUND,5.06,8.0,all purpose flour,cup,0.333333,75.0,1.0,125.0,cup,1.47,80.96,1.0
32,King Arthur Flour Whole Wheat Flour - 5lbs,14777566,KING ARTHUR 5LB KAF WW FLOUR,4.69,POUND,5.0,POUND,5.0,6.0,all purpose flour,cup,0.333333,75.0,1.0,125.0,cup,1.47,80.0,1.0
35,King Arthur Flour Unbleached All-Purpose Flour...,14777928,KING ARTHUR 5LB KAF AP FLOUR,5.29,POUND,5.0,POUND,5.15,9.0,all purpose flour,cup,0.333333,75.0,1.0,125.0,cup,1.47,82.4,1.0
31,Organic Flour - 5LB - Good & Gather™,77640459,GG FLOUR ORGANIC FLOUR 5LB,5.49,POUND,5.0,POUND,5.2,5.0,all purpose flour,cup,0.333333,75.0,1.0,125.0,cup,1.47,83.2,1.0
