In [4]:
import pandas as pd
import numpy as np

Testing out applying the heuristic framework on a single element here

In [5]:
import heuristic_model as hm
heuristic_model = hm.load_model("heuristics.json")

In [8]:
recipes = pd.read_parquet('../data_sources/recipepairs/recipes.parquet') 
pairs = pd.read_parquet('../data_sources/recipepairs/pairs.parquet') 
pairs_subset = pairs[pairs['name_iou'] > 0.7]

In [13]:
def get_recipe_by_id(id):
	return recipes.loc[recipes['id'] == id]['ingredients'].explode().tolist()

Look at one hand-picked example

In [16]:
generated = heuristic_model(get_recipe_by_id(pairs_subset.iloc[552].base), 'dairy-free')
actual = get_recipe_by_id(pairs_subset.iloc[552].target)
print(generated)
print(actual)

['bean', 'carrot', 'vegan cheese', 'chicken', 'chicken broth', 'coriander', 'corn', 'garlic', 'onion', 'rotel', 'seasoning', 'coconut cream', 'starch', 'tortilla', 'water']
['chicken', 'chili powder', 'chipotle chile', 'coriander', 'corn kernel', 'garlic', 'lime juice', 'lime wedge', 'low sodium chicken broth', 'onion', 'salt', 'tomato', 'tortilla', 'vegetable oil']


We're going to have a number of scenarios like this, where the alternate is significantly different. How do we want to handle metrics in this case? Maybe flatten the pairs subsets so all the alternates are in one thing? We'll need to check if that meaninigfully changes any metrics

Next, lets take a look at simple accuracy, where accuracy is defined as "does the heuristic model produce exactly the target recipe in the dataset?"

In [32]:
def is_heuristic_correct(row):
	generated = heuristic_model(get_recipe_by_id(row['base']), 'vegan')
	actual = get_recipe_by_id(row['target'])
	return 1 if set(generated) == set(actual) else 0

def is_heuristic_correct_vegetarian(row):
	generated = heuristic_model(get_recipe_by_id(row['base']), 'vegetarian')
	actual = get_recipe_by_id(row['target'])
	return 1 if set(generated) == set(actual) else 0

def is_heuristic_correct_df(row):
	generated = heuristic_model(get_recipe_by_id(row['base']), 'dairy-free')
	actual = get_recipe_by_id(row['target'])
	return 1 if set(generated) == set(actual) else 0

In [33]:
pairs_vegan = pairs_subset[pairs_subset['categories'].apply(lambda x: 'vegan' in x)]
pairs_vegan.apply(is_heuristic_correct, axis=1).mean()

np.float64(0.0001630639720345288)

In [34]:
pairs_vegetarian = pairs_subset[pairs_subset['categories'].apply(lambda x: 'vegetarian' in x)]
pairs_vegetarian.apply(is_heuristic_correct_vegetarian, axis=1).mean()

np.float64(7.711467723651843e-06)

In [35]:
pairs_df = pairs_subset[pairs_subset['categories'].apply(lambda x: 'dairy_free' in x)]
pairs_df.apply(is_heuristic_correct_df, axis=1).mean()

np.float64(7.87983624949813e-05)

These are ridiculously low numbers. Let's see what happens if we compress the datasets so that we have base-all target mappings

In [28]:
def is_heuristic_correct_compressed_vegan(row):
	generated = heuristic_model(get_recipe_by_id(row['base']), 'vegan')
	for target in row['target']:
		if set(generated) == set(get_recipe_by_id(target)):
			return 1
	return 0

def is_heuristic_correct_compressed_vegetarian(row):
	generated = heuristic_model(get_recipe_by_id(row['base']), 'vegetarian')
	for target in row['target']:
		if set(generated) == set(get_recipe_by_id(target)):
			return 1
	return 0

def is_heuristic_correct_compressed_df(row):
	generated = heuristic_model(get_recipe_by_id(row['base']), 'dairy-free')
	for target in row['target']:
		if set(generated) == set(get_recipe_by_id(target)):
			return 1
	return 0

In [29]:
pairs_vegan_compressed = pairs_vegan.groupby('base', as_index=False).agg({'target': list})
pairs_vegan_compressed.apply(is_heuristic_correct_compressed_vegan, axis=1).mean()

np.float64(0.00046063238245648666)

In [30]:
pairs_vegetarian_compressed = pairs_vegetarian.groupby('base', as_index=False).agg({'target': list})
pairs_vegetarian_compressed.apply(is_heuristic_correct_compressed_vegetarian, axis=1).mean()

np.float64(5.076657528683115e-05)

In [31]:
pairs_df_compressed = pairs_df.groupby('base', as_index=False).agg({'target': list})
pairs_df_compressed.apply(is_heuristic_correct_compressed_df, axis=1).mean()

np.float64(0.00036966165086544316)

This does help somewhat, but the numbers are still tiny. Let's try out the method mentioned in the RecipePairs paper; namely, treating this as a multi-label binary classification problem. We'll look at accuracy (or more specifically IoU since defining true negative here is a pain), precision, recall, and F-score

In [39]:
def is_heuristic_correct_compressed_mlbc_base(row, restriction):
	generated = set(heuristic_model(get_recipe_by_id(row['base']), restriction))
	tp_check = len(generated)
	results = (0,0,0) # tp, fp, fn
	for target in row['target']:
		target_set = set(get_recipe_by_id(target))
		tp = len(generated & target_set)
		fp = len(generated - target_set)
		fn = len(target_set - generated)
		sym_diff = fp + fn
		if tp == tp_check == len(target_set): # if we have a perfect match; we check lengths since that's faster than equality
			return tp, fp, fn
		elif tp > results[0] or (tp == results[0] and sym_diff < results[1] + results[2]):
			results = (tp, fp, fn)

	return results

def is_heuristic_correct_compressed_mlbc_vegan(row):
	return is_heuristic_correct_compressed_mlbc_base(row, 'vegan')
def is_heuristic_correct_compressed_mlbc_vegetarian(row):
	return is_heuristic_correct_compressed_mlbc_base(row, 'vegetarian')
def is_heuristic_correct_compressed_mlbc_df(row):
	return is_heuristic_correct_compressed_mlbc_base(row, 'dairy-free')

def get_eval_metrics(processed_df):
	tp = processed_df['TP'].sum()
	fp = processed_df['FP'].sum()
	fn = processed_df['FN'].sum()

	iou = tp / (tp + fp + fn)
	print("IoU: ", iou)
	precision = tp / (tp + fp)
	print("Precision: ", precision)
	recall = tp / (tp + fn)
	print("Recall: ", recall)
	f1 = (2 * tp) / ((2 * tp) + fp + fn)
	print("F-1: ", f1)


In [40]:
pairs_vegan_compressed = pairs_vegan.groupby('base', as_index=False).agg({'target': list})
pairs_vegan_compressed[['TP', 'FP', 'FN']] = pairs_vegan_compressed.apply(lambda row: pd.Series(is_heuristic_correct_compressed_mlbc_vegan(row)), axis=1)
get_eval_metrics(pairs_vegan_compressed)

IoU:  0.27257748656033776
Precision:  0.40832239753410443
Recall:  0.45052424225842974
F-1:  0.42838646674017494


In [42]:
pairs_vegetarian_compressed = pairs_vegetarian.groupby('base', as_index=False).agg({'target': list})
pairs_vegetarian_compressed[['TP', 'FP', 'FN']] = pairs_vegetarian_compressed.apply(lambda row: pd.Series(is_heuristic_correct_compressed_mlbc_vegetarian(row)), axis=1)
get_eval_metrics(pairs_vegetarian_compressed)

IoU:  0.2848157131785171
Precision:  0.43045862007255925
Recall:  0.45705122299507117
F-1:  0.4433565222733912


In [43]:
pairs_df_compressed = pairs_df.groupby('base', as_index=False).agg({'target': list})
pairs_df_compressed[['TP', 'FP', 'FN']] = pairs_df_compressed.apply(lambda row: pd.Series(is_heuristic_correct_compressed_mlbc_df(row)), axis=1)
get_eval_metrics(pairs_df_compressed)

IoU:  0.30149885639203855
Precision:  0.44556235571633024
Recall:  0.48253060843016915
F-1:  0.4633102133148872
