In [1]:
import pandas as pd
import numpy as np
import json

recipe_file_path = './server/dp/datasets/epi_r.csv'
recipe_json_file_path = "./server/dp/datasets/full_format_recipes.json"

In [2]:
def find_matching_recipes(keywords, result_count=15):
    """
    Given a list of keywords find recipes that match the keywords and return all the matched recipes
    and their matching percentage.
    Args:
        keywords: A 'list' containing all the ingredients and other keywords in user's inventory and
         other keywords such as allergy info.
    Returns:
        result_df: A Pandas DataFrame containing:
            Title: The name of the recipe with at least one keyword found in it.
            Match Found: The number of keywords found in the recipe given a querying list of keywords.
            Match Percentage: The percentage of matching, defined as match found divided by item count in each recipe.
            Match Items: A list of matched keywords found in each recipe.
    """
    # Read recipes information from epi.csv
    recipes_df = pd.read_csv(recipe_file_path)

    result_list = []
    for index, recipe in recipes_df.iterrows():
        # The recipe title to be used as a key in found_count for storing recipe result name.
        recipe_name = recipe["title"]

        is_one_hot_true = map(lambda x: x == np.float(1), recipe)

        # Recipe with only one-hot encoding column.
        one_hot_recipe = recipe[is_one_hot_true]

        found = 0
        found_keywords = []
        # Loop and count matching keywords in a recipe.
        for keyword in keywords:
            if keyword in one_hot_recipe:
                found += 1
                found_keywords.append(keyword)
        if found != 0:
            # matching_percentage = found_count[recipe_name] / one_hot_recipe.count() * 100
            matching_percentage = found / one_hot_recipe.count() * 100.00

            # Add results to DataFrame.
            result_list.append({'Title': recipe_name,
                                'Match Found': found,
                                'Match Percentage': matching_percentage,
                                'Match Items': found_keywords})

    # Convert list of dicts to DataFrame.
    result_df = pd.DataFrame(result_list)

    # Specify DataFrame desired column order.
    desired_order = ['Title', 'Match Found', 'Match Percentage', 'Match Items']
    result_df = result_df[desired_order]

    # Sort the result DataFrame based on the number of item found in descending order.
    result_df = result_df.sort_values('Match Found', ascending=False)

    return result_df[0:result_count]

In [3]:
keywords_from_inventory = ["lettuce", "chicken", "apple", "tomato", "turkey", "bean"]

In [4]:
result = find_matching_recipes(keywords_from_inventory)

In [5]:
# Test for find_matching_recipes
recipes_df_json_m = pd.read_json(recipe_json_file_path)
print(list(recipes_df_json_m))

['calories', 'categories', 'date', 'desc', 'directions', 'fat', 'ingredients', 'protein', 'rating', 'sodium', 'title']


In [6]:
result_indexes = result.index.values.tolist()
print(result_indexes)

[0, 1061, 140, 2680, 2392, 694, 365, 859, 4743, 3943, 2070, 3920, 2030, 3140, 2655]


In [7]:
result_json_df = recipes_df_json_m.iloc[result_indexes, :]
print(result_json_df)

      calories                                         categories  \
0        426.0  [Sandwich, Bean, Fruit, Tomato, turkey, Vegeta...   
1061     265.0  [Bread, Cheese, Onion, Bake, Quick & Easy, Che...   
140      439.0  [Egg, Side, Low Cal, Dinner, Shrimp, Hot Peppe...   
2680     262.0  [Milk/Cream, Alcoholic, Brandy, Winter, Nutmeg...   
2392     617.0  [Lamb, Mushroom, Olive, Tomato, Bake, Winter, ...   
694      437.0  [Fruit, Breakfast, Brunch, Raspberry, Apple, K...   
365       33.0       [Sauce, Soup/Stew, Herb, Vegetable, Gourmet]   
859        NaN  [Cake, Berry, Chocolate, Dessert, Bake, Passov...   
4743     261.0  [Pork, Bake, Christmas, New Year's Eve, Phyllo...   
3943     538.0  [Milk/Cream, Egg, Tomato, Appetizer, Brunch, B...   
2070     742.0  [Soup/Stew, Ginger, Vegetarian, High Fiber, Cu...   
3920     474.0  [Beef, Olive, Quick & Easy, Summer, Grill/Barb...   
2030     222.0  [Soup/Stew, Onion, Tomato, Sauté, Celery, Lent...   
3140    6929.0   [Christmas, Bake,

In [32]:
from pprint import pprint

result_json = result_json_df.to_json(orient='index')

In [17]:
with open('result.json', 'w') as output_file:
    json.dump(result_json, output_file)