In [1]:
import json
import pandas as pd
import sqlite3
import numpy as np

# 1. Recipe Finder

The function below is our __recipe finder__ function. It allows a user to input a list of ingredients, and retrieve some relevant recipes with matching ingredients.

In [2]:
def find_recipe(ingredients, n = 5):
    """
    Derivation of find_recipe_2 function, but optimized for easier use with a UI. 
    Removed the min_score argument, recipes with most matches are automatically returned.
    Returns at most n recipes in a pandas dataframe contaning
    title, ingredients, instructions, and number of ingredient matches

    ingredients: list of ingredients available
    n: number of desired recipes to output. default set to 5
    """
    
    # ensure that the ingredients are passed as a list
    if type(ingredients) != list:
        raise TypeError("Ingredients must be contained in a list.")
     
    # create a variable to contain the WHERE statement for the SQL query
    where_statement = ""

    # Iterate accross the ingredients and add each one to the WHERE statement
    for i in ingredients:
        where_statement += f"R.ingredients LIKE '%{i}%' OR "
    
    # open up dataset, automatically close
    with sqlite3.connect("recipes1M.db") as conn:
        
        # grab ingredient matches
        query = \
        f"""
        SELECT R.title, R.ingredients, R.instructions
        FROM recipes R
        WHERE {where_statement[:-3]}
    
        """
        
        # query database
        df = pd.read_sql_query(query, conn)
        
    # reset the Score column every time the function is called
    df["Score"] = 0
    
    # iterate through list of input ingredients
    for ingr in ingredients: 
        # increment score by 1 every time the matching ingredient name is found in a recipe
        df["Score"] += df['ingredients'].apply(lambda x: ingr in x)
    
    for i in range(len(ingredients), 0, -1):

        if (df["Score"] >= i).any() == True:
            return (df[df["Score"] >= i]).sample(n = n)

    return "No matching recipes!"

In [3]:
find_recipe(["potato", "asparagus"], n = 10)

Unnamed: 0,title,ingredients,instructions,Score
59598,Barbecued Flank Steak with Roasted Vegetables ...,"[""4 ears of corn (with husks)"", ""1/4 cup KRAFT...","[""Preheat grill to medium-high heat."", ""Pull h...",2
36204,Cast Iron Blackened Grouper with Grilled Seaso...,"[""1 tablespoon black peppercorns"", ""1 tablespo...","[""Prepare the blackening spice."", ""Put all lis...",2
35165,Potato and Asparagus Soup With Cheese,"[""3 medium potatoes (peeled or unpeeled and cu...","[""Combine the potatoes, chicken broth, onions,...",2
72262,Portabella Mushroom Roulade,"[""6 portabella mushrooms, roasted"", ""2 onions,...","[""Remove gills and stems from portabellos."", ""...",2
79457,Moroccan Fish Tajine,"[""3 whole trout (about 10 inches long)"", ""2 bu...","[""Finely chop parsley and garlic and put in me...",2
6920,Homemade Rabbit Meal for Dogs,"[""2 rabbits, approx. 4 lbs. each"", ""8 cups wat...","[""Place rabbits in a large pot and add water.""...",2
10795,Asparagus Soup,"[""1 tbsp butter"", ""1 medium onion"", ""3 clove g...","[""slice onion and garlic and set aside."", ""cho...",2
20049,Ham and Potatoes With Asparagus,"[""2 sheets heavy duty aluminum foil"", ""2 mediu...","[""Preheat oven to 450F or grill to medium-high...",2
11168,Frittata,"[""12 bunch thin asparagus, trimmed,lightly ste...","[""Preheat oven broiler."", ""In a large ovenproo...",2
36352,Mr Falafel's Portobellos of Love,"[""1 package portabella mushroom"", ""3 -5 cloves...","[""Cut the potatoes into chunks for boiling."", ...",2


#2. Recipe Generation

In this section, we'll develop a __recipe generation__ function. It allows a user to input ingredients, and generate recipes using models trained on recurrent neural networks. At the moment, we're working with an __LSTM__ (long short-term memory) model, and a __GRU__ (Gated Recurrent Units) model.

In [None]:
import tensorflow as tf
from tensorflow.keras.layers.experimental import preprocessing
from tensorflow.keras import layers

# modeling
import pathlib # for setting up checkpoint directory
import os # ditto

## a.) Auxiliary Code + Functions

The code below is used in prepping the `generate_recipe` function.

In [None]:
def import_data(n):
  ''' imports the first n recipes from the recipe database. '''
  
  with sqlite3.connect("/content/drive/Shareddrives/Gouda Group Project/recipes1M.db") as conn:
    query = \
    f"""
    SELECT R.title, R.ingredients, R.instructions
    FROM recipes R
    LIMIT ?
    """

    df = pd.read_sql_query(query, conn, params = [n])
  
  return df

In [None]:
DATA_SIZE = 100000
data_raw = import_data(DATA_SIZE)

OperationalError: ignored

In [None]:
# Oleksii Trekhleb

# define relevant constant values
STOP_WORD_TITLE = 'üìó '
STOP_WORD_INGREDIENTS = '\nü•ï\n\n'
STOP_WORD_INSTRUCTIONS = '\nüìù\n\n'

In [None]:
def condense(title, ingr, instr):
  ''' 
  Each recipe is stored across three columns in the original data. This
  function condenses them into a single string, with marked boundaries.

  The concatenation steps in the end of this function were adapted from the code
  source discussed.
  '''

  # set up the ingredients
  temp1 = ingr # get string
  temp1 = temp1[1:-1] # remove outer quotations
  temp1 = temp1.split("\", ") # split into a list according to ",  sequence of those three characters
  temp1 = [item[1:] for item in temp1] # remove leading quotation
  temp1[len(temp1) - 1] = temp1[len(temp1) - 1][:-1] # remove ending quotation on last piece

  # set up the instructions
  temp2 = instr
  temp2 = temp2[1:-1]
  temp2 = temp2.split("\", ")
  temp2 = [item[1:] for item in temp2]
  temp2[len(temp2) - 1] = temp2[len(temp2) - 1][:-1]
    
  ingr_string = ''
  for ingredient in temp1:
    ingr_string += f'‚Ä¢ {ingredient}\n'

  instr_string = ''
  for instruction in temp2:
    instr_string += f'‚Ä¢ {instruction}\n'

  return f'{STOP_WORD_TITLE}{title}\n{STOP_WORD_INGREDIENTS}{ingr_string}{STOP_WORD_INSTRUCTIONS}{instr_string}'

In [None]:
# condense each recipe into a single string
data_str = data_raw.apply(lambda x: condense(x.title, x.ingredients, x.instructions), axis = 1)

In [None]:
# Oleksii Trekhleb
MAX_RECIPE_LENGTH = 2000

def filter(recipe):
  ''' removes recipes that are too long. '''
  return len(recipe) <= MAX_RECIPE_LENGTH 

data_filter = [recipe for recipe in data_str if filter(recipe)] 

In [None]:
# Oleksii Trekhleb (adapted)

STOP_SIGN = '‚ê£' # will be appended to the end of each recipe

tokenizer = tf.keras.preprocessing.text.Tokenizer(
    filters = '', # we do not want to filter our recipes
    lower = False, # we want the model to recognize uppercase characters
    split = '', # we are using characters, not words
    char_level = True # we want a character-level RNN
)

# show the tokenizer all of the existing characters we have
tokenizer.fit_on_texts([STOP_SIGN])
tokenizer.fit_on_texts(data_filter)

tokenizer.get_config() # show results

{'char_level': True,
 'document_count': 94136,
 'filters': '',
 'index_docs': '{"1": 94135, "101": 1, "55": 26345, "51": 51101, "17": 94135, "21": 93421, "32": 76497, "10": 94111, "25": 89419, "34": 70465, "7": 94131, "14": 94029, "41": 62330, "47": 94135, "27": 90429, "48": 94135, "5": 94116, "4": 94133, "3": 94126, "26": 91327, "22": 92324, "15": 94095, "54": 39644, "2": 94135, "28": 90400, "38": 65029, "18": 93792, "11": 94079, "31": 78336, "35": 59588, "62": 32509, "13": 94120, "12": 94135, "30": 82207, "42": 64041, "9": 94130, "20": 93543, "16": 94034, "19": 93812, "23": 93354, "8": 94123, "44": 54366, "24": 92673, "33": 74283, "46": 52304, "49": 94135, "39": 64915, "29": 90369, "6": 94130, "36": 66005, "45": 41533, "58": 36437, "64": 21373, "61": 32407, "59": 31291, "70": 12369, "53": 43677, "43": 61155, "60": 26479, "52": 47844, "63": 30461, "40": 59826, "37": 70695, "73": 9668, "76": 8657, "65": 18289, "75": 10409, "56": 36455, "50": 48091, "66": 14933, "69": 15236, "84": 1147,

In [None]:
# Oleksii Trekhleb

VOCABULARY_SIZE = len(tokenizer.word_counts) + 1 # record for later
data_vec = tokenizer.texts_to_sequences(data_filter) # vectorize the data

In [None]:
# create generator using LSTM model
generator_LSTM = tf.keras.models.Sequential([
  layers.Embedding(input_dim = VOCABULARY_SIZE,
                  output_dim = 256,
                  batch_input_shape = [1, None]),
  layers.LSTM(units = 1024,
              return_sequences = True,
              stateful = True,
              recurrent_initializer = tf.keras.initializers.GlorotNormal()),
  layers.Dense(VOCABULARY_SIZE)         
])

generator_LSTM.load_weights("/content/drive/Shareddrives/Gouda Group Project/recipe_model/baseline/checkpoint_4").expect_partial()
generator_LSTM.build(tf.TensorShape([1, None]))

# create generator using GRU model
generator_GRU = tf.keras.models.Sequential([
  layers.Embedding(input_dim = VOCABULARY_SIZE,
                  output_dim = 512,
                  batch_input_shape = [1, None]),
  layers.GRU(units = 1024,
              return_sequences = True,
              stateful = True),
  layers.Dense(VOCABULARY_SIZE)         
])

generator_GRU.load_weights("/content/drive/Shareddrives/Gouda Group Project/recipe_model/gru1/checkpoint_4").expect_partial()
generator_GRU.build(tf.TensorShape([1, None]))

## b.) Main Function

Below is the main `generate_recipe` function.

In [None]:
def generate_recipe(n, model, seed, length, temperature):
    """
    Function that generates recipes based on an RNN model.
    RNN Model can easily be swapped, so further testing on optimizing a model can be done.

    n: Number of recipes to be generated.
    model: String of model name to use. Currently, "LSTM" and "GRU" are only valid model types. More can be added later.
    seed: Ingredient name/seed to generate recipe with.
    length: Length in characters of output recipe.
    temperature: Temperature to be used when generating new recipe.
    """

    # load appropriate generator, more models can be added here later if developed
    if model == "LSTM":
        generator = generator_LSTM
        
    elif model == "GRU":
        generator = generator_GRU
    
    else:
        raise ValueError("Please input a valid model!")

    # initialize empty list of recipes
    recipes = []

    # iterate n times to generate n recipes
    for i in range(n):

        start = STOP_WORD_TITLE + seed
        indices = np.array(tokenizer.texts_to_sequences([start])) # vectorize
        result = []

        generator.reset_states() # make separate predictions independent

        for char in range(length): # predict next character
            preds = generator(indices)
            preds = tf.squeeze(preds, 0) # reduce a dimension
            preds = preds / temperature

            # pick next character
            pred_id = tf.random.categorical(preds, num_samples = 1)[-1, 0].numpy()
                
            # add the predicted character
            indices = tf.expand_dims([pred_id], 0)
            next_char = tokenizer.sequences_to_texts(indices.numpy())[0]
            result.append(next_char)

        recipes.append(start + ''.join(result))

        # print recipe
        print("----- RECIPE " + str(i + 1) + " -----")
        print("SEED: ", seed, ", TEMPERATURE: ", temperature)
        print(recipes[len(recipes) - 1])

In [None]:
# GRU model, kinda smart
generate_recipe(n = 5, model = "GRU", seed = "salmon", length = 500, temperature = 0.8)

----- RECIPE 1 -----
SEED:  salmon , TEMPERATURE:  0.8
üìó salmon with a red pepper flakes, gently add to the pot and cover with pita bread and sliced tomatoes with the lemon zest and, for topping and chop stems and top with the lemon zest, soup
‚Ä¢ Salt and freshly ground black pepper to taste, and toss well.
‚Ä¢ Serve shrimp with some papred by adding oil (for frying).
‚Ä¢ Makes Reslette.
‚Ä¢ When you have to work in an even bowl.
‚Ä¢ Spray a triangle of the fried onions to a plate to cool completely begin to spread out the steaks with mayonnaise.
‚Ä¢ Don't leave with 
----- RECIPE 2 -----
SEED:  salmon , TEMPERATURE:  0.8
üìó salmon and cover with aluminum foil or wax paper formon through green cabbage leaves.
‚Ä¢ May be freezery, add a few dinnex from the baking sheet and dash of the nuts and bake the top should be done removed.
‚Ä¢ 3.77 (Patteet) in a saucepan over medium heat, strain the mushrooms on top of the steak and cheese, onion and garlic for the flour to make it all acc

In [None]:
# LSTM model, not as smart
generate_recipe(n = 5, model = "LSTM", seed = "lemon", length = 500, temperature = 0.4)

----- RECIPE 1 -----
SEED:  lemon , TEMPERATURE:  0.4
üìó lemonüìù‚Ä¢üìùoü•ïoning large bowl with the chicken stock and stir to make a light brown sugar and salt to the boiling water and stir until the center is melted and they are lettuce.
‚Ä¢ Add the soup and brown sugar.
‚Ä¢ Add the chicken stock, sugar, cornstarch, and salt, and stir to coat.
‚Ä¢ Reduce heat to low and simmer for 25 minutes.
‚Ä¢ Stir in the remaining ingredients and stir to cook over medium heat and cook until stiff the consistency to serve.
‚ê£‚ê£‚ê£‚ê£‚ê£‚ê£‚ê£‚ê£‚ê£‚ê£‚ê£‚ê£‚ê£‚ê£‚ê£‚ê£‚ê£‚ê£‚ê£‚ê£‚ê£‚ê£‚ê£‚ê£‚ê£‚ê£‚ê£‚ê£‚ê£‚ê£‚ê£‚ê£‚ê£‚ê£‚ê£‚ê£‚ê£‚ê£‚ê£‚ê£‚ê£‚ê£‚ê£‚ê£‚ê£‚ê£‚ê£‚ê£‚ê£‚ê£‚ê£‚ê£‚ê£‚ê£‚ê£‚ê£‚ê£‚ê£‚ê£‚ê£‚ê£
----- RECIPE 2 -----
SEED:  lemon , TEMPERATURE:  0.4
üìó lemonüìùü•ïoth colive with a medium non-stick skillet and then to a simmer for 5 minutes or until softened.
‚Ä¢ Transfer to a boil and stir until the carefly mixture and the center comes out clean is tender.
‚Ä¢ Cook the low for 1 ho

## 3. Recipe Classification

This section is currently not finished. Issues with the database make this function unlikely to be successful.

#a.) Auxiliary Functions

In [None]:
def standardization(input_data):
    lowercase = tf.strings.lower(input_data)
    no_punctuation = tf.strings.regex_replace(lowercase, '[%s]' % re.escape(string.punctuation),'')
    return no_punctuation

In [None]:
max_tokens = 1500
sequence_length = 40

vectorize_layer = TextVectorization(
    standardize = standardization,
    max_tokens = max_tokens,
    output_mode = 'int',
    output_sequence_length = sequence_length)

##b.) Main Function

In [None]:
def recipe_classifier(recipe, guess):
    """
    Function used for the recipe classification game. 
    
    recipe: input recipe to be classified
    guess: cuisine guess made by the user
    """

    model = tf.keras.models.load_model('/content/drive/Shareddrives/Gouda Group Project/classifier_model')

    recipe_vector = vectorize_layer(recipe)

    return model.predict(recipe_vector)

In [None]:
recipe_to_guess = import_data(1)
recipe_to_guess

Unnamed: 0,title,instructions
0,Worlds Best Mac and Cheese,"[""Preheat the oven to 350 F. Butter or oil an ..."


In [None]:
recipe_classifier(recipe_to_guess["instructions"][0], "american")

AttributeError: ignored