# Chopped NLP
Data courtesy of Jeffrey Braun (https://www.kaggle.com/jeffreybraun/chopped-10-years-of-episode-data)

### Import necessary modules

In [131]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

### Read CSV of Chopped Episode data

In [132]:
chopped_data = pd.read_csv("chopped.csv")

In [159]:
chopped_data.head()

Unnamed: 0,season,season_episode,series_episode,episode_name,episode_notes,air_date,judge1,judge2,judge3,appetizer,entree,dessert,contestant1,contestant1_info,contestant2,contestant2_info,contestant3,contestant3_info,contestant4,contestant4_info
0,1,1,1,"""Octopus, Duck, Animal Crackers""",This is the first episode with only three offi...,13-Jan-09,Marc Murphy,Alex Guarnaschelli,Aarón Sánchez,"baby octopus, bok choy, oyster sauce, smoked ...","duck breast, green onions, ginger, honey","prunes, animal crackers, cream cheese",Summer Kriegshauser,Private Chef and Nutrition Coach New York NY,Perry Pollaci,Private Chef and Sous chef Bar Blanc New Yo...,Katie Rosenhouse,Pastry Chef Olana Restaurant New York NY,Sandy Davis,Catering Chef Showstoppers Catering at Union...
1,1,2,2,"""Tofu, Blueberries, Oysters""",This is the first of a few episodes with five ...,20-Jan-09,Aarón Sánchez,Alex Guarnaschelli,Marc Murphy,"firm tofu, tomato paste, prosciutto","daikon, pork loin, Napa cabbage, Thai chiles,...","phyllo dough, gorgonzola cheese, pineapple ri...",Raymond Jackson,Private Caterer and Culinary Instructor West...,Klaus Kronsteiner,Chef de cuisine Liberty National Golf Course...,Christopher Jackson,Executive Chef and Owner Ted and Honey Broo...,Pippa Calland,Owner and Chef Chef for Hire LLC Newville PA
2,1,3,3,"""Avocado, Tahini, Bran Flakes""",,27-Jan-09,Aarón Sánchez,Alex Guarnaschelli,Marc Murphy,"lump crab meat, dried shiitake mushrooms, pin...","ground beef, cannellini beans, tahini paste, ...","brioche, cantaloupe, pecans, avocados",Margaritte Malfy,Executive Chef and Co-owner La Palapa New Y...,Rachelle Rodwell,Chef de cuisine SoHo Grand Hotel New York NY,Chris Burke,Private Chef New York NY,Andre Marrero,Chef tournant L’Atelier de Joël Robuchon Ne...
3,1,4,4,"""Banana, Collard Greens, Grits""","In the appetizer round, Chef Chuboda refused t...",3-Feb-09,Scott Conant,Amanda Freitag,Geoffrey Zakarian,"ground beef, wonton wrappers, cream of mushro...","scallops, collard greens, anchovies, sour cream","maple syrup, black plums, almond butter, waln...",Sean Chudoba,Executive Chef Ayza Wine Bar New York NY,Kyle Shadix,Chef Registered Dietician and Culinary Consu...,Luis Gonzales,Executive Chef Knickerbocker Bar & Grill Ne...,Einat Admony,Chef and Owner Taïm New York NY
4,1,5,5,"""Yucca, Watermelon, Tortillas""",,10-Feb-09,Geoffrey Zakarian,Alex Guarnaschelli,Marc Murphy,"watermelon, canned sardines, pepper jack chee...","beef shoulder, yucca, raisins, ancho chiles, ...","flour tortillas, prosecco, Canadian bacon, ro...",John Keller,Personal Chef New York NY,Andrea Bergquist,Executive Chef New York NY,Ed Witt,Executive Chef / Wine Director Bloomingdale ...,Josh Emett,Chef de cuisine Gordon Ramsay at The London ...


### Creating list of every appetizer ingredient

In [175]:
round_type = "entree"
round_list = chopped_data[round_type].to_list()
print(round_list[:5])

[' duck breast, green onions, ginger, honey ', ' daikon, pork loin, Napa cabbage, Thai chiles, Blue Point oysters ', ' ground beef, cannellini beans, tahini paste, grape jelly ', ' scallops, collard greens, anchovies, sour cream ', ' beef shoulder, yucca, raisins, ancho chiles, dill pickles ']


### Creating a modified sequence of data
One which decomposes a sentence so that a sentence like ["My name is Jordan"]  turns into ["My name", "My name is", "My name is Jordan"]

In [176]:
text = []

for selection in round_list:                     # For each basket selection in appetizers
    ingredients_comma =  selection.split(",")         # Convert each basket selection into a list of strings
    ingredients = []
    for ingredient in ingredients_comma:
        for element in ingredient.strip().split(" "):
            ingredients.append(element)
    for i in range(1, len(ingredients)):              # For as long as the list of strings is
        n_gram = ingredients[:i + 1]                  # create a fragment of the sentence
        text.append(n_gram)                           # and append it to text

print(text[:15])

[['duck', 'breast'], ['duck', 'breast', 'green'], ['duck', 'breast', 'green', 'onions'], ['duck', 'breast', 'green', 'onions', 'ginger'], ['duck', 'breast', 'green', 'onions', 'ginger', 'honey'], ['daikon', 'pork'], ['daikon', 'pork', 'loin'], ['daikon', 'pork', 'loin', 'Napa'], ['daikon', 'pork', 'loin', 'Napa', 'cabbage'], ['daikon', 'pork', 'loin', 'Napa', 'cabbage', 'Thai'], ['daikon', 'pork', 'loin', 'Napa', 'cabbage', 'Thai', 'chiles'], ['daikon', 'pork', 'loin', 'Napa', 'cabbage', 'Thai', 'chiles', 'Blue'], ['daikon', 'pork', 'loin', 'Napa', 'cabbage', 'Thai', 'chiles', 'Blue', 'Point'], ['daikon', 'pork', 'loin', 'Napa', 'cabbage', 'Thai', 'chiles', 'Blue', 'Point', 'oysters'], ['ground', 'beef']]


### Tokenizing our words

In [177]:
tokenizer = Tokenizer(num_words = 2000, oov_token = "<OOV>")    # Generate a tokenizer
tokenizer.fit_on_texts(text)                                    # and fit it on our text
sequences = tokenizer.texts_to_sequences(text)                  # Turn all of our text into texts
word_index = tokenizer.word_index                               
word_count = len(word_index) - 1

### Padding each element so that they are all the same length

In [178]:
max_len = max([len(sequence) for sequence in sequences])
sequences = np.array(pad_sequences(sequences, maxlen = max_len, padding = "pre"))
print(sequences)

[[   0    0    0 ...    0   17   19]
 [   0    0    0 ...   17   19   12]
 [   0    0    0 ...   19   12  136]
 ...
 [   0    0    0 ...   39  128 1313]
 [   0    0    0 ...  128 1313   69]
 [   0    0    0 ... 1313   69  353]]


### Creating our inputs and labels for our training data

In [179]:
xs = sequences[:,:-1]
labels = sequences[:,-1]

ys = tf.keras.utils.to_categorical(labels)

### Creating and compiling a model

In [180]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(word_count, 64, input_length = max_len - 1),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(20)),
    tf.keras.layers.Dense(word_count +2, activation = "softmax"),
])

model.compile(loss = "categorical_crossentropy", optimizer = "adam", metrics = ["accuracy"])

### Training our model

In [184]:
model.fit(xs, ys, epochs=300, verbose = 0)

<tensorflow.python.keras.callbacks.History at 0x256a872c490>

### Making Predictions with our model

In [185]:
seed_ingredients = "chicken"
num_words = 6

for _ in range(num_words):
    token_list = tokenizer.texts_to_sequences([seed_ingredients])[0]
    token_list = pad_sequences([token_list], maxlen = max_len - 1, padding = "pre")
    predicted = model.predict_classes(token_list, verbose = 2)
    output_word = ""
    for word, i in tokenizer.word_index.items():
        if i == predicted:
            output_word = word
            break
    seed_ingredients += f" {output_word}"

print(seed_ingredients)

1/1 - 0s
1/1 - 0s
1/1 - 0s
1/1 - 0s
1/1 - 0s
1/1 - 0s
chicken breast dandelion greens starfruit cherry liqueur
