# Chopped NLP
Data courtesy of Jeffrey Braun (https://www.kaggle.com/jeffreybraun/chopped-10-years-of-episode-data)

### Import necessary modules

In [131]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

### Read CSV of Chopped Episode data

In [132]:
chopped_data = pd.read_csv("chopped.csv")

In [159]:
chopped_data.head()

Unnamed: 0,season,season_episode,series_episode,episode_name,episode_notes,air_date,judge1,judge2,judge3,appetizer,entree,dessert,contestant1,contestant1_info,contestant2,contestant2_info,contestant3,contestant3_info,contestant4,contestant4_info
0,1,1,1,"""Octopus, Duck, Animal Crackers""",This is the first episode with only three offi...,13-Jan-09,Marc Murphy,Alex Guarnaschelli,Aarón Sánchez,"baby octopus, bok choy, oyster sauce, smoked ...","duck breast, green onions, ginger, honey","prunes, animal crackers, cream cheese",Summer Kriegshauser,Private Chef and Nutrition Coach New York NY,Perry Pollaci,Private Chef and Sous chef Bar Blanc New Yo...,Katie Rosenhouse,Pastry Chef Olana Restaurant New York NY,Sandy Davis,Catering Chef Showstoppers Catering at Union...
1,1,2,2,"""Tofu, Blueberries, Oysters""",This is the first of a few episodes with five ...,20-Jan-09,Aarón Sánchez,Alex Guarnaschelli,Marc Murphy,"firm tofu, tomato paste, prosciutto","daikon, pork loin, Napa cabbage, Thai chiles,...","phyllo dough, gorgonzola cheese, pineapple ri...",Raymond Jackson,Private Caterer and Culinary Instructor West...,Klaus Kronsteiner,Chef de cuisine Liberty National Golf Course...,Christopher Jackson,Executive Chef and Owner Ted and Honey Broo...,Pippa Calland,Owner and Chef Chef for Hire LLC Newville PA
2,1,3,3,"""Avocado, Tahini, Bran Flakes""",,27-Jan-09,Aarón Sánchez,Alex Guarnaschelli,Marc Murphy,"lump crab meat, dried shiitake mushrooms, pin...","ground beef, cannellini beans, tahini paste, ...","brioche, cantaloupe, pecans, avocados",Margaritte Malfy,Executive Chef and Co-owner La Palapa New Y...,Rachelle Rodwell,Chef de cuisine SoHo Grand Hotel New York NY,Chris Burke,Private Chef New York NY,Andre Marrero,Chef tournant L’Atelier de Joël Robuchon Ne...
3,1,4,4,"""Banana, Collard Greens, Grits""","In the appetizer round, Chef Chuboda refused t...",3-Feb-09,Scott Conant,Amanda Freitag,Geoffrey Zakarian,"ground beef, wonton wrappers, cream of mushro...","scallops, collard greens, anchovies, sour cream","maple syrup, black plums, almond butter, waln...",Sean Chudoba,Executive Chef Ayza Wine Bar New York NY,Kyle Shadix,Chef Registered Dietician and Culinary Consu...,Luis Gonzales,Executive Chef Knickerbocker Bar & Grill Ne...,Einat Admony,Chef and Owner Taïm New York NY
4,1,5,5,"""Yucca, Watermelon, Tortillas""",,10-Feb-09,Geoffrey Zakarian,Alex Guarnaschelli,Marc Murphy,"watermelon, canned sardines, pepper jack chee...","beef shoulder, yucca, raisins, ancho chiles, ...","flour tortillas, prosecco, Canadian bacon, ro...",John Keller,Personal Chef New York NY,Andrea Bergquist,Executive Chef New York NY,Ed Witt,Executive Chef / Wine Director Bloomingdale ...,Josh Emett,Chef de cuisine Gordon Ramsay at The London ...


### Creating list of every appetizer ingredient

In [163]:
appetizers = chopped_data["appetizer"].to_list()
print(appetizers[:5])

[' baby octopus, bok choy, oyster sauce, smoked paprika ', ' firm tofu, tomato paste, prosciutto ', ' lump crab meat, dried shiitake mushrooms, pink grapefruit, bran cereal ', ' ground beef, wonton wrappers, cream of mushroom soup, bananas ', ' watermelon, canned sardines, pepper jack cheese, zucchini ']


### Creating a modified sequence of data
One which decomposes a sentence so that a sentence like ["My name is Jordan"]  turns into ["My name", "My name is", "My name is Jordan"]

In [166]:
text = []

for selection in appetizers:                     # For each basket selection in appetizers
    ingredients_comma =  selection.split(",")         # Convert each basket selection into a list of strings
    ingredients = []
    for ingredient in ingredients_comma:
        for element in ingredient.strip().split(" "):
            ingredients.append(element)
    for i in range(1, len(ingredients)):              # For as long as the list of strings is
        n_gram = ingredients[:i + 1]                  # create a fragment of the sentence
        text.append(n_gram)                           # and append it to text

print(text[:15])

[['baby', 'octopus'], ['baby', 'octopus', 'bok'], ['baby', 'octopus', 'bok', 'choy'], ['baby', 'octopus', 'bok', 'choy', 'oyster'], ['baby', 'octopus', 'bok', 'choy', 'oyster', 'sauce'], ['baby', 'octopus', 'bok', 'choy', 'oyster', 'sauce', 'smoked'], ['baby', 'octopus', 'bok', 'choy', 'oyster', 'sauce', 'smoked', 'paprika'], ['firm', 'tofu'], ['firm', 'tofu', 'tomato'], ['firm', 'tofu', 'tomato', 'paste'], ['firm', 'tofu', 'tomato', 'paste', 'prosciutto'], ['lump', 'crab'], ['lump', 'crab', 'meat'], ['lump', 'crab', 'meat', 'dried'], ['lump', 'crab', 'meat', 'dried', 'shiitake']]


### Tokenizing our words

In [170]:
tokenizer = Tokenizer(num_words = 2000, oov_token = "<OOV>")    # Generate a tokenizer
tokenizer.fit_on_texts(text)                                    # and fit it on our text
sequences = tokenizer.texts_to_sequences(text)                  # Turn all of our text into texts
word_index = tokenizer.word_index                               
word_count = len(word_index) - 1

### Padding each element so that they are all the same length

In [160]:
max_len = max([len(sequence) for sequence in sequences])
sequences = np.array(pad_sequences(sequences, maxlen = max_len, padding = "pre"))
print(sequences)

[[   0    0    0 ...    0    3  171]
 [   0    0    0 ...    3  171  237]
 [   0    0    0 ...  171  237  197]
 ...
 [   0    0    0 ...  411  206 1319]
 [   0    0    0 ...  206 1319  667]
 [   0    0    0 ... 1319  667  617]]


### Creating our inputs and labels for our training data

In [137]:
xs = sequences[:,:-1]
labels = sequences[:,-1]

ys = tf.keras.utils.to_categorical(labels)

### Creating and compiling a model

In [142]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(word_count, 64, input_length = max_len - 1),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(20)),
    tf.keras.layers.Dense(word_count +2, activation = "softmax"),
])

model.compile(loss = "categorical_crossentropy", optimizer = "adam", metrics = ["accuracy"])

### Training our model

In [143]:
model.fit(xs, ys, epochs=500, verbose = 2)

Epoch 1/500
129/129 - 2s - loss: 7.1426 - accuracy: 0.0051
Epoch 2/500
129/129 - 2s - loss: 6.7631 - accuracy: 0.0088
Epoch 3/500
129/129 - 2s - loss: 6.6902 - accuracy: 0.0098
Epoch 4/500
129/129 - 1s - loss: 6.6089 - accuracy: 0.0085
Epoch 5/500
129/129 - 1s - loss: 6.5264 - accuracy: 0.0095
Epoch 6/500
129/129 - 2s - loss: 6.4523 - accuracy: 0.0107
Epoch 7/500
129/129 - 2s - loss: 6.3775 - accuracy: 0.0127
Epoch 8/500
129/129 - 2s - loss: 6.2964 - accuracy: 0.0156
Epoch 9/500
129/129 - 2s - loss: 6.2104 - accuracy: 0.0168
Epoch 10/500
129/129 - 2s - loss: 6.1198 - accuracy: 0.0220
Epoch 11/500
129/129 - 2s - loss: 6.0340 - accuracy: 0.0246
Epoch 12/500
129/129 - 2s - loss: 5.9503 - accuracy: 0.0276
Epoch 13/500
129/129 - 2s - loss: 5.8525 - accuracy: 0.0285
Epoch 14/500
129/129 - 2s - loss: 5.7718 - accuracy: 0.0361
Epoch 15/500
129/129 - 2s - loss: 5.6846 - accuracy: 0.0393
Epoch 16/500
129/129 - 2s - loss: 5.6003 - accuracy: 0.0439
Epoch 17/500
129/129 - 2s - loss: 5.5338 - accura

129/129 - 1s - loss: 1.1151 - accuracy: 0.7839
Epoch 138/500
129/129 - 1s - loss: 1.0773 - accuracy: 0.7907
Epoch 139/500
129/129 - 1s - loss: 1.0679 - accuracy: 0.7968
Epoch 140/500
129/129 - 1s - loss: 1.0871 - accuracy: 0.7893
Epoch 141/500
129/129 - 1s - loss: 1.0647 - accuracy: 0.7951
Epoch 142/500
129/129 - 1s - loss: 1.0280 - accuracy: 0.8037
Epoch 143/500
129/129 - 1s - loss: 1.0063 - accuracy: 0.8051
Epoch 144/500
129/129 - 1s - loss: 0.9911 - accuracy: 0.8083
Epoch 145/500
129/129 - 1s - loss: 0.9794 - accuracy: 0.8139
Epoch 146/500
129/129 - 1s - loss: 0.9733 - accuracy: 0.8173
Epoch 147/500
129/129 - 1s - loss: 0.9616 - accuracy: 0.8124
Epoch 148/500
129/129 - 1s - loss: 0.9567 - accuracy: 0.8102
Epoch 149/500
129/129 - 1s - loss: 0.9917 - accuracy: 0.8041
Epoch 150/500
129/129 - 1s - loss: 1.0390 - accuracy: 0.7885
Epoch 151/500
129/129 - 1s - loss: 0.9696 - accuracy: 0.8132
Epoch 152/500
129/129 - 1s - loss: 0.9404 - accuracy: 0.8144
Epoch 153/500
129/129 - 1s - loss: 0.9

Epoch 272/500
129/129 - 1s - loss: 0.3163 - accuracy: 0.9234
Epoch 273/500
129/129 - 1s - loss: 0.3144 - accuracy: 0.9227
Epoch 274/500
129/129 - 1s - loss: 0.3253 - accuracy: 0.9207
Epoch 275/500
129/129 - 1s - loss: 0.3020 - accuracy: 0.9241
Epoch 276/500
129/129 - 1s - loss: 0.2912 - accuracy: 0.9261
Epoch 277/500
129/129 - 1s - loss: 0.2900 - accuracy: 0.9268
Epoch 278/500
129/129 - 1s - loss: 0.2824 - accuracy: 0.9280
Epoch 279/500
129/129 - 1s - loss: 0.2797 - accuracy: 0.9263
Epoch 280/500
129/129 - 1s - loss: 0.2753 - accuracy: 0.9293
Epoch 281/500
129/129 - 1s - loss: 0.2726 - accuracy: 0.9261
Epoch 282/500
129/129 - 1s - loss: 0.2763 - accuracy: 0.9285
Epoch 283/500
129/129 - 1s - loss: 0.2755 - accuracy: 0.9288
Epoch 284/500
129/129 - 1s - loss: 0.2875 - accuracy: 0.9263
Epoch 285/500
129/129 - 1s - loss: 0.2826 - accuracy: 0.9273
Epoch 286/500
129/129 - 1s - loss: 0.2740 - accuracy: 0.9266
Epoch 287/500
129/129 - 1s - loss: 0.2692 - accuracy: 0.9298
Epoch 288/500
129/129 - 

Epoch 407/500
129/129 - 1s - loss: 0.1569 - accuracy: 0.9346
Epoch 408/500
129/129 - 1s - loss: 0.1555 - accuracy: 0.9339
Epoch 409/500
129/129 - 1s - loss: 0.1538 - accuracy: 0.9361
Epoch 410/500
129/129 - 1s - loss: 0.1533 - accuracy: 0.9356
Epoch 411/500
129/129 - 1s - loss: 0.1531 - accuracy: 0.9351
Epoch 412/500
129/129 - 1s - loss: 0.1540 - accuracy: 0.9337
Epoch 413/500
129/129 - 1s - loss: 0.1517 - accuracy: 0.9373
Epoch 414/500
129/129 - 1s - loss: 0.1522 - accuracy: 0.9363
Epoch 415/500
129/129 - 1s - loss: 0.1511 - accuracy: 0.9337
Epoch 416/500
129/129 - 1s - loss: 0.1503 - accuracy: 0.9344
Epoch 417/500
129/129 - 1s - loss: 0.1505 - accuracy: 0.9346
Epoch 418/500
129/129 - 1s - loss: 0.1519 - accuracy: 0.9346
Epoch 419/500
129/129 - 1s - loss: 0.1504 - accuracy: 0.9349
Epoch 420/500
129/129 - 1s - loss: 0.1485 - accuracy: 0.9337
Epoch 421/500
129/129 - 1s - loss: 0.1485 - accuracy: 0.9359
Epoch 422/500
129/129 - 1s - loss: 0.1523 - accuracy: 0.9339
Epoch 423/500
129/129 - 

<tensorflow.python.keras.callbacks.History at 0x25697047cd0>

### Making Predictions with our model

In [173]:
seed_ingredients = "tortillas"
num_words = 6

for _ in range(num_words):
    token_list = tokenizer.texts_to_sequences([seed_ingredients])[0]
    token_list = pad_sequences([token_list], maxlen = max_len - 1, padding = "pre")
    predicted = model.predict_classes(token_list, verbose = 2)
    output_word = ""
    for word, i in tokenizer.word_index.items():
        if i == predicted:
            output_word = word
            break
    seed_ingredients += f" {output_word}"

print(seed_ingredients)

1/1 - 0s
1/1 - 0s
1/1 - 0s
1/1 - 0s
1/1 - 0s
1/1 - 0s
tortillas liver lemon apples purple asparagus korean
