In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from keras.models import model_from_json
from tensorflow.keras.models import load_model

Using TensorFlow backend.


In [2]:
data = pd.read_json("train.json")
data.head()

Unnamed: 0,cuisine,id,ingredients
0,greek,10259,"[romaine lettuce, black olives, grape tomatoes..."
1,southern_us,25693,"[plain flour, ground pepper, salt, tomatoes, g..."
2,filipino,20130,"[eggs, pepper, salt, mayonaise, cooking oil, g..."
3,indian,22213,"[water, vegetable oil, wheat, salt]"
4,indian,13162,"[black pepper, shallots, cornflour, cayenne pe..."


In [3]:
# Create list of unique cuisine types
cuisine_list = data['cuisine']
cuisine_compilation = []
for cuisine in cuisine_list:
    cuisine_compilation.append(cuisine)
      
cuis_unique = list(set(cuisine_compilation))
cuis_unique

['vietnamese',
 'greek',
 'thai',
 'southern_us',
 'korean',
 'spanish',
 'british',
 'irish',
 'moroccan',
 'cajun_creole',
 'mexican',
 'filipino',
 'japanese',
 'brazilian',
 'french',
 'russian',
 'jamaican',
 'indian',
 'chinese',
 'italian']

In [4]:
# Onehotencoding for cuisine types
label_encoder_cuis = LabelEncoder()
label_encoder_cuis.fit(cuis_unique)
encoded_cuis = label_encoder_cuis.transform(data['cuisine'])
one_hot_cuis = to_categorical(encoded_cuis)

In [5]:
# Check first encoded item
one_hot_cuis[0]

array([0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0.], dtype=float32)

In [6]:
# Add one hot cuisine to dataframe
data["one_hot_cuisine"] = list(one_hot_cuis)

In [7]:
data.head()

Unnamed: 0,cuisine,id,ingredients,one_hot_cuisine
0,greek,10259,"[romaine lettuce, black olives, grape tomatoes...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ..."
1,southern_us,25693,"[plain flour, ground pepper, salt, tomatoes, g...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
2,filipino,20130,"[eggs, pepper, salt, mayonaise, cooking oil, g...","[0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ..."
3,indian,22213,"[water, vegetable oil, wheat, salt]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ..."
4,indian,13162,"[black pepper, shallots, cornflour, cayenne pe...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ..."


In [8]:
# Organize data frame
data = data[["cuisine", "one_hot_cuisine", "ingredients"]]
data.head()

Unnamed: 0,cuisine,one_hot_cuisine,ingredients
0,greek,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ...","[romaine lettuce, black olives, grape tomatoes..."
1,southern_us,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[plain flour, ground pepper, salt, tomatoes, g..."
2,filipino,"[0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ...","[eggs, pepper, salt, mayonaise, cooking oil, g..."
3,indian,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...","[water, vegetable oil, wheat, salt]"
4,indian,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...","[black pepper, shallots, cornflour, cayenne pe..."


In [9]:
# Create one hot encoding for ingredients that are in lists!
#     -Create dictionary
ingredients = data.loc[:,'ingredients']

i_map = {}
i_list = []
counter = 0
for lists in ingredients:
    for items in lists:
        if items not in i_map:
            i_list.append(items)
            i_map[items] = counter
            counter = counter + 1

In [10]:
dict(list(i_map.items())[0:5])

{'romaine lettuce': 0,
 'black olives': 1,
 'grape tomatoes': 2,
 'garlic': 3,
 'pepper': 4}

In [11]:
# Create one hot encoding for ingredients that are in lists!
#     -Create encoded data
ingredients_encodings = []
for lists in ingredients:
    encoding = [0]*len(i_map)
    for items in lists:
        encoding[i_map[items]] = 1
    ingredients_encodings.append(encoding)

In [12]:
ingredients_encodings[0]

[1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,


In [13]:
# Add encoded ingredients to data frame
data["one_hot_ingredients"] = ingredients_encodings

In [14]:
data.head()

Unnamed: 0,cuisine,one_hot_cuisine,ingredients,one_hot_ingredients
0,greek,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ...","[romaine lettuce, black olives, grape tomatoes...","[1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, ..."
1,southern_us,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[plain flour, ground pepper, salt, tomatoes, g...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, ..."
2,filipino,"[0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ...","[eggs, pepper, salt, mayonaise, cooking oil, g...","[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, ..."
3,indian,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...","[water, vegetable oil, wheat, salt]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, ..."
4,indian,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...","[black pepper, shallots, cornflour, cayenne pe...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, ..."


In [15]:
# Create variables for train test split
one_hot_cuisine = data['one_hot_cuisine']
one_hot_ingredients = data['one_hot_ingredients']

In [16]:
# Split data
ing_train, ing_test,cuis_train, cuis_test = train_test_split(one_hot_ingredients, one_hot_cuisine, random_state=1)

In [17]:
ing_train.head()

15470    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
24599    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, ...
4712     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, ...
8761     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
22503    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, ...
Name: one_hot_ingredients, dtype: object

In [18]:
# Convert object into numpy array 
cuis_train = np.array(cuis_train.tolist())
cuis_test = np.array(cuis_test.tolist())
ing_train = np.array(ing_train.tolist())
ing_test = np.array(ing_test.tolist())

In [19]:
ing_train

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

In [20]:
# Create model structure
deep_model = Sequential()
deep_model.add(Dense(units=20, activation='relu', input_dim=6714))
deep_model.add(Dense(units=15, activation='relu'))
deep_model.add(Dense(units=10, activation='relu'))
deep_model.add(Dense(units=20, activation='softmax'))

Instructions for updating:
Colocations handled automatically by placer.


In [21]:
deep_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 20)                134300    
_________________________________________________________________
dense_1 (Dense)              (None, 15)                315       
_________________________________________________________________
dense_2 (Dense)              (None, 10)                160       
_________________________________________________________________
dense_3 (Dense)              (None, 20)                220       
Total params: 134,995
Trainable params: 134,995
Non-trainable params: 0
_________________________________________________________________


In [22]:
# Compile and fit model
deep_model.compile(optimizer='adam',
                   loss='categorical_crossentropy',
                   metrics=['accuracy'])

deep_model.fit(
    ing_train,
    cuis_train,
    epochs=10,
    shuffle=True,
    verbose=2
)

Instructions for updating:
Use tf.cast instead.
Epoch 1/10
 - 3s - loss: 1.5803 - acc: 0.5407
Epoch 2/10
 - 4s - loss: 0.8938 - acc: 0.7419
Epoch 3/10
 - 4s - loss: 0.7208 - acc: 0.7913
Epoch 4/10
 - 3s - loss: 0.6179 - acc: 0.8226
Epoch 5/10
 - 4s - loss: 0.5455 - acc: 0.8420
Epoch 6/10
 - 3s - loss: 0.4885 - acc: 0.8573
Epoch 7/10
 - 3s - loss: 0.4411 - acc: 0.8717
Epoch 8/10
 - 4s - loss: 0.4038 - acc: 0.8812
Epoch 9/10
 - 3s - loss: 0.3695 - acc: 0.8905
Epoch 10/10
 - 4s - loss: 0.3408 - acc: 0.8988


<tensorflow.python.keras.callbacks.History at 0x29da7c39438>

In [23]:
# Test the model.  Get loss and accuracy.
deep_model_loss, deep_model_accuracy = deep_model.evaluate(
    ing_test, cuis_test, verbose=2)
print(
    f"Deep Neural Network - Loss: {deep_model_loss}, Accuracy: {deep_model_accuracy}")

 - 1s - loss: 1.1164 - acc: 0.7347
Deep Neural Network - Loss: 1.1164045951487265, Accuracy: 0.7347143888473511


In [24]:
# Save the model weights
deep_model.save("cuisine_deep_model_trained.h5")

In [25]:
# Save the model structure
with open('deep_model_architecture.json', 'w') as f:
    f.write(deep_model.to_json())