In [362]:
import json
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Dense, Embedding, GlobalAveragePooling1D, SpatialDropout1D, Bidirectional
from tensorflow.keras import Model, Input
from tensorflow.keras.layers import LSTM, GRU, Embedding, Dense
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [363]:
with open("../data/foods_data.json") as f:
    content = json.load(f)

examples = []
for item in content:
    for i in item["inputs"]:
        examples.append([i.lower(), item["class"].lower()])

examples

[['i would like margherita', 'margherita'],
 ['i would like one margherita', 'margherita'],
 ['i would like two margheritas', 'margherita'],
 ['margherita, please', 'margherita'],
 ['one margherita, please', 'margherita'],
 ['two margheritas, please', 'margherita'],
 ['margherita', 'margherita'],
 ['one margherita', 'margherita'],
 ['two margheritas', 'margherita'],
 ['please deliver me a margherita', 'margherita'],
 ['please deliver me two margheritas', 'margherita'],
 ['i want margherita to be delivered to me', 'margherita'],
 ['i want to order margherita', 'margherita'],
 ['i would like formaggio', 'formaggio'],
 ['i would like one formaggio', 'formaggio'],
 ['i would like two formaggios', 'formaggio'],
 ['formaggio, please', 'formaggio'],
 ['one formaggio, please', 'formaggio'],
 ['two formaggios, please', 'formaggio'],
 ['formaggio', 'formaggio'],
 ['one formaggio', 'formaggio'],
 ['two formaggios', 'formaggio'],
 ['please deliver me a formaggio', 'formaggio'],
 ['please deliver m

In [364]:
df = pd.DataFrame(examples, columns=["inputs", "class"])
df

Unnamed: 0,inputs,class
0,i would like margherita,margherita
1,i would like one margherita,margherita
2,i would like two margheritas,margherita
3,"margherita, please",margherita
4,"one margherita, please",margherita
...,...,...
112,two garlic breads,garlic bread
113,please deliver me a garlic bread,garlic bread
114,please deliver me two garlic breads,garlic bread
115,i want garlic bread to be delivered to me,garlic bread


In [365]:
unique_classes = []

for cls in df["class"]:
    if cls not in unique_classes:
        unique_classes.append(cls)

unique_classes

['margherita',
 'formaggio',
 'meat town',
 'lasagna',
 'ravioli',
 'spaghetti classica',
 'chicken soup',
 'bruschetta',
 'garlic bread']

In [366]:
mapping_classes = {unique_classes[x]: x for x in range(len(unique_classes))}
mapping_classes

{'margherita': 0,
 'formaggio': 1,
 'meat town': 2,
 'lasagna': 3,
 'ravioli': 4,
 'spaghetti classica': 5,
 'chicken soup': 6,
 'bruschetta': 7,
 'garlic bread': 8}

In [367]:
int_classes = [mapping_classes[x] for x in df["class"]]
int_classes

[0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8]

In [368]:
y = to_categorical(int_classes)
y

array([[1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 1.]], dtype=float32)

In [369]:
y.shape

(117, 9)

In [370]:
X = []
for _, row in df.iterrows():
    X.append([row["inputs"]])

X = np.array(X)

In [371]:
vectorizer = CountVectorizer()
vectorizer.fit(df["inputs"])
len(vectorizer.vocabulary_)

34

In [372]:
max_sequence_length = 32
max_nb_words = 34
embedding_dim = 32
dropout = 0.5
r_dropout = 0.5

In [373]:
tokenizer = Tokenizer(num_words=max_nb_words)
tokenizer.fit_on_texts(df["inputs"].values)
word_index = tokenizer.word_index
len(word_index)

36

In [374]:
X = pad_sequences(tokenizer.texts_to_sequences(df["inputs"].values), maxlen=max_sequence_length, padding="post", truncating="post")
X.shape

(117, 32)

In [375]:
input_layer = Input(shape=max_sequence_length)

layer = Embedding(max_nb_words, embedding_dim, input_length=max_sequence_length)(input_layer)
layer = Bidirectional(LSTM(64, dropout=dropout, recurrent_dropout=r_dropout))(layer)
out = Dense(9, activation="softmax")(layer)

model = Model(inputs=input_layer, outputs=out)

model.summary()

Model: "model_15"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_22 (InputLayer)       [(None, 32)]              0         
                                                                 
 embedding_19 (Embedding)    (None, 32, 32)            1088      
                                                                 
 bidirectional_1 (Bidirectio  (None, 128)              49664     
 nal)                                                            
                                                                 
 dense_30 (Dense)            (None, 9)                 1161      
                                                                 
Total params: 51,913
Trainable params: 51,913
Non-trainable params: 0
_________________________________________________________________


In [376]:
model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=['accuracy'])

In [377]:
epochs = 100
batch_size = 16

history = model.fit(X, y, epochs=epochs, batch_size=batch_size, validation_split=0.2)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100


KeyboardInterrupt: 