In [55]:
import json
import pandas as pd
import numpy as np
import spacy
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Bidirectional, Dense, LSTM

In [56]:
with open("../data/foods_data.json") as f:
    content = json.load(f)

examples = []
for item in content:
    for i in item["inputs"]:
        examples.append([i.lower(), item["class"].lower()])

examples

[['i would like margherita', 'margherita'],
 ['i would like one margherita', 'margherita'],
 ['i would like two margheritas', 'margherita'],
 ['margherita, please', 'margherita'],
 ['one margherita, please', 'margherita'],
 ['two margheritas, please', 'margherita'],
 ['margherita', 'margherita'],
 ['one margherita', 'margherita'],
 ['two margheritas', 'margherita'],
 ['please deliver me a margherita', 'margherita'],
 ['please deliver me two margheritas', 'margherita'],
 ['i want margherita to be delivered to me', 'margherita'],
 ['i want to order margherita', 'margherita'],
 ['i would like formaggio', 'formaggio'],
 ['i would like one formaggio', 'formaggio'],
 ['i would like two formaggios', 'formaggio'],
 ['formaggio, please', 'formaggio'],
 ['one formaggio, please', 'formaggio'],
 ['two formaggios, please', 'formaggio'],
 ['formaggio', 'formaggio'],
 ['one formaggio', 'formaggio'],
 ['two formaggios', 'formaggio'],
 ['please deliver me a formaggio', 'formaggio'],
 ['please deliver m

In [57]:
df = pd.DataFrame(examples, columns=["inputs", "class"])
df

Unnamed: 0,inputs,class
0,i would like margherita,margherita
1,i would like one margherita,margherita
2,i would like two margheritas,margherita
3,"margherita, please",margherita
4,"one margherita, please",margherita
...,...,...
112,two garlic breads,garlic bread
113,please deliver me a garlic bread,garlic bread
114,please deliver me two garlic breads,garlic bread
115,i want garlic bread to be delivered to me,garlic bread


In [58]:
nlp = spacy.load("en_core_web_md")

In [59]:
def get_placeholder_vector(d):
    return np.ones(d)

In [60]:
def spacy_parser(nlp, text):
    doc = nlp(text)
    vectors = []
    for token in doc:
        if token.has_vector:
            vector = token.vector
        else:
            vector = get_placeholder_vector(300)

        vectors.append(vector)
    return vectors

In [61]:
embedings = []

for _, row in df.iterrows():
    e = spacy_parser(nlp, row["inputs"])
    embedings.append(e)

embedings

[[array([ 1.8733e-01,  4.0595e-01, -5.1174e-01, -5.5482e-01,  3.9716e-02,
          1.2887e-01,  4.5137e-01, -5.9149e-01,  1.5591e-01,  1.5137e+00,
         -8.7020e-01,  5.0672e-02,  1.5211e-01, -1.9183e-01,  1.1181e-01,
          1.2131e-01, -2.7212e-01,  1.6203e+00, -2.4884e-01,  1.4060e-01,
          3.3099e-01, -1.8061e-02,  1.5244e-01, -2.6943e-01, -2.7833e-01,
         -5.2123e-02, -4.8149e-01, -5.1839e-01,  8.6262e-02,  3.0818e-02,
         -2.1253e-01, -1.1378e-01, -2.2384e-01,  1.8262e-01, -3.4541e-01,
          8.2611e-02,  1.0024e-01, -7.9550e-02, -8.1721e-01,  6.5621e-03,
          8.0134e-02, -3.9976e-01, -6.3131e-02,  3.2260e-01, -3.1625e-02,
          4.3056e-01, -2.7270e-01, -7.6020e-02,  1.0293e-01, -8.8653e-02,
         -2.9087e-01, -4.7214e-02,  4.6036e-02, -1.7788e-02,  6.4990e-02,
          8.8451e-02, -3.1574e-01, -5.8522e-01,  2.2295e-01, -5.2785e-02,
         -5.5981e-01, -3.9580e-01, -7.9849e-02, -1.0933e-02, -4.1722e-02,
         -5.5576e-01,  8.8707e-02,  1.

In [62]:
max_length = 20
embeding_size = 300

In [63]:
padded_embedings = []

for embeding in embedings:
    tmp = []
    embeding_len = len(embeding)
    if embeding_len < max_length:
        for _ in range(max_length - embeding_len):
            tmp.append([0] * embeding_size)
        padded_embedings.append(np.concatenate((embeding, tmp), axis=0))
    else:
        padded_embedings.append(embeding)

In [64]:
X = np.array(padded_embedings)

In [65]:
unique_classes = []

for cls in df["class"]:
    if cls not in unique_classes:
        unique_classes.append(cls)

unique_classes

['margherita',
 'formaggio',
 'meat town',
 'lasagna',
 'ravioli',
 'spaghetti classica',
 'chicken soup',
 'bruschetta',
 'garlic bread']

In [66]:
mapping_classes = {unique_classes[x]: x for x in range(len(unique_classes))}
mapping_classes

{'margherita': 0,
 'formaggio': 1,
 'meat town': 2,
 'lasagna': 3,
 'ravioli': 4,
 'spaghetti classica': 5,
 'chicken soup': 6,
 'bruschetta': 7,
 'garlic bread': 8}

In [67]:
int_classes = [mapping_classes[x] for x in df["class"]]
int_classes

[0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 5,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 6,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8,
 8]

In [68]:
y = to_categorical(int_classes)
y

array([[1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 1.]], dtype=float32)

In [69]:
X.shape

(117, 20, 300)

In [70]:
y.shape

(117, 9)

In [71]:
BATCH_SIZE = 8

In [73]:
model = Sequential()
model.add(Bidirectional(LSTM(64)))
model.add(Dense(64, activation="relu"))
model.add(Dense(9, activation="softmax"))

In [74]:
model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

In [76]:
history = model.fit(X, y, epochs=100, validation_split=0.2)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78