In [35]:
import numpy as np
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Embedding

In [36]:
## customer reviews

reviews = ['nice food',
          'amazing restaurant',
          'too good',
          'will go again',
          'horrible food',
          'never go there',
          'poor service',
          'poor quality',
          'needs improvement']

## reviews labels (class labels)
sentiment = np.array([1,1,1,1,0,0,0,0,0,])

In [37]:
### using one_hot encoding in keras to assign random unique
### number to each word, within a specified range (vocabulary size)

one_hot('amazing restaurant', 30)

[9, 27]

In [38]:
### Now we want to encode all the reviews
vocab_size = 50
## the random numbers assigned should be in range 0-30

encoded_reviews = [one_hot(d, vocab_size) for d in reviews]  # a simple list comprehension
encoded_reviews

[[39, 23],
 [12, 44],
 [27, 25],
 [31, 47, 22],
 [48, 23],
 [2, 47, 31],
 [2, 41],
 [2, 36],
 [30, 39]]

In [39]:
### we need a maximum sentence size 
### for this we need padding (we need to pad the sentence)
### because some sentences are 3 words long, some are 2
### for the 2-word sentences, we need to pad and append 0

max_length = 3

## using a method called pad_sequences in tensorflow keras
## supply all the encoded reviews into pad_sequences
## padding = 'post' means, pad the reviews towards the end
padded_reviews = pad_sequences(encoded_reviews, maxlen=max_length, padding='post')
print(padded_reviews)

[[39 23  0]
 [12 44  0]
 [27 25  0]
 [31 47 22]
 [48 23  0]
 [ 2 47 31]
 [ 2 41  0]
 [ 2 36  0]
 [30 39  0]]


In [40]:
## Now we want our embedding vector size to be 4
embedded_vector_size = 4

## next we create our model
model = Sequential()

## the first layer will be the embedding layer
## using our Embedding class, which was imported
## this takes vocab_size, embedded_vector_size, input_length, and we give it a name so we can use it later
model.add(Embedding(vocab_size,embedded_vector_size, input_length=max_length, name='embedding'))

# next is to add your flatten layer 
model.add(Flatten())

# next is a dense layer with one neuron and an activation function
model.add(Dense(1, activation='sigmoid'))


In [41]:
X = padded_reviews # training samples
y = sentiment     # labels

In [42]:
### we end up using adam as an optimizer and binary_crossentropy because the output is either 1 or 0
### the review is either positive or negative
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [43]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 3, 4)              200       
                                                                 
 flatten_2 (Flatten)         (None, 12)                0         
                                                                 
 dense_2 (Dense)             (None, 1)                 13        
                                                                 
Total params: 213
Trainable params: 213
Non-trainable params: 0
_________________________________________________________________


In [44]:
## setting verbose = 0 will allow it to run without showing the details while running
model.fit(X,y,epochs=10, verbose=0)

<keras.callbacks.History at 0x241090b96c0>

In [45]:
## we can get the accuracy
loss, accuracy = model.evaluate(X,y)



In [46]:
### In this example we are interested in embedding not really the classification.
### We can get the coefficient (weights) for the embedding (layer). This was why we named the embedding layer. 

weights = model.get_layer('embedding').get_weights()[0]
weights

array([[-0.04412745, -0.02495219, -0.0116336 , -0.02701507],
       [ 0.0139653 , -0.04981779, -0.00911032,  0.01580619],
       [-0.04613188,  0.02010217,  0.01894413, -0.01934381],
       [-0.0298726 , -0.0473005 ,  0.01380773, -0.00454892],
       [ 0.04266732, -0.03342768, -0.03747449, -0.01624268],
       [-0.01013995, -0.04050047, -0.04836781, -0.00160445],
       [ 0.00233431, -0.04162521, -0.00708358,  0.02677996],
       [-0.01186272,  0.00588728,  0.04224113, -0.01820941],
       [ 0.03772887,  0.04073342,  0.01848713,  0.01465025],
       [-0.01980211, -0.00809649,  0.03347385, -0.02775606],
       [ 0.00342171, -0.00703942, -0.0247803 ,  0.03139445],
       [-0.00323264, -0.03301443, -0.02449607,  0.03898866],
       [-0.0395863 ,  0.05332593,  0.01765579,  0.05746831],
       [ 0.01400489, -0.02096782, -0.04555124, -0.02888478],
       [ 0.03090746,  0.02640349,  0.00206935,  0.04896965],
       [ 0.02403334, -0.04613544, -0.01931006,  0.02950878],
       [-0.02046987,  0.

In [47]:
len(weights) 
## this gives the vocab_size

50

In [48]:
# 'Nice' corresponds to 39
weights[39]

array([-0.01202961, -0.0385941 , -0.03658636, -0.00578469], dtype=float32)

In [49]:
# 'Amazing' corresponds to 12
weights[12]

array([-0.0395863 ,  0.05332593,  0.01765579,  0.05746831], dtype=float32)

In [None]:
# You would think these coefficients would be the same because the words are similar
# But our datatset was very small 
# If you run it on a huge dataset, maybe you would find these vectors to be similar
# and then you can compute the cosine similarity of these two vectors 
# this gives you an idea of how keras embedding layer works
# you can save this to a file and later on load the saved embedding. 