<a href="https://colab.research.google.com/github/chakra-ai/MachineLearning/blob/master/Understand%20Embedding%20Layer%20Basics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## **Simple Embedding Layer**

In [4]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding
import numpy as np

model = Sequential()
embedding_layer = Embedding(input_dim = 10, output_dim = 4, input_length = 2)
model.add(embedding_layer)
model.compile('adam', 'mse')

In [5]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 2, 4)              40        
Total params: 40
Trainable params: 40
Non-trainable params: 0
_________________________________________________________________


In [6]:
input_data = np.array([[1, 2]])

pred = model.predict(input_data)

print(input_data.shape)
print(pred)

(1, 2)
[[[ 0.00907419  0.02009683 -0.00283406  0.04077572]
  [ 0.01126075  0.02615981 -0.02553423  0.03626943]]]


In [7]:
embedding_layer.get_weights()

[array([[-0.01761629,  0.03047058, -0.02592208,  0.01876097],
        [ 0.00907419,  0.02009683, -0.00283406,  0.04077572],
        [ 0.01126075,  0.02615981, -0.02553423,  0.03626943],
        [ 0.01837964,  0.00644028, -0.02231342, -0.01756768],
        [-0.0239487 ,  0.02368267,  0.045244  ,  0.01817617],
        [-0.02997978,  0.01632948,  0.01051276, -0.03198048],
        [-0.0395137 ,  0.04707033,  0.03362216,  0.04021219],
        [ 0.03073083,  0.04744485,  0.02653474, -0.00671577],
        [-0.04955727, -0.00577788,  0.01709953, -0.02614887],
        [-0.01150929, -0.02221642, -0.00182124,  0.01525897]],
       dtype=float32)]

# **Transferring an Embedding**

In [8]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding
import numpy as np

embedding_lookup = np.array([
                             [1, 0, 0],
                             [0, 1, 0],
                             [0, 0, 1]
])

In [9]:
model = Sequential()
embedding_layer = Embedding(input_dim=3, output_dim=3, input_length=2)
model.add(embedding_layer)
model.compile('adam', 'mse')

In [10]:
embedding_layer.set_weights([embedding_lookup])

In [11]:
input_data = np.array([[1, 2]])

pred = model.predict(input_data)

print(input_data.shape)
print(pred)

(1, 2)
[[[0. 1. 0.]
  [0. 0. 1.]]]


# **Training an Embedding**

In [12]:
from numpy import array
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Dense, Flatten

In [13]:
# Define 10 resturant reviews.
reviews = [
    'Never coming back!',
    'Horrible service',
    'Rude waitress',
    'Cold food.',
    'Horrible food!',
    'Awesome',
    'Awesome service!',
    'Rocks!',
    'poor work',
    'Couldn\'t have done better']

# Define labels (1=negative, 0=positive)
labels = array([1,1,1,1,1,0,0,0,0,0])

In [15]:
VOCAB_SIZE = 50
encoded_reviews = [one_hot(d, VOCAB_SIZE) for d in reviews]
print(f"Encoded reviews: {encoded_reviews}")

Encoded reviews: [[35, 47, 11], [36, 30], [4, 34], [5, 36], [36, 36], [17], [17, 30], [21], [28, 18], [19, 32, 22, 34]]


In [16]:
MAX_LENGTH = 4
padded_reviews = pad_sequences(encoded_reviews, maxlen=MAX_LENGTH, padding='post')
print(padded_reviews)

[[35 47 11  0]
 [36 30  0  0]
 [ 4 34  0  0]
 [ 5 36  0  0]
 [36 36  0  0]
 [17  0  0  0]
 [17 30  0  0]
 [21  0  0  0]
 [28 18  0  0]
 [19 32 22 34]]


In [17]:
model = Sequential()
embedding_layer = Embedding(input_dim=VOCAB_SIZE, output_dim=8, input_length=MAX_LENGTH)
model.add(embedding_layer)
model.add(Flatten())
model.add(Dense(1, activation='sigmoid'))
model.compile('adam', 'binary_crossentropy', metrics=['accuracy'])

print(model.summary())

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_3 (Embedding)      (None, 4, 8)              400       
_________________________________________________________________
flatten (Flatten)            (None, 32)                0         
_________________________________________________________________
dense (Dense)                (None, 1)                 33        
Total params: 433
Trainable params: 433
Non-trainable params: 0
_________________________________________________________________
None


In [18]:
model.fit(padded_reviews, labels, epochs=100, verbose=0)

<tensorflow.python.keras.callbacks.History at 0x7f242c2f77f0>

In [19]:
print(embedding_layer.get_weights()[0].shape)
print(embedding_layer.get_weights())

(50, 8)
[array([[ 0.08055642, -0.08969885,  0.12991388,  0.12073484, -0.07991857,
         0.12555344,  0.12746507,  0.12557457],
       [ 0.01173251, -0.03532294, -0.0044133 , -0.01035376,  0.01908964,
        -0.04579556,  0.03454443, -0.03335215],
       [-0.04887019,  0.03893859,  0.00921243, -0.03362089,  0.02507696,
         0.01427547,  0.03867687,  0.01796378],
       [ 0.04673282,  0.03841129,  0.0160907 ,  0.02111221, -0.01635114,
        -0.04915524, -0.02580346, -0.03258663],
       [ 0.07879266, -0.06277201,  0.14431323, -0.12032352,  0.10022847,
         0.08992141, -0.1454556 ,  0.0620799 ],
       [ 0.05611022, -0.06344204,  0.13829409, -0.1447166 ,  0.11079322,
         0.11575363, -0.14008811,  0.14476247],
       [-0.00415616, -0.00128738,  0.01456714,  0.00552426, -0.02739756,
        -0.02256349, -0.00090381,  0.00555589],
       [-0.03506817, -0.02954885, -0.0222927 ,  0.03952599, -0.00521783,
         0.02906838,  0.01793586,  0.04220862],
       [-0.00355884, -0

In [20]:
loss, accuracy = model.evaluate(padded_reviews, labels, verbose=0)
print(f'Accuracy : {accuracy}')

Accuracy : 1.0
