## How to Use Word Embedding Layers for Deep Learning with Keras
https://machinelearningmastery.com/use-word-embedding-layers-deep-learning-keras/

### Imports

In [23]:
from numpy import array
import keras
from keras.layers.embeddings import Embedding
from keras.preprocessing.text import one_hot
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Flatten, Dense

### Data

#### Input

In [9]:
# define documents
docs = ['Well done!',
        'Good work',
        'Great effort',
        'nice work',
        'Excellent!',
        'Weak',
        'Poor effort!',
        'not good',
        'poor work',
        'Could have done better.']
# define class labels
labels = array([1,1,1,1,1,0,0,0,0,0])

#### Preprocessing

##### encoding

In [16]:
vocab_size = 50
encoded_docs = [one_hot(d, vocab_size) for d in docs]
print(encoded_docs)

[[17, 9], [34, 36], [38, 20], [1, 36], [40], [5], [10, 20], [15, 34], [10, 36], [19, 10, 9, 49]]


##### padding

In [19]:
max_length = 4
padded_docs = pad_sequences(encoded_docs, maxlen=max_length, padding='post')
print(padded_docs)

[[17  9  0  0]
 [34 36  0  0]
 [38 20  0  0]
 [ 1 36  0  0]
 [40  0  0  0]
 [ 5  0  0  0]
 [10 20  0  0]
 [15 34  0  0]
 [10 36  0  0]
 [19 10  9 49]]


### Model

#### Architecture

In [47]:
model = Sequential()
model.add(Embedding(vocab_size, 8, input_length=max_length))
model.add(Flatten())
model.add(Dense(1, activation='sigmoid'))
#print(model.summary())
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
print(model.summary())

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_5 (Embedding)      (None, 4, 8)              400       
_________________________________________________________________
flatten_4 (Flatten)          (None, 32)                0         
_________________________________________________________________
dense_4 (Dense)              (None, 1)                 33        
Total params: 433
Trainable params: 433
Non-trainable params: 0
_________________________________________________________________
None


#### Fit and Evaluate

In [31]:
model.fit(padded_docs, labels, epochs=50, verbose=0)
loss, accuracy = model.evaluate(padded_docs, labels, verbose=0)
print('Accuracy: %f' % (accuracy*100))

Accuracy: 100.000000


#### Understand the embeddings learnt

In [40]:
model.layers[0].weights[0].get_shape()

TensorShape([50, 8])

##### notice similarities between 'Good' and 'nice' and their difference on all but one (second last) dimensions with 'poor'. Also, 'Good' and 'poor' close enough on the second last dimension, possibly indicating they being used before 'work'.

In [45]:
print(model.layers[0].weights[0][34]) #Good
print(model.layers[0].weights[0][1]) #nice
print(model.layers[0].weights[0][10]) #poor

tf.Tensor(
[-0.14568387  0.05705408  0.13110237  0.06979936 -0.09565859  0.07789612
 -0.1058446  -0.13426901], shape=(8,), dtype=float32)
tf.Tensor(
[-0.12367412  0.09053475  0.09312475  0.08979663 -0.13149685  0.05054885
  0.11056579 -0.1092499 ], shape=(8,), dtype=float32)
tf.Tensor(
[ 0.10279294 -0.07896227 -0.14648521 -0.0813786   0.10840301 -0.06913472
 -0.08796571  0.15809298], shape=(8,), dtype=float32)
