# Word Embedding

##Short Food Reviews Analysis
### Features: `Reviews`
### Target: `Sentiment`

In [213]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from keras import preprocessing
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Embedding



## Custom Generated Reviews with Sentiments

In [194]:
reviews = ['nice food',
        'amazing restaurant',
        'too good',
        'just loved it!',
        'will go again',
        'horrible food',
        'never go there',
        'poor service',
        'poor quality',
        'needs improvement',
        'excellent experience',
        'delicious dishes',
        'perfect ambiance',
        'highly recommended',
        'great place',
        'awful food quality',
        'avoid this place',
        'terrible service',
        'disappointing experience',
        'requires improvement']

sentiment = np.array([1,1,1,1,1,0,0,0,0,0,1, 1, 1, 1, 1, 0, 0, 0, 0, 0])

### Converting a given text into one-hot encoding

on below, 30 specifies the size of the vocabulary, which is the total number of unique words in your corpus.

In [195]:
one_hot("highly recommended",30)

[3, 3]

In [196]:
vocab_size = 30
encoded_reviews = [one_hot(d, vocab_size) for d in reviews]
print(encoded_reviews)

[[20, 1], [18, 13], [19, 10], [29, 17, 20], [8, 5, 27], [16, 1], [8, 5, 5], [9, 24], [9, 8], [16, 12], [19, 1], [23, 26], [1, 28], [3, 3], [16, 8], [23, 1, 8], [8, 3, 8], [26, 24], [3, 1], [8, 12]]


### Adding padding to make all sentences same sized!

In [198]:
max_length = 3
padded_reviews = pad_sequences(encoded_reviews, maxlen=max_length, padding='post')
print(padded_reviews)

[[20  1  0]
 [18 13  0]
 [19 10  0]
 [29 17 20]
 [ 8  5 27]
 [16  1  0]
 [ 8  5  5]
 [ 9 24  0]
 [ 9  8  0]
 [16 12  0]
 [19  1  0]
 [23 26  0]
 [ 1 28  0]
 [ 3  3  0]
 [16  8  0]
 [23  1  8]
 [ 8  3  8]
 [26 24  0]
 [ 3  1  0]
 [ 8 12  0]]


In [199]:
embeded_vector_size = 5

model = Sequential()
model.add(Embedding(vocab_size, embeded_vector_size, input_length=max_length,name="embedding"))
model.add(Flatten())
model.add(Dense(1, activation='sigmoid'))

In [200]:
X = padded_reviews
y = sentiment

In [201]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
print(model.summary())

Model: "sequential_12"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 3, 5)              150       
                                                                 
 flatten_12 (Flatten)        (None, 15)                0         
                                                                 
 dense_12 (Dense)            (None, 1)                 16        
                                                                 
Total params: 166
Trainable params: 166
Non-trainable params: 0
_________________________________________________________________
None


In [202]:
model.fit(X, y, epochs=50, verbose=0)

<keras.callbacks.History at 0x7d4a4c146830>

## Measuring Loss & Accuracy

In [203]:
loss, accuracy = model.evaluate(X, y)
accuracy



0.949999988079071

## Finding all the weights of each word!

In [205]:
weights = model.get_layer('embedding').get_weights()[0]
len(weights)

30

In [209]:
weights[26] #represents `terrible`

array([-0.00196256, -0.10150034, -0.06435358, -0.07027328,  0.02802897],
      dtype=float32)

In [210]:
weights[23] #represents `awful`

array([-0.00018347,  0.01041231, -0.00303777,  0.00879696, -0.01719472],
      dtype=float32)

Look, both words are similar in meaning and the array value also indicates that

#**Accuracy**: 94.999 or ~95%