## Simple Customer Sentiment Analysis (NLP)
<b>using Tensorflow and Keras' Bag of Words (BoW)</b>

In [1]:
from sklearn.preprocessing import OneHotEncoder
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [2]:
sentences = [
    'Great service, the shipment is fast and the quality is good, thanks.',
    'There is a damage to the goods, i have to return the shipment',
    'The goods is exactly as ordered, fast respond seller, recommended',
    'Excellent, thank you',
    'Packaging broken, unfriendly seller, not recommended'
]

labels = [1,
          0,
          1,
          1,
          0]

### Keras Bag of Words (BoW) Tokenizer and Word Sequences

In [3]:
tokenizer = Tokenizer(num_words=1000, oov_token="<UNK>")

In [4]:
tokenizer.fit_on_texts(sentences)

In [5]:
word_index = tokenizer.word_index
word_index

{'<UNK>': 1,
 'the': 2,
 'is': 3,
 'shipment': 4,
 'fast': 5,
 'to': 6,
 'goods': 7,
 'seller': 8,
 'recommended': 9,
 'great': 10,
 'service': 11,
 'and': 12,
 'quality': 13,
 'good': 14,
 'thanks': 15,
 'there': 16,
 'a': 17,
 'damage': 18,
 'i': 19,
 'have': 20,
 'return': 21,
 'exactly': 22,
 'as': 23,
 'ordered': 24,
 'respond': 25,
 'excellent': 26,
 'thank': 27,
 'you': 28,
 'packaging': 29,
 'broken': 30,
 'unfriendly': 31,
 'not': 32}

In [6]:
sequences = tokenizer.texts_to_sequences(sentences)
sequences

[[10, 11, 2, 4, 3, 5, 12, 2, 13, 3, 14, 15],
 [16, 3, 17, 18, 6, 2, 7, 19, 20, 6, 21, 2, 4],
 [2, 7, 3, 22, 23, 24, 5, 25, 8, 9],
 [26, 27, 28],
 [29, 30, 31, 8, 32, 9]]

In [7]:
padded = pad_sequences(sequences, padding='post', truncating='post')
padded

array([[10, 11,  2,  4,  3,  5, 12,  2, 13,  3, 14, 15,  0],
       [16,  3, 17, 18,  6,  2,  7, 19, 20,  6, 21,  2,  4],
       [ 2,  7,  3, 22, 23, 24,  5, 25,  8,  9,  0,  0,  0],
       [26, 27, 28,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [29, 30, 31,  8, 32,  9,  0,  0,  0,  0,  0,  0,  0]], dtype=int32)

In [8]:
pad_length = len(padded[0])
pad_length

13

In [9]:
labels = np.array(labels)
labels

array([1, 0, 1, 1, 0])

In [10]:
x = padded

### One-hot Encoding

In [11]:
enc = OneHotEncoder(sparse=False)
y = enc.fit_transform(labels.reshape(-1, 1))

In [12]:
y

array([[0., 1.],
       [1., 0.],
       [0., 1.],
       [0., 1.],
       [1., 0.]])

### Tensorflow Dense layers (Deep Neural Network layers)

In [13]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(2, activation='softmax')
])

In [14]:
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [15]:
model.fit(x, y, epochs=120)

Epoch 1/120
Epoch 2/120
Epoch 3/120
Epoch 4/120
Epoch 5/120
Epoch 6/120
Epoch 7/120
Epoch 8/120
Epoch 9/120
Epoch 10/120
Epoch 11/120
Epoch 12/120
Epoch 13/120
Epoch 14/120
Epoch 15/120
Epoch 16/120
Epoch 17/120
Epoch 18/120
Epoch 19/120
Epoch 20/120
Epoch 21/120
Epoch 22/120
Epoch 23/120
Epoch 24/120
Epoch 25/120
Epoch 26/120
Epoch 27/120
Epoch 28/120
Epoch 29/120
Epoch 30/120
Epoch 31/120
Epoch 32/120
Epoch 33/120
Epoch 34/120
Epoch 35/120
Epoch 36/120
Epoch 37/120
Epoch 38/120
Epoch 39/120
Epoch 40/120
Epoch 41/120
Epoch 42/120
Epoch 43/120
Epoch 44/120
Epoch 45/120
Epoch 46/120
Epoch 47/120
Epoch 48/120
Epoch 49/120
Epoch 50/120
Epoch 51/120
Epoch 52/120
Epoch 53/120
Epoch 54/120
Epoch 55/120
Epoch 56/120
Epoch 57/120
Epoch 58/120
Epoch 59/120
Epoch 60/120
Epoch 61/120
Epoch 62/120
Epoch 63/120
Epoch 64/120
Epoch 65/120
Epoch 66/120
Epoch 67/120
Epoch 68/120
Epoch 69/120
Epoch 70/120
Epoch 71/120
Epoch 72/120
Epoch 73/120
Epoch 74/120
Epoch 75/120
Epoch 76/120
Epoch 77/120
Epoch 78

<keras.callbacks.History at 0xffff4822eca0>

### Test data

In [16]:
test_data = [
    'Bad quality, i must return the goods, not recommended',
    'Thanks, good quality product'
]

In [17]:
test_seq = tokenizer.texts_to_sequences(test_data)
test_seq

[[1, 13, 19, 1, 21, 2, 7, 32, 9], [15, 14, 13, 1]]

In [18]:
test_pad = pad_sequences(test_seq, padding='post', truncating='post', maxlen=pad_length)
test_pad

array([[ 1, 13, 19,  1, 21,  2,  7, 32,  9,  0,  0,  0,  0],
       [15, 14, 13,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0]], dtype=int32)

In [19]:
result = model.predict(test_pad)
result

array([[0.8678486 , 0.13215142],
       [0.02217277, 0.97782725]], dtype=float32)

In [20]:
np.round(result)

array([[1., 0.],
       [0., 1.]], dtype=float32)

Label:<br>
[1, 0] = Negative<br>
[0, 1] = Positive<br>