In [1]:
from numpy import array 
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten,Embedding,Dense

In [2]:
reviews = [
    'আমি আর আসছি না এখানে!',
    'একদম বাজে সার্ভিস',
    'কথা শোনে না ওয়েটার',
    'একদম ঠান্ডা খাবার',
    'বাজে খাবার!',
    'অসাধারণ',
    'অসাধারণ সার্ভিস!',
    'খুব ভালো!',
    'মোটামুটি',
    'এর থেকে ভালো হয়না']

In [3]:
type(reviews)

list

In [4]:
labels = array([1,1,1,1,1,0,0,0,0,0])

In [5]:
print(reviews[0])

আমি আর আসছি না এখানে!


In [6]:
labels[0]

1

In [7]:
VOCAB_SIZE = 50 # we just assume that size
encoded_reviews = [one_hot(d, VOCAB_SIZE) for d in reviews]
print(f"Encode reviews: {encoded_reviews}")


Encode reviews: [[31, 13, 37, 44, 45], [3, 48, 42], [34, 33, 44, 2], [3, 13, 2], [48, 2], [17], [17, 42], [10, 42], [31], [45, 2, 42, 24]]


In [8]:
MAX_LENGTH = 4 
padded_reviews = pad_sequences(encoded_reviews, maxlen=MAX_LENGTH, padding='post')

In [9]:
print(padded_reviews)

[[13 37 44 45]
 [ 3 48 42  0]
 [34 33 44  2]
 [ 3 13  2  0]
 [48  2  0  0]
 [17  0  0  0]
 [17 42  0  0]
 [10 42  0  0]
 [31  0  0  0]
 [45  2 42 24]]


In [12]:
model = Sequential()
embedding_layer = Embedding(VOCAB_SIZE, 8, input_length = MAX_LENGTH)
model.add(embedding_layer)
model.add(Flatten())
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])


In [13]:
print(model.summary())

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 4, 8)              400       
_________________________________________________________________
flatten (Flatten)            (None, 32)                0         
_________________________________________________________________
dense (Dense)                (None, 1)                 33        
Total params: 433
Trainable params: 433
Non-trainable params: 0
_________________________________________________________________
None


In [14]:
model.fit(padded_reviews, labels, epochs = 100, verbose = 0)

<tensorflow.python.keras.callbacks.History at 0x7fc0335c0b00>

In [20]:
print(embedding_layer.get_weights()[0].shape)


(50, 8)


In [25]:
print(embedding_layer.get_weights()[0])

[[ 0.1638433  -0.07291833  0.08543549 -0.11453667 -0.13297585 -0.05911572
  -0.03998092  0.0815617 ]
 [ 0.04008337 -0.00471894 -0.01502096  0.04812229  0.02867771  0.02212423
  -0.02320287  0.03574821]
 [-0.12936173  0.13574202 -0.07317565  0.12575494  0.08981211  0.07197513
   0.13341413 -0.05662116]
 [-0.12007947  0.0947543  -0.08118816  0.14328049  0.05552011 -0.11480528
   0.13393497  0.09594744]
 [ 0.03104192  0.0082926  -0.00487846  0.03819532  0.01177236  0.00496268
   0.04640045  0.02416046]
 [ 0.00301319 -0.02278047 -0.02698796 -0.04764272 -0.03087274  0.03257908
   0.01048158 -0.01137283]
 [ 0.03705336  0.00387283 -0.00156573  0.04752927 -0.04061283  0.02613558
   0.01090806  0.04138226]
 [ 0.04509659  0.04350119 -0.04842755  0.00765057 -0.04337609  0.04712768
  -0.03019538  0.04776401]
 [-0.02630807 -0.03360607 -0.01024401 -0.0054285  -0.01279521  0.03048204
   0.0493163   0.00890334]
 [-0.0474932   0.04910377  0.04276378  0.02265021  0.01451634 -0.03267755
  -0.01726349  0.

In [26]:
loss, accuracy = model.evaluate(padded_reviews, labels, verbose = 0)

In [27]:
print(f'accuracy: {accuracy}')

accuracy: 1.0


In [28]:
## Using one hot encoding :

In [29]:
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding,Dense,LSTM
import numpy as np

In [30]:
num_classes = 5
max_words = 20
sentences = ['আমি আর আসছি না এখানে!','কথা শোনে না ওয়েটার','একদম ঠান্ডা খাবার']

In [31]:
sentences

['আমি আর আসছি না এখানে!', 'কথা শোনে না ওয়েটার', 'একদম ঠান্ডা খাবার']

In [32]:
type(sentences)

list

In [33]:
labels = np.random.randint(0, num_classes, 3)
y = to_categorical(labels,num_classes=num_classes)

In [43]:
y

array([[0., 0., 0., 1., 0.],
       [1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0.]], dtype=float32)

In [44]:
labels

array([3, 0, 2])

In [45]:
words = set(w for sent in sentences for w in sent.split())


In [46]:
words

{'আমি',
 'আর',
 'আসছি',
 'একদম',
 'এখানে!',
 'ওয়েটার',
 'কথা',
 'খাবার',
 'ঠান্ডা',
 'না',
 'শোনে'}

In [47]:
word_map = {w : i+1 for (i, w) in enumerate(words)}

In [48]:
word_map

{'ঠান্ডা': 1,
 'ওয়েটার': 2,
 'আসছি': 3,
 'আমি': 4,
 'এখানে!': 5,
 'আর': 6,
 'শোনে': 7,
 'না': 8,
 'একদম': 9,
 'কথা': 10,
 'খাবার': 11}

In [49]:
sent_ints = [[word_map[w] for w in sent.split()] for sent in sentences]

In [50]:
sent_ints

[[4, 6, 3, 8, 5], [10, 7, 8, 2], [9, 1, 11]]

In [52]:
vocab_size = len(words)

In [53]:
vocab_size

11

In [57]:
X = np.array([to_categorical((pad_sequences((sent,), 
    max_words)).reshape(20,),vocab_size + 1) for sent in sent_ints])
print(X.shape)

(3, 20, 12)


In [58]:
print(X)

[[[1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
  [0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]
  [0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]]

 [[1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
  [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.

In [59]:
print(y)

[[0. 0. 0. 1. 0.]
 [1. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0.]]


In [62]:
model = Sequential()
model.add(Dense(512, input_shape=(max_words, vocab_size + 1)))
model.add(LSTM(128))
model.add(Dense(5, activation='softmax'))
model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])

In [63]:
model.fit(X,y)

Train on 3 samples


<tensorflow.python.keras.callbacks.History at 0x7fbf62f00d30>