In [109]:
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np

In [4]:
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.imdb.load_data(num_words=500, maxlen=200)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


In [5]:
word2indx = tf.keras.datasets.imdb.get_word_index()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json


In [6]:
word2indx

{'fawn': 34701,
 'tsukino': 52006,
 'nunnery': 52007,
 'sonja': 16816,
 'vani': 63951,
 'woods': 1408,
 'spiders': 16115,
 'hanging': 2345,
 'woody': 2289,
 'trawling': 52008,
 "hold's": 52009,
 'comically': 11307,
 'localized': 40830,
 'disobeying': 30568,
 "'royale": 52010,
 "harpo's": 40831,
 'canet': 52011,
 'aileen': 19313,
 'acurately': 52012,
 "diplomat's": 52013,
 'rickman': 25242,
 'arranged': 6746,
 'rumbustious': 52014,
 'familiarness': 52015,
 "spider'": 52016,
 'hahahah': 68804,
 "wood'": 52017,
 'transvestism': 40833,
 "hangin'": 34702,
 'bringing': 2338,
 'seamier': 40834,
 'wooded': 34703,
 'bravora': 52018,
 'grueling': 16817,
 'wooden': 1636,
 'wednesday': 16818,
 "'prix": 52019,
 'altagracia': 34704,
 'circuitry': 52020,
 'crotch': 11585,
 'busybody': 57766,
 "tart'n'tangy": 52021,
 'burgade': 14129,
 'thrace': 52023,
 "tom's": 11038,
 'snuggles': 52025,
 'francesco': 29114,
 'complainers': 52027,
 'templarios': 52125,
 '272': 40835,
 '273': 52028,
 'zaniacs': 52130,

In [25]:
idx2word = {i:charset for i, charset in enumerate(word2indx)}

In [26]:
idx2word

{0: 'fawn',
 1: 'tsukino',
 2: 'nunnery',
 3: 'sonja',
 4: 'vani',
 5: 'woods',
 6: 'spiders',
 7: 'hanging',
 8: 'woody',
 9: 'trawling',
 10: "hold's",
 11: 'comically',
 12: 'localized',
 13: 'disobeying',
 14: "'royale",
 15: "harpo's",
 16: 'canet',
 17: 'aileen',
 18: 'acurately',
 19: "diplomat's",
 20: 'rickman',
 21: 'arranged',
 22: 'rumbustious',
 23: 'familiarness',
 24: "spider'",
 25: 'hahahah',
 26: "wood'",
 27: 'transvestism',
 28: "hangin'",
 29: 'bringing',
 30: 'seamier',
 31: 'wooded',
 32: 'bravora',
 33: 'grueling',
 34: 'wooden',
 35: 'wednesday',
 36: "'prix",
 37: 'altagracia',
 38: 'circuitry',
 39: 'crotch',
 40: 'busybody',
 41: "tart'n'tangy",
 42: 'burgade',
 43: 'thrace',
 44: "tom's",
 45: 'snuggles',
 46: 'francesco',
 47: 'complainers',
 48: 'templarios',
 49: '272',
 50: '273',
 51: 'zaniacs',
 52: '275',
 53: 'consenting',
 54: 'snuggled',
 55: 'inanimate',
 56: 'uality',
 57: 'bronte',
 58: 'errors',
 59: 'dialogs',
 60: "yomada's",
 61: "madman's"

In [27]:
idx2word

{0: 'fawn',
 1: 'tsukino',
 2: 'nunnery',
 3: 'sonja',
 4: 'vani',
 5: 'woods',
 6: 'spiders',
 7: 'hanging',
 8: 'woody',
 9: 'trawling',
 10: "hold's",
 11: 'comically',
 12: 'localized',
 13: 'disobeying',
 14: "'royale",
 15: "harpo's",
 16: 'canet',
 17: 'aileen',
 18: 'acurately',
 19: "diplomat's",
 20: 'rickman',
 21: 'arranged',
 22: 'rumbustious',
 23: 'familiarness',
 24: "spider'",
 25: 'hahahah',
 26: "wood'",
 27: 'transvestism',
 28: "hangin'",
 29: 'bringing',
 30: 'seamier',
 31: 'wooded',
 32: 'bravora',
 33: 'grueling',
 34: 'wooden',
 35: 'wednesday',
 36: "'prix",
 37: 'altagracia',
 38: 'circuitry',
 39: 'crotch',
 40: 'busybody',
 41: "tart'n'tangy",
 42: 'burgade',
 43: 'thrace',
 44: "tom's",
 45: 'snuggles',
 46: 'francesco',
 47: 'complainers',
 48: 'templarios',
 49: '272',
 50: '273',
 51: 'zaniacs',
 52: '275',
 53: 'consenting',
 54: 'snuggled',
 55: 'inanimate',
 56: 'uality',
 57: 'bronte',
 58: 'errors',
 59: 'dialogs',
 60: "yomada's",
 61: "madman's"

In [36]:
def translate(indexes, idx2word):
    arr = []
    for i in indexes:
        arr.append(idx2word[i])
    return ' '.join(arr)

In [37]:
X_train[0]

[1,
 194,
 2,
 194,
 2,
 78,
 228,
 5,
 6,
 2,
 2,
 2,
 134,
 26,
 4,
 2,
 8,
 118,
 2,
 14,
 394,
 20,
 13,
 119,
 2,
 189,
 102,
 5,
 207,
 110,
 2,
 21,
 14,
 69,
 188,
 8,
 30,
 23,
 7,
 4,
 249,
 126,
 93,
 4,
 114,
 9,
 2,
 2,
 5,
 2,
 4,
 116,
 9,
 35,
 2,
 4,
 229,
 9,
 340,
 2,
 4,
 118,
 9,
 4,
 130,
 2,
 19,
 4,
 2,
 5,
 89,
 29,
 2,
 46,
 37,
 4,
 455,
 9,
 45,
 43,
 38,
 2,
 2,
 398,
 4,
 2,
 26,
 2,
 5,
 163,
 11,
 2,
 2,
 4,
 2,
 9,
 194,
 2,
 7,
 2,
 2,
 349,
 2,
 148,
 2,
 2,
 2,
 15,
 123,
 125,
 68,
 2,
 2,
 15,
 349,
 165,
 2,
 98,
 5,
 4,
 228,
 9,
 43,
 2,
 2,
 15,
 299,
 120,
 5,
 120,
 174,
 11,
 220,
 175,
 136,
 50,
 9,
 2,
 228,
 2,
 5,
 2,
 2,
 245,
 2,
 5,
 4,
 2,
 131,
 152,
 491,
 18,
 2,
 32,
 2,
 2,
 14,
 9,
 6,
 371,
 78,
 22,
 2,
 64,
 2,
 9,
 8,
 168,
 145,
 23,
 4,
 2,
 15,
 16,
 4,
 2,
 5,
 28,
 6,
 52,
 154,
 462,
 33,
 89,
 78,
 285,
 16,
 145,
 95]

In [38]:
translate(X_train[0], idx2word=idx2word)

"tsukino fig nunnery fig nunnery babaganoosh golem woods spiders nunnery nunnery nunnery expressively wood' vani nunnery woody yahoo nunnery 'royale heirloom rickman disobeying meteorologist nunnery screaming insecurity woods hurting sermons nunnery arranged 'royale 'reloaded' lifeline woody seamier familiarness hanging vani sickeningly intake eggar's vani populations trawling nunnery nunnery woods nunnery vani mesmerize trawling wednesday nunnery vani indiscretion trawling o'shaughnessy nunnery vani yahoo trawling vani 'porky's' nunnery diplomat's vani nunnery woods cobblers bringing nunnery francesco altagracia vani fro trawling snuggles thrace circuitry nunnery nunnery pinoy vani nunnery wood' nunnery woods sentencing comically nunnery nunnery vani nunnery trawling fig nunnery hanging nunnery nunnery norliss nunnery travel nunnery nunnery nunnery harpo's ster' hj 'pleasure nunnery nunnery harpo's norliss assimilates nunnery stern woods vani golem trawling thrace nunnery nunnery harp

In [108]:
X_train

array([list([1, 194, 2, 194, 2, 78, 228, 5, 6, 2, 2, 2, 134, 26, 4, 2, 8, 118, 2, 14, 394, 20, 13, 119, 2, 189, 102, 5, 207, 110, 2, 21, 14, 69, 188, 8, 30, 23, 7, 4, 249, 126, 93, 4, 114, 9, 2, 2, 5, 2, 4, 116, 9, 35, 2, 4, 229, 9, 340, 2, 4, 118, 9, 4, 130, 2, 19, 4, 2, 5, 89, 29, 2, 46, 37, 4, 455, 9, 45, 43, 38, 2, 2, 398, 4, 2, 26, 2, 5, 163, 11, 2, 2, 4, 2, 9, 194, 2, 7, 2, 2, 349, 2, 148, 2, 2, 2, 15, 123, 125, 68, 2, 2, 15, 349, 165, 2, 98, 5, 4, 228, 9, 43, 2, 2, 15, 299, 120, 5, 120, 174, 11, 220, 175, 136, 50, 9, 2, 228, 2, 5, 2, 2, 245, 2, 5, 4, 2, 131, 152, 491, 18, 2, 32, 2, 2, 14, 9, 6, 371, 78, 22, 2, 64, 2, 9, 8, 168, 145, 23, 4, 2, 15, 16, 4, 2, 5, 28, 6, 52, 154, 462, 33, 89, 78, 285, 16, 145, 95]),
       list([1, 14, 47, 8, 30, 31, 7, 4, 249, 108, 7, 4, 2, 54, 61, 369, 13, 71, 149, 14, 22, 112, 4, 2, 311, 12, 16, 2, 33, 75, 43, 2, 296, 4, 86, 320, 35, 2, 19, 263, 2, 2, 4, 2, 33, 89, 78, 12, 66, 16, 4, 360, 7, 4, 58, 316, 334, 11, 4, 2, 43, 2, 2, 8, 257, 85, 2, 42, 

In [39]:
size_seq = 200
seq_pad_train = tf.keras.preprocessing.sequence.pad_sequences(X_train, maxlen=size_seq)
seq_pad_test = tf.keras.preprocessing.sequence.pad_sequences(X_test, maxlen=size_seq)

In [40]:
seq_pad_train

array([[  0,   0,   0, ...,  16, 145,  95],
       [  0,   0,   0, ...,   7, 129, 113],
       [  0,   0,   0, ...,   7,  61, 113],
       ...,
       [  0,   0,   0, ...,   4,   2,   2],
       [  0,   0,   0, ...,  12,   9,  23],
       [  0,   0,   0, ..., 204, 131,   9]])

In [41]:
y_train

array([0, 0, 0, ..., 0, 1, 0], dtype=int64)

In [48]:
model = tf.keras.Sequential()
model.add(tf.keras.layers.Embedding(input_dim=500, output_dim=32, input_length=size_seq))
model.add(tf.keras.layers.LSTM(units=10, input_shape=(32,)))
model.add(tf.keras.layers.Dense(1, activation='sigmoid'))

In [49]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=["acc"])

In [54]:
model.fit(x=seq_pad_train, y=y_train, epochs=30)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x1f428b022c0>

In [80]:
predict = model.predict(seq_pad_test)



In [81]:
predict[10]

array([0.04984523], dtype=float32)

In [82]:
y_test[10]

0

In [139]:
test_phrase = "I love you Renata"
# Filtrar caracteres em branco (espaços) e converter em uma lista
test_phrase_sequence = [word for word in test_phrase if word != ' ']

# Criar um conjunto de caracteres únicos
unique_chars = sorted(set(test_phrase_sequence))

# Criar um dicionário de índices para cada caractere único
char_index = {char: index for index, char in enumerate(unique_chars)}

# Converter a sequência de caracteres para uma sequência de índices
test_phrase_sequence = np.array([char_index[char] for char in test_phrase_sequence])

# Pad the sequence
test_phrase_sequence_padding = tf.keras.preprocessing.sequence.pad_sequences([test_phrase_sequence])

In [140]:
predict = model.predict(test_phrase_sequence_padding)



In [141]:
predict

array([[0.6456288]], dtype=float32)