In [2]:
import numpy as np
from keras.datasets import imdb
from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Embedding, LSTM, Dense, Dropout
from sklearn.metrics import accuracy_score


In [3]:
# Definindo o número máximo de palavras a serem usadas (mais frequentes)
max_features = 20000

# Carregando o dataset IMDb
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=max_features)


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step


In [4]:
# Definindo o comprimento máximo das resenhas
max_len = 200

# Padronizando as sequências para que todas tenham o mesmo tamanho
X_train = sequence.pad_sequences(X_train, maxlen=max_len)
X_test = sequence.pad_sequences(X_test, maxlen=max_len)


In [5]:
# Definindo o modelo
model = Sequential()

# Camada de embedding para transformar os índices em vetores densos
model.add(Embedding(max_features, 128, input_length=max_len))

# Camada LSTM com 128 unidades
model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))

# Camada de saída totalmente conectada com ativação sigmoide para classificação binária
model.add(Dense(1, activation='sigmoid'))

# Compilando o modelo com otimizador Adam e função de perda binária
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])




In [6]:
# Treinando o modelo
batch_size = 64
epochs = 5

history = model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(X_test, y_test))


Epoch 1/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 161ms/step - accuracy: 0.7143 - loss: 0.5389 - val_accuracy: 0.8294 - val_loss: 0.3949
Epoch 2/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m67s[0m 171ms/step - accuracy: 0.8642 - loss: 0.3336 - val_accuracy: 0.7944 - val_loss: 0.4650
Epoch 3/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 168ms/step - accuracy: 0.8975 - loss: 0.2615 - val_accuracy: 0.8433 - val_loss: 0.3673
Epoch 4/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 169ms/step - accuracy: 0.9221 - loss: 0.2058 - val_accuracy: 0.8342 - val_loss: 0.4070
Epoch 5/5
[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m69s[0m 177ms/step - accuracy: 0.9481 - loss: 0.1456 - val_accuracy: 0.8488 - val_loss: 0.4176


In [7]:
# Avaliando o modelo
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Acurácia no conjunto de teste: {test_accuracy:.4f}")


[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 41ms/step - accuracy: 0.8470 - loss: 0.4194
Acurácia no conjunto de teste: 0.8488


In [8]:
# Previsão no conjunto de teste
y_pred = (model.predict(X_test) > 0.5).astype("int32")

# Acurácia
accuracy = accuracy_score(y_test, y_pred)
print(f"Acurácia calculada: {accuracy:.4f}")


[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 38ms/step
Acurácia calculada: 0.8488
