In [1]:
import tensorflow as tf
from tensorflow.keras import layers, Sequential
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing import sequence

In [2]:
#parametry modelu i treningu
max_features = 10000
max_len = 500
batch_size = 32
embedding_dims = 50
units = 32
epochs = 5

In [3]:
(x_train,y_train),(x_test,y_test) = imdb.load_data(num_words=max_features)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


In [4]:
#dostosowanie recenzji do stałej długości
x_train = sequence.pad_sequences(x_train,maxlen=max_len)
x_test = sequence.pad_sequences(x_test,maxlen=max_len)

In [5]:
model = Sequential([
    layers.Embedding(max_features,embedding_dims,input_length=max_len),
    layers.LSTM(units),
    layers.Dense(1,activation='sigmoid')
])

In [6]:
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [7]:
model.fit(x_train,y_train,batch_size=batch_size,epochs=epochs,validation_data=(x_test,y_test))

Epoch 1/5
Epoch 2/5
Epoch 3/5
  1/782 [..............................] - ETA: 3:06 - loss: 0.0646 - accuracy: 1.0000  2/782 [..............................] - ETA: 3:01 - loss: 0.0984 - accuracy: 0.9688  3/782 [..............................] - ETA: 3:00 - loss: 0.1737 - accuracy: 0.9479  4/782 [..............................] - ETA: 2:59 - loss: 0.2465 - accuracy: 0.9219  5/782 [..............................] - ETA: 2:58 - loss: 0.2612 - accuracy: 0.9250  6/782 [..............................] - ETA: 2:58 - loss: 0.2391 - accuracy: 0.9323

In [None]:
loss,accuracy = model.evaluate(x_test,y_test,batch_size=batch_size)
print(f"test accuracy: {accuracy}")

In [None]:
import numpy as np
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [None]:
recenzje = [
    "The movie was great!",
    "The film was okay.",
    "The movie was terrible...",
    "Very bad movie!",
    "Big shit!",
    "Disaster",
    "Beautiful disaster",
    "Głupi film",
    "The movie is wonderful, director is a genius",
    "The film i s terrible, zero talent, class E cinema!",
    "The film is good but far from perfection",
    "The film is very medicore.",
    "The film is very medicore. The film is good but far from perfection. The director is a klutz."
]

In [None]:
#przetwarzanie recenzji do formatu zgodnego z modelem
word_index = imdb.get_word_index()
sequences = [[word_index[word] for word in review.split() if word in word_index] for review in recenzje]
sequences = pad_sequences(sequences,maxlen=max_len)

predictions = model.predict(sequences)

In [None]:
#klasyfikacja sentymentu
sentiment_label = ['Negative','Positive']
for i,prediction in enumerate(predictions):
    sentiment = prediction
    print(f'recenzja: {recenzje[i]} -> ocena: {sentiment}\n')