In [5]:
! pip install tensorflow keras gdown

!gdown 1kMVEUkLblQMEuKVNASkMVL3hsbpbBl3L
!unzip movie-reviews-dataset.zip


Downloading...
From: https://drive.google.com/uc?id=1kMVEUkLblQMEuKVNASkMVL3hsbpbBl3L
To: /content/movie-reviews-dataset.zip
100% 63.0M/63.0M [00:00<00:00, 195MB/s]
Archive:  movie-reviews-dataset.zip
replace movie-reviews-dataset/.DS_Store? [y]es, [n]o, [A]ll, [N]one, [r]ename: 

In [9]:
from tensorflow.keras.preprocessing import text_dataset_from_directory
from tensorflow.strings import regex_replace
from tensorflow.keras.layers.experimental.preprocessing import TextVectorization
from tensorflow.keras.models import Sequential
from tensorflow.keras import Input
from tensorflow.keras.layers import Dense, LSTM, Embedding, Dropout

def prepareData(dir):
  data = text_dataset_from_directory(dir)
  return data.map(
    lambda text, label: (regex_replace(text, '', ' '), label),
  )

# Assumes you're in the root level of the dataset directory.
# If you aren't, you'll need to change the relative paths here.

train_data = prepareData('./movie-reviews-dataset/train')
test_data = prepareData('./movie-reviews-dataset/test')

for text_batch, label_batch in train_data.take(1):
  print(text_batch.numpy()[0])
  print(label_batch.numpy()[0]) # 0 = negative, 1 = positive


model = Sequential()

# ----- 1. INPUT
# We need this to use the TextVectorization layer next.
model.add(Input(shape=(1,), dtype="string"))

# ----- 2. TEXT VECTORIZATION
# This layer processes the input string and turns it into a sequence of
# max_len integers, each of which maps to a certain token.
max_tokens = 1000
max_len = 100
vectorize_layer = TextVectorization(
  # Max vocab size. Any words outside of the max_tokens most common ones
  # will be treated the same way: as "out of vocabulary" (OOV) tokens.
  max_tokens=max_tokens,
  # Output integer indices, one per string token
  output_mode="int",
  # Always pad or truncate to exactly this many tokens
  output_sequence_length=max_len,
)

# Call adapt(), which fits the TextVectorization layer to our text dataset.
# This is when the max_tokens most common words (i.e. the vocabulary) are selected.
train_texts = train_data.map(lambda text, label: text)
vectorize_layer.adapt(train_texts)

model.add(vectorize_layer)

# ----- 3. EMBEDDING
# This layer turns each integer (representing a token) from the previous layer
# an embedding. Note that we're using max_tokens + 1 here, since there's an
# out-of-vocabulary (OOV) token that gets added to the vocab.
model.add(Embedding(max_tokens + 1, 128))

# ----- 4. RECURRENT LAYER
model.add(LSTM(64))

# ----- 5. DENSE HIDDEN LAYER
model.add(Dense(64, activation="relu"))

# ----- 6. OUTPUT
model.add(Dense(1, activation="sigmoid"))


# Compile and train the model.
model.compile(loss="binary_crossentropy", optimizer="adagrad", metrics=["accuracy"])# zmiana optymalizatora
model.fit(train_data, epochs=10)

model.save_weights('rnn')

model.load_weights('rnn')

# Try the model on our test dataset.
model.evaluate(test_data)

# Should print a very high score like 0.98.
print(model.predict([
  "i loved it! highly recommend it to anyone and everyone looking for a great movie to watch.",
]))

# Should print a very low score like 0.01.
print(model.predict([
  "this was awful! i hated it so much, nobody should watch this. the acting was terrible, the music was terrible, overall it was just bad.",
]))

Found 25000 files belonging to 2 classes.
Found 25000 files belonging to 2 classes.
b' I \' m   o n l y   r a t i n g   t h i s   f i l m   a s   a   3   o u t   o f   p i t y   b e c a u s e   i t   a t t e m p t s   t o   b e   w o r t h w h i l e .   I   l o v e   t o   p r a i s e   a   g r e a t   m o v i e   a n d   I \' m   n o t   b i a s e d   t o w a r d   " m a l e "   m o v i e s .   L e g a l l y   b l o n d e   w a s   a n   e x c e l l e n t   f i l m .   G e o r g i a   R u l e   o n   t h e   o t h e r   h a n d ,   w a s   a   d i s o r g a n i z e d ,   w e a k ,   p o o r l y   w r i t t e n ,   u n r e a l i s t i c   e x a m p l e   o f   m o v i e   m a k i n g   a t   i t s   w o r s t .   b y   t h e   e n d   o f   t h e   f i l m   I   d i d n \' t   c a r e   w h o   w a s   l y i n g   o r   i f   a n y t h i n g   w a s   r e s o l v e d .   < b r   / > < b r   / > T h e   m o s t   i m p o r t a n t   t h i n g   i n   a   f i l m   i s   a   g o o d   S 

In [10]:
print("RECENZJE NEGATYWNE")
print("Poz 1.  Wartosc: ",model.predict([
 "That was a horror in real life, Ive never been in such disgusting place like that, crew was very unpleasant, my foot never stand in that place anymore",
]))

print("Poz 2.  Wartosc: ",model.predict([
 "Long story short, that was terrible vacation.",
 ]))

print("Poz 3.  Wartosc: ",model.predict([
 "I think reality TV shows are mindless and contribute to the dumbing down of society.",
 ]))
print("Poz 4.  Wartosc: ",model.predict([
 "I was very disappointed with the tour I took with this company. The guide was unenthusiastic and uninformative, and the sights were underwhelming.",
 ]))
print("Poz 5.  Wartosc: ",model.predict([
 "This coffee shop was a huge letdown. The coffee was burnt and tasted terrible, and the atmosphere was cold and unwelcoming.",
 ]))
print("Poz 6.  Wartosc: ",model.predict([
 "I had a terrible experience at this theme park. The rides were poorly maintained, the food was terrible, and the staff was unhelpful and uninterested in providing a good experience.",
 ]))
print("Poz 7.  Wartosc: ",model.predict([
 "The product I purchased from this company was of very poor quality. It broke within a few days, and the customer service was unhelpful in resolving the issue.",
 ]))
print("Poz 8.  Wartosc: ",model.predict([
 "This museum was a huge disappointment. The exhibits were poorly curated and uninformative, and the staff was uninterested in answering questions or providing any additional information.",
 ]))
print("Poz 9.  Wartosc: ",model.predict([
 "My stay at this resort was a nightmare. The amenities were outdated and in poor condition, the views were disappointing, and the staff was unresponsive to my complaints.",
 ]))
print("Poz 10. Wartosc: ",model.predict([
 "The concert I attended at this venue was a disaster. The sound quality was terrible, and the staff was unhelpful and unresponsive to complaints from attendees.",
 ]))

RECENZJE NEGATYWNE
Poz 1.  Wartosc:  [[0.49371177]]
Poz 2.  Wartosc:  [[0.4937118]]
Poz 3.  Wartosc:  [[0.4937118]]
Poz 4.  Wartosc:  [[0.4937118]]
Poz 5.  Wartosc:  [[0.4937118]]
Poz 6.  Wartosc:  [[0.49371177]]
Poz 7.  Wartosc:  [[0.49371177]]
Poz 8.  Wartosc:  [[0.4937118]]
Poz 9.  Wartosc:  [[0.49371177]]
Poz 10. Wartosc:  [[0.4937118]]


In [11]:
print("RECENZJE POZYTYWNE")
print("Poz 1.  Wartosc: ",model.predict([
  "I had an amazing experience at this restaurant! The food was delicious, the service was impeccable, and the atmosphere was wonderful. I highly recommend it to anyone looking for a great dining experience.",
]))
print("Poz 2.  Wartosc: ",model.predict([
  "This hotel exceeded my expectations in every way. The staff was friendly and accommodating, the rooms were clean and comfortable, and the location was perfect for exploring the city.",
]))
print("Poz 3.  Wartosc: ",model.predict([
  "The customer service at this store was exceptional. The staff went out of their way to help me find what I was looking for, and they were friendly and knowledgeable throughout the entire process.",
]))
print("Poz 4.  Wartosc: ",model.predict([
  "I recently took a tour with this company, and I was blown away by the quality of the experience. The guide was knowledgeable and entertaining, and the sights were breathtaking.",
]))
print("Poz 5.  Wartosc: ",model.predict([
  "I've been a regular customer at this coffee shop for years, and I can honestly say that it's the best in town. The coffee is always delicious, and the atmosphere is cozy and welcoming.",
]))
print("Poz 6.  Wartosc: ",model.predict([
  "I had a fantastic time at this theme park. The rides were thrilling, the food was tasty, and the staff was friendly and helpful.",
]))
print("Poz 7.  Wartosc: ",model.predict([
  "I recently purchased a product from this company, and I couldn't be happier with my purchase. The product is high-quality and exactly what I was looking for.",
]))
print("Poz 8.  Wartosc: ",model.predict([
  "This museum is a must-visit for anyone interested in history. The exhibits are informative and engaging, and the staff is knowledgeable and helpful.",
]))
print("Poz 9.  Wartosc: ",model.predict([
  "I had an amazing stay at this resort. The amenities were top-notch, the views were breathtaking, and the staff was friendly and accommodating.",
]))
print("Poz 10. Wartosc: ",model.predict([
  "The concert I attended at this venue was incredible. The sound quality was excellent, and the staff did a great job of ensuring everyone had a great experience.",
]))

RECENZJE POZYTYWNE
Poz 1.  Wartosc:  [[0.49371177]]
Poz 2.  Wartosc:  [[0.49371177]]
Poz 3.  Wartosc:  [[0.49371177]]
Poz 4.  Wartosc:  [[0.49371177]]
Poz 5.  Wartosc:  [[0.49371177]]
Poz 6.  Wartosc:  [[0.4937118]]
Poz 7.  Wartosc:  [[0.4937118]]
Poz 8.  Wartosc:  [[0.4937118]]
Poz 9.  Wartosc:  [[0.4937118]]
Poz 10. Wartosc:  [[0.4937118]]
