In [5]:
! pip install tensorflow keras gdown

!gdown 1kMVEUkLblQMEuKVNASkMVL3hsbpbBl3L
!unzip movie-reviews-dataset.zip


Downloading...
From: https://drive.google.com/uc?id=1kMVEUkLblQMEuKVNASkMVL3hsbpbBl3L
To: /content/movie-reviews-dataset.zip
100% 63.0M/63.0M [00:00<00:00, 195MB/s]
Archive:  movie-reviews-dataset.zip
replace movie-reviews-dataset/.DS_Store? [y]es, [n]o, [A]ll, [N]one, [r]ename: 

In [6]:
from tensorflow.keras.preprocessing import text_dataset_from_directory
from tensorflow.strings import regex_replace
from tensorflow.keras.layers.experimental.preprocessing import TextVectorization
from tensorflow.keras.models import Sequential
from tensorflow.keras import Input
from tensorflow.keras.layers import Dense, LSTM, Embedding, Dropout

def prepareData(dir):
  data = text_dataset_from_directory(dir)
  return data.map(
    lambda text, label: (regex_replace(text, '', ' '), label),
  )

# Assumes you're in the root level of the dataset directory.
# If you aren't, you'll need to change the relative paths here.

train_data = prepareData('./movie-reviews-dataset/train')
test_data = prepareData('./movie-reviews-dataset/test')

for text_batch, label_batch in train_data.take(1):
  print(text_batch.numpy()[0])
  print(label_batch.numpy()[0]) # 0 = negative, 1 = positive


model = Sequential()

# ----- 1. INPUT
# We need this to use the TextVectorization layer next.
model.add(Input(shape=(1,), dtype="string"))

# ----- 2. TEXT VECTORIZATION
# This layer processes the input string and turns it into a sequence of
# max_len integers, each of which maps to a certain token.
max_tokens = 1000
max_len = 100
vectorize_layer = TextVectorization(
  # Max vocab size. Any words outside of the max_tokens most common ones
  # will be treated the same way: as "out of vocabulary" (OOV) tokens.
  max_tokens=max_tokens,
  # Output integer indices, one per string token
  output_mode="int",
  # Always pad or truncate to exactly this many tokens
  output_sequence_length=max_len,
)

# Call adapt(), which fits the TextVectorization layer to our text dataset.
# This is when the max_tokens most common words (i.e. the vocabulary) are selected.
train_texts = train_data.map(lambda text, label: text)
vectorize_layer.adapt(train_texts)

model.add(vectorize_layer)

# ----- 3. EMBEDDING
# This layer turns each integer (representing a token) from the previous layer
# an embedding. Note that we're using max_tokens + 1 here, since there's an
# out-of-vocabulary (OOV) token that gets added to the vocab.
model.add(Embedding(max_tokens + 1, 128))

# ----- 4. RECURRENT LAYER
model.add(LSTM(64))

# ----- 5. DENSE HIDDEN LAYER
model.add(Dense(64, activation="relu"))

# ----- 6. OUTPUT
model.add(Dense(1, activation="sigmoid"))


# Compile and train the model.
model.compile(loss="binary_crossentropy", optimizer="adadelta", metrics=["accuracy"])# zmiana optymalizatora
model.fit(train_data, epochs=10)

model.save_weights('rnn')

model.load_weights('rnn')

# Try the model on our test dataset.
model.evaluate(test_data)

# Should print a very high score like 0.98.
print(model.predict([
  "i loved it! highly recommend it to anyone and everyone looking for a great movie to watch.",
]))

# Should print a very low score like 0.01.
print(model.predict([
  "this was awful! i hated it so much, nobody should watch this. the acting was terrible, the music was terrible, overall it was just bad.",
]))

Found 25000 files belonging to 2 classes.
Found 25000 files belonging to 2 classes.
b" H e r e   a g a i n   i s   y e t   a n o t h e r   D i a n e   L a n e   m o v i e   w h e r e   s h e   c h e a t s   o n   h e r   h u s b a n d .   I s   t h i s   t h e   o n l y   r o l e   s h e   k n o w s   h o w   t o   p l a y ?   T h i s   t i m e   i t ' s   s e t   i n   1 9 6 9   a n d   s h e   c h e a t s   o n   h e r   h u s b a n d   w i t h   t h e   b l o u s e   m a n .   I   a m   s o   n o t   s u r p r i s e d   b e c a u s e   t h a t   i s   s o   v e r y   p r e d i c t a b l e .   T h e n   h e r   h u s b a n d   g e t s   m a d   a n d   t h r o w s   t h e   m i l k .   I   w o u l d n ' t   b e   s u r p r i s e d   i f   s h e   s l e p t   w i t h   t h e   m i l k m a n   a s   w e l l .   I   w o u l d n ' t   b e   s u r p r i s e d   i f   s h e   s l e p t   w i t h   t h e   i c e   c r e a m   m a n   t o o   b e c a u s e   t h i s   i s   a   v e r y   b o

In [7]:
print("RECENZJE NEGATYWNE")
print("Poz 1.  Wartosc: ",model.predict([
 "That was a horror in real life, Ive never been in such disgusting place like that, crew was very unpleasant, my foot never stand in that place anymore",
]))

print("Poz 2.  Wartosc: ",model.predict([
 "Long story short, that was terrible vacation.",
 ]))

print("Poz 3.  Wartosc: ",model.predict([
 "I think reality TV shows are mindless and contribute to the dumbing down of society.",
 ]))
print("Poz 4.  Wartosc: ",model.predict([
 "I was very disappointed with the tour I took with this company. The guide was unenthusiastic and uninformative, and the sights were underwhelming.",
 ]))
print("Poz 5.  Wartosc: ",model.predict([
 "This coffee shop was a huge letdown. The coffee was burnt and tasted terrible, and the atmosphere was cold and unwelcoming.",
 ]))
print("Poz 6.  Wartosc: ",model.predict([
 "I had a terrible experience at this theme park. The rides were poorly maintained, the food was terrible, and the staff was unhelpful and uninterested in providing a good experience.",
 ]))
print("Poz 7.  Wartosc: ",model.predict([
 "The product I purchased from this company was of very poor quality. It broke within a few days, and the customer service was unhelpful in resolving the issue.",
 ]))
print("Poz 8.  Wartosc: ",model.predict([
 "This museum was a huge disappointment. The exhibits were poorly curated and uninformative, and the staff was uninterested in answering questions or providing any additional information.",
 ]))
print("Poz 9.  Wartosc: ",model.predict([
 "My stay at this resort was a nightmare. The amenities were outdated and in poor condition, the views were disappointing, and the staff was unresponsive to my complaints.",
 ]))
print("Poz 10. Wartosc: ",model.predict([
 "The concert I attended at this venue was a disaster. The sound quality was terrible, and the staff was unhelpful and unresponsive to complaints from attendees.",
 ]))

RECENZJE NEGATYWNE
Poz 1.  Wartosc:  [[0.49108848]]
Poz 2.  Wartosc:  [[0.49108836]]
Poz 3.  Wartosc:  [[0.4910884]]
Poz 4.  Wartosc:  [[0.49108842]]
Poz 5.  Wartosc:  [[0.49108842]]
Poz 6.  Wartosc:  [[0.4910885]]
Poz 7.  Wartosc:  [[0.49108848]]
Poz 8.  Wartosc:  [[0.49108845]]
Poz 9.  Wartosc:  [[0.49108848]]
Poz 10. Wartosc:  [[0.49108845]]


In [8]:
print("RECENZJE POZYTYWNE")
print("Poz 1.  Wartosc: ",model.predict([
  "I had an amazing experience at this restaurant! The food was delicious, the service was impeccable, and the atmosphere was wonderful. I highly recommend it to anyone looking for a great dining experience.",
]))
print("Poz 2.  Wartosc: ",model.predict([
  "This hotel exceeded my expectations in every way. The staff was friendly and accommodating, the rooms were clean and comfortable, and the location was perfect for exploring the city.",
]))
print("Poz 3.  Wartosc: ",model.predict([
  "The customer service at this store was exceptional. The staff went out of their way to help me find what I was looking for, and they were friendly and knowledgeable throughout the entire process.",
]))
print("Poz 4.  Wartosc: ",model.predict([
  "I recently took a tour with this company, and I was blown away by the quality of the experience. The guide was knowledgeable and entertaining, and the sights were breathtaking.",
]))
print("Poz 5.  Wartosc: ",model.predict([
  "I've been a regular customer at this coffee shop for years, and I can honestly say that it's the best in town. The coffee is always delicious, and the atmosphere is cozy and welcoming.",
]))
print("Poz 6.  Wartosc: ",model.predict([
  "I had a fantastic time at this theme park. The rides were thrilling, the food was tasty, and the staff was friendly and helpful.",
]))
print("Poz 7.  Wartosc: ",model.predict([
  "I recently purchased a product from this company, and I couldn't be happier with my purchase. The product is high-quality and exactly what I was looking for.",
]))
print("Poz 8.  Wartosc: ",model.predict([
  "This museum is a must-visit for anyone interested in history. The exhibits are informative and engaging, and the staff is knowledgeable and helpful.",
]))
print("Poz 9.  Wartosc: ",model.predict([
  "I had an amazing stay at this resort. The amenities were top-notch, the views were breathtaking, and the staff was friendly and accommodating.",
]))
print("Poz 10. Wartosc: ",model.predict([
  "The concert I attended at this venue was incredible. The sound quality was excellent, and the staff did a great job of ensuring everyone had a great experience.",
]))

RECENZJE POZYTYWNE
Poz 1.  Wartosc:  [[0.4910886]]
Poz 2.  Wartosc:  [[0.49108848]]
Poz 3.  Wartosc:  [[0.4910886]]
Poz 4.  Wartosc:  [[0.4910885]]
Poz 5.  Wartosc:  [[0.4910886]]
Poz 6.  Wartosc:  [[0.49108842]]
Poz 7.  Wartosc:  [[0.49108848]]
Poz 8.  Wartosc:  [[0.49108842]]
Poz 9.  Wartosc:  [[0.49108842]]
Poz 10. Wartosc:  [[0.49108848]]
