In [1]:
from tensorflow.keras.preprocessing import text_dataset_from_directory
from tensorflow.strings import regex_replace
from tensorflow.keras.layers.experimental.preprocessing import TextVectorization
from tensorflow.keras.models import Sequential
from tensorflow.keras import Input
from tensorflow.keras.layers import Dense, LSTM, Embedding, Dropout

In [2]:
def prepareData(dir):
  data = text_dataset_from_directory(dir)
  return data.map(
    lambda text, label: (regex_replace(text, '<br />', ' '), label),
  )

In [3]:
# Dataset directory.
train_data = prepareData('/content/drive/MyDrive/ZADANIA - zaliczenie/aclImdb/train')
test_data = prepareData('/content/drive/MyDrive/ZADANIA - zaliczenie/aclImdb/test')

Found 4276 files belonging to 2 classes.
Found 4276 files belonging to 2 classes.


In [4]:
for text_batch, label_batch in train_data.take(1):
  print(text_batch.numpy()[0])
  print(label_batch.numpy()[0]) # 0 = negative, 1 = positive

b'The big bad swim has a low budget, indie feel about it. So many times I start to watch independent films that have had really good reviews only to find out they are pretentious crud, voted for by people who are so blinded by the idea of the film and its potential to be provocative that they forget that film is a form of entertainment first and foremost.  I do not know if The big bad swim has any message or higher meaning or metaphor, if it does then I missed it.  From the get go BBS felt right, it was easy and warm and human, there were no major dramas or meaningful insights, I just connected with the characters straight off. And when, as with all good films the end came around I felt sadness at the loss of that connection.  If you are looking for something big, or fast or insightful look elsewhere, look for a film trying to deliver more than it can. BBS delivers a solid, enjoyable, real experience and I felt rewarded and satiated having watched it.'
1


In [5]:
model = Sequential()

In [6]:
# ----- 1. INPUT
# Use the TextVectorization layer next.
model.add(Input(shape=(1,), dtype="string"))

In [7]:
# ----- 2. TEXT VECTORIZATION
max_tokens = 2000
max_len = 100
vectorize_layer = TextVectorization(
  
  max_tokens=max_tokens,
  output_mode="int",
  
  output_sequence_length=max_len,
)

In [8]:
# Call adapt()
train_texts = train_data.map(lambda text, label: text)
vectorize_layer.adapt(train_texts)

In [9]:
model.add(vectorize_layer)

In [10]:
# ----- 3. EMBEDDING
model.add(Embedding(max_tokens + 1, 128))

In [11]:
# ----- 4. RECURRENT LAYER
model.add(LSTM(64))

# ----- 5. DENSE HIDDEN LAYER
model.add(Dense(64, activation="relu"))

# ----- 6. OUTPUT
model.add(Dense(1, activation="sigmoid"))

In [12]:
# Compile and train the model.
model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])
model.fit(train_data, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f2f8fa8ba50>

In [13]:
model.save_weights('rnn')

In [14]:
model.load_weights('rnn')

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7f2f8c78bf10>

In [15]:
# 1 = positiv
# 0 = negativ


# Try the model on test dataset.
model.evaluate(test_data)

# Should print a very high score.
print(model.predict([
  "i loved it! highly recommend it to anyone and everyone looking for a great movie to watch.",
]))

# Should print a very low score.
print(model.predict([
  "this was awful! i hated it so much, nobody should watch this. the acting was terrible, the music was terrible, overall it was just bad.",
]))

print(model.predict([
  "I love beautiful movies. If a film is eye-candy with carefully designed decorations, masterful camerawork, lighting, and architectural frames",
]))

print(model.predict([
  "Before starting to review one of the greatest–in my opinion–TV shows of all time, let me put some suspense and psychological pressure on you first.",
]))

print(model.predict([
  "f you want to have a good laugh, gather your friends and turn on “Fifty Shades Darker”: I guarantee you will feel like experienced film critics while watching this. Other than this, there are no excuses for watching the movie.",
]))

print(model.predict([
  "This is not to mention more technical drawbacks. For example, the direction of the action scenes. ",
]))

print(model.predict([
  "It is a glorious film, but you could not make it now. And that is not just my opinion. My preview screening companion and fellow lover of weepy old black and white films agreed that not even the director Max Ophüls could get away with this 1948 classic if he tried to make it in 2009.",
]))

print(model.predict([
  "The acting in the film is terrific. Keitel and De Niro are standouts, and their verbal interplay takes on almost Abbott And Costello-like humor. ",
]))

print(model.predict([
  "Once again Mr. Costner has dragged out a movie for far longer than necessary.",
]))

print(model.predict([
  "A nut is holding her hostage, children are outside the open screen, so she whimpers for help instead of screaming when only a moment ago she was brave enough to be smashing windows to yell to these same children.<br /><br />She's finally free and alone in the house. Her chance to go for help, so what does she do? Wanders around the house and lies down. She's in the basement, locked away. So what does she do? Takes a little nap. Come on! Most of the movie is the nut wandering away and finding her sitting there snoozing when he wakes her up. Four times! What? If the writer is too bored to actually write a real plot why should we be paying attention? I think the key here is that it was originally a play for the radio, so they filled in with the heroine just sitting around rather than pretending to be screen writers and actually writing any action.<br /><br />And the ending is horrendous.<br /><br />The whole movie is completely implausible, horribly written and almost comically acted. Beware this movie at all costs!",
]))

print(model.predict([
  "It is a well-known fact in the video game industry that movies made after video games are often impossible to watch.",
]))

print(model.predict([
  "Once more, the maestro of Spanish cinema, Pedro Almodóvar, gives us a heady mixture of suspense, stormy melodrama, and theatrically dramatic characters. Penélope Cruz is superb, José Luis Gómez is tantalisingly elegant, and Lluís Homar is outstanding",
]))

print(model.predict([
  "The basic problem is that the film is really good, pushing near greatness, until Elaine finds out about the affair. She and Ben have only gone out once, and just on their second date, he seems to obsess over her, despite the reality of their having little in common.",
]))

print(model.predict([
  "Kairo, as well as many other Japanese horror movies, may seem fragmented; there are many secondary characters who appear on screen just to vanish in a couple of minutes and never appear again.",
]))

print(model.predict([
  "The end of the movie is probably its best part, even though it contradicts the rest of the movie",
]))

print(model.predict([
  "When I started watching I rather expected to see a battle of ideas, an opposition between Freud and Jung, who would prove the worthiness of their methods, with Sabina Spielrein as their patient.",
]))

print(model.predict([
  "There were many flaws and drawbacks, starting from numerous assumptions being unscientific, and several gaps in logic. But no matter what, Cure is a bright example of a truly psychological thriller: dark, mysterious, and atmospheric.",
]))

print(model.predict([
  "What I also liked was the general idea of the film: even if the situation seems to be hopeless, do not lose your head, try to keep your mind clear, and eventually you will find a solution to any problem.",
]))

print(model.predict([
  "What I liked most of all was the actors performances. Every person participating in the film acted so realistically that sometimes it seems you are watching a documentary",
]))

print(model.predict([
  "300 has reminded me of a beautiful grim fairy tale. You know it is not true, but since it is written well, you read through it until the end. The movie is the same; you feel like you are being fooled all the time, but the picture is captivating, so you watch until the final credits.",
]))

[[0.93293214]]
[[0.00357372]]
[[0.9327274]]
[[0.9323597]]
[[0.93044156]]
[[0.9291607]]
[[0.9221233]]
[[0.9326631]]
[[0.9328217]]
[[0.9992298]]
[[0.9323724]]
[[0.93343353]]
[[0.91452026]]
[[0.71891063]]
[[0.93276465]]
[[0.931597]]
[[0.9305996]]
[[0.92353576]]
[[0.932523]]
[[0.9298743]]
