<a href="https://colab.research.google.com/github/TovarischComrad/Tehnopark/blob/main/LSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf
import tensorflow_datasets as tfds

import numpy as np
import matplotlib.pyplot as plt


In [None]:
# Obtain the imdb review dataset from tensorflow datasets
dataset = tfds.load('imdb_reviews', as_supervised=True)

# Seperate test and train datasets
train_dataset, test_dataset = dataset['train'], dataset['test']

# Split the test and train data into batches of 32
# and shuffling the training set
batch_size = 32
train_dataset = train_dataset.shuffle(10000)
train_dataset = train_dataset.batch(batch_size)
test_dataset = test_dataset.batch(batch_size)

Downloading and preparing dataset 80.23 MiB (download: 80.23 MiB, generated: Unknown size, total: 80.23 MiB) to /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0...


Dl Completed...: 0 url [00:00, ? url/s]

Dl Size...: 0 MiB [00:00, ? MiB/s]

Generating splits...:   0%|          | 0/3 [00:00<?, ? splits/s]

Generating train examples...:   0%|          | 0/25000 [00:00<?, ? examples/s]

Shuffling /root/tensorflow_datasets/imdb_reviews/plain_text/incomplete.1H0R43_1.0.0/imdb_reviews-train.tfrecor…

Generating test examples...:   0%|          | 0/25000 [00:00<?, ? examples/s]

Shuffling /root/tensorflow_datasets/imdb_reviews/plain_text/incomplete.1H0R43_1.0.0/imdb_reviews-test.tfrecord…

Generating unsupervised examples...:   0%|          | 0/50000 [00:00<?, ? examples/s]

Shuffling /root/tensorflow_datasets/imdb_reviews/plain_text/incomplete.1H0R43_1.0.0/imdb_reviews-unsupervised.…

Dataset imdb_reviews downloaded and prepared to /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0. Subsequent calls will reuse this data.


In [None]:
example, label = next(iter(train_dataset))
print('Text:\n', example.numpy()[0])
print('\nLabel: ', label.numpy()[0])


Text:
 b"A must see film with great dialogues, great music, great acting and a superb atmosphere.<br /><br />In the film you will follow 8 people for one day in the city of antwerp, they are all individuals and sometimes plain weird (that's how I love them!).<br /><br />I'm not going to say anything else, just go see and enjoy it."

Label:  1


In [None]:
# Using the TextVectorization layer to normalize, split, and map strings
# to integers.
encoder = tf.keras.layers.TextVectorization(max_tokens=10000)
encoder.adapt(train_dataset.map(lambda text, _: text))

# Extracting the vocabulary from the TextVectorization layer.
vocabulary = np.array(encoder.get_vocabulary())

# Encoding a test example and decoding it back.
original_text = example.numpy()[0]
encoded_text = encoder(original_text).numpy()
decoded_text = ' '.join(vocabulary[encoded_text])

print('original: ', original_text)
print('encoded: ', encoded_text)
print('decoded: ', decoded_text)


original:  b"A must see film with great dialogues, great music, great acting and a superb atmosphere.<br /><br />In the film you will follow 8 people for one day in the city of antwerp, they are all individuals and sometimes plain weird (that's how I love them!).<br /><br />I'm not going to say anything else, just go see and enjoy it."
encoded:  [   4  217   68   20   17   85 4000   85  223   85  112    3    4  895
    1   13    8    2   20   23   77  841 1123   83   16   29  262    8
    2  541    5    1   35   24   32 3361    3  505 1020  903  174   87
   10  116 1854   13  142   22  166    6  130  229  329   41  138   68
    3  344    9]
decoded:  a must see film with great dialogues great music great acting and a superb [UNK] br in the film you will follow 8 people for one day in the city of [UNK] they are all individuals and sometimes plain weird thats how i love thembr br im not going to say anything else just go see and enjoy it


In [None]:
encoder

<TextVectorization name=text_vectorization, built=True>

In [None]:
model = tf.keras.Sequential([
  tf.keras.Input(shape=(1,), dtype=tf.string),
	encoder,
	tf.keras.layers.Embedding(len(encoder.get_vocabulary()), 64, mask_zero=True),
	tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, return_sequences=True)),
	tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),
	tf.keras.layers.Dense(64, activation='relu'),
	tf.keras.layers.Dense(1)
])
model

<Sequential name=sequential_6, built=True>

In [None]:
model.summary()

In [None]:
# Compile the model
model.compile(
	loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
	optimizer=tf.keras.optimizers.Adam(),
	metrics=['accuracy']
)


In [None]:
history = model.fit(
    train_dataset,
    epochs=5,
    validation_data=test_dataset,
)

Epoch 1/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2143s[0m 3s/step - accuracy: 0.7239 - loss: 0.5039 - val_accuracy: 0.8533 - val_loss: 0.3617
Epoch 2/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2127s[0m 3s/step - accuracy: 0.8979 - loss: 0.2513 - val_accuracy: 0.8700 - val_loss: 0.3049
Epoch 3/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2173s[0m 3s/step - accuracy: 0.9340 - loss: 0.1682 - val_accuracy: 0.8445 - val_loss: 0.3647
Epoch 4/5
[1m324/782[0m [32m━━━━━━━━[0m[37m━━━━━━━━━━━━[0m [1m17:40[0m 2s/step - accuracy: 0.9552 - loss: 0.1216

In [None]:




# Plotting the accuracy and loss over time

# Training history
history_dict = history.history

# Seperating validation and training accuracy
acc = history_dict['accuracy']
val_acc = history_dict['val_accuracy']

# Seperating validation and training loss
loss = history_dict['loss']
val_loss = history_dict['val_loss']

# Plotting
plt.figure(figsize=(8, 4))
plt.subplot(1, 2, 1)
plt.plot(acc)
plt.plot(val_acc)
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend(['Accuracy', 'Validation Accuracy'])

plt.subplot(1, 2, 2)
plt.plot(loss)
plt.plot(val_loss)
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend(['Loss', 'Validation Loss'])

plt.show()

In [None]:
# Making predictions
sample_text = (
    '''The movie by GeeksforGeeks was so good and the animation are so dope.
    I would recommend my friends to watch it.'''
)
predictions = model.predict(np.array([sample_text]))
print(*predictions[0])

# Print the label based on the prediction
if predictions[0] > 0:
    print('The review is positive')
else:
    print('The review is negative')