<a href="https://colab.research.google.com/github/jay05Hawk/Bi_RNN/blob/main/BiDirectional_RNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
BASE_DIR = "/content/drive/MyDrive/001_SUNNY_BHAVEEN_CHANDRA/001_My_Classes"
ROOT = os.path.join(BASE_DIR, "DLCVNLP Aug 28th Batch/ALL_NOTES/July 23rd")
os.chdir(ROOT)

In [None]:
!pwd

In [None]:
import tensorflow as tf
import numpy as np 
import tensorflow_datasets as tfds
import matplotlib.pyplot as plt

In [None]:
dataset_name = "imdb_reviews"

dataset, info = tfds.load(dataset_name, with_info=True, as_supervised=True)

In [None]:
info

In [None]:
info.name

In [None]:
print(info.description)

In [None]:
dataset.keys()

In [None]:
train_ds, test_ds = dataset["train"], dataset["test"]

In [None]:
for example, label in train_ds.take(3):
  print(f"Sample Text: \n{example}\n")
  print(f"Label: \n{label}\n")
  print("--"*30)

In [None]:
type(10_000_000)

In [None]:
class Config:
  BUFFER_SIZE = 10_000
  BATCH_SIZE = 64
  VOCAB_SIZE = 1000
  OUTPUT_DIM = 64
  EPOCHS = 10
  BASE_LOG_DIR = "base_log_dir"
  TRAINED_MODEL_DIR = os.path.join(BASE_LOG_DIR, "models")
  CHECKPOINT_DIR = os.path.join(BASE_LOG_DIR, "ckpt")
  TB_ROOT_LOG_DIR = os.path.join(BASE_LOG_DIR, "tb_log_dir")

In [None]:
# Suffle and batch the training dataset

train_ds = train_ds.shuffle(Config.BUFFER_SIZE).batch(Config.BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
test_ds = test_ds.batch(Config.BATCH_SIZE).prefetch(tf.data.AUTOTUNE)



In [None]:
for example, label in train_ds.take(1):
  print(f"Sample Text: \n{example}\n")
  print(f"Label: \n{label}\n")
  print("--"*30)

In [None]:
len(example), len(label)

In [None]:
def function(text, label):
  return text

In [None]:
# text encoding

encoder = tf.keras.layers.TextVectorization(max_tokens=Config.VOCAB_SIZE)

encoder.adapt(train_ds.map(lambda text, label: text))

In [None]:
# frist 20 tokens 
vocab = np.array(encoder.get_vocabulary())
vocab[:20]

In [None]:
example.numpy()[:3]

In [None]:
encoder(example.numpy()[:3])

In [None]:
len(encoder.get_vocabulary())

In [None]:
embedding_layer = tf.keras.layers.Embedding(
    input_dim  = len(encoder.get_vocabulary()),
    output_dim = Config.OUTPUT_DIM,
    mask_zero=True
)


In [None]:
LAYERS = [
          encoder, # text vectorization
          embedding_layer, # embedding layer
          tf.keras.layers.Bidirectional(
              tf.keras.layers.LSTM(64)
          ),
          tf.keras.layers.Dense(64, activation="relu"),
          tf.keras.layers.Dense(1)
]

model = tf.keras.Sequential(LAYERS)

In [None]:
model.summary()

In [None]:
for layer in model.layers:
  print(layer.supports_masking)

In [None]:
model.compile(
    loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
    metrics=["accuracy"]
)

In [None]:
import time
import re

In [None]:
re.sub(r"[\s:]", "_", time.asctime())

In [None]:
def callbacks(base_dir="."):

  # tensorboard callbacks
  unique_log = re.sub(r"[\s:]", "_", time.asctime())
  tb_log_dir = os.path.join(Config.TB_ROOT_LOG_DIR, unique_log)
  os.makedirs(tb_log_dir, exist_ok = True)

  tb_cb = tf.keras.callbacks.TensorBoard(log_dir=tb_log_dir)

  # ckpt callbacks
  ckpt_file = os.path.join(Config.CHECKPOINT_DIR, "model")
  os.makedirs(Config.CHECKPOINT_DIR, exist_ok=True)

  ckpt_cb = tf.keras.callbacks.ModelCheckpoint(
      filepath=ckpt_file,
      save_best_only=True
  )

  return [tb_cb, ckpt_cb]

In [None]:
callback_list = callbacks()

history = model.fit(
    train_ds,
    epochs=Config.EPOCHS,
    validation_data=test_ds,
    validation_steps=30,
    callbacks=callback_list
)

In [None]:
test_loss, test_acc = model.evaluate(test_ds)

print(f"test loss: {test_loss}")
print(f"test accuracy: {test_acc}")

In [None]:
def get_plot(history, metric):
  history_obj = history.history
  plt.plot(history_obj[metric])
  plt.plot(history_obj[f"val_{metric}"])
  plt.xlabel("Epochs -->")
  plt.ylabel(f"{metric} -->")
  plt.legend([metric, f"val_{metric}"])

In [None]:
plt.style.use("fivethirtyeight")

In [None]:
get_plot(history, metric="accuracy")

In [None]:
get_plot(history, metric="loss")


In [None]:
%load_ext tensorboard

In [None]:
%tensorboard --logdir base_log_dir/tb_log_dir

In [None]:
sample_text_pos = (
    "The movie was cool. The animation and the graphics were out of the world. I would definitly recommend this movie"
)

sample_text_neg = (
    "The movie was horrible. The animation and the graphics were terrible. I would never recommend this movie"
)

In [None]:
model.predict([sample_text_pos])

In [None]:
model.predict([sample_text_neg])

In [None]:
model.predict(['Taj Mahal is beautiful'])

In [None]:
model.predict(["Today the start of the was bad"])

In [None]:
model.predict(["Its a regular day today"])

In [None]:
def getscore_and_setiment(model, sample_text):
  pred = model.predict([sample_text])
  score = pred[0][0]

  if score > 0:
    print(f"result: positive sentiment with score: {score}")
  else:
    print(f"result: negative sentiment with score: {score}")


In [None]:
getscore_and_setiment(model, "Today the start of the was bad")

In [None]:
getscore_and_setiment(model, sample_text_pos)
