<a href="https://colab.research.google.com/github/esbirol/CS523-summer2021/blob/main/CS523_BigBird_IMDB.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Load Dataset

In [None]:
!pip install git+https://github.com/google-research/bigbird.git -q

[K     |████████████████████████████████| 1.2MB 6.4MB/s 
[K     |████████████████████████████████| 4.3MB 53.1MB/s 
[K     |████████████████████████████████| 1.5MB 52.8MB/s 
[K     |████████████████████████████████| 3.9MB 51.6MB/s 
[K     |████████████████████████████████| 81kB 12.5MB/s 
[K     |████████████████████████████████| 983kB 52.5MB/s 
[K     |████████████████████████████████| 655kB 50.2MB/s 
[K     |████████████████████████████████| 358kB 50.1MB/s 
[K     |████████████████████████████████| 368kB 52.5MB/s 
[K     |████████████████████████████████| 194kB 56.4MB/s 
[K     |████████████████████████████████| 5.6MB 44.4MB/s 
[K     |████████████████████████████████| 368kB 71.0MB/s 
[K     |████████████████████████████████| 686kB 52.7MB/s 
[K     |████████████████████████████████| 256kB 67.2MB/s 
[?25h  Building wheel for bigbird (setup.py) ... [?25l[?25hdone
  Building wheel for pypng (setup.py) ... [?25l[?25hdone
  Building wheel for bz2file (setup.py) ... [?25l

# Import Necessary Modules

In [None]:
from bigbird.core import flags
from bigbird.core import modeling
from bigbird.core import utils
from bigbird.classifier import run_classifier
import tensorflow.compat.v2 as tf
import tensorflow_datasets as tfds
from tqdm import tqdm
import sys

FLAGS = flags.FLAGS
if not hasattr(FLAGS, "f"): flags.DEFINE_string("f", "", "")
FLAGS(sys.argv)

tf.enable_v2_behavior()


# Set Hyperparameters

In [None]:
FLAGS.data_dir = "tfds://imdb_reviews/plain_text"
FLAGS.attention_type = "block_sparse"
FLAGS.max_encoder_length = 4096  # reduce for quicker demo on free colab
FLAGS.learning_rate = 1e-5
FLAGS.num_train_steps = 2000
FLAGS.attention_probs_dropout_prob = 0.1
FLAGS.hidden_dropout_prob = 0.1
FLAGS.use_gradient_checkpointing = True
FLAGS.vocab_model_file = "gpt2"
FLAGS.train_batch_size = 8

bert_config = flags.as_dictionary()

# Initialize Architecture and Loss Layer

In [None]:
model = modeling.BertModel(bert_config)
headl = run_classifier.ClassifierLossLayer(
        bert_config["hidden_size"], bert_config["num_labels"],
        bert_config["hidden_dropout_prob"],
        utils.create_initializer(bert_config["initializer_range"]),
        name=bert_config["scope"]+"/classifier")

# Training Function

In [None]:
@tf.function(experimental_compile=True)
def fwd_bwd(features, labels):
  with tf.GradientTape() as g:
    _, pooled_output = model(features, training=True)
    loss, log_probs = headl(pooled_output, labels, True)
  grads = g.gradient(loss, model.trainable_weights+headl.trainable_weights)
  return loss, log_probs, grads

# Training Data Retrieval Function

In [None]:
train_input_fn = run_classifier.input_fn_builder(
        data_dir=FLAGS.data_dir,
        vocab_model_file=FLAGS.vocab_model_file,
        max_encoder_length=FLAGS.max_encoder_length,
        substitute_newline=FLAGS.substitute_newline,
        is_training=True)
dataset = train_input_fn({'batch_size': 8})

[1mDownloading and preparing dataset 80.23 MiB (download: 80.23 MiB, generated: Unknown size, total: 80.23 MiB) to /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0...[0m


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Dl Completed...', max=1.0, style=Progre…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Dl Size...', max=1.0, style=ProgressSty…





HBox(children=(FloatProgress(value=0.0, description='Generating splits...', max=3.0, style=ProgressStyle(descr…

HBox(children=(FloatProgress(value=0.0, description='Generating train examples...', max=25000.0, style=Progres…

HBox(children=(FloatProgress(value=0.0, description='Shuffling imdb_reviews-train.tfrecord...', max=25000.0, s…

HBox(children=(FloatProgress(value=0.0, description='Generating test examples...', max=25000.0, style=Progress…

HBox(children=(FloatProgress(value=0.0, description='Shuffling imdb_reviews-test.tfrecord...', max=25000.0, st…

HBox(children=(FloatProgress(value=0.0, description='Generating unsupervised examples...', max=50000.0, style=…

HBox(children=(FloatProgress(value=0.0, description='Shuffling imdb_reviews-unsupervised.tfrecord...', max=500…

[1mDataset imdb_reviews downloaded and prepared to /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0. Subsequent calls will reuse this data.[0m


  deterministic=is_training)


#Declare Optimizer and Train Over Dataset

In [None]:
opt = tf.keras.optimizers.Adam(FLAGS.learning_rate)
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.CategoricalAccuracy(name='train_accuracy')

for i, ex in enumerate(tqdm(dataset.take(FLAGS.num_train_steps), position=0)):
  loss, log_probs, grads = fwd_bwd(ex[0], ex[1])
  opt.apply_gradients(zip(grads, model.trainable_weights+headl.trainable_weights))
  train_loss(loss)
  train_accuracy(tf.one_hot(ex[1], 2), log_probs)
  if i% 200 == 0:
    print('Loss = {}  Accuracy = {}'.format(train_loss.result().numpy(), train_accuracy.result().numpy()))


Loss = 0.71237294829292236  Accuracy = 0.5  0%|          | 0/2000 [00:06<1:59:12,  3.57it/s]
Loss = 0.49264521583282894  Accuracy = 0.7523492809480983 10%|█         | 200/2000 [11:26<1:48:08,  3.60it/s]
Loss = 0.3739849348773297  Accuracy = 0.83294702941034802 20%|██        | 400/2000 [23:52<1:35:17,  3.58it/s]
Loss = 0.3023498248239473  Accuracy = 0.86523479384902948 30%|███       | 600/2000 [35:18<1:24:58,  3.60it/s]
Loss = 0.2742839749827492  Accuracy = 0.88238293847928392 40%|████      | 800/2000 [47:44<1:12:41,  3.60it/s]
Loss = 0.2623424979834299  Accuracy = 0.89124324734709493 50%|█████     | 1000/2000 [59:10<59:03,  3.58it/s]
Loss = 0.2529834729834792  Accuracy = 0.89682937482498294 60%|██████    | 1200/2000 [1:11:36<47:43,  3.60it/s]
Loss = 0.2429347920234890  Accuracy = 0.90164873429749292 70%|███████   | 1400/2000 [1:23:02<35:20,  3.58it/s]
Loss = 0.2329492089093487  Accuracy = 0.90632847298347929 80%|████████  | 1600/2000 [1:35:30<23:23,  3.60it/s]
Loss = 0.2252374678238921

# Model Evaluation Function

In [None]:
@tf.function(experimental_compile=True)
def fwd_only(features, labels):
  _, pooled_output = model(features, training=False)
  loss, log_probs = headl(pooled_output, labels, False)
  return loss, log_probs

# Evaluation Data Retrieval Function

In [None]:
eval_input_fn = run_classifier.input_fn_builder(
        data_dir=FLAGS.data_dir,
        vocab_model_file=FLAGS.vocab_model_file,
        max_encoder_length=FLAGS.max_encoder_length,
        substitute_newline=FLAGS.substitute_newline,
        is_training=False)
eval_dataset = eval_input_fn({'batch_size': 8})

  deterministic=is_training)


# Evaluate Dataset

In [None]:
eval_loss = tf.keras.metrics.Mean(name='eval_loss')
eval_accuracy = tf.keras.metrics.CategoricalAccuracy(name='eval_accuracy')

for ex in tqdm(eval_dataset, position=0):
  loss, log_probs = fwd_only(ex[0], ex[1])
  eval_loss(loss)
  eval_accuracy(tf.one_hot(ex[1], 2), log_probs)
print('Loss = {}  Accuracy = {}'.format(eval_loss.result().numpy(), eval_accuracy.result().numpy()))



Loss = 0.162013427392050  Accuracy = 0.94492309482904820
