<a href="https://colab.research.google.com/github/giuliocn/bert_binary_classification_imdb_reviews/blob/main/bert_binary_imdb_inference.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# IMDB movie reviews binary classification

## Setup

In [1]:
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
device_name = tf.test.gpu_device_name()
device_name

Num GPUs Available:  1


'/device:GPU:0'

In [2]:
# A dependency of the preprocessing for BERT inputs
!pip install -qU tensorflow-text==2.14.0

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.5/6.5 MB[0m [31m26.8 MB/s[0m eta [36m0:00:00[0m
[?25h

In [3]:
!pip install -qU tf-models-official==2.14.0

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.7/2.7 MB[0m [31m14.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m119.7/119.7 kB[0m [31m14.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m31.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.6/43.6 kB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m241.2/241.2 kB[0m [31m21.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for seqeval (setup.py) ... [?25l[?25hdone


In [4]:
import os
import shutil
import os.path as path

import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text as text

import matplotlib.pyplot as plt

tf.get_logger().setLevel('ERROR')

## Dataset

In [5]:
url = 'https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz'

if not path.exists("/content/aclImdb"):
  dataset = tf.keras.utils.get_file('aclImdb_v1.tar.gz', url,
                                    untar=True, cache_dir='.',
                                    cache_subdir='')

  dataset_dir = os.path.join(os.path.dirname(dataset), 'aclImdb')

  train_dir = os.path.join(dataset_dir, 'train')

  # remove unused folders to make it easier to load the data
  remove_dir = os.path.join(train_dir, 'unsup')
  shutil.rmtree(remove_dir)

Downloading data from https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz


In [6]:
AUTOTUNE = tf.data.AUTOTUNE
batch_size = 32
seed = 1
validation_split = 0.1

# enables operations determinism
tf.keras.utils.set_random_seed(seed)
tf.config.experimental.enable_op_determinism()

raw_train_ds = tf.keras.utils.text_dataset_from_directory(
    'aclImdb/train',
    batch_size=batch_size,
    validation_split=validation_split,
    subset='training',
    seed=seed)

class_names = raw_train_ds.class_names

train_ds = raw_train_ds.cache()

val_ds = tf.keras.utils.text_dataset_from_directory(
    'aclImdb/train',
    batch_size=batch_size,
    validation_split=validation_split,
    subset='validation',
    seed=seed)

val_ds = val_ds.cache()

test_ds = tf.keras.utils.text_dataset_from_directory(
    'aclImdb/test',
    batch_size=batch_size)

test_ds = test_ds.cache()

Found 25000 files belonging to 2 classes.
Using 22500 files for training.
Found 25000 files belonging to 2 classes.
Using 2500 files for validation.
Found 25000 files belonging to 2 classes.


In [7]:
for text_batch, label_batch in train_ds.take(1):
  for i in range(3):
    print(f'Review: {text_batch.numpy()[i]}')
    label = label_batch.numpy()[i]
    print(f'Label : {label} ({class_names[label]})')

Review: b"I entered the theatre intending to pass a pleasant 90 minutes being entertained if not enlightened. I left neither entertained nor enlightened. This movie can't make up its mind what it wants to be and ends up being not much of anything. There are a few funny lines and a few incredibly pretentious movie references (The 400 Blows--for this character? come off it!). While none of the characters gets treated with much respect, the over thirty gay men get the worst of it: all predatory, fat, sad, slobs. If you're in the mood for a movie dealing with gay relationships check out Parting Glances, Longtime Companion, Trick, All Over the Guy, Red Dirt, Maurice, Philadelphia instead. You'll thank me.<br /><br />"
Label : 0 (neg)
Review: b"Though I liked On the Town better I really liked it. I'm a new comer when it comes to Frank Sinatra and Gene Kelly. Though I had heard of them I had never seen anything with them in it until recently. The first one I saw was Singin in the Rain that ma

## Vocabolary

In [8]:
words = []
with open('/content/aclImdb/imdb.vocab', mode='rt') as vocab:
  words = [word.strip('\n') for word in vocab]

_VOCAB = [
    # Special tokens
    b"[UNK]", b"[MASK]", b"[RANDOM]", b"[CLS]", b"[SEP]",
    # Punctuation
    b".", b",", b";", b":",
    # words
] + words

_START_TOKEN = _VOCAB.index(b"[CLS]")
_END_TOKEN = _VOCAB.index(b"[SEP]")
_MASK_TOKEN = _VOCAB.index(b"[MASK]")
_RANDOM_TOKEN = _VOCAB.index(b"[RANDOM]")
_UNK_TOKEN = _VOCAB.index(b"[UNK]")
_MAX_SEQ_LEN = 256
_MAX_PREDICTIONS_PER_BATCH = 8

_VOCAB_SIZE = len(_VOCAB)

_VOCAB[:7]

[b'[UNK]', b'[MASK]', b'[RANDOM]', b'[CLS]', b'[SEP]', b'.', b',']

## PRE-trained Model

In [11]:
url = 'https://github.com/giuliocn/bert_binary_classification_imdb_reviews/releases/download/binary-classification/imdb_l10_h256_a4_bert-20231126T160856Z-001.zip'
if not path.exists("imdb_l10_h256_a4_bert"):
  dataset = tf.keras.utils.get_file(
      'imdb_l10_h256_a4_bert-20231126T160856Z-001.zip',
      url,
      extract=True, cache_dir='.',
      cache_subdir='')

In [13]:
# Load model from directory
classifier_model = tf.keras.models.load_model('imdb_l10_h256_a4_bert')

# Check model architecture
classifier_model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(32,)]                      0         []                            
                                                                                                  
 pre_layer (Custom>PreLayer  {'input_type_ids': (None,    0         ['input_1[0][0]']             
 )                           256),                                                                
                              'input_word_ids': (None,                                            
                             256),                                                                
                              'input_mask': (None, 256)                                           
                             }                                                                

## Evaluation

In [14]:
# Evaluate the model on metrics
classifier_model.compile(
    loss=tf.keras.losses.BinaryCrossentropy(),
    metrics=[
        tf.keras.metrics.Precision(),
        tf.keras.metrics.Recall(),
        tf.keras.metrics.TruePositives(),
        tf.keras.metrics.FalseNegatives(),
        tf.keras.metrics.FalsePositives(),
        tf.keras.metrics.TrueNegatives(),
        ])

In [15]:
result = classifier_model.evaluate(test_ds, return_dict=True)



In [16]:
string = f"""
{'Precision':15}  {result['precision']:.3f}
{'Recall':15}  {result['recall']:.3f}

{'True Positives':15}  {int(result['true_positives'])*100/25e3:2.3f} %
{'False Negatives':15}  {int(result['false_negatives'])*100/25e3:2.3f} %
{'False Positives':15}  {int(result['false_positives'])*100/25e3:2.3f} %
{'True Negatives':15}  {int(result['true_negatives'])*100/25e3:2.3f} %
"""
print(string)


Precision        0.832
Recall           0.850

True Positives   42.504 %
False Negatives  7.496 %
False Positives  8.560 %
True Negatives   41.440 %



## Unseen Examples

In [17]:
examples = []

for tf_text, tf_label in test_ds.unbatch().shuffle(1).take(5):
  examples.append(tf_text.numpy())
  print(tf_text.numpy()[:50], f"Expected output: {tf_label.numpy()}")
  result = classifier_model(tf.constant([tf_text.numpy()] * batch_size))
  result = tf.math.reduce_mean(result)
  print(f"model result: {result.numpy():.3f}")


b'This is a tough film to review, since several fact' Expected output: 1
model result: 0.008
b'This is a charming little film, which like many of' Expected output: 1
model result: 0.979
b'I remember viewing this movie when I was a kid. I ' Expected output: 0
model result: 0.960
b'This odd little film starts out with the story of ' Expected output: 0
model result: 0.990
b"I have seen a few of Fred Carpenter's movies on Sh" Expected output: 1
model result: 0.999


In [18]:
examples[2]

b"I remember viewing this movie when I was a kid. I recall it terrified me immensely and it stayed with me all these years. I spent a couple of years trying to find it online...didn't remember the title, only the storyline. After searching and searching, I came across a VHS that was being sold on E-Bay. I was excited and when it finally arrived, I jammed it into the VCR and couldn't help but feel a bit nostalgic. Needless to say, I was slightly disappointed. This wasn't the movie I remember watching as a kid. It was boring at times and I found Beryl Reid's incessant whinning extremely annoying. Both performances by Reid and Flora Robson were good overall but the movie wasn't scary. I think any movie is worth viewing to form you're own opinion but sometimes, well......"

## More Examples

In [19]:
result = classifier_model(tf.constant(['The movie was great!'] * batch_size))
result = tf.math.reduce_mean(result)
print(f"model result: {result.numpy():.3f}")

model result: 0.946


In [22]:
result = classifier_model(tf.constant(['The movie was terrible!'] * batch_size))
result = tf.math.reduce_mean(result)
print(f"model result: {result.numpy():.3f}")

model result: 0.211


In [35]:
# Model fails to classify single positive words
result = classifier_model(tf.constant(['Wonderful.'] * batch_size))
result = tf.math.reduce_mean(result)
print(f"model result: {result.numpy():.3f}")

model result: 0.180


In [36]:
result = classifier_model(tf.constant(['Terrible.'] * batch_size))
result = tf.math.reduce_mean(result)
print(f"model result: {result.numpy():.3f}")

model result: 0.125


In [40]:
# Neutral statement
result = classifier_model(tf.constant(['This is a movie'] * batch_size))
result = tf.math.reduce_mean(result)
print(f"model result: {result.numpy():.3f}")

model result: 0.520


## Classify single words with a template string

In [41]:
good_words = [
    # Good
    'good','great','enjoyable','amazing','delightful','lovely','pleasant',]
bad_words = [
    # Bad
    'bad','poor','inferior','lacking','awful','terrible','abominable',]
neutral_words = [
    # Neutral
    'indifferent','mediocre','ordinary','average','commonplace','medium','moderate',
]

def print_my_examples(words):
  for w in words:
    result = classifier_model(tf.constant([f"This movie was {w}"] * 32))
    result = tf.math.reduce_mean(result)
    print(f"input: {w:15}\t\t result:{result:.3f}", sep='\n')


In [42]:
print_my_examples(good_words)
print('\n')
print_my_examples(bad_words)
print('\n')
print_my_examples(neutral_words)

input: good           		 result:0.668
input: great          		 result:0.790
input: enjoyable      		 result:0.770
input: amazing        		 result:0.834
input: delightful     		 result:0.716
input: lovely         		 result:0.693
input: pleasant       		 result:0.749


input: bad            		 result:0.085
input: poor           		 result:0.075
input: inferior       		 result:0.096
input: lacking        		 result:0.071
input: awful          		 result:0.059
input: terrible       		 result:0.083
input: abominable     		 result:0.021


input: indifferent    		 result:0.129
input: mediocre       		 result:0.131
input: ordinary       		 result:0.496
input: average        		 result:0.264
input: commonplace    		 result:0.747
input: medium         		 result:0.060
input: moderate       		 result:0.426
