# BERT Example


* Here is the page that has list of all available bert models on tensorflow hub that one can download and make use of.

    https://tfhub.dev/google/collections/bert/1

* Here is the information on basic uncased BERT model,

    https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/4

* It uses L=12 hidden layers (i.e., Transformer blocks), a hidden size of H=768, and A=12 attention heads. This model has been pre-trained for English on the Wikipedia and BooksCorpus.


In [2]:
!pip3 install tensorflow_text

Collecting tensorflow_text
  Downloading tensorflow_text-2.8.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (4.9 MB)
[K     |████████████████████████████████| 4.9 MB 5.1 MB/s 
Collecting tensorflow<2.9,>=2.8.0
  Downloading tensorflow-2.8.0-cp37-cp37m-manylinux2010_x86_64.whl (497.5 MB)
[K     |████████████████████████████████| 497.5 MB 26 kB/s 
[?25hCollecting tensorboard<2.9,>=2.8
  Downloading tensorboard-2.8.0-py3-none-any.whl (5.8 MB)
[K     |████████████████████████████████| 5.8 MB 33.1 MB/s 
Collecting tf-estimator-nightly==2.8.0.dev2021122109
  Downloading tf_estimator_nightly-2.8.0.dev2021122109-py2.py3-none-any.whl (462 kB)
[K     |████████████████████████████████| 462 kB 44.2 MB/s 
Collecting keras<2.9,>=2.8.0rc0
  Downloading keras-2.8.0-py2.py3-none-any.whl (1.4 MB)
[K     |████████████████████████████████| 1.4 MB 42.2 MB/s 
Collecting numpy>=1.20
  Downloading numpy-1.21.5-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (15.7 MB)
[K     |████

In [1]:
import tensorflow_hub as hub
import tensorflow_text as text

## Importing BERT model 

> Importing the BERT model from tensorflow hub

In [2]:
preprocess_url = "https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3"
encoder_url = "https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/4"

## Preprocessing

In [4]:
bert_preprocess_model=hub.KerasLayer(preprocess_url) # Layer 1

In [6]:
text_test=['nice movie indeed','I love python programming']
text_preprocessed= bert_preprocess_model(text_test)
text_preprocessed.keys()

dict_keys(['input_word_ids', 'input_mask', 'input_type_ids'])

In [7]:
text_preprocessed

{'input_mask': <tf.Tensor: shape=(2, 128), dtype=int32, numpy=
 array([[1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
       dtype=int32)>,
 'input_type_ids': <tf.Tensor: shape

In [9]:
text_preprocessed['input_mask'][0]

<tf.Tensor: shape=(128,), dtype=int32, numpy=
array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int32)>

In [10]:
text_preprocessed['input_type_ids']

<tf.Tensor: shape=(2, 128), dtype=int32, numpy=
array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
      dtype=int32)>

In [11]:
text_preprocessed['input_word_ids']  # These are the individual unique ids for these words. It could be the ids from a vocabulary.

<tf.Tensor: shape=(2, 128), dtype=int32, numpy=
array([[  101,  3835,  3185,  5262,   102,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0, 

## Word Embeddings

In [12]:
bert_model=hub.KerasLayer(encoder_url) # Layer 2

bert_results=bert_model(text_preprocessed)
bert_results.keys()

dict_keys(['sequence_output', 'pooled_output', 'default', 'encoder_outputs'])

In [14]:
# Embedding for the entire sentence or Embedding vectors

bert_results['pooled_output']

<tf.Tensor: shape=(2, 768), dtype=float32, numpy=
array([[-0.79177463, -0.21411951,  0.49769327, ...,  0.24465072,
        -0.47334516,  0.81758744],
       [-0.91712326, -0.4793521 , -0.78657013, ..., -0.617518  ,
        -0.71026915,  0.92184293]], dtype=float32)>

In [15]:
bert_results['sequence_output']

<tf.Tensor: shape=(2, 128, 768), dtype=float32, numpy=
array([[[ 0.07292038,  0.08567842,  0.14476822, ..., -0.09677143,
          0.08722145,  0.07711081],
        [ 0.17839344, -0.19006258,  0.5034949 , ..., -0.0586981 ,
          0.32717144, -0.15578583],
        [ 0.18701449, -0.43388814, -0.48875296, ..., -0.15502766,
          0.00145172, -0.2447103 ],
        ...,
        [ 0.12083013,  0.12884311,  0.4645362 , ...,  0.07375546,
          0.17441897,  0.16522236],
        [ 0.07967884, -0.01190644,  0.5022541 , ...,  0.13777748,
          0.210022  ,  0.00624548],
        [-0.07212698, -0.28303394,  0.5903344 , ...,  0.4755187 ,
          0.1666846 , -0.08920381]],

       [[-0.07900536,  0.36335137, -0.21101594, ..., -0.171837  ,
          0.16299717,  0.6724262 ],
        [ 0.27883533,  0.43716228, -0.35764885, ..., -0.04463524,
          0.38315073,  0.58879745],
        [ 1.2037673 ,  1.072702  ,  0.48408717, ...,  0.24921033,
          0.4073103 ,  0.4048179 ],
        ...,

In [16]:
bert_results['encoder_outputs']

[<tf.Tensor: shape=(2, 128, 768), dtype=float32, numpy=
 array([[[ 0.12901425,  0.00644732, -0.03614959, ...,  0.04999626,
           0.06149194, -0.02657545],
         [ 1.1753378 ,  1.2140784 ,  1.1569978 , ...,  0.11634377,
          -0.3585534 , -0.4049019 ],
         [ 0.03859021,  0.5386997 , -0.21089756, ...,  0.21858235,
           0.7260173 , -1.1158612 ],
         ...,
         [-0.07587024, -0.25421894,  0.7075514 , ...,  0.505419  ,
          -0.1887869 ,  0.15028353],
         [-0.1606659 , -0.28089708,  0.5759705 , ...,  0.5275857 ,
          -0.11141351,  0.02887536],
         [-0.04428169, -0.2027955 ,  0.5909354 , ...,  0.8133834 ,
          -0.3907574 , -0.02601734]],
 
        [[ 0.18903583,  0.02752563, -0.06513733, ..., -0.006202  ,
           0.15053886,  0.03165463],
         [ 0.59161484,  0.7589136 , -0.07240709, ...,  0.6190397 ,
           0.8292891 ,  0.16161957],
         [ 1.446081  ,  0.4460263 ,  0.40990278, ...,  0.48255885,
           0.6269118 ,  0.13

In [18]:
len(bert_results['encoder_outputs']) # Since we are using the 'BERT base' model and it has '12' encoder layers

12

In [19]:
bert_results['encoder_outputs'][0] # '128' since it has 128 words including the padding and for each word there is a '768' size embedding vector.

<tf.Tensor: shape=(2, 128, 768), dtype=float32, numpy=
array([[[ 0.12901425,  0.00644732, -0.03614959, ...,  0.04999626,
          0.06149194, -0.02657545],
        [ 1.1753378 ,  1.2140784 ,  1.1569978 , ...,  0.11634377,
         -0.3585534 , -0.4049019 ],
        [ 0.03859021,  0.5386997 , -0.21089756, ...,  0.21858235,
          0.7260173 , -1.1158612 ],
        ...,
        [-0.07587024, -0.25421894,  0.7075514 , ...,  0.505419  ,
         -0.1887869 ,  0.15028353],
        [-0.1606659 , -0.28089708,  0.5759705 , ...,  0.5275857 ,
         -0.11141351,  0.02887536],
        [-0.04428169, -0.2027955 ,  0.5909354 , ...,  0.8133834 ,
         -0.3907574 , -0.02601734]],

       [[ 0.18903583,  0.02752563, -0.06513733, ..., -0.006202  ,
          0.15053886,  0.03165463],
        [ 0.59161484,  0.7589136 , -0.07240709, ...,  0.6190397 ,
          0.8292891 ,  0.16161957],
        [ 1.446081  ,  0.4460263 ,  0.40990278, ...,  0.48255885,
          0.6269118 ,  0.13463339],
        ...,

In [20]:
bert_results['encoder_outputs'][-1] == bert_results['sequence_output']

<tf.Tensor: shape=(2, 128, 768), dtype=bool, numpy=
array([[[ True,  True,  True, ...,  True,  True,  True],
        [ True,  True,  True, ...,  True,  True,  True],
        [ True,  True,  True, ...,  True,  True,  True],
        ...,
        [ True,  True,  True, ...,  True,  True,  True],
        [ True,  True,  True, ...,  True,  True,  True],
        [ True,  True,  True, ...,  True,  True,  True]],

       [[ True,  True,  True, ...,  True,  True,  True],
        [ True,  True,  True, ...,  True,  True,  True],
        [ True,  True,  True, ...,  True,  True,  True],
        ...,
        [ True,  True,  True, ...,  True,  True,  True],
        [ True,  True,  True, ...,  True,  True,  True],
        [ True,  True,  True, ...,  True,  True,  True]]])>