<a href="https://colab.research.google.com/github/bskkarthik/DataScience_repos/blob/master/pytorch_bert_demo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
#!pip install transformers

In [0]:
import torch
from transformers import BertModel, BertTokenizer

The architecture of BERT is the same as the encoder of a transformer network. It mainly consists of a series of self-attention layers (12 in case of the base model and 24 in the large model) combined with layer normalization and residual layers.

In [0]:
#Create the instance of BertModel
bert_model = BertModel.from_pretrained('bert-base-uncased')
#Create the instance of tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

In [30]:
#Specifying the maximum length
T = 12
sentence = "I study data science."
#Step 1 : Tokenize
tokens = tokenizer.tokenize(sentence)
tokens

['i', 'study', 'data', 'science', '.']

In [31]:
# Step 2 : Add [CLS] and [SEP]
tokens = ['[CLS]'] + tokens + ['[SEP]']
tokens

['[CLS]', 'i', 'study', 'data', 'science', '.', '[SEP]']

In [32]:
# Step 3 : Pad tokens
padded_tokens = tokens + ['[PAD]' for _ in range(T - len(tokens))]
padded_tokens

['[CLS]',
 'i',
 'study',
 'data',
 'science',
 '.',
 '[SEP]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]',
 '[PAD]']

In [33]:
attn_mask = [1 if token!='[PAD]' else 0 for token in padded_tokens]
attn_mask

[1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0]

In [34]:
# Step 4 Segment ids
seq_ids = [0 for _ in range(len(padded_tokens))] # optional
# Step 5  Get BERT vocabulary index for each token
token_ids = tokenizer.convert_tokens_to_ids(padded_tokens)
token_ids

[101, 1045, 2817, 2951, 2671, 1012, 102, 0, 0, 0, 0, 0]

In [0]:
#Converting everything to torch tensors before feeding them to bert_model
token_ids = torch.tensor(token_ids).unsqueeze(0) #Shape [1, 12]
attn_mask = torch.tensor(attn_mask).unsqueeze(0) #Shape [1, 12]
seq_ids = torch.tensor(seq_ids).unsqueeze(0) #Shape [1, 12]

In [36]:
# Feed them to BERT
hidden_reps , cls_head = bert_model(token_ids, attention_mask=attn_mask,token_type_ids = seq_ids)
print(hidden_reps.shape)
print(cls_head.shape)

torch.Size([1, 12, 768])
torch.Size([1, 768])
