# Bert pooler

## load model and tokenize

In [13]:
from transformers.models.bert import BertTokenizer, BertModel
import torch

model_name = 'bert-base-uncased'
bert = BertModel.from_pretrained(model_name)
tokenizer = BertTokenizer.from_pretrained(model_name)
text = "this is a text sentence."
inputs = tokenizer(text, return_tensors="pt")
inputs

{'input_ids': tensor([[ 101, 2023, 2003, 1037, 3793, 6251, 1012,  102]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1]])}

In [14]:
tokenizer.convert_ids_to_tokens(inputs['input_ids'][0])

['[CLS]', 'this', 'is', 'a', 'text', 'sentence', '.', '[SEP]']

## forward and pooler output

In [15]:
bert.eval()
with torch.no_grad():
    outputs = bert(**inputs)
outputs.keys()

odict_keys(['last_hidden_state', 'pooler_output'])

In [16]:
outputs['last_hidden_state'].shape

torch.Size([1, 8, 768])

In [17]:
outputs['pooler_output'].shape

torch.Size([1, 768])

## from scratch

In [18]:
my_pooler_output = bert.pooler.activation(bert.pooler.dense(outputs['last_hidden_state'][0][0, :]))
my_pooler_output.shape

torch.Size([768])

In [31]:
torch.allclose(outputs['pooler_output'], my_pooler_output)

True