In [None]:
!pip install transformers

In [18]:
from transformers import AutoTokenizer
from transformers import AutoModelForSequenceClassification


In [None]:
checkpoint = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

In [4]:
tokenizer

BertTokenizerFast(name_or_path='bert-base-uncased', vocab_size=30522, model_max_length=512, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'unk_token': '[UNK]', 'sep_token': '[SEP]', 'pad_token': '[PAD]', 'cls_token': '[CLS]', 'mask_token': '[MASK]'})

In [5]:
tokens = tokenizer.tokenize("hello world!")

In [6]:
tokens

['hello', 'world', '!']

In [7]:
ids = tokenizer.convert_tokens_to_ids(tokens)

In [8]:
ids

[7592, 2088, 999]

In [9]:
tokenizer.convert_ids_to_tokens(ids)

['hello', 'world', '!']

In [11]:
ids = tokenizer.encode("hello world!")
ids

[101, 7592, 2088, 999, 102]

In [12]:
tokenizer.convert_ids_to_tokens(ids)

['[CLS]', 'hello', 'world', '!', '[SEP]']

In [13]:
tokenizer.decode(ids)

'[CLS] hello world! [SEP]'

In [14]:
model_inputs = tokenizer("hello world!")
model_inputs

{'input_ids': [101, 7592, 2088, 999, 102], 'token_type_ids': [0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1]}

In [None]:
data = [
    "Deep Learning",
    "Attention in Python",
    "Machine Learning",
]

tokenizer(data)

In [None]:
model = AutoModelForSequenceClassification.from_pretrained(checkpoint)

In [21]:
model_inputs = tokenizer(data, padding=True, truncation=True, return_tensors='pt')
outputs = model(**model_inputs)

In [23]:
outputs

SequenceClassifierOutput(loss=None, logits=tensor([[ 0.0619, -0.5358],
        [ 0.2266, -0.5742],
        [ 0.3014, -0.6642]], grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)

In [None]:
model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=3)

In [26]:
outputs = model(**model_inputs)
outputs

SequenceClassifierOutput(loss=None, logits=tensor([[ 0.2205, -0.2956, -0.1036],
        [ 0.2659, -0.1598, -0.0514],
        [ 0.1237, -0.0427, -0.1145]], grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)

In [27]:
outputs.logits

tensor([[ 0.2205, -0.2956, -0.1036],
        [ 0.2659, -0.1598, -0.0514],
        [ 0.1237, -0.0427, -0.1145]], grad_fn=<AddmmBackward0>)

In [28]:
outputs[0]

tensor([[ 0.2205, -0.2956, -0.1036],
        [ 0.2659, -0.1598, -0.0514],
        [ 0.1237, -0.0427, -0.1145]], grad_fn=<AddmmBackward0>)

In [29]:
outputs['logits']

tensor([[ 0.2205, -0.2956, -0.1036],
        [ 0.2659, -0.1598, -0.0514],
        [ 0.1237, -0.0427, -0.1145]], grad_fn=<AddmmBackward0>)

In [30]:
# detach beacause the tensor has a gradient
# move the tensor from CPU to GPU
# convert to numpy array
outputs.logits.detach().cpu().numpy()

array([[ 0.22052732, -0.29560113, -0.10363542],
       [ 0.26590735, -0.15983926, -0.05139817],
       [ 0.12373269, -0.04266077, -0.11449973]], dtype=float32)

In [31]:
model_inputs['input_ids']

tensor([[  101,  2784,  4083,   102,     0],
        [  101,  3086,  1999, 18750,   102],
        [  101,  3698,  4083,   102,     0]])

In [32]:
model_inputs['attention_mask']

tensor([[1, 1, 1, 1, 0],
        [1, 1, 1, 1, 1],
        [1, 1, 1, 1, 0]])

In [33]:
outputs = model(**model_inputs)
outputs

SequenceClassifierOutput(loss=None, logits=tensor([[ 0.2205, -0.2956, -0.1036],
        [ 0.2659, -0.1598, -0.0514],
        [ 0.1237, -0.0427, -0.1145]], grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)