# Implementing a pre-trained Transformer model for text classification.

In [14]:
import torch
print(torch.__file__)

/usr/local/lib/python3.12/dist-packages/torch/__init__.py


In [3]:
import torch
from torch.utils.data import DataLoader, TensorDataset

from transformers import (
    BertTokenizer,
    BertForSequenceClassification
)

### Model & Tokenizer

In [4]:
tokenizer=BertTokenizer.from_pretrained('bert-base-uncased')

Error while fetching `HF_TOKEN` secret value from your vault: 'Requesting secret HF_TOKEN timed out. Secrets can only be fetched when running from the Colab UI.'.
You are not authenticated with the Hugging Face Hub in this notebook.
If the error persists, please let us know by opening an issue on GitHub (https://github.com/huggingface/huggingface_hub/issues/new).


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

In [37]:
model = BertForSequenceClassification.from_pretrained(
    'bert-base-uncased',
    num_labels=2
)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


### Training Data

In [19]:
text=["I love this movie",
    "This film was terrible",
    "What a fantastic experience",
    "I hate this so much"
]
labels=[1,0,1,0]

In [38]:
encoding = tokenizer(
    text,
    padding=True,
    truncation=True,
    max_length=32,
    return_tensors="pt"
)

labels = torch.tensor(labels)

dataset = TensorDataset(
    encoding["input_ids"],
    encoding["attention_mask"],
    labels
)

dataloader = DataLoader(dataset, batch_size=4, shuffle=True)

  labels = torch.tensor(labels)


In [25]:
print(model)

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

In [44]:
w_before = model.bert.embeddings.word_embeddings.weight.clone()
c_before = model.classifier.weight.clone()
# train...

In [45]:
# Freeze BERT encoder
for param in model.bert.parameters():
    param.requires_grad = False

# Optimizer
optimizer = torch.optim.AdamW(
    filter(lambda p: p.requires_grad, model.parameters()),
    lr=1e-3
)

# Training Loop
model.train()

for epoch in range(5):
    print(f"Epoch {epoch + 1}")
    for batch in dataloader:
        input_ids, attention_mask, labels = batch

        optimizer.zero_grad()

        output = model(
            input_ids=input_ids,
            attention_mask=attention_mask,
            labels=labels
        )

        loss = output.loss
        loss.backward()
        optimizer.step()

        print("loss:", loss.item())

Epoch 1
loss: 0.6844410300254822
Epoch 2
loss: 0.6172816753387451
Epoch 3
loss: 0.7806915044784546
Epoch 4
loss: 0.5804389715194702
Epoch 5
loss: 0.5640920996665955


In [46]:
w_after = model.bert.embeddings.word_embeddings.weight
c_after = model.classifier.weight
print(torch.allclose(w_before, w_after))  
print(torch.allclose(c_before, c_after)) 

True
False


In [13]:
# Inference

model.eval()

test_text = [
    "I really enjoyed this movie",
    "Worst experience ever"
]

text_encoding = tokenizer(
    test_text,
    padding=True,
    truncation=True,
    max_length=32,
    return_tensors="pt"
)

with torch.no_grad():
    output = model(
        input_ids=text_encoding["input_ids"],
        attention_mask=text_encoding["attention_mask"]
    )

logits = output.logits
prediction = torch.argmax(logits, dim=1)

print("prediction class:", prediction.tolist())

prediction class: [1, 0]
