# BERT Text Classification

In [6]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import torch.nn.functional as F

In [3]:
model_name = "distilbert-base-uncased-finetuned-sst-2-english"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
model.config

DistilBertConfig {
  "_name_or_path": "distilbert-base-uncased-finetuned-sst-2-english",
  "activation": "gelu",
  "architectures": [
    "DistilBertForSequenceClassification"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "finetuning_task": "sst-2",
  "hidden_dim": 3072,
  "id2label": {
    "0": "NEGATIVE",
    "1": "POSITIVE"
  },
  "initializer_range": 0.02,
  "label2id": {
    "NEGATIVE": 0,
    "POSITIVE": 1
  },
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "output_past": true,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "transformers_version": "4.31.0",
  "vocab_size": 30522
}

In [4]:
test_examples = ["today is not so bad", "It is so bad", "It's good"]

In [5]:
in_tensors = tokenizer(test_examples, truncation=True, padding=True, return_tensors='pt')

# Model structure

DistilBertForSequenceClassification.distilbert

 * embeddings
    * word_embedding
    * position_embedding
    * LayerNorm
    * dropout
 * transformer
   * layers: 6 x TransformerBlock(attention, sa_layer_norm, ffn, output_layer_norm)
 * pre_classifier: Liner(768,768)
 * classifier: Liner(768,2)
 * dropout

In [13]:
model

DistilBertForSequenceClassification(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): MultiHeadSelfAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)
 

# model output:

SequenceClassifierOutput:

    * loss
    * logits
    * hidden_states
    * attentions

In [12]:
with torch.no_grad():
    output = model(**in_tensors)
    print(output)
    scores = F.softmax(output['logits'], dim=-1)
    predict_ids = torch.argmax(scores, dim=-1).tolist()
    labels = [model.config.id2label[idx] for idx in predict_ids]
    print(*[f"{text}: {label}" for text, label in zip(test_examples, labels)], sep='\n')

SequenceClassifierOutput(loss=None, logits=tensor([[-3.4942,  3.6582],
        [ 4.7498, -3.7849],
        [-4.1655,  4.5508]]), hidden_states=None, attentions=None)
today is not so bad: POSITIVE
It is so bad: NEGATIVE
It's good: POSITIVE
