<a href="https://colab.research.google.com/github/alfazick/AppliedLLMCourse/blob/main/Module2HowTransformersClassifiersWork.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [19]:
# ok so general training loop always in place

# step 1 Predict with whatever model you have
# step 2 calculate the loss
# step 3 calculate gradients for each param in your model
# step 4 update your model params

# keep doing for n_epochs

# so this is core, like you
# can think or wish many things
# but at some point all 4 steps should happen
# some of libs and code we look, just simplifies
# the process by saving you from a dozen hundreds of code


In [20]:
# so let's unpack all of this stages with hugging face transformers library

# step #1 well we need a base model to start to work with
# so we already know how to load the model, so no surprise
# just pull the model out of box which is good for your task

# https://huggingface.co/google-bert/bert-base-uncased

# I like the next quote
"""model is primarily aimed at being fine-tuned on tasks that use the whole sentence
(potentially masked) to make decisions, such as sequence classification,
token classification or question answering."""

# base model means, model was trained on vast amount of data
# but not for your specific task, and our task project
# will be [prompt injection detection]

# but first let's explore model

from transformers import BertTokenizer, BertModel
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained("bert-base-uncased")




In [21]:
model.eval()

BertModel(
  (embeddings): BertEmbeddings(
    (word_embeddings): Embedding(30522, 768, padding_idx=0)
    (position_embeddings): Embedding(512, 768)
    (token_type_embeddings): Embedding(2, 768)
    (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): BertEncoder(
    (layer): ModuleList(
      (0-11): 12 x BertLayer(
        (attention): BertAttention(
          (self): BertSdpaSelfAttention(
            (query): Linear(in_features=768, out_features=768, bias=True)
            (key): Linear(in_features=768, out_features=768, bias=True)
            (value): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (output): BertSelfOutput(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False

In [22]:
# so if you think about it produces a vector of tokens where each token dimension will
# be 768

text = "UTRGV to the Moon"
encoded_input = tokenizer(text, return_tensors='pt')
output = model(**encoded_input)

In [23]:
for k in encoded_input:
    print(k,encoded_input[k])

input_ids tensor([[  101, 21183, 10623,  2615,  2000,  1996,  4231,   102]])
token_type_ids tensor([[0, 0, 0, 0, 0, 0, 0, 0]])
attention_mask tensor([[1, 1, 1, 1, 1, 1, 1, 1]])


In [24]:
 # so let's learn tokens or what you will fill the model with
 # it's like your x data point



for token in encoded_input["input_ids"]:
    print(tokenizer.decode(token))

# obvious thing to notice everything lowcased and [CLS] and [SEP] token

[CLS] utrgv to the moon [SEP]


In [25]:
# so bert tokenizer adds [cls] and [sep] token
# by default, since it's a data format it has seen during training

ids = encoded_input["input_ids"][0]
tokens = tokenizer.convert_ids_to_tokens(ids)
tokens


['[CLS]', 'ut', '##rg', '##v', 'to', 'the', 'moon', '[SEP]']

In [26]:
 # ok if you look at output, this is really scary
# because the question is how exactly I suppose to use
# all of this "random" numbers to say classify something :))

output

BaseModelOutputWithPoolingAndCrossAttentions(last_hidden_state=tensor([[[-0.4504,  0.1165,  0.1604,  ..., -0.2071,  0.2120,  0.5255],
         [-0.0643, -0.3624,  0.5675,  ..., -0.1871,  0.9949,  0.9705],
         [-0.4801, -0.0094,  1.0636,  ..., -0.4680,  0.4697,  0.4389],
         ...,
         [-0.8320,  0.8765,  0.1836,  ...,  0.0842,  0.4684, -0.3084],
         [-0.0536,  0.4925,  0.2465,  ..., -0.5793,  0.7340,  0.1992],
         [ 0.6610,  0.1062, -0.2303,  ...,  0.0423, -0.5780, -0.1876]]],
       grad_fn=<NativeLayerNormBackward0>), pooler_output=tensor([[-0.7441, -0.2182,  0.3439,  0.4367, -0.4348, -0.0834,  0.6760,  0.1969,
          0.4367, -0.9996,  0.2893,  0.1764,  0.9726, -0.3716,  0.7386, -0.2185,
          0.0171, -0.3646,  0.3363, -0.1930,  0.2781,  0.9404,  0.4904,  0.1351,
          0.3006,  0.1527, -0.4112,  0.8065,  0.9004,  0.5889, -0.3778,  0.1981,
         -0.9739, -0.0860,  0.1737, -0.9652,  0.1126, -0.5816,  0.1339,  0.0339,
         -0.7281,  0.2869,  0.99

In [27]:
# so model returns two things

hidden_state = output.last_hidden_state
pooled = output.pooler_output
print(hidden_state.shape)     # torch.Size([1, 8, 768])  one 768-d vector per token
print(pooled.shape) # torch.Size([1, 768])  one 768-d vector for the whole sequence

torch.Size([1, 8, 768])
torch.Size([1, 768])


In [30]:
model.config.hidden_size

768

In [35]:
import torch
import torch.nn as nn
import torch.nn.functional as F
# so like output has our prediction, but before training let's try to understand

# so mainly there are two approaches

# 1) take a cls token, because it's believed it containes all
# context about the input, some kind of compressed version of meaning

# Grab the CLS vector (position 0 in last_hidden_state)
cls = hidden_state[:, 0, :]     # shape [1, 768]
print("CLS shape:", cls.shape)

# and now you treat this representation of a context as a input features!
# when I first time saw I was like what ???
# but it is what it is

num_labels = 2
id2label = {0:"Negative",1:"Positive"}
# so here is a key idea you are adding extra head like
# Linear layer
features = model.config.hidden_size # or 768 as we saw before
head = nn.Linear(features,num_labels)

torch.manual_seed(13)
nn.init.normal_(head.weight, mean=0.0, std=0.02)
nn.init.zeros_(head.bias)


CLS shape: torch.Size([1, 768])


Parameter containing:
tensor([0., 0.], requires_grad=True)

In [43]:
# ok so now we will do our forward pass apply softmax and get our predictions:))
with torch.no_grad():
    logits = head(cls) # now our output will have shape [1,2] right ?
    probs = F.softmax(logits,-1)
    pred_id = probs.argmax(-1).item()
    pred_label = id2label[pred_id]


print("probs: ", probs.squeeze(0).tolist())
print("pred:  ", pred_label)

# Important: this head is untrained, so the prediction is meaningless right now.
# It just demonstrates how CLS → logits → probs works.


probs:  [0.3979083001613617, 0.6020916104316711]
pred:   Positive


In [45]:
# 2 Ok now let's understand another strategy Mean  Pooling
# almost same story you use all tokens instead of single CLS
head_mean = nn.Linear(model.config.hidden_size, 2)  # 768 -> 2 classes

nn.init.normal_(head_mean.weight,   mean=0.0, std=0.02); nn.init.zeros_(head_mean.bias)

hs = hidden_state                # [1, T, 768]
mask = encoded_input["attention_mask"].unsqueeze(-1).float()  # [1, T, 1]
mean = (hs * mask).sum(1) / mask.sum(1).clamp(min=1e-9)  # [1, 768]

with torch.no_grad():
    logits_m = head_mean(mean)        # [1, 2]
    probs_m  = F.softmax(logits_m, -1)
    pred_m   = probs_m.argmax(-1).item()

print("\n[Mean-pooled tokens]")
print("mean shape:", tuple(mean.shape))
print("probs:", probs_m.squeeze(0).tolist())
print("pred :", id2label[pred_m])


# again want to emphasize totally random results
# since not Fine-Tuned


[Mean-pooled tokens]
mean shape: (1, 768)
probs: [0.4478999674320221, 0.5521000623703003]
pred : Positive
