In [1]:
from transformers import AutoModel, AutoTokenizer

## Ways to initiate a Model
### Using AutoModel and AutoTokenizer  # when flexibility is needed the script can load a model based on say an argument to the script
### Use specific model such as BertModel and BertTokenizer # Specific Model Architecture: You know exactly which model architecture you need (e.g., BERT) and you want to work with its base version.
### Customization: You plan to customize the model architecture or use it in a way not directly covered by the predefined classes (like BertForSequenceClassification).

In [None]:
# Using AutoModel and AutoTokenizer

checkpoint = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModel.from_pretrained(checkpoint)

In [8]:
# little bit playing with tokenizer

#print(tokenizer.get_vocab())
stmt = "Hi there! How are you?"
#print(tokenizer.tokenize(stmt))
#print(tokenizer.convert_tokens_to_ids(tokenizer.tokenize(stmt)))

inputs = tokenizer(stmt, return_tensors="pt")


[7632, 2045, 999, 2129, 2024, 2017, 1029]


In [9]:
outputs = model(**inputs)

In [None]:
print(outputs)
#print(help(outputs))
#print(outputs.logits)   #base model does not have this attribute for output instead use last_hidden_state
logits = outputs.last_hidden_state
print(logits) # logits # Since this is not a classification problem there is not further classes to be derived

### Second: With a specific model name and tokenizer

In [None]:
from transformers import BertModel, BertTokenizer

model_name = "bert-base-uncased"
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertModel.from_pretrained(model_name)

inputs = tokenizer("Hello, world!", return_tensors="pt")
outputs = model(**inputs)
print(outputs)

## Outputs Representation:<br>
   last_hidden_state: The hidden states at the last layer of the model for each input token.<br>
   pooler_output (if available): A summary of the hidden states for the classification token ([CLS]).<br>
Using the Outputs:<br>
   Feature Extraction: Use the hidden states as features for downstream tasks (e.g., clustering, classification).<br>
   Pooling: Aggregate the hidden states for tasks like sentence classification (mean pooling, max pooling, or using the [CLS] token).<br>

## Example: Mean pooling to get sentence representation
sentence_embedding = torch.mean(last_hidden_state, dim=1)

print(sentence_embedding)   

In [17]:
# Initialize model and tokenizer
from transformers import BertForSequenceClassification, BertTokenizer
import torch

model_name = "bert-base-uncased"
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertForSequenceClassification.from_pretrained(model_name, num_labels=2)

# Prepare input
inputs = tokenizer("Hello, world!", return_tensors="pt")

# Forward pass
outputs = model(**inputs)
logits = outputs.logits

# Get class names (assume binary classification for simplicity)
class_names = ["negative", "positive"]
predicted_class = class_names[torch.argmax(logits, dim=1).item()]

print(logits)
print(predicted_class)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


tensor([[ 0.1917, -0.3931]], grad_fn=<AddmmBackward0>)
negative
