In [1]:
import torch
from transformers import BertTokenizer, BertModel

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load pre-trained BERT tokenizer
# using pretrained model 
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Load pre-trained BERT model
model = BertModel.from_pretrained('bert-base-uncased').to(device)

# Example input sentence
sentence = "This is an example sentence."

# Tokenize input sentence
encoded_inputs = tokenizer.encode_plus(
    sentence,
    add_special_tokens=True,
    max_length=512,
    padding='max_length',
    truncation=True,
    return_attention_mask=True,
    return_tensors='pt'
)
input_ids = encoded_inputs['input_ids'].to(device)
attention_mask = encoded_inputs['attention_mask'].to(device)

# Forward pass through BERT model
with torch.no_grad():
    outputs = model(input_ids=input_ids, attention_mask=attention_mask)

# Get the sentence embeddings
sentence_embedding = outputs.last_hidden_state[:, 0, :]

# Perform downstream task with the sentence embedding
# For example, you can use it for classification or other tasks

# Print the sentence embedding
print(sentence_embedding)

# Save the sentence embedding or use it for further processing


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


tensor([[-3.7736e-01, -3.3498e-01, -3.2064e-01, -4.3991e-01, -5.0140e-01,
         -2.1692e-01,  4.7474e-01,  5.7114e-01, -8.4856e-02, -1.0667e-01,
         -3.7948e-01, -4.8466e-03, -9.6260e-02,  1.4302e-02,  6.4120e-01,
          4.2318e-02,  2.1465e-01,  5.6647e-01,  2.4406e-01, -4.9750e-01,
          2.4432e-01, -3.1471e-01, -1.8387e-01, -4.3433e-01,  3.7000e-01,
         -4.8847e-01, -1.6026e-01, -5.3024e-01, -2.5536e-01,  2.5280e-02,
          1.1406e-01,  3.2830e-01, -3.3749e-01, -1.7455e-01,  5.4211e-01,
         -2.4419e-01,  2.1843e-01, -5.2654e-02,  5.4251e-01,  2.2136e-01,
         -2.6010e-01, -6.9896e-02,  4.5415e-01,  1.4498e-01,  7.1128e-02,
         -1.9316e-01, -2.8830e+00, -4.7826e-01, -4.7273e-01, -2.8777e-01,
         -3.7046e-01, -8.1552e-02,  7.4794e-01,  4.6832e-01, -2.2192e-01,
          3.8314e-01, -3.8344e-01,  3.7906e-01,  2.1074e-01,  1.0200e-02,
          3.5558e-01,  4.1932e-02, -1.1501e-02, -9.0390e-02, -2.5045e-02,
          6.6402e-01, -3.8978e-02,  5.