In [5]:
from transformers import BertTokenizer, BertModel
import torch

# Load pre-trained model tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Encode the word 'context'
input_ids = tokenizer("context", return_tensors='pt')['input_ids']

# Load pre-trained model
model = BertModel.from_pretrained('bert-base-uncased')

# Get the embeddings
with torch.no_grad():
    outputs = model(input_ids)
    last_hidden_states = outputs.last_hidden_state

# Extract the embedding for the word 'context'
word_embedding = last_hidden_states[0][1]  # [batch_index, token_index]

print(word_embedding)


model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

tensor([-3.3891e-01, -3.2889e-01, -4.5454e-01, -4.2762e-01, -1.5153e-01,
         4.1486e-01,  2.6706e-02,  2.9010e-01, -8.1905e-01, -8.6328e-01,
        -2.5876e-02,  1.2273e-01,  4.2575e-01,  1.6918e-01,  1.3557e-01,
         4.6029e-01,  1.7197e-02, -1.5975e-01, -2.7451e-01,  2.9154e-01,
         1.7347e-01,  1.8432e-01, -4.5334e-01, -1.5739e-01,  7.4115e-02,
         5.4036e-01, -2.6094e-01,  1.5478e-01, -7.7359e-01,  3.2843e-01,
        -8.3502e-02,  2.3226e-01, -4.8126e-01,  1.9134e-01,  2.0823e-01,
        -2.4275e-01,  1.2739e-01,  9.9948e-03,  2.3368e-02,  3.4186e-01,
        -3.7541e-01, -6.6113e-02, -6.0516e-02, -5.4083e-01,  5.4566e-01,
        -2.2761e-01,  5.7892e-01, -3.4417e-01, -5.8395e-01, -1.6689e-01,
        -7.6361e-01,  9.0692e-02,  6.7941e-02, -3.1401e-02, -3.3577e-01,
         5.6978e-01,  3.1754e-01, -6.7270e-02,  1.1704e-01, -2.7381e-01,
         3.3146e-01,  3.8456e-02, -6.8137e-02, -3.1560e-01,  3.0929e-01,
        -1.2999e-02,  2.3168e-01, -1.5565e-01, -2.9

In [6]:
print(len(word_embedding))

768


In [8]:
from transformers import GPT2Tokenizer, GPT2Model
import torch

# Load pre-trained model tokenizer
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')

# Encode the word
word = "context"
input_ids = tokenizer(word, return_tensors='pt')['input_ids']

# Load pre-trained model
model = GPT2Model.from_pretrained('gpt2')

# Get the embeddings
with torch.no_grad():
    outputs = model(input_ids)
    last_hidden_states = outputs.last_hidden_state

# Extract the embedding for the word
# GPT-2 does not have token type embeddings, just the last hidden state
# Access the first (and only) token in the sequence
word_embedding = last_hidden_states[0][0].numpy()  # [batch_index, token_index]

print(f"Embedding for '{word}':\n{word_embedding}")

Embedding for 'context':
[ 9.14293230e-02  9.41814482e-02 -5.06908655e-01  5.03740646e-02
  2.71978974e-02 -7.15360045e-02  1.02914870e+00 -4.98978607e-02
 -3.24488223e-01  5.15847020e-02  2.53762007e-01 -1.08520783e-01
 -1.30084427e-02 -6.70717657e-02 -3.05121303e-01 -1.30710796e-01
 -2.62558311e-01 -1.53509349e-01  5.71540184e-02 -2.68956304e-01
  1.91658080e-01 -1.91676602e-01 -2.90186584e-01 -1.18460253e-01
 -2.36840755e-01  1.78309947e-01 -3.94053340e-01 -1.76962048e-01
 -2.01020733e-01 -1.44557476e-01 -8.86080693e-03  1.20525084e-01
 -7.00376034e-02 -3.65534663e-01 -8.30584988e-02  1.07728101e-01
  1.82150612e+01 -1.00260109e-01 -6.78741038e-02  1.02073010e-02
  2.83043589e-02 -4.54234146e-02 -7.56417960e-02 -3.67230207e-01
 -1.12868704e-01  1.30978525e-01 -2.24662237e-02 -7.79924467e-02
 -3.91245484e-01 -9.53921899e-02 -1.39277531e-02  1.36690155e-01
 -1.15098871e-01  1.98133476e-03  3.29381004e-02  8.57785568e-02
  4.25170138e-02 -1.17429890e-01 -6.26761168e-02 -1.70906633e-01


In [9]:
print(len(word_embedding))

768
