#### Tokenization using Hugging Face's Transformers

In [None]:
# Import the AutoTokenizer class from Hugging Face's transformers library
from transformers import AutoTokenizer

# Load the pretrained GPT-2 tokenizer
# This will automatically download the tokenizer configuration for GPT-2 if not already available
tokenizer = AutoTokenizer.from_pretrained("gpt-2")

# Tokenize the input text
# This process splits the text into tokens that GPT-2 can understand (subwords or words)
tokens = tokenizer.tokenize("A young girl named Alice sits bored by a riverbank...")

# Print the list of tokens generated by the tokenizer
print(tokens)

#### Embedding and Processing with a Transformer Model

In [None]:
# Import the AutoModel class to load a pretrained model from Hugging Face
from transformers import AutoModel

# Load the pretrained GPT-2 model
# This model will generate embeddings for the input text
model = AutoModel.from_pretrained("gpt-2")

# Tokenize the input text and convert it to tensor format for model input
# The 'return_tensors="pt"' argument specifies that the output should be in PyTorch tensor format
inputs = tokenizer("A young girl named Alice sits bored by a riverbank...", return_tensors="pt")

# Pass the tokenized inputs through the model
# The '**inputs' syntax unpacks the dictionary, allowing each tensor (e.g., input IDs) to be passed as a named argument
outputs = model(**inputs)

# Extract the last hidden states from the model outputs
# 'last_hidden_state' contains embeddings for each token in the input text
last_hidden_states = outputs.last_hidden_state

# Print the shape of the last hidden states tensor to understand the output structure
print(last_hidden_states.shape)

#### Visualization of Embeddings (Simplified Example)

In [None]:
# Import matplotlib for visualization
import matplotlib.pyplot as plt

# Visualize the embeddings using a heatmap
# 'detach().numpy()' detaches the tensor from the computation graph and converts it to a NumPy array
# '[0]' selects the embeddings for the first sequence (useful if batching multiple sequences)
plt.imshow(last_hidden_states.detach().numpy()[0], cmap='viridis')

# Add a color bar to indicate the scale of values in the heatmap
plt.colorbar()

# Display the heatmap
plt.show()