In [5]:
! pip install transformers torch


Collecting torch
  Downloading torch-2.6.0-cp311-cp311-win_amd64.whl (204.2 MB)
     -------------------------------------- 204.2/204.2 MB 4.7 MB/s eta 0:00:00
Collecting networkx
  Downloading networkx-3.4.2-py3-none-any.whl (1.7 MB)
     ---------------------------------------- 1.7/1.7 MB 7.8 MB/s eta 0:00:00
Collecting jinja2
  Downloading jinja2-3.1.5-py3-none-any.whl (134 kB)
     -------------------------------------- 134.6/134.6 kB 8.3 MB/s eta 0:00:00
Collecting sympy==1.13.1
  Downloading sympy-1.13.1-py3-none-any.whl (6.2 MB)
     ---------------------------------------- 6.2/6.2 MB 8.1 MB/s eta 0:00:00
Collecting mpmath<1.4,>=1.1.0
  Using cached mpmath-1.3.0-py3-none-any.whl (536 kB)
Installing collected packages: mpmath, sympy, networkx, jinja2, torch
Successfully installed jinja2-3.1.5 mpmath-1.3.0 networkx-3.4.2 sympy-1.13.1 torch-2.6.0



[notice] A new release of pip available: 22.3.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


## BERT

In [58]:
import torch 
from transformers import BertTokenizer, BertForMaskedLM

tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-cased')
model = BertForMaskedLM.from_pretrained('bert-base-multilingual-cased')

# Set the device to CPU
device = torch.device("cpu")

# Define the input text and tokenize it
input_text = "We the people"

for i in range(10):
    input_text += " [MASK]"
    tokenized_text = tokenizer.tokenize(input_text)

    # Find the index of the masked token
    masked_index = tokenized_text.index('[MASK]')

    # Convert the tokenized text to a tensor of token ids
    indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
    tokens_tensor = torch.tensor([indexed_tokens])

    # Move the tokens tensor to the CPU
    tokens_tensor = tokens_tensor.to(device)

    # Generate predictions for the masked token using the model
    with torch.no_grad():
        outputs = model(tokens_tensor)
        predictions = outputs[0][0, masked_index].topk(5)

    # Convert the predicted token ids to tokens
    predicted_token_ids = predictions.indices.tolist()
    predicted_tokens = tokenizer.convert_ids_to_tokens(predicted_token_ids)

    # Print the predicted tokens
    next_token = predicted_tokens[0]
    input_text = input_text.replace("[MASK]", next_token)
print(input_text)

Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForMaskedLM: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


We the people who are not married or divorced , and who are


## GPT

In [57]:
import torch
from transformers import AutoTokenizer, OpenAIGPTLMHeadModel

tokenizer = AutoTokenizer.from_pretrained("openai-community/openai-gpt")
model = OpenAIGPTLMHeadModel.from_pretrained("openai-community/openai-gpt")

input_text = "Your dog looks beautiful, which breed "
print(input_text, end=" ")
for _ in range(10):
    inputs = tokenizer(input_text, return_tensors="pt")
    outputs = model(**inputs, labels=inputs["input_ids"])
    logits = outputs.logits
    predicted_token_id = torch.argmax(logits[0, -1, :]).item()
    predicted_word = tokenizer.decode(predicted_token_id)
    print(predicted_word, end=" ")
    input_text += " " + predicted_word 

Your dog looks beautiful, which breed  is it ? " 
 
 
 
 
 
 

## GPT 2 Model

In [54]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

device = "cpu"

model = AutoModelForCausalLM.from_pretrained("gpt2")
tokenizer = AutoTokenizer.from_pretrained("gpt2")

prompt = "Write a poem:\n The flower was beautiful,\n"

model_inputs = tokenizer([prompt], return_tensors="pt").to(device)
model.to(device)

generated_ids = model.generate(**model_inputs, max_new_tokens=100, do_sample=True)
tokenizer.batch_decode(generated_ids)[0]

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


'Write a poem:\n The flower was beautiful,\n\nthe roses were precious, the butterflies were glorious\n\nAnd the stars were lovely.\n\nCultural influence: The poem is in a collection called "Love. The Life" or "Love. The Art of Poetry" and includes an online version (which you can take to your local Post Office)!\n\nWhat\'s your favorite poem to read that you\'d like to share with people in the world?\n\nI am interested in sharing this amazing poem in the post of the week'

In [55]:
print(tokenizer.batch_decode(generated_ids)[0])

Write a poem:
 The flower was beautiful,

the roses were precious, the butterflies were glorious

And the stars were lovely.

Cultural influence: The poem is in a collection called "Love. The Life" or "Love. The Art of Poetry" and includes an online version (which you can take to your local Post Office)!

What's your favorite poem to read that you'd like to share with people in the world?

I am interested in sharing this amazing poem in the post of the week
