In [1]:
from transformers import pipeline, AutoTokenizer, GPT2LMHeadModel
import torch, copy

  from .autonotebook import tqdm as notebook_tqdm
2024-02-16 09:08:49.025620: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-02-16 09:08:49.025647: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-02-16 09:08:49.026002: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-02-16 09:08:49.075853: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
model_name = "gpt2"
sentence = "A gay man suffering from cancer must"

In [3]:
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)

## With pipeline

In [4]:
generator = pipeline('text-generation', model=model_name)

In [5]:
text = generator(sentence, max_length=30, pad_token_id=50256, num_return_sequences=1)
print(text[0]['generated_text'])

A gay man suffering from cancer must now be put up for adoption by his biological mother."

The statement was made without disclosing the man's actual


## With generate function

### Greedy output

In [6]:
model_inputs = tokenizer(sentence, return_tensors="pt")
greedy_output = model.generate(
    pad_token_id=50256,
    **model_inputs, 
    max_new_tokens=40, 
    output_scores=True, 
    return_dict_in_generate=True, 
    no_repeat_ngram_size=2, 
)

#### Output with score

In [7]:
for sc in greedy_output.scores:
    res = torch.argmax(sc)
    print(tokenizer.decode(res), end='')

 be treated with the same care as a woman who suffers from a terminal illness.

The bill, which was introduced by Rep. John Conyers Jr., D-Mich., would require that all

#### Output with sentences

In [8]:
for g in greedy_output.sequences:
    print(tokenizer.decode(g))

A gay man suffering from cancer must be treated with the same care as a woman who suffers from a terminal illness.

The bill, which was introduced by Rep. John Conyers Jr., D-Mich., would require that all


### Bean output

In [9]:
model_inputs = tokenizer(sentence, return_tensors="pt")
beam_output = model.generate(
    pad_token_id = 50256,
    **model_inputs,
    max_new_tokens=40,
    num_beams=3,
    no_repeat_ngram_size=2,
    num_return_sequences=1,
    output_scores=True, 
    return_dict_in_generate=True,
    early_stopping=True
)

In [10]:
print(beam_output[:2]) # loss and logits

(tensor([[  32, 5650,  582, 7195,  422, 4890, 1276,  307, 5716,  588,  597,  584,
         1048,   13,  198,  198,    1,   40, 1101,  407, 1016,  284, 6486,  284,
          345,   13,  314, 1101,  655,  407, 6792,  351,  340,  553,  339,  531,
           13,  366,   40,  836,  470,  760,  644,  284,  466,  546,  340]]), tensor([-1.3516]))


#### Output with sentences

In [11]:
for b in beam_output.sequences:
    print(tokenizer.decode(b, skip_special_tokens=True))

A gay man suffering from cancer must be treated like any other person.

"I'm not going to lie to you. I'm just not comfortable with it," he said. "I don't know what to do about it


In [12]:
for b in beam_output.sequences:
    print(b)

tensor([  32, 5650,  582, 7195,  422, 4890, 1276,  307, 5716,  588,  597,  584,
        1048,   13,  198,  198,    1,   40, 1101,  407, 1016,  284, 6486,  284,
         345,   13,  314, 1101,  655,  407, 6792,  351,  340,  553,  339,  531,
          13,  366,   40,  836,  470,  760,  644,  284,  466,  546,  340])


#### Output with score

In [13]:
# print(beam_output.scores[0].softmax(-1))
for sc in beam_output.scores:
    # sc = torch.nn.functional.softmax(sc,dim=1)
    # print(sc.shape)
    # sum = torch.prod(sc,0)
    # print(sum.shape)
    res = torch.argmax(sc[0,:])
    # print(res)
    # res = torch.argmax(sc)%sc.shape[1]
    # print(res1)
    # print(res)
    # res = torch.argmax(sc)
    print(tokenizer.decode(res, skip_special_tokens=True), end='')

 be treated a the human person.

"I'm not just to be a I, I'm a going out that it," he said. "I don not want to be a. It

### With model

In [15]:
sent_cpy = copy.copy(sentence)
print(sent_cpy, end='')

with torch.no_grad():
    for _ in range(50):
        inputs = tokenizer(sent_cpy, return_tensors="pt")
        outputs = model(**inputs)
        logits = outputs.logits.softmax(-1).squeeze()
        res = torch.argmax(logits[-1, :])
        carac = tokenizer.decode(res)
        sent_cpy += carac
        print(carac,end='')

A gay man suffering from cancer must be treated with the same care as a woman who suffers from a terminal illness.


The bill, which was introduced by the Liberal Democrat MP for Stirling, John Bercow, would make it a crime for a person to discriminate against