In [1]:
from transformers import pipeline, AutoTokenizer, GPT2LMHeadModel
import torch, copy

  from .autonotebook import tqdm as notebook_tqdm
2024-02-13 09:31:41.468296: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-02-13 09:31:41.468321: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-02-13 09:31:41.468677: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-02-13 09:31:41.517177: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [77]:
model_name = "gpt2"
sentence = "A gay man suffering from cancer must"

In [78]:
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)

## With pipeline

In [79]:
generator = pipeline('text-generation', model=model_name)

In [80]:
text = generator(sentence, max_length=30, pad_token_id=50256, num_return_sequences=1)
print(text[0]['generated_text'])

A gay man suffering from cancer must have been able to survive because if he had lived to make a full recovery, he would not have been a patient


## With generate function

### Greedy output

In [81]:
model_inputs = tokenizer(sentence, return_tensors="pt")
greedy_output = model.generate(
    pad_token_id=50256,
    **model_inputs, 
    max_new_tokens=40, 
    output_scores=True, 
    return_dict_in_generate=True, 
    no_repeat_ngram_size=2, 
)

#### Output with score

In [82]:
for sc in greedy_output.scores:
    res = torch.argmax(sc)
    print(tokenizer.decode(res), end='')

 be treated with the same care as a woman who suffers from a terminal illness.

The bill, which was introduced by Rep. John Conyers Jr., D-Mich., would require that all

#### Output with sentences

In [83]:
for g in greedy_output.sequences:
    print(tokenizer.decode(g))

A gay man suffering from cancer must be treated with the same care as a woman who suffers from a terminal illness.

The bill, which was introduced by Rep. John Conyers Jr., D-Mich., would require that all


### Bean output

In [75]:
model_inputs = tokenizer(sentence, return_tensors="pt")
beam_output = model.generate(
    pad_token_id = 50256,
    **model_inputs,
    max_new_tokens=40,
    num_beams=3,
    no_repeat_ngram_size=2,
    num_return_sequences=1,
    output_scores=True, 
    return_dict_in_generate=True,
    early_stopping=True
)

In [88]:
print(beam_output[:2]) # loss and logits

(tensor([[   32,   582,  7195,   422,  4890,  1276, 17777,  8185,   284,  4781,
           257, 22359,   422,   465,  3632,    13,   198,   198,   464,   582,
            11,   508,   373,  4642,   351,   257,  4071,  1296,   286,  3632,
          4890,    11,   373, 14641,   351,   262,  4369,   287,  2805,    13,
           679,   373,  1813,   257,  1218,  2863]]), tensor([-1.4236]))


#### Output with sentences

In [76]:
for b in beam_output.sequences:
    print(tokenizer.decode(b, skip_special_tokens=True))

A man suffering from cancer must undergo surgery to remove a tumor from his brain.

The man, who was born with a rare form of brain cancer, was diagnosed with the disease in March. He was given a second chance


In [11]:
for b in beam_output.sequences:
    print(b)

tensor([42590, 24572,   373,   262,   717,   284, 18077,   326,   262,  6881,
          318,   257,  4947,   286, 13166,    13,   198,   198,   818,   262,
         1903,  1160,   400,  4289,    11, 24572,   338,  4583,   286,  2276,
        44449,    11,   543,   373,   717,  5150,   416,  9966, 24572,    11,
          373,  5625,   284])


#### Output with score

In [53]:
# print(beam_output.scores[0].softmax(-1))
for sc in beam_output.scores:
    # sc = torch.nn.functional.softmax(sc,dim=1)
    # print(sc.shape)
    # sum = torch.prod(sc,0)
    # print(sum.shape)
    res = torch.argmax(sc[0,:])
    # print(res)
    # res = torch.argmax(sc)%sc.shape[1]
    # print(res1)
    # print(res)
    # res = torch.argmax(sc)
    print(tokenizer.decode(res, skip_special_tokens=True), end='')

 a great 18 propose write the universe the a collection single particles.
 thatThe idea early hass,, physicists began to think relativity was was developed he developed proposed the 1859, was applied widely

### With model

In [89]:
sent_cpy = copy.copy(sentence)
print(sent_cpy, end='')

with torch.no_grad():
    for _ in range(50):
        inputs = tokenizer(sent_cpy, return_tensors="pt")
        outputs = model(**inputs)
        print(outputs.loss)
        logits = outputs.logits.softmax(-1).squeeze()
        res = torch.argmax(logits[-1, :])
        carac = tokenizer.decode(res)
        sent_cpy += carac
        print(carac,end='')

A gay man suffering from cancer mustNone
 beNone
 treatedNone
 withNone
 theNone
 sameNone
 careNone
 asNone
 aNone
 womanNone
 whoNone
 suffersNone
 fromNone
 aNone
 terminalNone
 illnessNone
.None

None

None

None
TheNone
 billNone
,None
 whichNone
 wasNone
 introducedNone
 byNone
 theNone
 LiberalNone
 DemocratNone
 MPNone
 forNone
 StNone
irlingNone
,None
 JohnNone
 BNone
ercNone
owNone
,None
 wouldNone
 makeNone
 itNone
 aNone
 crimeNone
 forNone
 aNone
 personNone
 toNone
 discriminateNone
 against