In [5]:
# !pip install torch transformers einops accelerate xformers
# !pip install --upgrade torch

In [1]:
from transformers import BertTokenizer, BertForSequenceClassification
import torch
import torch.nn.functional as F

  from pandas.core.computation.check import NUMEXPR_INSTALLED


## Understanding logits

In [None]:
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
model = BertForSequenceClassification.from_pretrained("bert-base-uncased")

inputs = tokenizer("", return_tensors="pt")
labels = torch.tensor([1]).unsqueeze(0)  # Batch size 1
outputs = model(**inputs, labels=labels)
logits = outputs.logits
print(logits)

# convert these logits to probabilities
probs = F.softmax(logits, dim=1)
print(probs)

In [3]:
from transformers import AutoTokenizer
import transformers
import torch

model = "tiiuae/falcon-7b-instruct"

tokenizer = AutoTokenizer.from_pretrained(model)
pipeline = transformers.pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    torch_dtype=torch.bfloat16,
    trust_remote_code=True,
    device_map="auto",
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

The model 'RWForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'CodeGenForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'LlamaForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCausalLM', 'MegatronBertForCausalLM', 'MvpForCausalLM', 'OpenLlamaForCausalLM', 'OpenAIGPTLMHeadModel', 'OPTForCausalLM', 'PegasusForCausalLM', 'PLBartForCausalLM', 'ProphetNetForCausalLM', 'QDQBertLMHeadModel', 'ReformerModelWithLMHead', 'RemBertForCausalLM', 'RobertaForCausalLM', 'RobertaPreLayerNormForC

In [4]:
sequences = pipeline(
   "Write a poem about Valencia.",
    max_length=200,
    do_sample=True,
    top_k=10,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
)
for seq in sequences:
    print(f"Result: {seq['generated_text']}")

Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


Result: Write a poem about Valencia.
Valencia, city of sun and sky,
The city that's made of pure white,
Where fountains of crystal clear light,
Are mirrored in the night.

Valencia, city of the arts,
The city where you find the heart,
Where Picasso once his brush did paint,
A masterpiece, on its walls.

Valencia, city of the sea,
The city that has its own beat,
Where sailboats dance in perfect time,
As the sun sets without a sound.

Valencia, city of a thousand faces,
Each one a story to tell,
Where the past intertwines with the present,
And where beauty is forever in every detail.


In [34]:
# Load model directly
from transformers import AutoModelForCausalLM

model = AutoModelForCausalLM.from_pretrained("tiiuae/falcon-7b", torch_dtype=torch.bfloat16, 
                                             trust_remote_code=True, device_map="auto",)
tokenizer = AutoTokenizer.from_pretrained("tiiuae/falcon-7b")

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [35]:
inputs = tokenizer("Hello, my dog is cute", return_tensors="pt").to(device)
print(inputs)
labels = torch.tensor([1]).unsqueeze(0).to(device)  # Batch size 1
outputs = model(**inputs, labels=labels)

{'input_ids': tensor([[9856,   23,  491, 3696,  304, 7209]], device='cuda:0'), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0]], device='cuda:0'), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1]], device='cuda:0')}


ValueError: Got unexpected arguments: {'token_type_ids': tensor([[0, 0, 0, 0, 0, 0]])}

In [27]:
prompt = tokenizer(" ", return_tensors='pt')
device = 'cuda:0'
model.to(device)


outputs = model.generate(
    input_ids=prompt['input_ids'].to(device),
    attention_mask=prompt['attention_mask'].to(device),
    max_new_tokens=125,
    do_sample=True,
    output_scores=True
)

Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


In [30]:
?model.generate

[0;31mSignature:[0m
[0mmodel[0m[0;34m.[0m[0mgenerate[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0minputs[0m[0;34m:[0m [0mUnion[0m[0;34m[[0m[0mtorch[0m[0;34m.[0m[0mTensor[0m[0;34m,[0m [0mNoneType[0m[0;34m][0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mgeneration_config[0m[0;34m:[0m [0mUnion[0m[0;34m[[0m[0mtransformers[0m[0;34m.[0m[0mgeneration[0m[0;34m.[0m[0mconfiguration_utils[0m[0;34m.[0m[0mGenerationConfig[0m[0;34m,[0m [0mNoneType[0m[0;34m][0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mlogits_processor[0m[0;34m:[0m [0mUnion[0m[0;34m[[0m[0mtransformers[0m[0;34m.[0m[0mgeneration[0m[0;34m.[0m[0mlogits_process[0m[0;34m.[0m[0mLogitsProcessorList[0m[0;34m,[0m [0mNoneType[0m[0;34m][0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mstopping_criteria[0m[0;34m:[0m [0mUnion[0m[0;34m[[0m[0mtransformers[0m[0;34m.[0m[0mgenerati

In [28]:
print(outputs)

tensor([[  209,   187,   395,   253,   187, 18941,   187,   249,   253,   187,
          5045,   187,  1171,   187,   266,   187, 34502,   187, 28936,   187,
           395,     3,   187,    66,   187, 29777, 39901,   187, 28936,   187,
           266,   187, 34502,   187, 28936,   187,  3529,   187,  5658,   187,
          5092,    13,   187, 12550,   187, 29777,   545, 20283,    13,   187,
           262,   187,  5092,   187,  3088,   187,   635,   187, 25914,   187,
          3062,   187,  2858,    13,   187,   262,   187, 22732,   187,  6309,
           347,   187, 17124,   187,   395,   187, 29266,   187,   249,  2426,
           187,  1171,   187,    85, 25004,   187, 12550,   187, 29777, 39901,
           187,   395,   187,   262,   187, 22732,   187,  3062,   187,  2920,
           187,   395,   187,    84, 46711,   187, 49831,   187, 12550,   187,
         29777,  6198,   187,   395,   187,   262,   187,  9846,   187, 11145,
           187,  3529,   187, 25914,   187, 29266]],

In [24]:
tokenizer.decode(outputs[0])

' \n}\n\nI want to create a list and list of the results of search. A list of the results are the values that I would like to have in the search result list. As with a string list, I am getting an error. I want to get a response, instead of a list of the results.\n//A list of the results in the textfield\n    List<EmployeeType> results;\n    //Using the "results" parameter as a key for the result list to be returned\n    IDictionary<Key<EmployeeType>, TResultList> resultsListKey = \n                                '

## Classifier-free guidance

In [12]:
from transformers import (GPT2Tokenizer, AutoModelForCausalLM,
                          GPTNeoXForCausalLM, AutoTokenizer)
import numpy as np
import torch
from transformers import (LogitsProcessor, LogitsProcessorList,
                          MinLengthLogitsProcessor, TemperatureLogitsWarper,
                          TopKLogitsWarper, TopPLogitsWarper,
                          TypicalLogitsWarper)
from transformers.generation import LogitNormalization
import torch.nn.functional as F

class CFGLogits(LogitsProcessor):
    r"""Logits processor for Classifier-Free Guidance (CFG). The processors
    computes a weighted average across scores from prompt conditional and prompt unconditional (or negative) logits,
    parameterized by the `guidance_scale`. The unconditional scores are computed internally by prompting `model` with
    the `uncond` branch. Finally, according to CFG Rescale, the reweighted logits are interpolated back with weight
    `rescale_factor` the conditional ones to smooth the effect and increase output quality.

    See [the paper](https://arxiv.org/abs/2306.17806) for more information.

    Args:
        guidance_scale (float):
            The guidance scale for classifier free guidance (CFG). CFG is enabled by setting `guidance_scale > 1`.
            Higher guidance scale encourages the model to generate samples that are more closely linked to the input
            prompt, usually at the expense of poorer quality.
        uncond (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
            Indices of input sequence tokens in the vocabulary for the unconditional branch.
        model:
            The LM computing the unconditional scores. Supposedly the same as the one computing the conditional scores.
            Both models must use the same tokenizer.
        smooth_factor (float):
            The interpolation weight for CFG Rescale. 1 means no rescaling, 0 reduces to the conditional scores without
            CFG. Turn it lower if the output degenerates. Lower values allow for higher guidance scale.
    """

    def __init__(self, guidance_scale, uncond, model, rescale_factor=1.0):
        self.guidance_scale = guidance_scale
        self.uncond = uncond
        self.model = model
        self.out = None
        self.rescale_factor = rescale_factor

    def __call__(self, input_ids, scores):
        scores = F.log_softmax(scores, dim=-1)
        if self.guidance_scale == 1:
            return scores

        if self.out is None:
            self.out = self.model(self.uncond, use_cache=True)
        else:
            self.out = self.model(
                input_ids[:, -1:],
                use_cache=True,
                past_key_values=self.out.past_key_values,
            )
        unconditional_logits = F.log_softmax(self.out.logits[0][-1:], dim=-1)
        out = self.guidance_scale * (scores - unconditional_logits) + unconditional_logits
        out = F.log_softmax(out, dim=-1)
        if self.rescale_factor == 1:
            return out
        return self.rescale_factor * out + (1 - self.rescale_factor) * scores

In [13]:
# paper usage: (copying and editing @grantCelley 's answer)
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import LogitsProcessorList, TemperatureLogitsWarper, TopPLogitsWarper

tokenizer = AutoTokenizer.from_pretrained("EleutherAI/pythia-160m")
model = AutoModelForCausalLM.from_pretrained("EleutherAI/pythia-160m")

Downloading (…)okenizer_config.json:   0%|          | 0.00/396 [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/99.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/569 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/375M [00:00<?, ?B/s]

In [14]:
prompt = tokenizer("Today a dragon flew over Paris, France,", return_tensors='pt')
# either provide a negative prompt:
neg_prompt = tokenizer("A sad event happened,", return_tensors='pt')['input_ids']
# or don't:
# neg_prompt = prompt['input_ids'][:, -1:]

device='cuda:0'
model.to(device)
outputs = model.generate(
    input_ids=prompt['input_ids'].to(device),
    attention_mask=prompt['attention_mask'].to(device),
    max_new_tokens=125,
    logits_processor=LogitsProcessorList([
        # inputs_cfg usually is the last token of the prompt but there are
        # possibilities of negative prompting that are explored in the paper
        CFGLogits(1.5, neg_prompt.to(device), model),
        TemperatureLogitsWarper(0.8),
        TopPLogitsWarper(0.95),
    ]),
    do_sample=True,
)

print(tokenizer.decode(outputs[0]))

Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


Today a dragon flew over Paris, France, and entered Paris, France.

The dragon was the second most successful dragon of the 19th century and was one of the best known in France. It was a dragon of the early 19th century, but it took off in the early 20th century. The Dragon was also a great success on the world stage, becoming the world's biggest and most successful dragon ever, a big success in France and Germany, and one of the world's biggest dragon.

The dragon has been around for over 300 years, and in the last 100 years, it is a great dragon. It is also the most difficult dragon


In [None]:
def generate_examples(model, tokenizer, prompt, neg_prompts, num_examples, device='cuda:0'):
    model.to(device)
    generated_examples = []

    for _ in range(num_examples):
        # Generate a new example
        prompt_tensor = tokenizer(prompt, return_tensors='pt')
        neg_prompts_tensor = [tokenizer(neg_prompt, return_tensors='pt')['input_ids'] for neg_prompt in neg_prompts]
        
        # Ensure everything is on the right device
        prompt_tensor = {k: v.to(device) for k, v in prompt_tensor.items()}
        neg_prompts_tensor = [prompt.to(device) for prompt in neg_prompts_tensor]

        output = model.generate(
            input_ids=prompt_tensor['input_ids'],
            attention_mask=prompt_tensor['attention_mask'],
            max_new_tokens=125,
            logits_processor=LogitsProcessorList([
                CFGLogits(1.5, neg_prompt, model) for neg_prompt in neg_prompts_tensor,
                TemperatureLogitsWarper(0.8),
                TopPLogitsWarper(0.95),
            ]),
            do_sample=True,
        )

        generated_example = tokenizer.decode(output[0])
        generated_examples.append(generated_example)

        # Add generated example to the set of negative prompts
        neg_prompts.append(generated_example)

    return generated_examples