#RL training GPT-2

In [None]:
!pip install transformers datasets accelerate

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.wh

In [None]:
import torch
from torch import nn
from transformers import GPT2Tokenizer, GPT2LMHeadModel
import random


In [None]:
# prompt: Check if cuda is available to use

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Using device: {device}")


Using device: cpu


In [None]:
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = GPT2LMHeadModel.from_pretrained("gpt2")
model.to(device)
model.train()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D(nf=2304, nx=768)
          (c_proj): Conv1D(nf=768, nx=768)
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D(nf=3072, nx=768)
          (c_proj): Conv1D(nf=768, nx=3072)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50257, bias=False)
)

In [None]:
# Add padding token if missing
tokenizer.pad_token = tokenizer.eos_token
model.config.pad_token_id = tokenizer.pad_token_id

# Reward function: +1 if 'cat' is in the generated text, else 0
def reward_fn(text):
    return 1.0 if "cat" in text.lower() else 0.0


In [None]:
# prompt: Show the number of parameters of the model

print(f'The model has {sum(p.numel() for p in model.parameters())} parameters')

The model has 124439808 parameters


In [None]:
def sample_text(prompt="The animal is", max_len=20):
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    outputs = model.generate(
        **inputs,
        max_length=inputs['input_ids'].shape[1] + max_len,
        do_sample=True,
        top_k=50,
        top_p=0.95,
        temperature=1.0,
        pad_token_id=tokenizer.pad_token_id
    )
    return tokenizer.decode(outputs[0], skip_special_tokens=True)


In [None]:
optimizer = torch.optim.Adam(model.parameters(), lr=5e-5)
loss_fn = nn.CrossEntropyLoss()


In [None]:
def reinforce_train_step(prompt="The animal is"):
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    input_len = inputs['input_ids'].shape[1]

    outputs = model.generate(
        **inputs,
        max_length=input_len + 20,
        do_sample=True,
        return_dict_in_generate=True,
        output_scores=True,
        output_hidden_states=False
    )

    sequences = outputs.sequences  # shape: (1, seq_len)
    scores = outputs.scores        # list of (1, vocab_size) tensors

    generated_tokens = sequences[0, input_len:]  # only generated part
    if len(scores) != len(generated_tokens):
        print("⚠️ Mismatch between scores and generated tokens")
        return "error", 0.0

    log_probs = torch.stack(scores, dim=0).log_softmax(dim=-1).squeeze(1)  # shape: (gen_len, vocab)
    chosen_token_ids = generated_tokens
    vocab_size = log_probs.shape[-1]

    if (chosen_token_ids >= vocab_size).any():
        print("⚠️ Token ID out of bounds; clipping")
        chosen_token_ids = chosen_token_ids.clamp(max=vocab_size - 1)

    chosen_log_probs = log_probs[range(len(chosen_token_ids)), chosen_token_ids]

    # Reward
    generated_text = tokenizer.decode(sequences[0], skip_special_tokens=True)
    R = reward_fn(generated_text)

    # REINFORCE loss
    loss = -R * chosen_log_probs.mean()
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()

    return generated_text, R


In [None]:
import torch
from torch import nn
from transformers import GPT2Tokenizer, GPT2LMHeadModel
import random

# Ensure necessary imports are present if running this function standalone
# from IPython import get_ipython
# from IPython.display import display
# !pip install transformers datasets accelerate
# import torch
# from torch import nn
# from transformers import GPT2Tokenizer, GPT2LMHeadModel
# import random

# Assume 'device', 'tokenizer', 'model', 'reward_fn', and 'optimizer' are defined

def reinforce_train_step(prompt="The animal is"):
    # Initial prompt input
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    input_len = inputs['input_ids'].shape[1]

    # Generate sequence using the model
    outputs = model.generate(
        **inputs,
        max_length=input_len + 20,
        do_sample=True,
        return_dict_in_generate=True,
        output_scores=False,  # No longer need scores from generate
        output_hidden_states=False
    )

    generated_sequences = outputs.sequences  # shape: (1, seq_len)

    # Calculate log probabilities for the *generated* sequence
    # We need to run a forward pass on the model with the generated sequence
    # to get the logits with gradient tracking.
    full_outputs = model(generated_sequences, labels=generated_sequences) # Use labels for automatic loss calculation if needed, or calculate manually

    # Extract logits for the generated tokens part
    # The logits are for predicting the *next* token given the current sequence
    # So, logits[:, :-1, :] correspond to predicting tokens at indices 1 to end
    # and we are interested in the probabilities of the tokens that were actually generated.
    logits = full_outputs.logits[:, :-1, :]  # shape: (1, seq_len-1, vocab_size)
    target_tokens = generated_sequences[:, 1:] # shape: (1, seq_len-1) - these are the tokens predicted by the logits

    # Calculate log probabilities of the chosen tokens
    log_probs_all = torch.log_softmax(logits, dim=-1) # shape: (1, seq_len-1, vocab_size)

    # Select the log probabilities corresponding to the generated tokens
    # Flatten the tensors for easier indexing
    log_probs_flat = log_probs_all.view(-1, log_probs_all.size(-1)) # shape: (seq_len-1, vocab_size)
    target_tokens_flat = target_tokens.view(-1) # shape: (seq_len-1)

    # Filter to get only the log probabilities for the generated part (after the prompt)
    # Need to adjust indices based on the prompt length
    log_probs_generated_flat = log_probs_flat[input_len-1:] # shape: (gen_len, vocab_size)
    target_tokens_generated_flat = target_tokens_flat[input_len-1:] # shape: (gen_len)

    # Select the log probabilities for the actually chosen tokens in the generated part
    chosen_log_probs = log_probs_generated_flat[range(len(target_tokens_generated_flat)), target_tokens_generated_flat]

    # Reward
    generated_text = tokenizer.decode(generated_sequences[0], skip_special_tokens=True)
    R = reward_fn(generated_text)

    # REINFORCE loss
    # We want to maximize the expected reward, which is equivalent to minimizing -expected_reward.
    # The expected reward is R * sum(log_probs of chosen tokens).
    # However, since we are doing batch gradient descent and have one sample,
    # the gradient is proportional to R * gradient(sum(log_probs)).
    # A common simplification in REINFORCE is to use -R * sum(log_probs) as the loss.
    # Here, we are using the mean of log probs over the generated sequence length.
    loss = -R * chosen_log_probs.mean()

    # Backpropagation and optimizer step
    optimizer.zero_grad() # Zero gradients before backward pass
    loss.backward()
    optimizer.step()


    return generated_text, R

In [None]:
print("Before training")
for _ in range(5):
    print("→", sample_text())

print("\nTraining...")
for step in range(100):
    text, r = reinforce_train_step()
    if step % 10 == 0:
        print(f"[Step {step}] Reward: {r:.2f} | Text: {text}")

print("\nAfter training")
for _ in range(5):
    print("→", sample_text())


Before training
→ The animal is usually the small horn or slightly larger than it appears on the skin. The small animal can also be
→ The animal is not a real creature but is in fact a sentient animal, with its very shape and structure. There
→ The animal is under constant threat of being injured, shot or otherwise harmed, for which all the owners' legal counsel
→ The animal is a tiny house animal, and its only living creature is a human one, which is why this is


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


→ The animal is actually just a human. I mean when you look at this, it's almost exactly the same,

Training...


`loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


[Step 0] Reward: 0.00 | Text: The animal is at the level, he says, only this time when the other has been in an unexpected position.


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


[Step 10] Reward: 0.00 | Text: The animal is that you get stuck in. There is a lot in that. The reason is the animal does it


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


[Step 20] Reward: 0.00 | Text: The animal is the thing the thing"

"And "

"

A lot of animals,


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


[Step 30] Reward: 0.00 | Text: The animal is the animal (animal)

animal has the animal

the animal must be the animal has


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


[Step 40] Reward: 1.00 | Text: The animal is the fish; the cat. The cat is the cat.


the cat is the cat




We strongly recommend passing in an `attention_mask` since your input_ids may be padded. See https://huggingface.co/docs/transformers/troubleshooting#incorrect-output-when-padding-tokens-arent-masked.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


[Step 50] Reward: 1.00 | Text: The animal is the cat."


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


[Step 60] Reward: 1.00 | Text: The animal is the cat is the cat"


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


[Step 70] Reward: 1.00 | Text: The animal is the cat".


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


[Step 80] Reward: 1.00 | Text: The animal is the cat".


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


[Step 90] Reward: 1.00 | Text: The animal is the cat".


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



After training
→ The animal is the cat".
→ The animal is the cat"
→ The animal is the cat".
→ The animal is the cat".
→ The animal is the cat".


# RL with LLM using LoRA from huggingface

In [None]:
pip install transformers peft accelerate bitsandbytes

Collecting bitsandbytes
  Downloading bitsandbytes-0.46.0-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Downloading bitsandbytes-0.46.0-py3-none-manylinux_2_24_x86_64.whl (67.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.0/67.0 MB[0m [31m12.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bitsandbytes
Successfully installed bitsandbytes-0.46.0


In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import get_peft_model, LoraConfig, TaskType
import torch.nn.functional as F

# Use a small model like Falcon-RW-1B or GPT2 for fast CPU training
model_name = "tiiuae/falcon-rw-1b"

tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="right")
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

base_model = AutoModelForCausalLM.from_pretrained(model_name, device_map=None)

# Enable adapter with LoRA
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["query_key_value", "dense", "dense_h_to_4h"],  # Adjust based on model
    lora_dropout=0.1,
    bias="none",
    task_type=TaskType.CAUSAL_LM
)

model = get_peft_model(base_model, lora_config)
model.config.pad_token_id = tokenizer.pad_token_id
device = torch.device("cpu")
model = model.to(device)
model.train()


tokenizer_config.json:   0%|          | 0.00/234 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/99.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.05k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/2.62G [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.62G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/115 [00:00<?, ?B/s]



PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): FalconForCausalLM(
      (transformer): FalconModel(
        (word_embeddings): Embedding(50304, 2048)
        (h): ModuleList(
          (0-23): 24 x FalconDecoderLayer(
            (self_attention): FalconAttention(
              (query_key_value): lora.Linear(
                (base_layer): FalconLinear(in_features=2048, out_features=6144, bias=True)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.1, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=2048, out_features=8, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=8, out_features=6144, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
            

In [None]:
def reward_fn(text):
    return 1.0 if "cat" in text.lower() else 0.0


In [None]:
def sample_text(prompt="The animal is", max_len=20):
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    outputs = model.generate(
        **inputs,
        max_length=inputs['input_ids'].shape[1] + max_len,
        do_sample=True,
        top_k=50,
        top_p=0.95,
        temperature=1.0,
        pad_token_id=tokenizer.pad_token_id
    )
    return tokenizer.decode(outputs[0], skip_special_tokens=True)


In [None]:
optimizer = torch.optim.Adam(model.parameters(), lr=5e-5)



In [None]:
import torch
from torch import nn
from transformers import GPT2Tokenizer, GPT2LMHeadModel
import random

# Ensure necessary imports are present if running this function standalone
# from IPython import get_ipython
# from IPython.display import display
# !pip install transformers datasets accelerate
# import torch
# from torch import nn
# from transformers import GPT2Tokenizer, GPT2LMHeadModel
# import random

# Assume 'device', 'tokenizer', 'model', 'reward_fn', and 'optimizer' are defined

def reinforce_train_step(prompt="The animal is"):
    # Initial prompt input
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    input_len = inputs['input_ids'].shape[1]

    # Generate sequence using the model
    outputs = model.generate(
        **inputs,
        max_length=input_len + 20,
        do_sample=True,
        return_dict_in_generate=True,
        output_scores=False,  # No longer need scores from generate
        output_hidden_states=False
    )

    generated_sequences = outputs.sequences  # shape: (1, seq_len)

    # Calculate log probabilities for the *generated* sequence
    # We need to run a forward pass on the model with the generated sequence
    # to get the logits with gradient tracking.
    full_outputs = model(generated_sequences, labels=generated_sequences) # Use labels for automatic loss calculation if needed, or calculate manually

    # Extract logits for the generated tokens part
    # The logits are for predicting the *next* token given the current sequence
    # So, logits[:, :-1, :] correspond to predicting tokens at indices 1 to end
    # and we are interested in the probabilities of the tokens that were actually generated.
    logits = full_outputs.logits[:, :-1, :]  # shape: (1, seq_len-1, vocab_size)
    target_tokens = generated_sequences[:, 1:] # shape: (1, seq_len-1) - these are the tokens predicted by the logits

    # Calculate log probabilities of the chosen tokens
    log_probs_all = torch.log_softmax(logits, dim=-1) # shape: (1, seq_len-1, vocab_size)

    # Select the log probabilities corresponding to the generated tokens
    # Flatten the tensors for easier indexing
    log_probs_flat = log_probs_all.view(-1, log_probs_all.size(-1)) # shape: (seq_len-1, vocab_size)
    target_tokens_flat = target_tokens.view(-1) # shape: (seq_len-1)

    # Filter to get only the log probabilities for the generated part (after the prompt)
    # Need to adjust indices based on the prompt length
    log_probs_generated_flat = log_probs_flat[input_len-1:] # shape: (gen_len, vocab_size)
    target_tokens_generated_flat = target_tokens_flat[input_len-1:] # shape: (gen_len)

    # Select the log probabilities for the actually chosen tokens in the generated part
    chosen_log_probs = log_probs_generated_flat[range(len(target_tokens_generated_flat)), target_tokens_generated_flat]

    # Reward
    generated_text = tokenizer.decode(generated_sequences[0], skip_special_tokens=True)
    R = reward_fn(generated_text)

    # REINFORCE loss
    # We want to maximize the expected reward, which is equivalent to minimizing -expected_reward.
    # The expected reward is R * sum(log_probs of chosen tokens).
    # However, since we are doing batch gradient descent and have one sample,
    # the gradient is proportional to R * gradient(sum(log_probs)).
    # A common simplification in REINFORCE is to use -R * sum(log_probs) as the loss.
    # Here, we are using the mean of log probs over the generated sequence length.
    loss = -R * chosen_log_probs.mean()

    # Backpropagation and optimizer step
    optimizer.zero_grad() # Zero gradients before backward pass
    loss.backward()
    optimizer.step()


    return generated_text, R

In [None]:
print("Before training")
for _ in range(3):
    print("→", sample_text())

print("\nTraining...")
for step in range(30):  # Increase to 100+ to see better behavior
    text, r = reinforce_train_step()
    if step % 5 == 0:
        print(f"[Step {step}] Reward: {r:.2f} | Text: {text}")

print("\nAfter training")
for _ in range(3):
    print("→", sample_text())


Before training
→ The animal is an adult. The animal is an adult female. The dog is a young dog. If the dog
→ The animal is also likely to become aggressive if it believes it is threatened, and a vicious animal will usually defend the


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


→ The animal is not for sale. This is a free service provided by the
University of Kentucky
.
K

Training...


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[Step 0] Reward: 0.00 | Text: The animal is not the only one who's been affected by the shutdown. So have veterinarians.
On a


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[Step 5] Reward: 0.00 | Text: The animal is a young black bear, around one a year old, which wandered through my woods in northern Wisconsin.


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[Step 10] Reward: 0.00 | Text: The animal is very nervous.
She is crying.
She says it's because of a lot of stress,


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[Step 15] Reward: 1.00 | Text: The animal is not a dog or cat. He is a pig with dog body anatomy.
If you have any


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[Step 20] Reward: 0.00 | Text: The animal is in pain. You put his head in that bottle because he is in unbearable pain.”



Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[Step 25] Reward: 0.00 | Text: The animal is healthy, but it seems to have been sick for a couple of years before you found it. I


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[Step 30] Reward: 0.00 | Text: The animal is the same breed as the previous dog, it does not seem that this behavior is an obsessive compulsion disorder


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[Step 35] Reward: 0.00 | Text: The animal is a dog, a Siberian husky. The dog is a little bit thin, the coat is medium


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[Step 40] Reward: 0.00 | Text: The animal is a dog called Dizzy.. Dizzy is not a stray dog.. He was in my grandmothers


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[Step 45] Reward: 0.00 | Text: The animal is a horse.
The bird is a rooster.
The vegetable is
a carrot.



Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[Step 50] Reward: 1.00 | Text: The animal is a dog or cat.
The animal's name is Lola.
The animal is a cat


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[Step 55] Reward: 0.00 | Text: The animal is an animal.
An animal is a non-human animal.
The animal's species is not


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[Step 60] Reward: 1.00 | Text: The animal is cat.
- Cat Age: Young Adult
- Cat Breed: Cat Breeds
Preferred


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[Step 65] Reward: 1.00 | Text: The animal is cat.
- Cat is spayed/neutered.
- Cat is a cat.


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[Step 70] Reward: 1.00 | Text: The animal is cat, dog, bird, monkey, reptile, or frog.
- Age: To enter


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[Step 75] Reward: 1.00 | Text: The animal is cat, dog, bird, reptile, or exotic pet.
- Age: Will need to


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[Step 80] Reward: 1.00 | Text: The animal is cat, dog, bird, reptile, or reptilian and has no true cat, dog,


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[Step 85] Reward: 1.00 | Text: The animal is cat, bird, bird dog, bird dog dog, fish, hamster, rabbit, reptile


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[Step 90] Reward: 1.00 | Text: The animal is cat, bird, reptile, or exotic pet.
- Age: You must have an animal


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[Step 95] Reward: 1.00 | Text: The animal is cat, bird, reptile, or animal of a different species.
- Age: Any age


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[Step 100] Reward: 1.00 | Text: The animal is cats, birds, reptiles, or exotic pets.
- Age: Any age
- Type of


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[Step 105] Reward: 1.00 | Text: The animal is: Cat, Birds, Bird, Bird Dog, Bird Puppy, Bird Rabbit, Bird Rabbit Dog


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[Step 110] Reward: 1.00 | Text: The animal is cats, dogs, birds, reptiles, amphibians, or exotic pets.!Here's how


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[Step 115] Reward: 1.00 | Text: The animal is cats,birds,bats,birds of prey,birds,catfish,cats,dogs,ex


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[Step 120] Reward: 1.00 | Text: The animal is cats, birds, bats, dogs, reptiles, amphibians, or exotic pets.
The animal


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[Step 125] Reward: 1.00 | Text: The animal is cats, birds, bats, birds, dogs, cats, fish, frogs, dogs, reptiles and


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[Step 130] Reward: 1.00 | Text: The animal is cats, birds, bats, birds, cats, dogs, fish, frogs, reptiles, rodents,


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[Step 135] Reward: 1.00 | Text: The animal is cats, birds, bats, dogs, fish, reptiles, amphibians, non-reptiles


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[Step 140] Reward: 1.00 | Text: The animal is cats, birds, bats, birds, reptiles and amphibians.
Is your pet a cat,


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[Step 145] Reward: 1.00 | Text: The animal is cats, birds, bats, fish, amphibians, reptiles and amphibians.
Is The Animal


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[Step 150] Reward: 1.00 | Text: The animal is cats, birds, bats, dogs, fish, frogs and toads, birds, amphibians,


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[Step 155] Reward: 1.00 | Text: The animal is cats, bats, birds, cats birdbirds, cats fish and amphibians and reptiles and newts


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[Step 160] Reward: 1.00 | Text: The animal is cats, birds, bats, catsbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirds


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[Step 165] Reward: 1.00 | Text: The animal iscat dog bird bird bird bird bird bird bird bird bird bird bird bird bird bird bird bird bird bird


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[Step 170] Reward: 1.00 | Text: The animal is cats, birds, bats and bird dogs, cats bird dogs bird birds, cats dogs bird birds dog


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[Step 175] Reward: 1.00 | Text: The animal is cats dog bird bird birdbird birdbird bird bird bird bird bird bird bird bird bird bird bird bird


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[Step 180] Reward: 1.00 | Text: The animal is cats birdbirds birdsbirdsbirds birdsbirdsbirds birdsbirdsbirds birdsbirdsbirdsbirds birdsbirdsbirdsbirds


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[Step 185] Reward: 1.00 | Text: The animal is cats birdbirds birdsbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirds


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[Step 190] Reward: 1.00 | Text: The animal is cats birdbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirds


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[Step 195] Reward: 1.00 | Text: The animal is cats birdbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirds


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



After training
→ The animal is cats birdbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirds
→ The animal is cats birdbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirds
→ The animal is catsbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirdsbirds
