In [1]:
%%capture
!pip install -qq transformers bitsandbytes peft accelerate wandb datasets unisim trl[peft]

# Download best model from wandb

Skip this step if the model is already available in local directory.

In [None]:
from google.colab import userdata
import wandb
import os

os.environ["WANDB_PROJECT"] = "text-ads-generation"  # name your W&B project
os.environ["WANDB_LOG_MODEL"] = "checkpoint"  # log all model checkpoints
wandb.login(key=userdata.get('WANDB_API_KEY'))

with wandb.init() as run:
  # Pass the name and version of Artifact
  my_model_name = "model-gemma-2b-9:latest"
  my_model_artifact = run.use_artifact(my_model_name)

  # Download model weights to a folder and return the path
  model_path = my_model_artifact.download()
  print('model:', model_path)

[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mcodescv[0m. Use [1m`wandb login --relogin`[0m to force relogin


[34m[1mwandb[0m:   8 of 8 files downloaded.  


model: /content/artifacts/model-gemma-2b-9:v0


VBox(children=(Label(value='0.175 MB of 0.175 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

# Prediction

Now we can load the model and make some predictions.
The input is a prompt of the background information about the product e.g.
```
### Human: Generate 10 text ads for the product with the following information.
app_name: Bauldur's Gate 3
category: Game -> RPG Game
description: Gather your party, and return to the Forgotten Realms in a tale of fellowship and betrayal, sacrifice and survival, and the lure of absolute power.
Mysterious abilities are awakening inside you, drawn from a Mind Flayer parasite planted in your brain. Resist, and turn darkness against itself. Or embrace corruption, and become ultimate evil.
From the creators of Divinity: Original Sin 2 comes a next-generation RPG, set in the world of Dungeons and Dragons

### Assistant:
```
The output is 10 ad headlines. e.g.
```
Unleash Your Inner Monster
Mindflayer's Embrace
Dungeon Crawling Adventure
Party Up for Epic Tales
Forbidden Powers Await
Resist or Corrupt
Dungeons & Dragons Unleashed
ForgeYour Fellowship
Storytelling at its Finest
RPG Evolution: Divinity
```

In [1]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import pandas as pd

def model_summary(model):
  if hasattr(model, 'active_adapters'):
    if callable(model.active_adapters):
      adapters = model.active_adapters()
    elif isinstance(model.active_adapters, list):
      adapters = model.active_adapters
    print('adapter:', adapters)
  if hasattr(model, 'device'):
    print('device:', model.device)
  else:
    print('device:', list(model.parameters())[0].device)
  summary = pd.DataFrame([(name, param.shape, param.dtype, param.requires_grad) for name, param in model.named_parameters()], columns=['name', 'shape', 'dtype', 'trainable'])
  display(summary)


def predict(model, tokenizer, prompt):
  with torch.no_grad():
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    outputs = model.generate(**inputs, max_new_tokens=256, do_sample=True, top_k=50, top_p=0.95, temperature=0.5,
                             pad_token_id= tokenizer.eos_token_id, eos_token_id= tokenizer.eos_token_id, repetition_penalty=1.2)

    # only return generated new tokens
    result = tokenizer.decode(outputs[0, inputs['input_ids'].shape[-1]:], skip_special_tokens=True)
    return result


In [None]:
# Manually set local checkpoint: uncomment below
# model_path = '/path/to/checkpoint-1234'
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path, device_map='auto', torch_dtype=torch.bfloat16)
model_summary(model)

config.json:   0%|          | 0.00/627 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/13.5k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/67.1M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/137 [00:00<?, ?B/s]

adapter: ['default']


Unnamed: 0,name,shape,dtype,trainable
0,model.embed_tokens.weight,"(256000, 2048)",torch.bfloat16,False
1,model.layers.0.self_attn.q_proj.base_layer.weight,"(2048, 2048)",torch.bfloat16,False
2,model.layers.0.self_attn.q_proj.lora_A.default...,"(8, 2048)",torch.bfloat16,False
3,model.layers.0.self_attn.q_proj.lora_B.default...,"(2048, 8)",torch.bfloat16,False
4,model.layers.0.self_attn.k_proj.base_layer.weight,"(256, 2048)",torch.bfloat16,False
...,...,...,...,...
411,model.layers.17.mlp.down_proj.lora_A.default.w...,"(8, 16384)",torch.bfloat16,False
412,model.layers.17.mlp.down_proj.lora_B.default.w...,"(2048, 8)",torch.bfloat16,False
413,model.layers.17.input_layernorm.weight,"(2048,)",torch.bfloat16,False
414,model.layers.17.post_attention_layernorm.weight,"(2048,)",torch.bfloat16,False


In [None]:
prompt = """### Human: Generate 10 text ads for the product with the following information.
app_name: Bauldur's Gate 3
category: Game -> RPG Game
description: Gather your party, and return to the Forgotten Realms in a tale of fellowship and betrayal, sacrifice and survival, and the lure of absolute power.
Mysterious abilities are awakening inside you, drawn from a Mind Flayer parasite planted in your brain. Resist, and turn darkness against itself. Or embrace corruption, and become ultimate evil.
From the creators of Divinity: Original Sin 2 comes a next-generation RPG, set in the world of Dungeons and Dragons

### Assistant:
"""
print('prompt:', prompt, sep='\n')
print('Model output:', predict(model, tokenizer, prompt), sep='\n')

prompt:
### Human: Generate 10 text ads for the product with the following information.
app_name: Bauldur's Gate 3
category: Game -> RPG Game
description: Gather your party, and return to the Forgotten Realms in a tale of fellowship and betrayal, sacrifice and survival, and the lure of absolute power.
Mysterious abilities are awakening inside you, drawn from a Mind Flayer parasite planted in your brain. Resist, and turn darkness against itself. Or embrace corruption, and become ultimate evil.
From the creators of Divinity: Original Sin 2 comes a next-generation RPG, set in the world of Dungeons and Dragons

### Assistant:

Model output:
Unleash Your Inner Monster
Mind Flayer'S Tale Unravels
Forbidden Powers Await
Reject Darkness, Embrace Evil
Next-Gen RPG Adventure
World of Dungeons & Dragons
Fellowship and Betrayal Awaits
 sacrificE, Survival, Power
Resist or Corrupt (Your Brain)
Ultimate DnD Experience


# Merge the Model

Before Reinforcement Learning, it's [suggested](https://github.com/huggingface/trl/issues/1036) to merge the adapter first and then use another adapter for RL.


## References
- [Tutorial: merge Lora models](https://huggingface.co/docs/peft/developer_guides/lora#merge-adapters)

In [None]:
from transformers import AutoModelForCausalLM
from peft import PeftModel, AutoPeftModelForCausalLM
import torch

model_path = '/content/artifacts/model-gemma-2b-9:v0'
model = AutoPeftModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.bfloat16, device_map='auto')

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Before merge, the model contains both the base params and LoRA params:

In [None]:
model_summary(model)

adapter: ['default']
device: cuda:0


Unnamed: 0,name,shape,dtype,trainable
0,base_model.model.model.embed_tokens.weight,"(256000, 2048)",torch.bfloat16,False
1,base_model.model.model.layers.0.self_attn.q_pr...,"(2048, 2048)",torch.bfloat16,False
2,base_model.model.model.layers.0.self_attn.q_pr...,"(8, 2048)",torch.bfloat16,False
3,base_model.model.model.layers.0.self_attn.q_pr...,"(2048, 8)",torch.bfloat16,False
4,base_model.model.model.layers.0.self_attn.k_pr...,"(256, 2048)",torch.bfloat16,False
...,...,...,...,...
411,base_model.model.model.layers.17.mlp.down_proj...,"(8, 16384)",torch.bfloat16,False
412,base_model.model.model.layers.17.mlp.down_proj...,"(2048, 8)",torch.bfloat16,False
413,base_model.model.model.layers.17.input_layerno...,"(2048,)",torch.bfloat16,False
414,base_model.model.model.layers.17.post_attentio...,"(2048,)",torch.bfloat16,False


After the merge, the model has exactly the same params as the base model.

In [None]:
model = model.merge_and_unload()
model_summary(model)

adapter: []
device: cuda:0


Unnamed: 0,name,shape,dtype,trainable
0,model.embed_tokens.weight,"(256000, 2048)",torch.bfloat16,False
1,model.layers.0.self_attn.q_proj.weight,"(2048, 2048)",torch.bfloat16,False
2,model.layers.0.self_attn.k_proj.weight,"(256, 2048)",torch.bfloat16,False
3,model.layers.0.self_attn.v_proj.weight,"(256, 2048)",torch.bfloat16,False
4,model.layers.0.self_attn.o_proj.weight,"(2048, 2048)",torch.bfloat16,False
...,...,...,...,...
159,model.layers.17.mlp.up_proj.weight,"(16384, 2048)",torch.bfloat16,False
160,model.layers.17.mlp.down_proj.weight,"(2048, 16384)",torch.bfloat16,False
161,model.layers.17.input_layernorm.weight,"(2048,)",torch.bfloat16,False
162,model.layers.17.post_attention_layernorm.weight,"(2048,)",torch.bfloat16,False


In [None]:
base_model = AutoModelForCausalLM.from_pretrained('google/gemma-2b', torch_dtype=torch.bfloat16, device_map='auto')
model_summary(base_model)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

adapter: []
device: cuda:0


Unnamed: 0,name,shape,dtype,trainable
0,model.embed_tokens.weight,"(256000, 2048)",torch.bfloat16,True
1,model.layers.0.self_attn.q_proj.weight,"(2048, 2048)",torch.bfloat16,True
2,model.layers.0.self_attn.k_proj.weight,"(256, 2048)",torch.bfloat16,True
3,model.layers.0.self_attn.v_proj.weight,"(256, 2048)",torch.bfloat16,True
4,model.layers.0.self_attn.o_proj.weight,"(2048, 2048)",torch.bfloat16,True
...,...,...,...,...
159,model.layers.17.mlp.up_proj.weight,"(16384, 2048)",torch.bfloat16,True
160,model.layers.17.mlp.down_proj.weight,"(2048, 16384)",torch.bfloat16,True
161,model.layers.17.input_layernorm.weight,"(2048,)",torch.bfloat16,True
162,model.layers.17.post_attention_layernorm.weight,"(2048,)",torch.bfloat16,True


After the merge, the model behaves just like the LoRA adapted model, but with only one set of weights.

First let's compare the weights of the base model and merged model. The LoRA adapted layers should be different, while other layers should be the same.

In [None]:
import pandas as pd
stats = []
for ((n1, p1), (n2, p2)) in zip(base_model.named_parameters(), model.named_parameters()):
  stats.append({'name': n1, 'same': torch.allclose(p1, p2)})

display(pd.DataFrame(stats))

Unnamed: 0,name,same
0,model.embed_tokens.weight,True
1,model.layers.0.self_attn.q_proj.weight,False
2,model.layers.0.self_attn.k_proj.weight,False
3,model.layers.0.self_attn.v_proj.weight,False
4,model.layers.0.self_attn.o_proj.weight,False
...,...,...
159,model.layers.17.mlp.up_proj.weight,False
160,model.layers.17.mlp.down_proj.weight,False
161,model.layers.17.input_layernorm.weight,True
162,model.layers.17.post_attention_layernorm.weight,True


Now let's compare the predictions:

In [None]:
prompt = """### Human: Generate 10 text ads for the product with the following information.
app_name: Bauldur's Gate 3
category: Game -> RPG Game
description: Gather your party, and return to the Forgotten Realms in a tale of fellowship and betrayal, sacrifice and survival, and the lure of absolute power.
Mysterious abilities are awakening inside you, drawn from a Mind Flayer parasite planted in your brain. Resist, and turn darkness against itself. Or embrace corruption, and become ultimate evil.
From the creators of Divinity: Original Sin 2 comes a next-generation RPG, set in the world of Dungeons and Dragons

### Assistant:
"""

tokenizer = AutoTokenizer.from_pretrained(model_path)
print('prompt:', prompt, sep='\n')
print('=== SFT Model output:', predict(model, tokenizer, prompt), sep='\n')
print('=== Base Model output:', predict(base_model, tokenizer, prompt), sep='\n')

prompt:
### Human: Generate 10 text ads for the product with the following information.
app_name: Bauldur's Gate 3
category: Game -> RPG Game
description: Gather your party, and return to the Forgotten Realms in a tale of fellowship and betrayal, sacrifice and survival, and the lure of absolute power.
Mysterious abilities are awakening inside you, drawn from a Mind Flayer parasite planted in your brain. Resist, and turn darkness against itself. Or embrace corruption, and become ultimate evil.
From the creators of Divinity: Original Sin 2 comes a next-generation RPG, set in the world of Dungeons and Dragons

### Assistant:

=== SFT Model output:
Unleash Your Inner Monster
Mindflayer's Bargain Exposed
Resist or Embrace Corruption
Next-Gen Dungeon Crawling
Party Up for Fellowship
Survive and Survive Again
Darken the World
Become Ultimate Evil
Divinity Meets DnD
RPG Redefined
=== Base Model output:
### The ad should be written as if it were an actual advertisement that would appear on Goog

Looks good. Now we can save the SFT model.

In [None]:
model.save_pretrained('/content/drive/MyDrive/checkpoints/text-ads-generation/sft-v1')
tokenizer.save_pretrained('/content/drive/MyDrive/checkpoints/text-ads-generation/sft-v1')

# Scoring

## Format Score

The format score penalizes model to have formatting errors, e.g.

- Leading spaces
- No captalization
- Too long

In [2]:
def format_score(response, max_len=30):
  score = 1.0
  for ans in response.split('\n'):
    if len(ans) > max_len:
      score -= 0.5
    if len(ans) < 10:
      score -= 0.5
    if len(ans) > 0:
      if not ans[0].isupper():
        score -= 0.5
      elif ans[0] == ' ':
        score -= 0.5
  return score

response = """Unleash Your Inner Monster
This line is tooooooooooooooooo longggggggggg
Dungeon Crawling Adventure
Party Up for Epic Tales
Forbidden Powers Await
Resist or Corrupt
  This has leading spaces
not captalized
Too short
RPG Evolution: Divinity"""

format_score(response)

-1.0

## Diversity Score

We use [Unisim](https://github.com/google/unisim) to calculate the diversity (the inverse of similarity) of a group of ad headlines.

In [3]:
import tensorflow as tf

# avoid tensorflow to allocate all GPU memory
gpus = tf.config.list_physical_devices('GPU')
if gpus:
  try:
    # Currently, memory growth needs to be the same across GPUs
    for gpu in gpus:
      tf.config.experimental.set_memory_growth(gpu, True)
    logical_gpus = tf.config.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
  except RuntimeError as e:
    # Memory growth must be set before GPUs have been initialized
    print(e)

import numpy as np
from unisim import TextSim

text_sim = TextSim(use_accelerator=True)


@tf.function
def self_diversity(emb, similarity_threshold):
  print('Compiling self diversity, this should only happen the first time or when the input shapes change:', emb, similarity_threshold)
  # Don't include self when computing pairwise similarities
  mask = 1.0 - tf.eye(emb.shape[0])
  scores = tf.matmul(emb, emb, transpose_b=True) * mask
  count_above_threshold = tf.reduce_sum(tf.where(scores > similarity_threshold, 1.0, 0.0))
  score = 1.0 - count_above_threshold * 0.25
  return score


def diversity_score(response, similarity_threshold=0.85):
  queries = response.split('\n')
  emb = text_sim.embed(queries)
  return self_diversity(emb, tf.constant(similarity_threshold)).numpy()

1 Physical GPUs, 1 Logical GPUs
INFO: Loaded backend
INFO: Using TF with GPU




INFO: UniSim is storing a copy of the indexed data
INFO: If you are using large data corpus, consider disabling this behavior using store_data=False


In [4]:
response = """Unleash Your Inner Monster
Mindflayer's Embrace
Dungeon Crawling Adventure
Party Up for Epic Tales
Forbidden Powers Await
Resist or Corrupt
Dungeons & Dragons Unleashed
ForgeYour Fellowship
Storytelling at its Finest
RPG Evolution: Divinity
Mindflayers Emb"""

from tqdm.notebook import tqdm
for _ in tqdm(range(1000)):
  s = diversity_score(response, similarity_threshold=np.random.rand())

  0%|          | 0/1000 [00:00<?, ?it/s]

## Combined Score

In [4]:
def text_score(response, max_len=30, similarity_threshold=0.85):
  return format_score(response, max_len) + diversity_score(response, similarity_threshold)

response = """Unleash Your Inner Monster
Mindflayer's Embrace
Dungeon Crawling Adventure
Party Up for Epic Tales
Forbidden Powers Await
Resist or Corrupt
Dungeons & Dragons Unleashed
ForgeYour Fellowship
Storytelling at its Finest
RPG Evolution: Divinity
Mindflayers Emb"""

text_score(response)

Compiling self diversity, this should only happen the first time or when the input shapes change: Tensor("emb:0", shape=(11, 256), dtype=float32) Tensor("similarity_threshold:0", shape=(), dtype=float32)


1.5

# Dataset for RL

Take the same dataset in SFT training, but only use the prompts, not the targets.

Note the column name should be 'query' for PPOTrainer to work.

In [5]:
from datasets import Dataset
import yaml
import json
import glob

template = """### Human: Generate {count} text ads for the product with the following information.
{information}
### Assistant:
"""

data_dir = '/content/drive/MyDrive/02 data/text_ads_generation'

def rl_transform(data_dir):
  for file in glob.glob(f'{data_dir}/*.jsonl'):
    with open(file) as f:
      for line in f:
        item = json.loads(line)
        del(item['ad_headlines'])
        information = yaml.dump(item)
        query = template.format(count=10, information=information)
        yield {'query': query}

# PPO Training

- [enable_adapters](https://huggingface.co/docs/peft/v0.9.0/en/package_reference/tuners#peft.tuners.tuners_utils.BaseTunerLayer.enable_adapters) will set adapters to require grad for training

- [training PPO with PEFT](https://huggingface.co/docs/trl/en/lora_tuning_peft)
- [Example of PPO](https://huggingface.co/docs/trl/en/sentiment_tuning)
- [PPOConfig](https://github.com/huggingface/trl/blob/v0.7.11/trl/trainer/ppo_config.py#L34)
- [PPOTrainer](https://github.com/huggingface/trl/blob/v0.7.11/trl/trainer/ppo_trainer.py)
- [AutoModelForCausalLMWithValueHead](https://github.com/huggingface/trl/blob/v0.8.0/trl/models/modeling_value_head.py#L61)

In [6]:
from trl import PPOTrainer, PPOConfig, AutoModelForCausalLMWithValueHead
from trl.core import LengthSampler
from tqdm.notebook import tqdm
from peft import LoraConfig

# replace below with your merged SFT model.
model_path = '/content/drive/MyDrive/checkpoints/text-ads-generation/sft-v1'

lora_config = LoraConfig(
    r=8,
    target_modules=["q_proj", "o_proj", "k_proj", "v_proj", "gate_proj", "up_proj", "down_proj"],
    task_type="CAUSAL_LM",
)

model = AutoModelForCausalLMWithValueHead.from_pretrained(model_path,
                                                          device_map='auto',
                                                          torch_dtype=torch.bfloat16,
                                                          peft_config=lora_config)

model_summary(model)
# ref_model = AutoModelForCausalLMWithValueHead.from_pretrained(model_path, device_map='auto', torch_dtype=torch.bfloat16)
# ref_model.eval()
# model_summary(ref_model)

# when using peft, we can rely on PPOTrainer to create a reference model
# https://github.com/huggingface/trl/blob/main/trl/models/modeling_base.py#L605
ref_model = None

tokenizer = AutoTokenizer.from_pretrained(model_path)
tokenizer.pad_token = tokenizer.eos_token

def collator(data):
  return dict((key, [d[key] for d in data]) for key in data[0])


rl_dataset = Dataset.from_generator(rl_transform, gen_kwargs={'data_dir': data_dir})
# tokenize
rl_dataset = rl_dataset.map(lambda x: tokenizer(x["query"]), batched=False)
rl_dataset.set_format(type="torch")

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



device: cuda:0


Unnamed: 0,name,shape,dtype,trainable
0,pretrained_model.base_model.model.model.embed_...,"(256000, 2048)",torch.bfloat16,False
1,pretrained_model.base_model.model.model.layers...,"(2048, 2048)",torch.bfloat16,False
2,pretrained_model.base_model.model.model.layers...,"(8, 2048)",torch.bfloat16,True
3,pretrained_model.base_model.model.model.layers...,"(2048, 8)",torch.bfloat16,True
4,pretrained_model.base_model.model.model.layers...,"(256, 2048)",torch.bfloat16,False
...,...,...,...,...
413,pretrained_model.base_model.model.model.layers...,"(2048,)",torch.bfloat16,False
414,pretrained_model.base_model.model.model.layers...,"(2048,)",torch.bfloat16,False
415,pretrained_model.base_model.model.model.norm.w...,"(2048,)",torch.bfloat16,False
416,v_head.summary.weight,"(1, 2048)",torch.float32,True


In [7]:
from datasets import concatenate_datasets
repeat = 2
rl_dataset = concatenate_datasets([rl_dataset] * repeat)
rl_dataset

Dataset({
    features: ['query', 'input_ids', 'attention_mask'],
    num_rows: 7638
})

In [8]:
config = PPOConfig(
    model_name=model_path,
    learning_rate=1.41e-5,
    log_with="wandb",
    batch_size=128,
    mini_batch_size=8,
    gradient_accumulation_steps=4,
)

ppo_trainer = PPOTrainer(config, model, ref_model, tokenizer, dataset=rl_dataset, data_collator=collator)

generation_kwargs = {
    "min_length": -1,
    "top_k": 0.0,
    "top_p": 1.0,
    "do_sample": True,
    "pad_token_id": tokenizer.eos_token_id,
    'max_new_tokens': 100,
    'temperature': 0.7,
    'repetition_penalty': 1.0,
}


for epoch, batch in tqdm(list(enumerate(ppo_trainer.dataloader))):
  query_tensors = batch["input_ids"]

  #### Get response from LM
  response_tensors = []
  # for query in tqdm(query_tensors):
  #   response = ppo_trainer.generate(query, **generation_kwargs)
  #   response_tensors.append(response.squeeze()[query.shape[-1]:])

  response_tensors, ref_response_tensors = ppo_trainer.generate(
      query_tensors, return_prompt=False, generate_ref_response=True, batch_size=12, **generation_kwargs
  )
  # batch["response"] = [tokenizer.decode(r.squeeze()) for r in response_tensors]
  batch["response"] = tokenizer.batch_decode(response_tensors)
  batch["ref_response"] = tokenizer.batch_decode(ref_response_tensors)

  #### Compute text score
  texts = batch["response"]

  rewards = [torch.tensor(text_score(r)) for r in batch["response"]]
  # rewards = [torch.tensor(1.0) for _ in batch['response']]

  #### Run PPO step
  stats = ppo_trainer.step(query_tensors, response_tensors, rewards)
  ppo_trainer.log_stats(stats, batch, rewards)

  if epoch % 10 == 0:
    !rm -rf "/content/drive/MyDrive/checkpoints/text-ads-generation/ppo-v1-"*
    save_path = f'/content/drive/MyDrive/checkpoints/text-ads-generation/ppo-v1-{epoch}'
    !mkdir "{save_path}"

    ppo_trainer._save_pretrained(save_path)
    print(f'Saved model to {save_path}')


[34m[1mwandb[0m: Currently logged in as: [33mcodescv[0m. Use [1m`wandb login --relogin`[0m to force relogin


  0%|          | 0/59 [00:00<?, ?it/s]

You're using a GemmaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Compiling self diversity, this should only happen the first time or when the input shapes change: Tensor("emb:0", shape=(10, 256), dtype=float32) Tensor("similarity_threshold:0", shape=(), dtype=float32)
Compiling self diversity, this should only happen the first time or when the input shapes change: Tensor("emb:0", shape=(9, 256), dtype=float32) Tensor("similarity_threshold:0", shape=(), dtype=float32)
Compiling self diversity, this should only happen the first time or when the input shapes change: Tensor("emb:0", shape=(12, 256), dtype=float32) Tensor("similarity_threshold:0", shape=(), dtype=float32)




Saved model to /content/drive/MyDrive/checkpoints/text-ads-generation/ppo-v1-0
Compiling self diversity, this should only happen the first time or when the input shapes change: Tensor("emb:0", shape=(13, 256), dtype=float32) Tensor("similarity_threshold:0", shape=(), dtype=float32)
Compiling self diversity, this should only happen the first time or when the input shapes change: Tensor("emb:0", shape=(15, 256), dtype=float32) Tensor("similarity_threshold:0", shape=(), dtype=float32)
Compiling self diversity, this should only happen the first time or when the input shapes change: Tensor("emb:0", shape=(7, 256), dtype=float32) Tensor("similarity_threshold:0", shape=(), dtype=float32)
Saved model to /content/drive/MyDrive/checkpoints/text-ads-generation/ppo-v1-10
Compiling self diversity, this should only happen the first time or when the input shapes change: Tensor("emb:0", shape=(14, 256), dtype=float32) Tensor("similarity_threshold:0", shape=(), dtype=float32)
Compiling self diversity, 

# Prediction using the PPO model

In [18]:
from transformers import AutoModelForCausalLM
from peft import PeftModel, AutoPeftModelForCausalLM
import torch
model_path = '/content/drive/MyDrive/checkpoints/text-ads-generation/ppo-v1'

model = AutoPeftModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.bfloat16, device_map='auto')
tokenizer = AutoTokenizer.from_pretrained(model_path)
model_summary(model)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

adapter: ['default']
device: cuda:0


Unnamed: 0,name,shape,dtype,trainable
0,base_model.model.model.embed_tokens.weight,"(256000, 2048)",torch.bfloat16,False
1,base_model.model.model.layers.0.self_attn.q_pr...,"(2048, 2048)",torch.bfloat16,False
2,base_model.model.model.layers.0.self_attn.q_pr...,"(8, 2048)",torch.bfloat16,False
3,base_model.model.model.layers.0.self_attn.q_pr...,"(2048, 8)",torch.bfloat16,False
4,base_model.model.model.layers.0.self_attn.k_pr...,"(256, 2048)",torch.bfloat16,False
...,...,...,...,...
411,base_model.model.model.layers.17.mlp.down_proj...,"(8, 16384)",torch.bfloat16,False
412,base_model.model.model.layers.17.mlp.down_proj...,"(2048, 8)",torch.bfloat16,False
413,base_model.model.model.layers.17.input_layerno...,"(2048,)",torch.bfloat16,False
414,base_model.model.model.layers.17.post_attentio...,"(2048,)",torch.bfloat16,False


In [21]:
prompt = """### Human: Generate 10 text ads for the product with the following information.
app_name: Bauldur's Gate 3
category: Game -> RPG Game
description: Gather your party, and return to the Forgotten Realms in a tale of fellowship and betrayal, sacrifice and survival, and the lure of absolute power.
Mysterious abilities are awakening inside you, drawn from a Mind Flayer parasite planted in your brain. Resist, and turn darkness against itself. Or embrace corruption, and become ultimate evil.
From the creators of Divinity: Original Sin 2 comes a next-generation RPG, set in the world of Dungeons and Dragons

### Assistant:
"""
print('prompt:', prompt, sep='\n')
print('Model output:', predict(model, tokenizer, prompt), sep='\n')

prompt:
### Human: Generate 10 text ads for the product with the following information.
app_name: Bauldur's Gate 3
category: Game -> RPG Game
description: Gather your party, and return to the Forgotten Realms in a tale of fellowship and betrayal, sacrifice and survival, and the lure of absolute power.
Mysterious abilities are awakening inside you, drawn from a Mind Flayer parasite planted in your brain. Resist, and turn darkness against itself. Or embrace corruption, and become ultimate evil.
From the creators of Divinity: Original Sin 2 comes a next-generation RPG, set in the world of Dungeons and Dragons

### Assistant:

Model output:
Unleash Your Inner Monster
Mind Flayer'S Tale Unfolds
Forbidden Powers Await
Embrace Darkness or Corruption
Final Fantasy Meets DnD
RPG Redefined: BGT III
Gather Your Party
Forgotten Realms Restored
Fellowship and Invisibilities
Survival Above All
