In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
!nvidia-smi

In [1]:
from dataclasses import dataclass, field
from typing import Optional


from dataclasses import dataclass, field
from typing import Optional

import peft
import torch
from peft import PeftConfig, PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer, HfArgumentParser, pipeline


class ScriptArguments:
    #model_name = "edbeeching/gpt-neo-125M-imdb-lora-adapter-merged-ppo-sentiment"
    sentiment_models = [
        "lvwerra/distilbert-imdb", "distilbert-base-uncased-finetuned-sst-2-english",
        "martin-ha/toxic-comment-model",
        "valurank/distilbert-quality"
    ]
    model_names = [
        "alexrame/gpt-neo-125M-imdb-lora-adapter-merged-ppo-sentiment-lr1.41e-05",
        "alexrame/gpt-neo-125M-imdb-lora-adapter-merged-ppo-sentiment-lr1e-05"
    ]


script_args = ScriptArguments()

def load_model(peft_model_id):
    peft_config = PeftConfig.from_pretrained(peft_model_id)
    model = AutoModelForCausalLM.from_pretrained(
        peft_config.base_model_name_or_path,
        return_dict=True,
        #torch_dtype=torch.float16,
        load_in_8bit=True,
        device_map="auto"
    )
    # Load the Lora model
    model = PeftModel.from_pretrained(model, peft_model_id)
    model.eval()
    return model

dict_models_to_merge = {model_name: load_model(model_name) for model_name in script_args.model_names}

  from .autonotebook import tqdm as notebook_tqdm



Welcome to bitsandbytes. For bug reports, please submit your error trace to: https://github.com/TimDettmers/bitsandbytes/issues




In [2]:
tokenizer = AutoTokenizer.from_pretrained(
    PeftConfig.from_pretrained(script_args.model_names[0]).base_model_name_or_path)
tokenizer.pad_token = tokenizer.eos_token

In [3]:
batch = tokenizer("I really enjoyed the slight hint towards", return_tensors="pt")


In [5]:
dict_outputs = {}
with torch.cuda.amp.autocast():
    for model_name, model in dict_models_to_merge.items():
        dict_outputs[model_name] = model.generate(**batch, max_new_tokens=50)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
  attn_weights = torch.where(causal_mask, attn_weights, mask_value)
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


In [6]:
dict_responses = {model_name: tokenizer.decode(output_tokens[0], skip_special_tokens=True) for model_name, output_tokens in dict_outputs.items()}
print(dict_responses)

{'alexrame/gpt-neo-125M-imdb-lora-adapter-merged-ppo-sentiment-lr1.41e-05': 'I really enjoyed the slight hint towards the wonderful and beautiful story. The characters are very well-written and the story is very well-written. The characters are very well-written and the story is very well-told. The characters are very well-told and the story is very', 'alexrame/gpt-neo-125M-imdb-lora-adapter-merged-ppo-sentiment-lr1e-05': 'I really enjoyed the slight hint towards the wonderful and beautiful story. The characters are very well-written and the story is very well-told. The characters are very well-told and the story is very well-told. The story is a great story and it is a great story'}


In [8]:
from collections import OrderedDict

In [9]:
dict_outputs = OrderedDict({})

In [12]:
device = 0 if torch.cuda.is_available() else "cpu"  # to avoid a `pipeline` bug
print(f"Load sentiment model with {script_args.sentiment_models}")
sentiment_pipes = [
    pipeline("sentiment-analysis", model=sentiment_model, device=device)
    for sentiment_model in script_args.sentiment_models]

Load sentiment model with ['lvwerra/distilbert-imdb', 'distilbert-base-uncased-finetuned-sst-2-english', 'martin-ha/toxic-comment-model', 'valurank/distilbert-quality']


In [33]:
def average_rewards(rewards):
    return [sum(reward)/len(reward) for reward in rewards]


In [44]:
def average_rewards(rewards):
    avg_reward = None
    for reward in rewards:
        if avg_reward is None:
            avg_reward = copy.deepcopy(reward)
        else:
            for a, r in zip(avg_reward, reward):
                for i, rr in enumerate(r):
                    for j, rrr in enumerate(rr):
                        assert a[i][j]["label"] == rrr["label"]
                        a[i][j]["score"] = a[i][j]["score"] + rrr["score"]

    for a in avg_reward:
        for i, r in enumerate(a):
            for j, rr in enumerate(r):
                rr["score"] = rr["score"] / len(rewards)
    return avg_reward

In [45]:
reward = average_rewards(rewards)

In [46]:
reward

[[[{'label': 'NEGATIVE', 'score': -2.6297930081685386},
   {'label': 'POSITIVE', 'score': 2.885819753011068}]],
 [[{'label': 'NEGATIVE', 'score': -4.354640483856201},
   {'label': 'POSITIVE', 'score': 4.732121626536052}]],
 [[{'label': 'non-toxic', 'score': 3.590243419011434},
   {'label': 'toxic', 'score': -3.531656821568807}]],
 [[{'label': 'bad', 'score': 2.913426240285238},
   {'label': 'medium', 'score': -3.8566678365071616},
   {'label': 'good', 'score': 0.32789580275615055}]]]

In [14]:
responses_text = list(dict_responses.values())

rewards = get_rewards(responses_text)

for model_name, text, reward in zip(dict_models_to_merge.keys(), responses_text, rewards):
    print("model:", model_name)
    print("text:", text)
    print("reward:", reward)
    print("\n")

model: alexrame/gpt-neo-125M-imdb-lora-adapter-merged-ppo-sentiment-lr1.41e-05
text: I really enjoyed the slight hint towards the wonderful and beautiful story. The characters are very well-written and the story is very well-written. The characters are very well-written and the story is very well-told. The characters are very well-told and the story is very
reward: [[[{'label': 'NEGATIVE', 'score': -2.620770215988159}, {'label': 'POSITIVE', 'score': 2.8767237663269043}]], [[{'label': 'NEGATIVE', 'score': -4.3469014167785645}, {'label': 'POSITIVE', 'score': 4.727553367614746}]], [[{'label': 'non-toxic', 'score': 3.601452589035034}, {'label': 'toxic', 'score': -3.5355215072631836}]], [[{'label': 'bad', 'score': 2.964066505432129}, {'label': 'medium', 'score': -3.900128126144409}, {'label': 'good', 'score': 0.32938677072525024}]]]


model: alexrame/gpt-neo-125M-imdb-lora-adapter-merged-ppo-sentiment-lr1e-05
text: I really enjoyed the slight hint towards the wonderful and beautiful story. 

In [17]:
def average_weights(input_models, coefficients):
    """average weights of different transformer models based on the amount of training data they were trained on"""
    weights_averaged = OrderedDict()
    for i, current_model in tqdm(enumerate(input_models), leave=False):
        current_weights = current_model.state_dict()
        for key in current_weights.keys():
            if i == 0:
                weights_averaged[key] = coefficients[i] * current_weights[key]
            else:
                weights_averaged[key] += coefficients[i] * current_weights[key]

    return weights_averaged

weights_averaged = average_weights(dict_models_to_merge.values(), [1/len(dict_models_to_merge)]*len(dict_models_to_merge))

                  

In [23]:
dict_models_to_merge["wa"] = base_model

In [29]:
dict_models_to_merge.keys()

dict_keys(['alexrame/gpt-neo-125M-imdb-lora-adapter-merged-ppo-sentiment-lr1.41e-05', 'alexrame/gpt-neo-125M-imdb-lora-adapter-merged-ppo-sentiment-lr1e-05', 'wa'])

In [26]:
dict_outputs = OrderedDict({})
with torch.cuda.amp.autocast():
    for model_name, model in dict_models_to_merge.items():
        dict_outputs[model_name] = model.generate(**batch, max_new_tokens=50)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


In [31]:
dict_responses["wa"]

'I really enjoyed the slight hint towards the wonderful and beautiful story. The characters are very well-written and the story is very well-told. The characters are very well-told and the story is very well-told. The characters are very well-told and the story is very'

In [56]:
batch = tokenizer("I really enjoyed the slight hint towards", return_tensors="pt")

for model_name, model in dict_models_to_merge.items():
    responses_text, rewards, reward = get_prediction_rewards(model, batch["input_ids"])
    print("model:", model_name)
    print("text:", responses_text[0])
    print("reward:", rewards[0])
    print("\n")

  query_tensor = torch.tensor(query_tensors[i]).unsqueeze(dim=0).to(device)
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


model: alexrame/gpt-neo-125M-imdb-lora-adapter-merged-ppo-sentiment-lr1.41e-05
text: I really enjoyed the slight hint towards the wonderful and beautiful music. The music is very well-crafted and the story is very well-told. The music is a great story and the story is very well-told. The music is a great story and the story is very well-
reward: [[[{'label': 'NEGATIVE', 'score': -2.6299073696136475}, {'label': 'POSITIVE', 'score': 2.8892428874969482}]], [[{'label': 'NEGATIVE', 'score': -4.351489067077637}, {'label': 'POSITIVE', 'score': 4.72714376449585}]], [[{'label': 'non-toxic', 'score': 3.627135992050171}, {'label': 'toxic', 'score': -3.5429253578186035}]], [[{'label': 'bad', 'score': 3.1329898834228516}, {'label': 'medium', 'score': -3.6754684448242188}, {'label': 'good', 'score': 0.00465099373832345}]]]




The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


model: alexrame/gpt-neo-125M-imdb-lora-adapter-merged-ppo-sentiment-lr1e-05
text: I really enjoyed the slight hint towards the wonderful and beautiful story. The beautiful and beautiful story is a wonderful and beautiful story. The beautiful and beautiful story is a wonderful and beautiful story. The beautiful and beautiful story is a wonderful and beautiful story. The beautiful and beautiful story is a wonderful
reward: [[[{'label': 'NEGATIVE', 'score': -2.6820430755615234}, {'label': 'POSITIVE', 'score': 2.9311652183532715}]], [[{'label': 'NEGATIVE', 'score': -4.358129978179932}, {'label': 'POSITIVE', 'score': 4.720330238342285}]], [[{'label': 'non-toxic', 'score': 3.64192271232605}, {'label': 'toxic', 'score': -3.545462131500244}]], [[{'label': 'bad', 'score': 3.291238784790039}, {'label': 'medium', 'score': -3.6427667140960693}, {'label': 'good', 'score': -0.1873566061258316}]]]


model: wa
text: I really enjoyed the slight hint towards the wonderful and beautiful music. The music 

In [58]:
    base_model = list(dict_models_to_merge.values())[0]

In [60]:
base_model_copy = copy.deepcopy(base_model)

In [57]:

def get_query_tensors(bs=16):
    ds = load_dataset("imdb", split="test")
    ds = ds.filter(lambda x: len(x["text"]) > 200, batched=False)

    input_min_text_length=2
    input_max_text_length=8
    input_size = LengthSampler(input_min_text_length, input_max_text_length)

    def tokenize(sample):
        sample["input_ids"] = tokenizer.encode(sample["text"])[: input_size()]
        sample["query"] = tokenizer.decode(sample["input_ids"])
        return sample

    ds = ds.map(tokenize, batched=False)
    ds.set_format(type="torch")

    #### get a batch from the dataset
    ds.set_format("pandas")
    df_batch = ds[:].sample(bs)
    query_tensors = df_batch['input_ids'].tolist()
    return query_tensors

query_tensors = get_query_tensors(bs=16)
for model_name, model in dict_models_to_merge.items():
    responses_text, rewards, reward = get_prediction_rewards(model, query_tensors)
    print("model:", model_name)
    print("text:", responses_text[0])
    print("reward:", rewards[0])
    print("\n")

Found cached dataset imdb (/home/rame/.cache/huggingface/datasets/imdb/plain_text/1.0.0/d613c88cf8fa3bab83b4ded3713f1f74830d1100e171db75bbddb80b3345c9c0)
Loading cached processed dataset at /home/rame/.cache/huggingface/datasets/imdb/plain_text/1.0.0/d613c88cf8fa3bab83b4ded3713f1f74830d1100e171db75bbddb80b3345c9c0/cache-7b9524f2f05076d7.arrow
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.                                                                          
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may

model: alexrame/gpt-neo-125M-imdb-lora-adapter-merged-ppo-sentiment-lr1.41e-05
text: I was very impressed with this film. It is a great story, and a great film. It is a great story, and a great film. It is a great story, and a great story. It is a great story, and a great story.
reward: [[[{'label': 'NEGATIVE', 'score': -2.665740489959717}, {'label': 'POSITIVE', 'score': 2.9380452632904053}]], [[{'label': 'NEGATIVE', 'score': -4.344231605529785}, {'label': 'POSITIVE', 'score': 4.735471248626709}]], [[{'label': 'non-toxic', 'score': 3.6214146614074707}, {'label': 'toxic', 'score': -3.541447877883911}]], [[{'label': 'bad', 'score': 2.7662408351898193}, {'label': 'medium', 'score': -3.089421033859253}, {'label': 'good', 'score': -0.29634371399879456}]]]




The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generati

model: alexrame/gpt-neo-125M-imdb-lora-adapter-merged-ppo-sentiment-lr1e-05
text: I was a great experience. I loved the film and I am so glad it was a great experience. I am so glad I have found this film. It is a great story and a great film. It is a great story and a great film. It
reward: [[[{'label': 'NEGATIVE', 'score': -2.615633726119995}, {'label': 'POSITIVE', 'score': 2.879835605621338}]], [[{'label': 'NEGATIVE', 'score': -4.297640323638916}, {'label': 'POSITIVE', 'score': 4.662707805633545}]], [[{'label': 'non-toxic', 'score': 3.56888484954834}, {'label': 'toxic', 'score': -3.5238912105560303}]], [[{'label': 'bad', 'score': 3.2869949340820312}, {'label': 'medium', 'score': -2.9423885345458984}, {'label': 'good', 'score': -0.8983559608459473}]]]




The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generati

model: wa
text: I was very impressed with this film. It is a great story, and a great film. It is a great story, and a great film. It is a great story, and a great story. It is a great story, and a great story.
reward: [[[{'label': 'NEGATIVE', 'score': -2.665740489959717}, {'label': 'POSITIVE', 'score': 2.9380452632904053}]], [[{'label': 'NEGATIVE', 'score': -4.344231605529785}, {'label': 'POSITIVE', 'score': 4.735471248626709}]], [[{'label': 'non-toxic', 'score': 3.6214146614074707}, {'label': 'toxic', 'score': -3.541447877883911}]], [[{'label': 'bad', 'score': 2.7662408351898193}, {'label': 'medium', 'score': -3.089421033859253}, {'label': 'good', 'score': -0.29634371399879456}]]]




In [32]:
dict_responses = {
    model_name: tokenizer.decode(output_tokens[0], skip_special_tokens=True)
    for model_name, output_tokens in dict_outputs.items()
}
responses_text = list(dict_responses.values())
rewards = get_rewards(responses_text)
for model_name, text, reward in zip(dict_models_to_merge.keys(), responses_text, rewards):
    print("model:", model_name)
    print("text:", text)
    print("reward:", reward)
    print("\n")

model: alexrame/gpt-neo-125M-imdb-lora-adapter-merged-ppo-sentiment-lr1.41e-05
text: I really enjoyed the slight hint towards the wonderful and beautiful story. The characters are very well-written and the story is very well-told. The characters are very well-told and the story is very well-told. The characters are very well-told and the story is very
reward: [[[{'label': 'NEGATIVE', 'score': -2.620891571044922}, {'label': 'POSITIVE', 'score': 2.8771331310272217}]], [[{'label': 'NEGATIVE', 'score': -4.352511405944824}, {'label': 'POSITIVE', 'score': 4.729320526123047}]], [[{'label': 'non-toxic', 'score': 3.5841763019561768}, {'label': 'toxic', 'score': -3.5295770168304443}]], [[{'label': 'bad', 'score': 2.7189548015594482}, {'label': 'medium', 'score': -3.8796629905700684}, {'label': 'good', 'score': 0.5206950902938843}]]]


model: alexrame/gpt-neo-125M-imdb-lora-adapter-merged-ppo-sentiment-lr1e-05
text: I really enjoyed the slight hint towards the wonderful and beautiful story. The c

In [25]:
dict_responses

{}

In [None]:
dic

In [16]:
from tqdm import tqdm

In [10]:
def get_rewards(responses_text):
    sent_kwargs = {"return_all_scores": True, "function_to_apply": "none", "batch_size": 1}

    pipe_outputs = [
        [sentiment_pipe(response_text, **sent_kwargs) for sentiment_pipe in sentiment_pipes]
        for response_text in responses_text]
    return pipe_outputs
    rewards = [[output[1]["score"] for output in outputs] for outputs in pipe_outputs]
    return rewards

In [None]:
get_rewards(responses_text)

In [49]:
from datasets import load_dataset
from trl.core import LengthSampler

In [50]:
def get_query_tensors(bs=16):
    ds = load_dataset("imdb", split="test")
    ds = ds.filter(lambda x: len(x["text"]) > 200, batched=False)

    input_min_text_length=2
    input_max_text_length=8
    input_size = LengthSampler(input_min_text_length, input_max_text_length)

    def tokenize(sample):
        sample["input_ids"] = tokenizer.encode(sample["text"])[: input_size()]
        sample["query"] = tokenizer.decode(sample["input_ids"])
        return sample

    ds = ds.map(tokenize, batched=False)
    ds.set_format(type="torch")

    #### get a batch from the dataset
    ds.set_format("pandas")
    df_batch = ds[:].sample(bs)
    query_tensors = df_batch['input_ids'].tolist()
    return query_tensors

In [51]:
query_tensors = get_query_tensors(bs=16)

Found cached dataset imdb (/home/rame/.cache/huggingface/datasets/imdb/plain_text/1.0.0/d613c88cf8fa3bab83b4ded3713f1f74830d1100e171db75bbddb80b3345c9c0)
Map:  52%|█████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                                                             | 12927/24872 [00:09<00:09, 1263.34 examples/s]Token indices sequence length is longer than the specified maximum sequence length for this model (2228 > 2048). Running this sequence through the model will result in indexing errors
                                                                                                                                                                                                                                                            

In [None]:
#### get a batch from the dataset
bs = 16
ds.set_format("pandas")
df_batch = ds[:].sample(bs)
query_tensors = df_batch['input_ids'].tolist()

response_tensors = []
responses_text = []
for i in range(bs):
    query_tensor = torch.tensor(query_tensors[i]).unsqueeze(dim=0).to(device)
    output = model.generate(input_ids=query_tensor).squeeze()
    response_tensors.append(output)
    response = tokenizer.decode(output)
    responses_text.append(response)

In [None]:
rewards = get_rewards(responses_text)

for text, reward in zip(responses_text, rewards):
    print("text:", text)
    print("reward:", reward)    
    print("\n")

In [None]:
current_weights = model.state_dict()