In [1]:
from dataclasses import dataclass, field
from typing import Optional


from dataclasses import dataclass, field
from typing import Optional

from tqdm import tqdm
import peft
import copy
import torch
from peft import PeftConfig, PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer, HfArgumentParser, pipeline
from collections import OrderedDict
from datasets import load_dataset
from trl.core import LengthSampler


  from .autonotebook import tqdm as notebook_tqdm



Welcome to bitsandbytes. For bug reports, please submit your error trace to: https://github.com/TimDettmers/bitsandbytes/issues


In [None]:
class ScriptArguments:
    sentiment_models = [
        "lvwerra/distilbert-imdb", "distilbert-base-uncased-finetuned-sst-2-english",
        "martin-ha/toxic-comment-model", "valurank/distilbert-quality"
    ]
    model_names = [
        "alexrame/gpt-neo-125M-imdb-lora-adapter-merged-ppo-sentiment-lr1.41e-05",
        # "alexrame/gpt-neo-125M-imdb-lora-adapter-merged-ppo-sentiment-lr1e-05",
        # "alexrame/gpt-neo-125M-imdb-lora-adapter-merged-ppo-sentiment-distilbert-lr1.41e-05",
        # cli: accelerate launch gpt-neo-20b_sentiment_peft.py --sentiment_model distilbert-base-uncased-finetuned-sst-2-english
        # "alexrame/gpt-neo-125M-imdb-lora-adapter-merged-ppo-sentiment-distilbert-neg-lr1.41e-05",
        # cli: accelerate launch gpt-neo-20b_sentiment_peft.py --sentiment_model distilbert-base-uncased-finetuned-sst-2-english --score_goal negative
        # "alexrame/gpt-neo-125M-imdb-lora-adapter-merged-ppo-sentiment-toxic-neg-lr1.41e-05",
        # cli: accelerate launch gpt-neo-20b_sentiment_peft.py --sentiment_model martin-ha/toxic-comment-model --score_goal 1
        # "alexrame/gpt-neo-125M-imdb-lora-adapter-merged-ppo-sentiment-toxic-0",
        # cli: accelerate launch gpt-neo-20b_sentiment_peft.py --sentiment_model martin-ha/toxic-comment-model --score_goal 0,
        # "alexrame/gpt-neo-125M-imdb-lora-adapter-merged-ppo-sentiment-distilbert-1",
        # cli: accelerate launch gpt-neo-20b_sentiment_peft.py --sentiment_model valurank/distilbert-quality --score_goal 1
        "alexrame/gpt-neo-125M-imdb-lora-adapter-merged-ppo-sentiment-distilbert-2"
        #  cli: accelerate launch gpt-neo-20b_sentiment_peft.py --sentiment_model valurank/distilbert-quality --score_goal 2
    ]
    num_samples = 160

def get_args():
    parser = argparse.ArgumentParser(description='Inference')
    parser.add_argument('--sentiment_models', type=str, nargs='+', default=ScriptArguments.sentiment_models)
    parser.add_argument('--model_names', type=str, nargs='+', default=ScriptArguments.model_names)
    parser.add_argument('--num_samples', type=int, default=ScriptArguments.num_samples)
    return parser.parse_args()

def notebook_get_args():
    return ScriptArguments()

script_args = notebook_get_args()

In [2]:

class ScriptArguments:
    sentiment_models = [
        "lvwerra/distilbert-imdb", "distilbert-base-uncased-finetuned-sst-2-english",
        "martin-ha/toxic-comment-model", "valurank/distilbert-quality"
    ]
    model_names = [
        # "alexrame/gpt-neo-125M-imdb-lora-adapter-merged-ppo-sentiment-lr1.41e-05",
        # "alexrame/gpt-neo-125M-imdb-lora-adapter-merged-ppo-sentiment-lr1e-05",
        "alexrame/gpt-neo-125M-imdb-lora-adapter-merged-ppo-sentiment-distilbert-lr1.41e-05",
        # cli: accelerate launch gpt-neo-20b_sentiment_peft.py --sentiment_model distilbert-base-uncased-finetuned-sst-2-english
        # "alexrame/gpt-neo-125M-imdb-lora-adapter-merged-ppo-sentiment-distilbert-neg-lr1.41e-05",
        # cli: accelerate launch gpt-neo-20b_sentiment_peft.py --sentiment_model distilbert-base-uncased-finetuned-sst-2-english --score_goal negative
        # "alexrame/gpt-neo-125M-imdb-lora-adapter-merged-ppo-sentiment-toxic-neg-lr1.41e-05",
        # cli: accelerate launch gpt-neo-20b_sentiment_peft.py --sentiment_model martin-ha/toxic-comment-model --score_goal 1
        "alexrame/gpt-neo-125M-imdb-lora-adapter-merged-ppo-sentiment-distilbert-2"
    ]

script_args = ScriptArguments()

In [3]:
def load_model(peft_model_id):
    peft_config = PeftConfig.from_pretrained(peft_model_id)
    model = AutoModelForCausalLM.from_pretrained(
        peft_config.base_model_name_or_path,
        return_dict=True,
        #torch_dtype=torch.float16,
        load_in_8bit=True,
        device_map="auto",
    )
    # Load the Lora model
    model = PeftModel.from_pretrained(
        model,
        peft_model_id,
    )
    model.eval()
    return model


def average_weights(input_models, coefficients):
    """average weights of different transformer models based on the amount of training data they were trained on"""
    weights_averaged = OrderedDict()
    for i, current_model in tqdm(enumerate(input_models), leave=False):
        current_weights = current_model.state_dict()
        for key in current_weights.keys():
            if i == 0:
                weights_averaged[key] = coefficients[i] * current_weights[key]
            else:
                weights_averaged[key] += coefficients[i] * current_weights[key]

    return weights_averaged

def enrich_wa(dict_models_to_merge, coefficients=None):
    if coefficients is None:
        coefficients = [1 / len(dict_models_to_merge) for _ in len(dict_models_to_merge)]
    weights_averaged = average_weights(dict_models_to_merge.values(), coefficients)
    base_model_copy = list(dict_models_to_merge.values())[0]
    base_model_copy.load_state_dict(weights_averaged, strict=True)
    return base_model_copy




def get_samples_query_tensors():
    list_texts = [
        "I really enjoyed the slight hint towards",
        "I really hated the horrible hint towards"
    ]

    batch = tokenizer(list_texts, return_tensors="pt")
    return batch["input_ids"]




def predict(dict_models_to_merge, query_tensors, verbose=False):
    list_rewards = []
    for model_name, model in dict_models_to_merge.items():
        responses_text, rewards, avg_reward = get_prediction_rewards(model, query_tensors)
        if verbose:
            print("model:", model_name)
            print("avg reward:", avg_reward)
            for text, reward in zip(responses_text, rewards):
                print("text:", text)
                print("reward:", reward)
            print("\n")
        list_rewards.append(avg_reward)
    return list_rewards



def get_imdb_query_tensors(bs=16):
    ds = load_dataset("imdb", split="test")
    ds = ds.filter(lambda x: len(x["text"]) > 200, batched=False)

    input_min_text_length=2
    input_max_text_length=8
    input_size = LengthSampler(input_min_text_length, input_max_text_length)

    def tokenize(sample):
        sample["input_ids"] = tokenizer.encode(sample["text"])[: input_size()]
        sample["query"] = tokenizer.decode(sample["input_ids"])
        return sample

    ds = ds.map(tokenize, batched=False)
    ds.set_format(type="torch")

    #### get a batch from the dataset
    ds.set_format("pandas")
    df_batch = ds[:].sample(bs)
    query_tensors = df_batch['input_ids'].tolist()
    return query_tensors


In [4]:


device = 0 if torch.cuda.is_available() else "cpu"
print(f"Load LMs with {script_args.model_names}")
dict_models_to_merge = OrderedDict({model_name: load_model(model_name) for model_name in script_args.model_names})
# average
tokenizer = AutoTokenizer.from_pretrained(
    PeftConfig.from_pretrained(script_args.model_names[0]).base_model_name_or_path)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.pad_token_id = tokenizer.eos_token_id

print(f"Load sentiment model with {script_args.sentiment_models}")
sentiment_pipes = [
    pipeline("sentiment-analysis", model=sentiment_model, device=device)
    for sentiment_model in script_args.sentiment_models]

samples_query_tensors = get_samples_query_tensors()
imdb_query_tensors = get_imdb_query_tensors(bs=16)

Load LMs with ['alexrame/gpt-neo-125M-imdb-lora-adapter-merged-ppo-sentiment-distilbert-lr1.41e-05', 'alexrame/gpt-neo-125M-imdb-lora-adapter-merged-ppo-sentiment-distilbert-2']




Load sentiment model with ['lvwerra/distilbert-imdb', 'distilbert-base-uncased-finetuned-sst-2-english', 'martin-ha/toxic-comment-model', 'valurank/distilbert-quality']


Found cached dataset imdb (/home/rame/.cache/huggingface/datasets/imdb/plain_text/1.0.0/d613c88cf8fa3bab83b4ded3713f1f74830d1100e171db75bbddb80b3345c9c0)
Loading cached processed dataset at /home/rame/.cache/huggingface/datasets/imdb/plain_text/1.0.0/d613c88cf8fa3bab83b4ded3713f1f74830d1100e171db75bbddb80b3345c9c0/cache-771f297e2f2a4a0e.arrow
Loading cached processed dataset at /home/rame/.cache/huggingface/datasets/imdb/plain_text/1.0.0/d613c88cf8fa3bab83b4ded3713f1f74830d1100e171db75bbddb80b3345c9c0/cache-52416bdbc085e52d.arrow


In [5]:
def get_prediction_rewards(model, query_tensors):
    def get_rewards(responses_text):
        sent_kwargs = {"return_all_scores": True, "function_to_apply": "none", "batch_size": 1}
        rewards = [
            [sentiment_pipe(response_text, **sent_kwargs) for sentiment_pipe in sentiment_pipes]
            for response_text in responses_text]

        rewards = [transform_reward(reward) for reward in rewards]
        return rewards
    def transform_reward(reward):
        d_reward = []
        for rew in reward:
            d = {}
            assert len(rew) == 1
            for r in rew[0]:
                d[r["label"]] = r["score"]
            d_reward.append(d)
        return d_reward

    def average_rewards(rewards):
        avg_reward = None
        for reward in rewards:
            if avg_reward is None:
                avg_reward = copy.deepcopy(reward)
            else:
                for a_dict_reward, r_dict_reward in zip(avg_reward, reward):
                    for label in a_dict_reward:
                        a_dict_reward[label] = a_dict_reward[label] + r_dict_reward[label]

        for a_dict_reward in avg_reward:
            for label in a_dict_reward:
                a_dict_reward[label] = a_dict_reward[label] / len(rewards)
        return avg_reward

    response_tensors = []
    responses_text = []
    # with torch.cuda.amp.autocast():
    for i in range(len(query_tensors)):
        query_tensor = torch.tensor(query_tensors[i]).unsqueeze(dim=0).to(device)
        output = model.generate(input_ids=query_tensor, max_new_tokens=50, pad_token_id=tokenizer.eos_token_id).squeeze()
        response_tensors.append(output)
        response = tokenizer.decode(output, skip_special_tokens=True)
        responses_text.append(response)

    rewards = get_rewards(responses_text)
    avg_reward = average_rewards(rewards)
    return responses_text, rewards, avg_reward

In [6]:
list_rewards_samples = predict(dict_models_to_merge, samples_query_tensors, verbose=True)

  query_tensor = torch.tensor(query_tensors[i]).unsqueeze(dim=0).to(device)
  attn_weights = torch.where(causal_mask, attn_weights, mask_value)


model: alexrame/gpt-neo-125M-imdb-lora-adapter-merged-ppo-sentiment-distilbert-lr1.41e-05
avg reward: [{'NEGATIVE': -2.3680447340011597, 'POSITIVE': 2.6450458765029907}, {'NEGATIVE': -4.182225227355957, 'POSITIVE': 4.52437949180603}, {'non-toxic': 3.4182801246643066, 'toxic': -3.454858183860779}, {'bad': 3.0607573986053467, 'medium': -2.8268789052963257, 'good': -0.7550765573978424}]
text: I really enjoyed the slight hint towards the wonderful and beautiful. The beautiful and beautiful is a wonderful and beautiful. I really enjoyed the beautiful and beautiful. I really enjoyed the beautiful and beautiful. I really enjoyed the beautiful and beautiful. I really enjoyed the beautiful and beautiful. I really enjoyed
reward: [{'NEGATIVE': -2.618964195251465, 'POSITIVE': 2.881850004196167}, {'NEGATIVE': -4.359358310699463, 'POSITIVE': 4.702208042144775}, {'non-toxic': 3.474475383758545, 'toxic': -3.48274564743042}, {'bad': 3.0476889610290527, 'medium': -3.3974971771240234, 'good': -0.2379468

In [7]:
for rewards_samples in list_rewards_samples:
    print(rewards_samples)

[{'NEGATIVE': -2.3680447340011597, 'POSITIVE': 2.6450458765029907}, {'NEGATIVE': -4.182225227355957, 'POSITIVE': 4.52437949180603}, {'non-toxic': 3.4182801246643066, 'toxic': -3.454858183860779}, {'bad': 3.0607573986053467, 'medium': -2.8268789052963257, 'good': -0.7550765573978424}]
[{'NEGATIVE': -0.0041397809982299805, 'POSITIVE': -0.06043267250061035}, {'NEGATIVE': 0.2744317054748535, 'POSITIVE': 0.2846871614456177}, {'non-toxic': 3.005249261856079, 'toxic': -3.1387776136398315}, {'bad': 2.2686551213264465, 'medium': -3.126933217048645, 'good': 0.2602297365665436}]


In [None]:
list_rewards_imdb = predict({"qual": dict_models_to_merge["alexrame/gpt-neo-125M-imdb-lora-adapter-merged-ppo-sentiment-distilbert-2"]}, imdb_query_tensors, verbose=True)

In [None]:
for rewards_imdb in list_rewards_imdb:
    print(rewards_imdb)

In [8]:
list_states_dict = []
for current_model in dict_models_to_merge.values():
    current_weights = copy.deepcopy(current_model.state_dict())
    list_states_dict.append(current_weights)
    

In [9]:
list_states_dict[0]['base_model.model.transformer.wte.weight']

tensor([[ 0.1709, -0.7383,  0.4277,  ...,  0.0840,  0.5820, -0.3457],
        [ 0.2070, -0.6055,  0.4590,  ...,  0.1562,  0.4883, -0.2363],
        [ 0.2324, -0.6367,  0.3262,  ...,  0.2236,  0.7500, -0.2354],
        ...,
        [ 0.7734, -1.1406,  0.6523,  ...,  0.2832,  0.9258, -0.5547],
        [ 0.3906, -0.8438,  0.5117,  ...,  0.0148,  0.6992, -0.2383],
        [ 0.2734, -0.7148,  0.2949,  ...,  0.1748,  0.4043, -0.3105]],
       device='cuda:0', dtype=torch.float16)

In [10]:

def average_states_dict(list_states_dict, coefficients):
    """average weights of different transformer models based on the amount of training data they were trained on"""
    weights_averaged = OrderedDict()
    for i, current_weights in enumerate(list_states_dict):
        for key in current_weights.keys():
            if i == 0:
                weights_averaged[key] = coefficients[i] * current_weights[key]
            else:
                weights_averaged[key] += coefficients[i] * current_weights[key]
    return weights_averaged

def enrich_wa_states(list_states_dict, coefficients=None):
    weights_averaged = average_states_dict(list_states_dict, coefficients)
    base_model_copy = list(dict_models_to_merge.values())[0]
    base_model_copy.load_state_dict(weights_averaged, strict=True)
    return base_model_copy


In [11]:
wa = enrich_wa_states(list_states_dict, coefficients=[0.3, 0.7])


In [12]:
list_rewards_wa_samples = predict({"wa": wa}, samples_query_tensors, verbose=True)

  query_tensor = torch.tensor(query_tensors[i]).unsqueeze(dim=0).to(device)


model: wa
avg reward: [{'NEGATIVE': -2.4099957942962646, 'POSITIVE': 2.6818289756774902}, {'NEGATIVE': -4.221608638763428, 'POSITIVE': 4.549301624298096}, {'non-toxic': 3.129125714302063, 'toxic': -3.239298462867737}, {'bad': 2.641398787498474, 'medium': -2.841304361820221, 'good': -0.3426131308078766}]
text: I really enjoyed the slight hint towards the end of the film. The film was a very interesting and interesting film. The film was very interesting and interesting. The film was very interesting and interesting. The film was very interesting and interesting. The film was very interesting and interesting. The film
reward: [{'NEGATIVE': -2.3839497566223145, 'POSITIVE': 2.6594905853271484}, {'NEGATIVE': -4.217746257781982, 'POSITIVE': 4.57012414932251}, {'non-toxic': 3.6582894325256348, 'toxic': -3.54632830619812}, {'bad': 2.9145758152008057, 'medium': -4.4980363845825195, 'good': 0.8217939734458923}]
text: I really hated the horrible hint towards the end of the film. The film was a bi

In [13]:
wa = enrich_wa_states(list_states_dict, coefficients=[0.25, 0.75])
list_rewards_wa_samples = predict({"wa": wa}, samples_query_tensors, verbose=True)

  query_tensor = torch.tensor(query_tensors[i]).unsqueeze(dim=0).to(device)


model: wa
avg reward: [{'NEGATIVE': -2.1424390077590942, 'POSITIVE': 2.373133957386017}, {'NEGATIVE': -4.016600131988525, 'POSITIVE': 4.373375415802002}, {'non-toxic': 3.3106372356414795, 'toxic': -3.381856083869934}, {'bad': 2.9735552072525024, 'medium': -3.3110193014144897, 'good': -0.21162253618240356}]
text: I really enjoyed the slight hint towards the end of the film. The film was very well done and the film was very well presented. The film was well presented and the film was well presented. The film was well presented and the film was well presented. The film was well presented.
reward: [{'NEGATIVE': -2.4986557960510254, 'POSITIVE': 2.762079954147339}, {'NEGATIVE': -4.267558574676514, 'POSITIVE': 4.662430763244629}, {'non-toxic': 3.5925912857055664, 'toxic': -3.5325300693511963}, {'bad': 2.8268492221832275, 'medium': -4.111583709716797, 'good': 0.6059072017669678}]
text: I really hated the horrible hint towards the end of the film. The film was a lot of fun, and the acting was g

In [None]:
list_rewards_wa_imdb

In [None]:
imdb_query_tensors = get_imdb_query_tensors(bs=160)

In [None]:
wa = enrich_wa_states(list_states_dict, coefficients=[0.25, 0.75])
list_rewards_wa_imdb = predict({"wa": wa}, imdb_query_tensors)