In [1]:
import argparse
from tqdm import tqdm
import copy
import torch
from peft import PeftConfig, PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer, HfArgumentParser, pipeline
from transformers import LlamaForCausalLM, LlamaTokenizer
from collections import OrderedDict
from datasets import load_dataset
from trl.core import LengthSampler
import numpy as np

device = 0 if torch.cuda.is_available() else "cpu"


class ScriptArguments:
    sentiment_models = [
        "lvwerra/distilbert-imdb",
        "distilbert-base-uncased-finetuned-sst-2-english",
        "martin-ha/toxic-comment-model",
        "valurank/distilbert-quality",
        "OpenAssistant/reward-model-deberta-v3-large-v2",
        "OpenAssistant/reward-model-deberta-v3-base",
        # "pedropei/sentence-level-certainty",
        # "ChaiML/gpt2_base_retry_and_continue_5m_reward_model",
        # "ChaiML/3plus_stars_gpt2_reward"
        # "CogComp/bart-faithful-summary-detector"
    ]
    base_model_name = "decapoda-research/llama-7b-hf"
    peft_names = [
        "tloen/alpaca-lora-7b",
        "alexrame/llama-7b-hf-ppo-sentiment-1"  # python3 gpt-llama-7b-multi-gpu.py --score_goal 1
        # "llama-7b-hf-ppo-sentiment-distilbert-base-ufs2e-1" # python3 gpt-llama-7b-multi-gpu.py --score_goal 1 --sentiment_model distilbert-base-uncased-finetuned-sst-2-english
        # "llama-7b-hf-ppo-sentiment-distilbert-quality-2"  #  python3 gpt-llama-7b-multi-gpu.py --sentiment_model valurank/distilbert-quality --score_goal 2
        # "alexrame/llama-7b-hf-ppo-sentiment-reward-model-dvb-0" # python3 gpt-llama-7b-multi-gpu.py --sentiment_model OpenAssistant/reward-model-deberta-v3-base --score_goal 0
        # "llama-7b-hf-ppo-sentiment-reward-model-dvlv-0" # python3 gpt-llama-7b-multi-gpu.py --sentiment_model OpenAssistant/reward-model-deberta-v3-large-v2 --score_goal 0
    ]

    num_samples = 160
    every = 0

    @staticmethod
    def get_args():
        parser = argparse.ArgumentParser(description='Inference')
        parser.add_argument(
            '--sentiment_models', type=str, nargs='+', default=ScriptArguments.sentiment_models
        )
        parser.add_argument('--base_model_name', type=str, default=ScriptArguments.base_model_name)
        parser.add_argument('--peft_names', type=str, nargs='+', default=ScriptArguments.peft_names)
        parser.add_argument('--num_samples', type=int, default=ScriptArguments.num_samples)
        parser.add_argument('--every', type=int, default=ScriptArguments.every)
        return parser.parse_args()

    @staticmethod
    def notebook_get_args():
        return ScriptArguments()


class Instructions:
    instruction_llama = "Generate a movie review."
    prompt_llama = f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
    ### Instruction: {instruction_llama}
    ### Response: """


class Predictor:

    @staticmethod
    def get_rewards(responses_text):
        sent_kwargs = {"return_all_scores": True, "function_to_apply": "none", "batch_size": 1}

        def apply_sentiment_pipe(sentiment_pipe, response_text):
            if sentiment_pipe.model.name_or_path.startswith("OpenAssistant"):
                response_text = sentiment_pipe.tokenizer.cls_token + Instructions.instruction_llama + sentiment_pipe.tokenizer.sep_token + response_text
            return sentiment_pipe(response_text, **sent_kwargs)

        responses_text = [
            response_text.split("### Response: ")[-1] for response_text in responses_text
        ]
        rewards = [
            [
                apply_sentiment_pipe(sentiment_pipe, response_text)
                for sentiment_pipe in sentiment_pipes
            ]
            for response_text in responses_text
        ]

        rewards = [Predictor.transform_reward(reward) for reward in rewards]
        return rewards

    @staticmethod
    def transform_reward(reward):
        d_reward = []
        for rew in reward:
            d = {}
            assert len(rew) == 1
            for r in rew[0]:
                d[r["label"]] = r["score"]
            d_reward.append(d)
        return d_reward

    @staticmethod
    def average_rewards(rewards):
        avg_reward = None
        for reward in rewards:
            if avg_reward is None:
                avg_reward = copy.deepcopy(reward)
            else:
                for a_dict_reward, r_dict_reward in zip(avg_reward, reward):
                    for label in a_dict_reward:
                        a_dict_reward[label] = a_dict_reward[label] + r_dict_reward[label]
        assert avg_reward is not None
        for a_dict_reward in avg_reward:
            for label in a_dict_reward:
                a_dict_reward[label] = a_dict_reward[label] / len(rewards)
        return avg_reward

    @staticmethod
    def get_prediction_rewards(model, query_tensors):

        response_tensors = []
        responses_text = []
        # with torch.cuda.amp.autocast():
        for i in range(len(query_tensors)):
            query_tensor = torch.tensor(query_tensors[i]).unsqueeze(dim=0).to(device)
            output = model.generate(
                input_ids=query_tensor, max_new_tokens=60, pad_token_id=tokenizer.eos_token_id
            ).squeeze()
            response_tensors.append(output)
            response = tokenizer.decode(output, skip_special_tokens=True)
            responses_text.append(response)

        rewards = Predictor.get_rewards(responses_text)
        avg_reward = Predictor.average_rewards(rewards)
        return responses_text, rewards, avg_reward

    @staticmethod
    def predict(dict_models_to_merge, query_tensors, verbose=False):
        list_rewards = []
        for model_name, model in dict_models_to_merge.items():
            responses_text, rewards, avg_reward = Predictor.get_prediction_rewards(
                model, query_tensors
            )
            print("=== For model:", model_name)
            for text, reward in zip(responses_text, rewards):
                print("=== text:", text, "reward:", reward)
                if not verbose:
                    break
            list_rewards.append(avg_reward)
        return list_rewards


class Samples:

    @staticmethod
    def get_samples_query_tensors_llama():

        list_texts = [
            Instructions.prompt_llama + "I really hated the horrible hint towards",
            Instructions.prompt_llama + "I really enjoyed the slight hint towards"
        ]

        batch = [np.array(tokenizer.encode(text), dtype=np.int32) for text in list_texts]
        batch = [b[:-1] for b in batch]
        return batch

    @staticmethod
    def get_imdb_query_tensors_llama(bs=16):
        ds = load_dataset("imdb", split="test")
        ds = ds.filter(lambda x: len(x["text"]) > 200, batched=False)

        input_min_text_length = 2
        input_max_text_length = 8
        size_prompt_llama = len(tokenizer.encode(Instructions.prompt_llama))
        input_size = LengthSampler(
            size_prompt_llama + input_min_text_length, size_prompt_llama + input_max_text_length
        )

        def tokenize(sample):
            sample["input_ids"] = tokenizer.encode(Instructions.prompt_llama +
                                                   sample["text"])[:input_size()]
            sample["query"] = tokenizer.decode(sample["input_ids"])
            return sample

        ds = ds.map(tokenize, batched=False)
        ds.set_format(type="torch")

        #### get a batch from the dataset
        ds.set_format("pandas")
        df_batch = ds[:].sample(bs)
        query_tensors = df_batch['input_ids'].tolist()
        return query_tensors


class Pipelines:

    @staticmethod
    def load_pipes(sentiment_models):
        print(f"Load sentiment model with {sentiment_models}")
        sentiment_pipes = [
            pipeline(
                "sentiment-analysis",
                model=sentiment_model,
                device=device,
                tokenizer=sentiment_model if "ChaiML" not in sentiment_model else "gpt2"
            ) for sentiment_model in sentiment_models
        ]
        return sentiment_pipes


class Tokenizer:

    @staticmethod
    def load_tokenizer(base_model_name):
        tokenizer = LlamaTokenizer.from_pretrained(
            base_model_name, add_eos_token=True, padding_side="left"
        )
        tokenizer.pad_token_id = 0
        return tokenizer


class Loader:

    @staticmethod
    def load_base_model(base_model_name):
        base_model = LlamaForCausalLM.from_pretrained(
            base_model_name, load_in_8bit=True, device_map="auto"
        )
        return base_model

    @staticmethod
    def load_peft_model(base_model, peft_name):
        peft_model = PeftModel.from_pretrained(base_model, peft_name)
        peft_model.eval()
        return peft_model

LOAD_ONLY_LORA = True
class WeightAverager:

    @staticmethod
    def average_weights(base_model, peft_names, coefficients):
        weights_averaged = OrderedDict()
        i = 0
        for peft_name, coefficient in zip(peft_names, coefficients):
            if coefficient == 0.:
                continue
            current_model = Loader.load_peft_model(base_model, peft_name)
            current_weights = current_model.state_dict()
            for key in list(current_weights.keys()):
                if LOAD_ONLY_LORA and "lora" not in key:
                    pass
                elif i == 0:
                    weights_averaged[key] = coefficient * current_weights[key]
                else:
                    weights_averaged[key] += coefficient * current_weights[key]
                del current_weights[key]
            del current_model
            torch.cuda.empty_cache()
            i += 1
        return weights_averaged

    @staticmethod
    def build_wa(base_model, peft_names, coefficients):
        weights_averaged = WeightAverager.average_weights(
            base_model=base_model, peft_names=peft_names, coefficients=coefficients
        )
        torch.cuda.empty_cache()
        wa = Loader.load_peft_model(base_model, peft_names[0])
        wa.load_state_dict(weights_averaged, strict=not LOAD_ONLY_LORA)
        return wa

  from .autonotebook import tqdm as notebook_tqdm



Welcome to bitsandbytes. For bug reports, please submit your error trace to: https://github.com/TimDettmers/bitsandbytes/issues


In [2]:
script_args = ScriptArguments.notebook_get_args()
script_args.num_samples = -1

assert "llama" in script_args.base_model_name.lower()
print(f"Load LMs with {script_args.peft_names}")

# 1. load all key components
base_model = Loader.load_base_model(script_args.base_model_name)
tokenizer = Tokenizer.load_tokenizer(script_args.base_model_name)
sentiment_pipes = Pipelines.load_pipes(script_args.sentiment_models)

# 2. load dataset
if script_args.num_samples == -1:
    query_tensors = Samples.get_samples_query_tensors_llama()
else:
    query_tensors = Samples.get_imdb_query_tensors_llama(bs=script_args.num_samples)



Load LMs with ['tloen/alpaca-lora-7b', 'alexrame/llama-7b-hf-ppo-sentiment-1']


Loading checkpoint shards: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33/33 [00:11<00:00,  2.95it/s]
The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'LLaMATokenizer'. 
The class this function is called from is 'LlamaTokenizer'.


Load sentiment model with ['lvwerra/distilbert-imdb', 'distilbert-base-uncased-finetuned-sst-2-english', 'martin-ha/toxic-comment-model', 'valurank/distilbert-quality', 'OpenAssistant/reward-model-deberta-v3-large-v2', 'OpenAssistant/reward-model-deberta-v3-base']


In [6]:
LOAD_ONLY_LORA = True

In [5]:
!nvidia-smi

Fri Mar 24 14:57:04 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.91.03    Driver Version: 460.91.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  RTX A6000           On   | 00000000:3B:00.0 Off |                  Off |
| 30%   31C    P8    28W / 300W |  11110MiB / 48685MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+---------------------------------------------------------------------------

In [4]:
i = 0
coeff = 0.2
# 4.1 load wa
base_model=base_model
peft_names=script_args.peft_names
coefficients=[1 - coeff, coeff]

In [7]:
weights_averaged = WeightAverager.average_weights(
    base_model=base_model, peft_names=peft_names, coefficients=coefficients
)

In [8]:
!nvidia-smi

Fri Mar 24 14:57:27 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.91.03    Driver Version: 460.91.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  RTX A6000           On   | 00000000:3B:00.0 Off |                  Off |
| 30%   33C    P2    76W / 300W |  11158MiB / 48685MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+---------------------------------------------------------------------------

In [None]:
del weights_averaged

In [None]:
torch.cuda.empty_cache()

In [None]:
!nvidia-smi

In [None]:
# 4.2 predict with wa
list_rewards_wa = Predictor.predict({"wa coeff " + str(coeff): wa}, query_tensors)
print("== coeff", coeff, list_rewards_wa[0], "\n")
dict_coeff_to_reward[coeff] = list_rewards_wa[0]

# 4.3 del wa
del wa
torch.cuda.empty_cache()
wa = None

In [None]:
torch.cuda.empty_cache()

In [None]:
list_rewards_wa = Predictor.predict({"wa coeff " + str(coeff): wa}, query_tensors)
print("== coeff", coeff, list_rewards_wa[0], "\n")
dict_coeff_to_reward[coeff] = list_rewards_wa[0]

In [None]:
current_model = dict_models_to_merge["tloen/alpaca-lora-7b"]

In [None]:
current_model = current_model.to("cpu")

In [None]:
current_weights = current_model.state_dict()

In [None]:
coeff=0.2
weights_averaged = WeightAverager.average_weights(
    list_models=dict_models_to_merge.values(),
    )

In [None]:
weights_averaged = OrderedDict()
coeff = 0.2
coefficients=[1 - coeff, coeff]

In [None]:
i = 0

In [None]:
for key in list(current_weights.keys()):
    if i == 0:
        weights_averaged[key] = coefficients[i] * current_weights[key]
    else:
        weights_averaged[key] += coefficients[i] * current_weights[key]
    del current_weights[key]        

In [None]:
del wa
torch.cuda.empty_cache()
wa = None

In [None]:
    tokenizer = LlamaTokenizer.from_pretrained(
        script_args.base_model_name, add_eos_token=True, padding_side="left"
    )
    tokenizer.pad_token_id = 0

In [None]:
class Pipelines:

    def load_pipes(sentiment_models):
        print(f"Load sentiment model with {sentiment_models}")
        sentiment_pipes = [
            pipeline(
                "sentiment-analysis",
                model=sentiment_model,
                device=device,
                tokenizer=sentiment_model if "ChaiML" not in sentiment_model else "gpt2"
            ) for sentiment_model in sentiment_models
        ]
        return sentiment_pipes


In [None]:
sentiment_pipes = Pipelines.load_pipes(script_args.sentiment_models)

In [None]:
print(f"Load sentiment model with {script_args.sentiment_models}")
sentiment_pipes = [
    pipeline(
        "sentiment-analysis",
        model=sentiment_model,
        device=device,
        tokenizer=sentiment_model if "ChaiML" not in sentiment_model else "gpt2"
    ) for sentiment_model in script_args.sentiment_models
]

In [None]:
dict_models_to_merge = get_dict_models()

tokenizer = LlamaTokenizer.from_pretrained(
    script_args.base_model_name, add_eos_token=True, padding_side="left"
)
tokenizer.pad_token_id = 0

print(f"Load sentiment model with {script_args.sentiment_models}")
sentiment_pipes = [
    pipeline(
        "sentiment-analysis",
        model=sentiment_model,
        device=device,
        tokenizer=sentiment_model if "ChaiML" not in sentiment_model else "gpt2"
    ) for sentiment_model in script_args.sentiment_models
]

samples_query_tensors = get_samples_query_tensors_llama()
list_rewards_samples = predict(dict_models_to_merge, samples_query_tensors, verbose=True)
for rewards_samples in list_rewards_samples:
    print(rewards_samples)

In [None]:
!nvidia-smi

In [None]:
[print(a,b, "\n") for a, b in zip(script_args.sentiment_models, list_rewards_samples[1])]

In [None]:
script_args.num_samples=4

In [None]:
imdb_query_tensors = samples_query_tensors

In [None]:
list_rewards_imdb = predict(dict_models_to_merge, imdb_query_tensors, verbose=False)
dict_coeff_to_reward = {}
dict_coeff_to_reward[0] = list_rewards_imdb[0]
if len(dict_coeff_to_reward) > 1:
    dict_coeff_to_reward[1] = list_rewards_imdb[1]
for model_name, rewards_imdb in zip(dict_models_to_merge.keys(), list_rewards_imdb):
    print(model_name)
    print(rewards_imdb)

In [None]:
!nvidia-smi

In [None]:
coeff= 0.2

In [None]:
del wa
torch.cuda.empty_cache()
dict_models_to_merge = None

In [None]:
!nvidia-smi

In [None]:
dict_models_to_merge

In [None]:
if dict_models_to_merge is None:
    dict_models_to_merge = get_dict_models()
wa = enrich_wa_states(dict_models_to_merge, coefficients=[1 - coeff, coeff])
list_rewards_wa_imdb = predict({"wa": wa}, imdb_query_tensors)
print(coeff)
print(list_rewards_wa_imdb)
dict_coeff_to_reward[coeff] = list_rewards_wa_imdb[0]
print("\n")

In [None]:
text = "Bullshit is what I know"
list_rewards = get_rewards([text])
[print(a,b, "\n") for a, b in zip(script_args.sentiment_models, list_rewards[0])]

In [None]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer
reward_name = "OpenAssistant/reward-model-deberta-v3-large-v2"
rank_model, tokenizer = AutoModelForSequenceClassification.from_pretrained(reward_name), AutoTokenizer.from_pretrained(reward_name)
question, answer = 
inputs = tokenizer(question, answer, return_tensors='pt')


In [None]:
inputs["input_ids"]

In [None]:
instruction_llama = "Generate a movie review."
prompt_llama = f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
### Instruction: {instruction_llama}
### Response: """

In [None]:
prompt_llama

In [None]:
{key: value for key, value in sentiment_pipes[-2].__dict__.items() if type(value)}

In [None]:
instruction_llama = "Explain nuclear fusion like I am five"

In [None]:
sentiment_pipe = sentiment_pipes[-2]

In [None]:
response_text = "Nuclear fusion is the process by which two or more protons and neutrons combine to form a single nucleus. It is a very important process in the universe, as it is the source of energy for stars and galaxies. Nuclear fusion is also a key process in the production of energy for nuclear power plants"

In [None]:
response_text_clean = instruction_llama + response_text

In [None]:
sentiment_pipe.model.name_or_path.startswith("OpenAssistant")

In [None]:
response_text_clean = sentiment_pipe.tokenizer.cls_token + " " + instruction_llama + sentiment_pipe.tokenizer.sep_token + response_text

In [None]:
response_text_clean

In [None]:
sentiment_pipes[-2](response_text_clean)

In [None]:
inputs_to_rlhf = tokenizer.decode(inputs["input_ids"][0])

In [None]:
tokenizer

In [None]:
inputs_to_rlhf

In [None]:
import math

In [None]:
math.exp(2.2720)/(1+math.exp(2.2720))

In [None]:
score = rank_model(input_ids=inputs["input_ids"])
print(score)

In [None]:
list_rewards_imdb = predict(dict_models_to_merge, imdb_query_tensors[:3], verbose=True)

In [None]:
list_rewards_imdb[1]

In [None]:
import numpy as np

In [None]:
def get_samples_query_tensors():
    list_texts = [
        "I really enjoyed the slight hint towards",
        "I really hated the horrible hint towards"
    ]

    batch = [np.array(tokenizer.encode(text)[:-1], dtype=np.int32) for text in list_texts]
    return batch

In [None]:
tokenizer = LlamaTokenizer.from_pretrained(script_args.model_names[0], add_eos_token=True, padding_side="left")

In [None]:
imdb_query_tensors

In [None]:
def get_prediction_rewards(model, query_tensors):
    def get_rewards(responses_text):
        sent_kwargs = {"return_all_scores": True, "function_to_apply": "none", "batch_size": 1}
        responses_text = [response_text.split("### Response: ")[-1] for response_text in responses_text]
        print("responses_text cleant", responses_text)
        rewards = [
            [sentiment_pipe(response_text, **sent_kwargs) for sentiment_pipe in sentiment_pipes]
            for response_text in responses_text]

        rewards = [transform_reward(reward) for reward in rewards]
        return rewards
    def transform_reward(reward):
        d_reward = []
        for rew in reward:
            d = {}
            assert len(rew) == 1
            for r in rew[0]:
                d[r["label"]] = r["score"]
            d_reward.append(d)
        return d_reward

    def average_rewards(rewards):
        avg_reward = None
        for reward in rewards:
            if avg_reward is None:
                avg_reward = copy.deepcopy(reward)
            else:
                for a_dict_reward, r_dict_reward in zip(avg_reward, reward):
                    for label in a_dict_reward:
                        a_dict_reward[label] = a_dict_reward[label] + r_dict_reward[label]

        for a_dict_reward in avg_reward:
            for label in a_dict_reward:
                a_dict_reward[label] = a_dict_reward[label] / len(rewards)
        return avg_reward

    response_tensors = []
    responses_text = []
    # with torch.cuda.amp.autocast():
    for i in range(len(query_tensors)):
        query_tensor = torch.tensor(query_tensors[i]).unsqueeze(dim=0).to(device)
        output = model.generate(
            input_ids=query_tensor, max_new_tokens=60, pad_token_id=tokenizer.eos_token_id
        ).squeeze()
        response_tensors.append(output)
        response = tokenizer.decode(output, skip_special_tokens=True)
        responses_text.append(response)

    rewards = get_rewards(responses_text)
    avg_reward = average_rewards(rewards)
    return responses_text, rewards, avg_reward

In [None]:
samples_query_tensors

In [None]:
def get_samples_query_tensors_llama():
    prompt = f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
    ### Instruction: Tell me your opinion about this movie.
    ### Response: """
    list_texts = [
        prompt + "I really enjoyed the slight hint towards",
        prompt + "I really hated the horrible hint towards"
    ]

    batch = [np.array(tokenizer.encode(text), dtype=np.int32) for text in list_texts]
    batch = [b[:-1] for b in batch]
    return batch

In [None]:
prompt_llama = f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
### Instruction: Tell me your opinion about this movie.
### Response: """

In [None]:
size_prompt_llama = len(tokenizer.encode(prompt_llama))

In [None]:
def get_imdb_query_tensors_llama(bs=16):
    ds = load_dataset("imdb", split="test")
    ds = ds.filter(lambda x: len(x["text"]) > 200, batched=False)

    input_min_text_length=2
    input_max_text_length=8
    size_prompt_llama = len(tokenizer.encode(prompt_llama))
    input_size = LengthSampler(size_prompt_llama + input_min_text_length, size_prompt_llama + input_max_text_length)

    def tokenize(sample):

        sample["input_ids"] = tokenizer.encode(prompt_llama + sample["text"])[: input_size()]
        sample["query"] = tokenizer.decode(sample["input_ids"])
        return sample

    ds = ds.map(tokenize, batched=False)
    ds.set_format(type="torch")

    #### get a batch from the dataset
    ds.set_format("pandas")
    df_batch = ds[:].sample(bs)
    query_tensors = df_batch['input_ids'].tolist()
    return query_tensors

In [None]:
size_prompt_llama

In [None]:
samples_query_tensors = get_samples_query_tensors_llama()
list_rewards_samples = predict(dict_models_to_merge, samples_query_tensors, verbose=True)
for rewards_samples in list_rewards_samples:
    print(rewards_samples)

In [None]:
tokenizer.decode(imdb_query_tensors[0])

In [None]:
imdb_query_tensors = get_imdb_query_tensors_llama(bs=8)
list_rewards_imdb = predict(dict_models_to_merge, imdb_query_tensors, verbose=False)


In [None]:
dict_coeff_to_reward = {}
dict_coeff_to_reward[0] = list_rewards_imdb[0]
for model_name, rewards_imdb in zip(dict_models_to_merge.keys(), list_rewards_imdb):
    print(model_name)
    print(rewards_imdb)

In [None]:
list_rewards_imdb