In [None]:
%load_ext autoreload
%autoreload 2
import sys
sys.path.append("/home/rame/trl/examples/llama")

In [None]:
if False:
    parser = HfArgumentParser(ScriptArguments)
    script_args = parser.parse_args_into_dataclasses()[0]
elif False:
    script_args = DefaultArgs()
else:
    script_args = DefaultArgsMerged()

In [None]:
base_model = Loader.load_base_model(script_args.base_model_name)

In [None]:
!nvidia-smi

In [None]:
class Loader:

    @staticmethod
    def load_base_model(base_model_name):
        base_model = LlamaForCausalLM.from_pretrained(
            base_model_name, load_in_8bit=True, device_map="auto"
        )
        base_model = prepare_model_for_int8_training(base_model)
        return base_model

    @staticmethod
    def load_peft_model(base_model, peft_name):
        """### Apply LoRA
        Here comes the magic with `peft`! Let's load a `PeftModel` and specify that we are going to use low-rank adapters (LoRA) using `get_peft_model` utility function from `peft`.
        """
        if peft_name in [None, "none", "lora", "lora0"]:
            os.environ["INITLORA"] = "0" if peft_name == "lora0" else "1"
            lora_config = LoraConfig(
                r=8,
                lora_alpha=16,
                lora_dropout=0.05,
                target_modules=None,
                bias="none",
                task_type="CAUSAL_LM",
            )
            model = get_peft_model(base_model, lora_config)
        else:
            model = PeftModel.from_pretrained(base_model, peft_name)
        modelvaluehead = AutoModelForCausalLMWithValueHead.from_pretrained(model)
        return modelvaluehead

    @staticmethod
    def print_trainable_parameters(model):
        """
        Prints the number of trainable parameters in the model.
        """
        trainable_params = 0
        all_param = 0
        for _, param in model.named_parameters():
            all_param += param.numel()
            if param.requires_grad:
                trainable_params += param.numel()
        print(
            f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
        )

In [None]:
model = Loader.load_peft_model(base_model, peft_name=script_args.peft_name)

In [None]:
key_list = [key for key, _ in model.pretrained_model.base_model.named_modules() if "lora" in key]

In [None]:
key_list

In [None]:
import peft

In [None]:
model.pretrained_model

In [None]:
model.pretrained_model.base_model.model.model.layers[0].self_attn.q_proj.scaling

In [None]:
list(model.pretrained_model.base_model.model.model.layers[0].self_attn.q_proj.lora_B.parameters())

In [None]:
for key in key_list:
    parent, target, target_name = model.pretrained_model._get_submodules(key)
    assert not isinstance(target, peft.tuners.lora.MergedLinear)
    if isinstance(target, peft.tuners.lora.Linear):
        print("found", target)
        break

In [None]:
parent

In [None]:
isinstance(target, peft.tuners.lora.Linear)

In [None]:
target??

In [None]:
model = Loader.load_peft_model(base_model, lora_config=script_args.lora_config)
base_parameter_names_afterpeft = [n for n, _ in base_model.named_parameters()]
parameter_names_afterpeft = [n for n, _ in model.named_parameters()]

In [None]:
base_parameter_names_afterpeft

In [None]:
parameter_names_afterpeft

In [None]:
base_model.get_parameter("lm_head.0.weight")

In [None]:
model.get_parameter("pretrained_model.base_model.model.lm_head.0.weight")

In [None]:
model.get_parameter("v_head.summary.weight")

In [None]:
base_parameter_names

In [None]:
parameter_names = [n for n, _ in model.named_parameters()]

In [None]:
parameter_names

In [None]:
model.get_parameter("v_head.summary.bias")

In [None]:
model.get_parameter("v_head.summary.bias")

In [None]:
num_shared_layers=None
pattern=None

In [None]:
from copy import deepcopy

In [None]:
parameter_names = [n for n, _ in model.named_parameters()]
ref_model = deepcopy(model)

# if no layers are shared, return copy of model
if num_shared_layers is None:
    for param_name in parameter_names:
        param = ref_model.get_parameter(param_name)
        param.requires_grad = False
    ref_model.eval()
else:
    # identify layer name pattern
    if pattern is not None:
        pattern = pattern.format(layer=num_shared_layers)
    else:
        for pattern_candidate in LAYER_PATTERNS:
            pattern_candidate = pattern_candidate.format(layer=num_shared_layers)
            if any([pattern_candidate in name for name in parameter_names]):
                pattern = pattern_candidate
                break

    if pattern is None:
        raise ValueError("Layer pattern could not be matched.")

    # divide parameters in shared and unshared parameter lists
    shared_param_list = []
    unshared_param_list = []

    shared_parameter = True
    for name, param in model.named_parameters():
        if pattern in name:
            shared_parameter = False
        if shared_parameter:
            shared_param_list.append(name)
        else:
            unshared_param_list.append(name)

    # create reference of the original parameter if they are shared
    for param_name in shared_param_list:
        param = model.get_parameter(param_name)
        param.requires_grad = False

        ref_param = ref_model.get_parameter(param_name)  # noqa
        ref_param = param  # noqa

    # for all other parameters just make sure they don't use gradients
    for param_name in unshared_param_list:
        param = ref_model.get_parameter(param_name)
        param.requires_grad = False

In [None]:
ref_model.get_parameter(param_name)

In [None]:
param_name

In [None]:
os.environ["WANDB_DIR"] = os.path.join(FOLDER_EXPE, "wandb")
script_args_name = Naming.get_name(script_args)
os.environ["WANDB_NAME"] = script_args_name

config = PPOConfig(
    model_name=script_args.model_name,
    init_kl_coef=script_args.init_kl_coef,
    adap_kl_ctrl=script_args.adap_kl_ctrl,
    learning_rate=script_args.learning_rate,
    log_with=script_args.log_with if script_args.log_with != "" else None,
    batch_size=script_args.batch_size,
    mini_batch_size=script_args.mini_batch_size,
    optimize_cuda_cache=True,
    gradient_accumulation_steps=script_args.gradient_accumulation_steps,
)

In [None]:
tokenizer = Tokenizer.load_tokenizer(script_args.model_name)
Loader.print_trainable_parameters(model)

instructions = llama_utils.Instructions(prompt=script_args.prompt, tokenizer=tokenizer)


# We retrieve the dataloader by calling the `build_dataset` function.
dataset = build_dataset(config, tokenizer=tokenizer)
def collator(data):
    return dict((key, [d[key] for d in data]) for key in data[0])
optimizer = torch.optim.Adam(
    filter(lambda p: p.requires_grad, model.parameters()), lr=config.learning_rate
)

# We then build the PPOTrainer, passing the model, the reference model, the tokenizer


In [None]:
!nvidia-smi

In [None]:
ppo_trainer = PPOTrainer(
    config,
    model,
    ref_model=ref_model,
    tokenizer=tokenizer,
    dataset=dataset,
    data_collator=collator,
    optimizer=optimizer
)

In [None]:
device = ppo_trainer.accelerator.device
if ppo_trainer.accelerator.num_processes == 1:
    device = 0 if torch.cuda.is_available() else "cpu"  # to avoid a `pipeline` bug


In [None]:

class Runner():

    def __init__(self, ppo_trainer, sentiment_model, device, output_max_length):
        self.ppo_trainer = ppo_trainer
        self.generation_kwargs = {
            "min_length": -1,
            "top_k": 0.0,
            "top_p": 1.0,
            "do_sample": True,
            "pad_token_id": tokenizer.eos_token_id,
            "eos_token_id": -1,
        }

        output_min_length = output_max_length//2
        self.output_length_sampler = LengthSampler(output_min_length, output_max_length)

        print(f"Load sentiment model with {sentiment_model}")
        self.sentiment_pipe = pipeline(
            "sentiment-analysis",
            model=sentiment_model,
            device=device,
            tokenizer=llama_utils.load_tokenizer_pipe(sentiment_model)
        )
        # We then define the arguments to pass to the sentiment analysis pipeline.
        # We set `return_all_scores` to True to get the sentiment score for each token.
        self.sent_kwargs = {
            "return_all_scores": True,
            "function_to_apply": "none",
            "batch_size": config.mini_batch_size
        }

    def apply_sentiment_pipe(self, texts):
        texts = [
            llama_utils.transform_text(
                sentiment_pipe=self.sentiment_pipe,
                response_text=text,
                instruction=instructions.instruction_movie
            ) for text in texts
        ]
        pipe_outputs = self.sentiment_pipe(texts, **self.sent_kwargs)

        def get_score_from_output(output, score_index):
            if score_index == "":
                return 0.
            if score_index in ["positive", "negative"]:
                score_index = {"positive": 1, "negative": 0}[score_index]
            elif "-" in score_index:
                return get_score_from_output(output,
                                            score_index.split("-")[0]) - get_score_from_output(
                                                output,
                                                score_index.split("-")[1]
                                            )
            return output[int(score_index)]["score"]

        rewards = [get_score_from_output(output, script_args.score_goal) for output in pipe_outputs]
        return rewards

    def train_ppo(self, model):
        for epoch, batch in tqdm(enumerate(self.ppo_trainer.dataloader)):
            query_tensors = batch["input_ids"]

            model.gradient_checkpointing_disable()
            model.pretrained_model.config.use_cache = True

            # Get response from Causal LM
            response_tensors = []
            for query in query_tensors:
                gen_len = self.output_length_sampler()
                self.generation_kwargs["max_new_tokens"] = gen_len
                response = self.ppo_trainer.generate(query, **self.generation_kwargs)
                response_tensors.append(response.squeeze()[-gen_len:])
            batch["response"] = [tokenizer.decode(r.squeeze()) for r in response_tensors]

            # Compute sentiment score
            texts = [". ".join(q.split("Response: ")[1:]) + r for q, r in zip(batch["query"], batch["response"])]
            rewards = self.apply_sentiment_pipe(texts)
            rewards = [torch.tensor(reward) for reward in rewards]

            # Run PPO step
            model.gradient_checkpointing_enable()
            model.pretrained_model.config.use_cache = False

            stats = self.ppo_trainer.step(query_tensors, response_tensors, rewards)
            self.ppo_trainer.log_stats(stats, batch, rewards)

            break

In [None]:
runner = Runner(
    ppo_trainer,
    sentiment_model=script_args.sentiment_model,
    device=device,
    output_max_length=script_args.output_max_length
)

In [None]:
runner.train_ppo(model)