https://huggingface.co/docs/transformers/main/peft#add-additional-trainable-layers-to-a-peft-adapter

In [None]:
from dotenv import dotenv_values
from datasets import load_dataset, Dataset
from utils.data_preprocessor import DataPreprocessor
from utils.evaluator import Evaluator
from config.finetuning import config
from config import postprocessing_params_mistral as postprocessing
from utils.test_data_processor import TestDataProcessor
import pandas as pd
from log import mistral_8bits as models_params
from utils.generate_ft_adapters_list import generate_ft_adapters_list
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch
import gc
from peft import PeftModel
from tqdm import tqdm

HF_TOKEN = dotenv_values(".env.base")['HF_TOKEN']

max_new_tokens_factor_list = [8]#postprocessing.max_new_tokens_factor_list
n_shots_inference_list = [2]#postprocessing.n_shots_inference_list
layer = 'it.layer1'
language = layer.split('.')[0]

BASE_MODEL_CHECKPOINT = "mii-community/zefiro-7b-sft-ITA"

dataset = load_dataset("ferrazzipietro/e3c-sentences", token=HF_TOKEN)
dataset = dataset[layer]
                                
preprocessor = DataPreprocessor(model_checkpoint=BASE_MODEL_CHECKPOINT, 
                                tokenizer = BASE_MODEL_CHECKPOINT)
dataset = preprocessor.preprocess_data_one_layer(dataset,
                                                instruction_on_response_format=models_params.instruction_on_response_format,
                                                 simplest_prompt=models_params.simplest_prompt)
_, val_data, _ = preprocessor.split_layer_into_train_val_test_(dataset, layer)

load_in_8bit = not models_params.load_in_4bit
bnb_config = BitsAndBytesConfig(
            load_in_4bit = models_params.load_in_4bit[0],
            load_in_8bit = load_in_8bit,
            bnb_4bit_use_double_quant = models_params.bnb_4bit_use_double_quant,
            bnb_4bit_quant_type = models_params.bnb_4bit_quant_type[0],
            bnb_4bit_compute_dtype = models_params.bnb_4bit_compute_dtype[0],
            llm_int8_threshold = models_params.llm_int8_threshold[0],
            llm_int8_has_fp16_weight = models_params.llm_int8_has_fp16_weight,
            llm_int8_skip_modules = models_params.llm_int8_skip_modules
            )


adapters_list = generate_ft_adapters_list("mistral_8bits", simplest_prompt=models_params.simplest_prompt)
adapters = adapters_list[0]

base_model = AutoModelForCausalLM.from_pretrained(
    models_params.BASE_MODEL_CHECKPOINT, low_cpu_mem_usage=True,
    quantization_config = bnb_config,
    return_dict=True,  
    #torch_dtype=torch.float16,
    device_map= "auto")
merged_model = PeftModel.from_pretrained(base_model, adapters_list[0], token=HF_TOKEN, device_map='auto')

In [None]:
merged_model2 = PeftModel.from_pretrained(merged_model, adapters_list[1], token=HF_TOKEN, device_map='auto')

In [None]:
merged_model3 = merged_model.load_adapter(adapters_list[3], adapter_name='adapt2', token=HF_TOKEN, device_map='auto', bias='none')

In [None]:
tokenizer = AutoTokenizer.from_pretrained(models_params.BASE_MODEL_CHECKPOINT, add_eos_token=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"
postprocessor = TestDataProcessor(test_data=val_data.select(range(12)), 
                                              preprocessor=preprocessor, 
                                              n_shots_inference=1, 
                                              language=language, 
                                              tokenizer=tokenizer)
postprocessor.add_inference_prompt_column(simplest_prompt=models_params.simplest_prompt)
postprocessor.add_ground_truth_column()
postprocessor.add_responses_column(model=merged_model, 
                                                tokenizer=tokenizer, 
                                                batch_size=6, 
                                                max_new_tokens_factor=4)
res1 = postprocessor.test_data

postprocessor = TestDataProcessor(test_data=val_data.select(range(12)), 
                                              preprocessor=preprocessor, 
                                              n_shots_inference=1, 
                                              language=language, 
                                              tokenizer=tokenizer)
postprocessor.add_inference_prompt_column(simplest_prompt=models_params.simplest_prompt)
postprocessor.add_ground_truth_column()
postprocessor.add_responses_column(model=merged_model, 
                                                tokenizer=tokenizer, 
                                                batch_size=6, 
                                                max_new_tokens_factor=4)
res2 = postprocessor.test_data

In [None]:
res1['model_responses']