## env

In [1]:
!pip install -q h5py typing-extensions wheel
!pip install -q -U bitsandbytes
!pip install -q -U fsspec==2025.3.0
!pip install -q -U git+https://github.com/huggingface/transformers.git
!pip install -q -U git+https://github.com/huggingface/peft.git
!pip install -q -U git+https://github.com/huggingface/accelerate.git

!pip install -q datasets

In [4]:
import math
import torch 
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from time import time
from jinja2 import Template

## data pre

In [5]:
model_id = "Qwen/Qwen2.5-7B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_id)

OSError: We couldn't connect to 'https://huggingface.co' to load the files, and couldn't find them in the cached files.
Checkout your internet connection or see how to run the library in offline mode at 'https://huggingface.co/docs/transformers/installation#offline-mode'.

In [None]:
# 编码，发给模型，解码
template = Template(tokenizer.chat_template)
print(template)
@torch.no_grad()
def generate(prompt):
    modelInput=template.render(messages=[{"role": "user", "content": prompt}],bos_token= tokenizer.bos_token,add_generation_prompt=True)
    print("-"*80)
    print(f"model_input_string:\n{modelInput}")
    input_ids = tokenizer.encode(modelInput, add_special_tokens=False, return_tensors='pt').to("cuda:0")
    outputs = model.generate(input_ids, do_sample=False)
    model_return_string = tokenizer.decode(*outputs, skip_special_tokens=False)
    print("-"*80)
    print(f"model_return_string:\n{model_return_string}")
    generated_ids = outputs[:, input_ids.shape[1]:]
    generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=False)
    return generated_text

query = "Please introduce yourself"
print("-"*80)
print(f"query:\n{query}")
response = generate(query)
print("-"*80)
print(f"response:\n{response}")

In [6]:
from datasets import load_dataset
# 训练集、验证集 划分
# data = load_dataset("Abirate/english_quotes")
dataset = load_dataset("FreedomIntelligence/Huatuo26M-Lite")
dataset = dataset['train'].map(lambda sample: {"conversations": [{"from": "human", "value": sample['question']}, {"from": "gpt", "value": sample['answer']}]}, batched=False)

from torch.utils.data import random_split
train_dataset_size, val_dataset_size = 0.8, 0.2
train_dataset, val_dataset, _ = random_split(dataset, [train_dataset_size, val_dataset_size, 1-train_dataset_size-val_dataset_size])
print(len(dataset), len(train_dataset), len(val_dataset))
print(train_dataset[0]['conversations'])

ConnectionError: Couldn't reach 'FreedomIntelligence/Huatuo26M-Lite' on the Hub (LocalEntryNotFoundError)

In [None]:
import transformers
from typing import Dict, Sequence, List
from torch.utils.data import Dataset
from dataclasses import dataclass

def preprocess(
    sources,
    tokenizer: transformers.PreTrainedTokenizer,
) -> Dict:
    template = Template(tokenizer.chat_template)
    max_seq_len = tokenizer.model_max_length
    messages = []
    for i, source in enumerate(sources):
        if source[0]["from"] != "human":
            # Skip the first one if it is not from human
            source = source[1:]

        for j in range(0, len(source), 2):
            if j+1 >= len(source): continue
            q = source[j]["value"]
            a = source[j+1]["value"]
            assert q is not None and a is not None, f'q:{q} a:{a}'
            input =  template.render(messages=[{"role": "user", "content": q},{"role": "assistant", "content": a}],bos_token=tokenizer.bos_token,add_generation_prompt=False)
            input_ids = tokenizer.encode(input, add_special_tokens= False)

            query = template.render(messages=[{"role": "user", "content": q}],bos_token=tokenizer.bos_token,add_generation_prompt=True)
            query_ids = tokenizer.encode(query, add_special_tokens= False)

            labels = [-100]*len(query_ids) + input_ids[len(query_ids):]
            assert len(labels) == len(input_ids)
            if len(input_ids) == 0: continue
            messages.append({"input_ids": input_ids[-max_seq_len:], "labels": labels[-max_seq_len:]})

    input_ids = [item["input_ids"] for item in messages]
    labels = [item["labels"] for item in messages]

    max_len = max(len(x) for x in input_ids)

    max_len = min(max_len, max_seq_len)
    input_ids = [ item[:max_len] + [tokenizer.eos_token_id]*(max_len-len(item)) for item in input_ids]
    labels = [ item[:max_len] + [-100]*(max_len-len(item)) for item in labels]

    input_ids = torch.LongTensor(input_ids)
    labels = torch.LongTensor(labels)
    return {
        "input_ids": input_ids,
        "labels": labels
    }


class InstructDataset(Dataset):
    def __init__(self, data: Sequence, tokenizer: transformers.PreTrainedTokenizer) -> None:
        super().__init__()
        self.tokenizer = tokenizer
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index) -> Dict[str, torch.Tensor]:
        sources = self.data[index]
        if isinstance(index, int):
            sources = [sources]
        data_dict = preprocess([e['conversations'] for e in sources], self.tokenizer)
        if isinstance(index, int):
            data_dict = dict(input_ids=data_dict["input_ids"][0], labels=data_dict["labels"][0])
        return data_dict


@dataclass
class DataCollatorForSupervisedDataset(object):
    tokenizer: transformers.PreTrainedTokenizer
    def __call__(self, instances: Sequence[Dict]) -> Dict[str, torch.Tensor]:
        input_ids, labels = tuple([instance[key] for instance in instances] for key in ("input_ids", "labels"))
        input_ids = torch.nn.utils.rnn.pad_sequence(
            input_ids,
            batch_first=True,
            padding_value=self.tokenizer.pad_token_id)
        labels = torch.nn.utils.rnn.pad_sequence(labels, batch_first=True, padding_value=IGNORE_INDEX)
        return dict(
            input_ids=input_ids,
            labels=labels,
            attention_mask=input_ids.ne(self.tokenizer.pad_token_id),
        )

In [None]:
train_dataset = InstructDataset(train_dataset, tokenizer)
val_dataset = InstructDataset(val_dataset, tokenizer)
data_collator = DataCollatorForSupervisedDataset(tokenizer=tokenizer)

## model define

In [None]:




bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True, # Activate nested quantization for 4-bit base models (double quantization)
    bnb_4bit_quant_type="nf4", # Quantization type (fp4 or nf4), According to QLoRA paper, for training 4-bit base models (e.g. using LoRA adapters) one should use
    bnb_4bit_compute_dtype=torch.bfloat16
)
model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config, device_map={"":0})



OSError: We couldn't connect to 'https://huggingface.co' to load the files, and couldn't find them in the cached files.
Checkout your internet connection or see how to run the library in offline mode at 'https://huggingface.co/docs/transformers/installation#offline-mode'.

In [None]:
from peft import prepare_model_for_kbit_training

model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

In [None]:
from peft import LoraConfig, get_peft_model


from transformers import AutoModelForSeq2SeqLM 
from peft import LoraConfig, AdaLoraModel, AdaLoraConfig

TARGET_MODULES=["q_proj","k_proj", "v_proj", "gate_proj", "down_proj","up_proj"] #['q','k'] 
LORA_R = 8
LORA_ALPHA = 8
LORA_DROPOUT = 0.05
init_lora_weights = 'pissa'
rs = True
dw = True
config = AdaLoraConfig(
    peft_type="ADALORA", task_type="CAUSAL_LM", bias="none",
    init_r=LORA_R,
    lora_alpha=LORA_ALPHA,
    lora_dropout=LORA_DROPOUT,
    target_modules=TARGET_MODULES,
    tinit=100,                            # 初始稳定步数（不调整秩）
    tfinal=1000,                          # 开始动态调整的步数
    deltaT=10,                            # 秩调整间隔步数
    # 
    use_rslora = rs,
    init_lora_weights = init_lora_weights,
    use_dora=dw,
  
)
model = AutoModelForSeq2SeqLM.from_pretrained("t5-base")
model = AdaLoraModel(model, config, "default")

model = get_peft_model(model, config)

model.print_trainable_parameters()



In [None]:
from peft import prepare_model_for_kbit_training
from peft import LoraConfig, get_peft_model



from peft import LoraConfig, AdaLoraModel, AdaLoraConfig

In [None]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True, # Activate nested quantization for 4-bit base models (double quantization)
    bnb_4bit_quant_type="nf4", # Quantization type (fp4 or nf4), According to QLoRA paper, for training 4-bit base models (e.g. using LoRA adapters) one should use
    bnb_4bit_compute_dtype=torch.bfloat16
)

def get_basemodel(model_id,bnb_config):
    model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config, device_map={"":0})
    
    model.gradient_checkpointing_enable()
    model = prepare_model_for_kbit_training(model)
    return model



basemodel = get_basemodel(model_id,bnb_config)

    

In [None]:
TARGET_MODULES=["q_proj","k_proj", "v_proj", "gate_proj", "down_proj","up_proj"] #['q','k'] 
LORA_R = 8
LORA_ALPHA = 8
LORA_DROPOUT = 0.05
init_lora_weights = 'pissa'
rs = True
dw = True
train_config = AdaLoraConfig(
    peft_type="ADALORA", task_type="CAUSAL_LM", bias="none",
    init_r=LORA_R,
    lora_alpha=LORA_ALPHA,
    lora_dropout=LORA_DROPOUT,
    target_modules=TARGET_MODULES,
    tinit=100,                            # 初始稳定步数（不调整秩）
    tfinal=1000,                          # 开始动态调整的步数
    deltaT=10,                            # 秩调整间隔步数
    # 
    use_rslora = rs,
    init_lora_weights = init_lora_weights,
    use_dora=dw,
  
)
def get_train_model(model, train_config):
    
    model = AdaLoraModel(model, train_config, "default")

    model = get_peft_model(model, train_config)

    model.print_trainable_parameters()
    return model

trainmodel = get_train_model(basemodel, config)

## train para

In [None]:
OUTPUT_DIR = f"./{model_id}_rs{rs}_dw{dw}_r{LORA_R}_{init_lora_weights}/checkpoints"

training_arguments = transformers.TrainingArguments(
    output_dir=OUTPUT_DIR,
    num_train_epochs=10,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    gradient_accumulation_steps=2,

    optim='paged_adamw_32bit',    # 
    lr_scheduler_type="cosine", 
    learning_rate=2e-7,   # 
    weight_decay=0.001,
    warmup_ratio=0.03,

    max_steps=-1,
    logging_steps=100,
    eval_steps = 100,
    evaluation_strategy = 'steps',
    save_strategy= 'epochs',
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    greater_is_better=False,
    # save_steps=100,

    group_by_length=True,
    
    gradient_checkpointing=True,
    report_to="none"
)
    # fp16=True,

from transformers import EarlyStoppingCallback,TrainerCallback


class AdaLoraUpdateCallback(TrainerCallback):
    def on_step_end(self, args, state, control, **kwargs):
        # 获取 AdaLora 配置
        adalora_config = self.model.peft_config[self.model.trainable_adapter_name]
        # 每隔 deltaT 步触发一次秩调整
        if state.global_step % adalora_config.deltaT == 0:
            self.model.update_and_allocate(state.global_step)
            print(f"Step {state.global_step}: Rank pattern updated to")
        return control

# 初始化 Trainer 并添加回调
trainer = transformers.Trainer(
    model=trainmodel,
    tokenizer=tokenizer,
    args=training_arguments,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    data_collator=data_collator,
    callbacks=[AdaLoraUpdateCallback(), EarlyStoppingCallback(early_stopping_patience=3)],  # 关键！
)


In [None]:
model.print_trainable_parameters()
start = time()
trainer.train()
print('train time:', time() - start, 's')
eval_results = trainer.evaluate()
print(f"Perplexity: {math.exp(eval_results['eval_loss']):.2f}")

!pwd
output_path = f"./{model_id}_rs{rs}_dw{dw}_{init_lora_weights}/final"
trainer.save_model(output_path)
