In [None]:
#安装必备的软件包
%pip install -q -U bitsandbytes
%pip install -q -U trl 
%pip install -q -U accelerate
%pip install -q -U transformers
%pip install -q -U peft
%pip install -q datasets==2.16.0

In [None]:
import os
import torch, wandb
import numpy as np
import pandas as pd
from datasets import Dataset

from peft import PeftModel, LoraConfig, prepare_model_for_kbit_training, get_peft_model
from transformers import BitsAndBytesConfig, AutoTokenizer, TrainingArguments, AutoModelForCausalLM, HfArgumentParser, TrainingArguments, pipeline, logging
from trl import SFTTrainer

#avoids a lot of chatty warnings
logging.set_verbosity(logging.CRITICAL)


## 加载模型

In [None]:
base_model = "/kaggle/input/mistral-7b-it-v02"
#模型下载地址：https://www.kaggle.com/datasets/ahmadsaladin/mistral-7b-it-v02

bnb_config = BitsAndBytesConfig(  
    load_in_4bit= True,
    bnb_4bit_quant_type= "nf4",
    bnb_4bit_use_double_quant= True,
)

model = AutoModelForCausalLM.from_pretrained(
        base_model,
        quantization_config=bnb_config,
        torch_dtype=torch.bfloat16,
        device_map="auto",
        trust_remote_code=True,
)

model.config.use_cache = False 
model.config.pretraining_tp = 1
model.gradient_checkpointing_enable()

# 加载分词器

In [None]:
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
tokenizer.padding_side = 'right'
tokenizer.pad_token = tokenizer.eos_token
tokenizer.add_eos_token = True

## lora训练框架
###### 固定框架

In [None]:
model = prepare_model_for_kbit_training(model)

# 定义Lora
peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=64,      ### 可换32
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj","gate_proj"]
)
model = get_peft_model(model, peft_config)

## 简单测试

In [None]:
def test_interaction(prompt):

    pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=150)
    result = pipe(f"<s>[INST] {prompt} [/INST]")
    return result[0]['generated_text']
    
test_interaction("Does pineapple belong on pizza?")

## 训练集定义

In [None]:
#number of training records limitted to this number (on P100 is ~2.5 seconds / item / epoch)
train_records_to_use = 1400    

train_df = pd.read_csv("/kaggle/input/inst-make-some-templatized-training-data-inst/instruct_train.csv")

#quick test batch
train_df = train_df.head(train_records_to_use)

print(train_df.iloc[2])

In [None]:
data_to_append = {'text': '<s>[INST] Does pineapple belong on pizza? [/INST] No - pineapple should never go on pizza. </s>'}

#let's append that a few times to make sure it gets the message - even in a single epoch..
df_to_append = pd.DataFrame([data_to_append] * 10)

train_df = pd.concat([train_df, df_to_append], ignore_index=True)

In [None]:
train_dataset = Dataset.from_pandas(train_df)
train_dataset

## 模型训练参数

In [None]:
# 训练参数，TrainingArguments是Transformer包里的函数
training_arguments = TrainingArguments(
    output_dir="./results",
    num_train_epochs=1,      ### 1轮或2轮即可，训练多容易带偏
    per_device_train_batch_size=4,
    gradient_accumulation_steps=1,
    optim="paged_adamw_32bit",
    save_steps=25,
    logging_steps=25,
    learning_rate=2e-4,
    weight_decay=0.001,
    fp16=False,
    bf16=False,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="constant",
    report_to="none" 
)

#SFT (supervised fine-tuning) parameters
#(our dataset gets specified here)
trainer = SFTTrainer(
    model=model,
    train_dataset=train_dataset,
    peft_config=peft_config,
    max_seq_length= None,
    dataset_text_field="text",
    tokenizer=tokenizer,
    args=training_arguments,
    packing= False,
)

## 模型开始训练

In [None]:
trainer.train()

## 模型保存

In [None]:
new_model_name = "mistral_prompt_recovery_hold_the_pineapple"

trainer.model.save_pretrained(new_model_name)