In [1]:
import os
os.environ["BITSANDBYTES_NOWELCOME"] = "1"

import torch
device = "cuda" if torch.cuda.is_available() else "cpu"

from datasets import load_dataset


device

  from .autonotebook import tqdm as notebook_tqdm


'cuda'

In [2]:
model_name = "Qwen/Qwen2.5-1.5B-Instruct"
model_path = "D:\HuggingFace\hub\models--Qwen--Qwen2.5-1.5B-Instruct\snapshots\989aa7980e4cf806f80c7fef2b1adb7bc71aa306"

In [3]:
dataset = load_dataset("nbertagnolli/counsel-chat")

Repo card metadata block was not found. Setting CardData to empty.


In [None]:
dataset['train'][2774]

In [4]:
# Convert dataset to OAI messages
 
def create_conversation(sample):
  return {
    "messages": [
      {"role": "system", "content": "You are a mental therapist."},
      {"role": "user", "content": sample["questionText"]},
      {"role": "assistant", "content": sample["answerText"]}
    ]
  }

In [5]:
dataset = dataset.map(create_conversation, remove_columns=dataset["train"].column_names)

In [6]:
print(dataset["train"][2774])

{'messages': [{'content': 'You are a mental therapist.', 'role': 'system'}, {'content': 'What are some difficulties that a counselor can encounter when dealing with a client?', 'role': 'user'}, {'content': 'Each counselor will have their own list of "difficulties" in doing therapy work with a client. \xa0Even if clinically trained similarly, since counselors are human then their response to your question will reflect their unique differences as humans.On my list is when the emotional pain I feel for someone describing some type of injustice or unfair treatment by another, feels very deep.Sometimes I feel like avoiding the pain I feel by asking questions which will steer the conversation away from the painful areas the client talks about.What in fact is necessary to clear out their pain, is to step further into so as to realize their emotional pain isn\'t greater than who they are.', 'role': 'assistant'}]}


In [7]:
print(dataset["train"][345]["messages"])

[{'content': 'You are a mental therapist.', 'role': 'system'}, {'content': "I'm a teenager. My dad has been jail for the last five years. It's tough, but my mom really tries to give a normal life to my two sisters, my brother, and I. I feel like I took upon a parent role when I'm the second youngest, and I'm not stable. My mother and sisters say I'm overdramatic. I’m just so hurt, and I keep breaking down.", 'role': 'user'}, {'content': "It sounds like you have a lot of weight on your shoulders.I'm not sure what you mean when you say you're not stable and you are breaking down. If you are crying because you're sad, that is okay. If you are crying a lot or having trouble eating or sleeping, that's different than just crying sometimes because you are sad. It may be helpful to talk with a local mental health professional in your area. They can help you to figure out what you could do differently to have the role in your family that you would like while also supporting yourself and what yo

In [8]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from trl import setup_chat_format
# Load model and tokenizer
model = AutoModelForCausalLM.from_pretrained(
    model_path,
    device_map=device,
    torch_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
tokenizer.padding_side = 'right' # to prevent warnings
 
# # set chat template to OAI chatML, remove if you start from a fine-tuned model
model, tokenizer = setup_chat_format(model, tokenizer)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [9]:
from peft import LoraConfig
 
# LoRA config based on QLoRA paper & Sebastian Raschka experiment
peft_config = LoraConfig(
        lora_alpha=128,
        lora_dropout=0.05,
        r=256,
        bias="none",
        target_modules=["q_proj", "v_proj", "k_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],  # 适配 Qwen 结构
        task_type="CAUSAL_LM",
)

In [10]:
from transformers import TrainingArguments
 
args = TrainingArguments(
    output_dir="code-llama-7b-text-to-sql", # directory to save and repository id
    num_train_epochs=3,                     # number of training epochs
    per_device_train_batch_size=1,          # batch size per device during training
    gradient_accumulation_steps=2,          # number of steps before performing a backward/update pass
    gradient_checkpointing=True,            # use gradient checkpointing to save memory
    optim="adamw_torch_fused",              # use fused adamw optimizer
    logging_steps=1,                       # log every 10 steps
    save_strategy="no",                  # save checkpoint every epoch
    
    save_steps=1,
    save_total_limit=2,

    learning_rate=3e-4,                     # learning rate, based on QLoRA paper
    bf16=True,                              # use bfloat16 precision
    tf32=True,                              # use tf32 precision
    max_grad_norm=0.3,                      # max gradient norm based on QLoRA paper
    warmup_ratio=0.03,                      # warmup ratio based on QLoRA paper
    lr_scheduler_type="constant",           # use constant learning rate scheduler
    push_to_hub=True,                       # push model to hub
    report_to="tensorboard",                # report metrics to tensorboard
)

In [None]:
print(f"Dataset size: {len(dataset['train'])}")


In [13]:
def format_to_llama(example):
    messages = example["messages"]
    formatted_text = "<s>"  # 开始标记
    for message in messages:
        role = message["role"]
        content = message["content"]
        if role == "system":
            formatted_text += f"[INST]<<SYS>>{content}<</SYS>>\n"
        elif role == "user":
            formatted_text += f"{content} [/INST]"
        elif role == "assistant":
            formatted_text += f"{content} </s>"
    return {"text": formatted_text}

In [17]:
dataset = dataset.map(format_to_llama)

Map: 100%|██████████| 2775/2775 [00:00<00:00, 15910.25 examples/s]


In [18]:
print(dataset["train"][2774]["text"])

<s>[INST]<<SYS>>You are a mental therapist.<</SYS>>
What are some difficulties that a counselor can encounter when dealing with a client? [/INST]Each counselor will have their own list of "difficulties" in doing therapy work with a client.  Even if clinically trained similarly, since counselors are human then their response to your question will reflect their unique differences as humans.On my list is when the emotional pain I feel for someone describing some type of injustice or unfair treatment by another, feels very deep.Sometimes I feel like avoiding the pain I feel by asking questions which will steer the conversation away from the painful areas the client talks about.What in fact is necessary to clear out their pain, is to step further into so as to realize their emotional pain isn't greater than who they are. </s>


In [26]:
from trl import SFTTrainer
 
max_seq_length = 3072 # max sequence length for model and packing of the dataset
 
trainer = SFTTrainer(
    model=model,
    args=args,
    train_dataset=dataset["train"],
    peft_config=peft_config,
    max_seq_length=max_seq_length,
    tokenizer=tokenizer,
    packing=True,
    dataset_text_field="text",  # 指定训练字段
    dataset_kwargs={
        "add_special_tokens": False,  # template with special tokens
        "append_concat_token": False, # No need to add additional separator token
    }
)

In [None]:
import torch

total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
non_trainable_params = total_params - trainable_params

print(f"Total parameters: {total_params}")
print(f"Trainable parameters: {trainable_params}")
print(f"Non-trainable parameters: {non_trainable_params}")
print(f"Trainable ratio: {trainable_params / total_params:.2%}")

In [27]:
trainer.train()

 89%|████████▉ | 338/378 [3:11:53<23:31, 35.30s/it]

{'loss': 1.2343, 'learning_rate': 0.0003, 'epoch': 2.68}


 90%|████████▉ | 339/378 [3:12:28<22:52, 35.18s/it]

{'loss': 1.3407, 'learning_rate': 0.0003, 'epoch': 2.69}


 90%|████████▉ | 340/378 [3:13:02<22:07, 34.93s/it]

{'loss': 1.0989, 'learning_rate': 0.0003, 'epoch': 2.7}


 90%|█████████ | 341/378 [3:13:36<21:25, 34.76s/it]

{'loss': 1.0259, 'learning_rate': 0.0003, 'epoch': 2.71}


 90%|█████████ | 342/378 [3:14:11<20:46, 34.64s/it]

{'loss': 1.2048, 'learning_rate': 0.0003, 'epoch': 2.71}


 91%|█████████ | 343/378 [3:14:45<20:09, 34.56s/it]

{'loss': 1.1544, 'learning_rate': 0.0003, 'epoch': 2.72}


 91%|█████████ | 344/378 [3:15:19<19:32, 34.49s/it]

{'loss': 1.2, 'learning_rate': 0.0003, 'epoch': 2.73}


 91%|█████████▏| 345/378 [3:15:54<18:56, 34.45s/it]

{'loss': 1.1658, 'learning_rate': 0.0003, 'epoch': 2.74}


 92%|█████████▏| 346/378 [3:16:28<18:21, 34.43s/it]

{'loss': 0.9663, 'learning_rate': 0.0003, 'epoch': 2.75}


 92%|█████████▏| 347/378 [3:17:03<17:46, 34.41s/it]

{'loss': 1.3009, 'learning_rate': 0.0003, 'epoch': 2.75}


 92%|█████████▏| 348/378 [3:17:37<17:11, 34.39s/it]

{'loss': 1.0949, 'learning_rate': 0.0003, 'epoch': 2.76}


 92%|█████████▏| 349/378 [3:18:11<16:34, 34.30s/it]

{'loss': 1.0864, 'learning_rate': 0.0003, 'epoch': 2.77}


 93%|█████████▎| 350/378 [3:18:45<15:58, 34.22s/it]

{'loss': 1.2548, 'learning_rate': 0.0003, 'epoch': 2.78}


 93%|█████████▎| 351/378 [3:19:19<15:22, 34.17s/it]

{'loss': 1.2219, 'learning_rate': 0.0003, 'epoch': 2.79}


 93%|█████████▎| 352/378 [3:19:53<14:47, 34.13s/it]

{'loss': 1.3564, 'learning_rate': 0.0003, 'epoch': 2.79}


 93%|█████████▎| 353/378 [3:20:27<14:12, 34.10s/it]

{'loss': 1.2383, 'learning_rate': 0.0003, 'epoch': 2.8}


 94%|█████████▎| 354/378 [3:21:01<13:38, 34.09s/it]

{'loss': 1.2226, 'learning_rate': 0.0003, 'epoch': 2.81}


 94%|█████████▍| 355/378 [3:21:35<13:03, 34.08s/it]

{'loss': 1.5502, 'learning_rate': 0.0003, 'epoch': 2.82}


 94%|█████████▍| 356/378 [3:22:09<12:29, 34.07s/it]

{'loss': 1.1369, 'learning_rate': 0.0003, 'epoch': 2.83}


 94%|█████████▍| 357/378 [3:22:43<11:55, 34.06s/it]

{'loss': 1.1887, 'learning_rate': 0.0003, 'epoch': 2.83}


 95%|█████████▍| 358/378 [3:23:17<11:21, 34.05s/it]

{'loss': 1.3483, 'learning_rate': 0.0003, 'epoch': 2.84}


 95%|█████████▍| 359/378 [3:23:51<10:46, 34.05s/it]

{'loss': 1.2353, 'learning_rate': 0.0003, 'epoch': 2.85}


 95%|█████████▌| 360/378 [3:24:25<10:12, 34.05s/it]

{'loss': 1.1983, 'learning_rate': 0.0003, 'epoch': 2.86}


 96%|█████████▌| 361/378 [3:24:59<09:38, 34.04s/it]

{'loss': 1.1749, 'learning_rate': 0.0003, 'epoch': 2.87}


 96%|█████████▌| 362/378 [3:25:34<09:04, 34.04s/it]

{'loss': 1.1676, 'learning_rate': 0.0003, 'epoch': 2.87}


 96%|█████████▌| 363/378 [3:26:08<08:30, 34.04s/it]

{'loss': 1.0476, 'learning_rate': 0.0003, 'epoch': 2.88}


 96%|█████████▋| 364/378 [3:26:42<07:56, 34.04s/it]

{'loss': 1.171, 'learning_rate': 0.0003, 'epoch': 2.89}


 97%|█████████▋| 365/378 [3:27:16<07:22, 34.04s/it]

{'loss': 1.1229, 'learning_rate': 0.0003, 'epoch': 2.9}


 97%|█████████▋| 366/378 [3:27:50<06:48, 34.04s/it]

{'loss': 1.2351, 'learning_rate': 0.0003, 'epoch': 2.9}


 97%|█████████▋| 367/378 [3:28:24<06:14, 34.04s/it]

{'loss': 1.2292, 'learning_rate': 0.0003, 'epoch': 2.91}


 97%|█████████▋| 368/378 [3:28:58<05:40, 34.05s/it]

{'loss': 1.1028, 'learning_rate': 0.0003, 'epoch': 2.92}


 98%|█████████▊| 369/378 [3:29:32<05:06, 34.04s/it]

{'loss': 1.4216, 'learning_rate': 0.0003, 'epoch': 2.93}


 98%|█████████▊| 370/378 [3:30:06<04:32, 34.04s/it]

{'loss': 1.2382, 'learning_rate': 0.0003, 'epoch': 2.94}


 98%|█████████▊| 371/378 [3:30:40<03:58, 34.04s/it]

{'loss': 1.3067, 'learning_rate': 0.0003, 'epoch': 2.94}


 98%|█████████▊| 372/378 [3:31:14<03:24, 34.04s/it]

{'loss': 1.3141, 'learning_rate': 0.0003, 'epoch': 2.95}


 99%|█████████▊| 373/378 [3:31:48<02:50, 34.03s/it]

{'loss': 1.4065, 'learning_rate': 0.0003, 'epoch': 2.96}


 99%|█████████▉| 374/378 [3:32:22<02:16, 34.04s/it]

{'loss': 1.0921, 'learning_rate': 0.0003, 'epoch': 2.97}


 99%|█████████▉| 375/378 [3:32:56<01:42, 34.04s/it]

{'loss': 1.1479, 'learning_rate': 0.0003, 'epoch': 2.98}


 99%|█████████▉| 376/378 [3:33:30<01:08, 34.05s/it]

{'loss': 1.2905, 'learning_rate': 0.0003, 'epoch': 2.98}


100%|█████████▉| 377/378 [3:34:04<00:34, 34.05s/it]

{'loss': 1.2817, 'learning_rate': 0.0003, 'epoch': 2.99}


100%|██████████| 378/378 [3:34:38<00:00, 34.07s/it]

{'loss': 1.3414, 'learning_rate': 0.0003, 'epoch': 3.0}
{'train_runtime': 12878.7358, 'train_samples_per_second': 0.059, 'train_steps_per_second': 0.029, 'train_loss': 1.6323963034720648, 'epoch': 3.0}





TrainOutput(global_step=378, training_loss=1.6323963034720648, metrics={'train_runtime': 12878.7358, 'train_samples_per_second': 0.059, 'train_steps_per_second': 0.029, 'train_loss': 1.6323963034720648, 'epoch': 3.0})

In [None]:

trainer.save_model("saved_model")  # 保存当前模型到 "saved_model" 文件夹
tokenizer.save_pretrained("saved_model")  # 保存 tokenizer

In [None]:
for name, module in model.named_modules():
    print(name)


In [None]:
'''def format_data(sample):
    # 获取 messages 并解包
    messages = sample.get("messages", [])

    # 确保 messages 是 list 且非空
    if not isinstance(messages, list) or not messages:
        print(f"Skipping due to invalid messages format: {sample}")
        return None

    # 如果 messages 是双重 list，解包它
    if isinstance(messages[0], list):
        messages = messages[0]  # 取出内部 list 作为最终的 messages

    # 遍历 messages，确保所有字段不是 None
    for msg in messages:
        if msg["content"] is None:
            if msg["role"] == "assistant":
                msg["content"] = "I'm here to help. Could you tell me more about what you're experiencing?"
            else:
                msg["content"] = "No input provided."

    try:
        # 应用 tokenizer
        formatted_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=False)
        tokenized = tokenizer(formatted_text, truncation=True, padding="max_length", max_length=512)["input_ids"]
        return {"input_ids": tokenized}
    except Exception as e:
        print(f"Error processing sample: {messages}")
        print(f"Exception: {e}")
        return None

# 重新预处理数据
train_dataset = dataset["train"].map(format_data)'''
