In [49]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import LoraConfig, TaskType
from peft import get_peft_model
from peft import AutoPeftModelForCausalLM


from datasets import load_dataset
from trl import SFTConfig, SFTTrainer

from transformers import pipeline

## Raw model

In [2]:
model_name = "Qwen/Qwen3-0.6B"

# load the tokenizer and the model
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype="auto", device_map="auto")

In [3]:
# prepare the model input
# prompt = """Give me the summary of the text: A large language model (LLM) is a type of machine learning model designed for natural language processing tasks such as language generation. LLMs are language models with many parameters, and are trained with self-supervised learning on a vast amount of text.
# The largest and most capable LLMs are generative pretrained transformers (GPTs). Modern models can be fine-tuned for specific tasks or guided by prompt engineering.[1] These models acquire predictive power regarding syntax, semantics, and ontologies[2] inherent in human language corpora, but they also inherit inaccuracies and biases present in the data they are trained in.[3]
# """
prompt = """我是一个程序小白，一直想学好学精一门编程。看网上说 python 好学，就打算决定学 python ，然后就在网上找视频学习，先学习了一个尚硅谷哪个 python ，我感觉还可以，就认认真真的学了。一些基础东西算是会了吧。想提高。大致的方向是先 web 方向，因为想自己搭建一个网站是一个刚需。但在网上找不到好的学习资源了，也不知道从哪里到哪里先学了。 前段时间看要先学前端，学全栈，在 bili 上搜了一圈也找不到一个合适的教程。 现在我想请各位大哥，大姐，弟弟，妹妹们，大神们给俺推荐一个好的学习教程或者学习方法及路线，越详细越好，教程给推荐好的免费的最好！
\n\nTL;DR:
"""
messages = [{"role": "user", "content": prompt}]
text = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True,
    enable_thinking=False,  # Switches between thinking and non-thinking modes. Default is True.
)
model_inputs = tokenizer([text], return_tensors="pt").to(model.device)

# conduct text completion
generated_ids = model.generate(**model_inputs, max_new_tokens=32768)
output_ids = generated_ids[0][len(model_inputs.input_ids[0]) :].tolist()

# parsing thinking content
try:
    # rindex finding 151668 (</think>)
    index = len(output_ids) - output_ids[::-1].index(151668)
except ValueError:
    index = 0

thinking_content = tokenizer.decode(output_ids[:index], skip_special_tokens=True).strip("\n")
content = tokenizer.decode(output_ids[index:], skip_special_tokens=True).strip("\n")

print("thinking content:", thinking_content)
print("content:", content)

thinking content: 
content: 当然可以！你作为编程小白，学Python是一个很好的选择，尤其如果你想**先学Web开发**，这会是一个非常有前景的方向。以下是一些**推荐的学习资源、教程和学习路线**，希望能帮助你顺利入门并提升：

---

### 🧠 **推荐学习资源：**

#### 1. **免费学习资源**
- **Codecademy**（适合初学者）  
  - [Python入门教程](https://www.codecademy.com/learn/python)  
  - [Web开发入门](https://www.codecademy.com/learn/web)

- **Khan Academy**（适合系统学习）  
  - [Python](https://www.khanacademy.org)  
  - [Web开发](https://www.khanacademy.org)

- **Udemy**（适合系统学习）  
  - [Python编程入门](https://www.udemy.com/course/learn-python/)  
  - [Web开发入门](https://www.udemy.com/course/learn-web-development/)

- **CodePen**（在线项目实践）  
  - [Python项目实战](https://codepen.io/)  
  - [Web开发项目实战](https://codepen.io/)

- **LeetCode**（编程挑战）  
  - [Python入门](https://leetcode.com/learn/learn-python/)  
  - [Web开发入门](https://leetcode.com/learn/web)

---

#### 2. **推荐教程（全栈Web方向）**
- **W3Schools**  
  - [Python](https://www.w3schools.com/Python/)  
  - [JavaScript](https://www.w3schools.com/)

- **慕课网（MOOC）**  
  - [Python](https://www.i

## Peft model train

In [4]:
peft_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    inference_mode=False,
    r=8,
    lora_alpha=32,
    lora_dropout=0.1,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
)

In [5]:
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

trainable params: 5,046,272 || all params: 601,096,192 || trainable%: 0.8395


In [6]:
model

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): Qwen3ForCausalLM(
      (model): Qwen3Model(
        (embed_tokens): Embedding(151936, 1024)
        (layers): ModuleList(
          (0-27): 28 x Qwen3DecoderLayer(
            (self_attn): Qwen3Attention(
              (q_proj): lora.Linear(
                (base_layer): Linear(in_features=1024, out_features=2048, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.1, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=1024, out_features=8, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=8, out_features=2048, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_proj): lora.Linear(
 

In [7]:
dataset = load_dataset("trl-lib/tldr", split="train[:20000]")

# dataset = load_dataset("trl-lib/tldr", split="train")
dataset

Dataset({
    features: ['prompt', 'completion'],
    num_rows: 20000
})

In [8]:
# Adapted from trl.extras.dataset_formatting.instructions_formatting_function
# Converts dataset from prompt/completion format (not supported anymore)
# to the conversational format
def format_dataset(examples):
    if isinstance(examples["prompt"], list):
        output_texts = []
        for i in range(len(examples["prompt"])):
            converted_sample = [
                {"role": "user", "content": examples["prompt"][i]},
                {"role": "assistant", "content": examples["completion"][i]},
            ]
            output_texts.append(converted_sample)
        return {"messages": output_texts}
    else:
        converted_sample = [
            {"role": "user", "content": examples["prompt"]},
            {"role": "assistant", "content": examples["completion"]},
        ]
        return {"messages": converted_sample}

In [9]:
dataset = dataset.map(format_dataset).remove_columns(["prompt", "completion"])

In [10]:
dataset[0]

{'messages': [{'content': "SUBREDDIT: r/relationships\n\nTITLE: I (f/22) have to figure out if I want to still know these girls or not and would hate to sound insulting\n\nPOST: Not sure if this belongs here but it's worth a try. \n\nBackstory:\nWhen I (f/22) went through my first real breakup 2 years ago because he needed space after a year of dating roand  it effected me more than I thought. It was a horrible time in my life due to living with my mother and finally having the chance to cut her out of my life. I can admit because of it was an emotional wreck and this guy was stable and didn't know how to deal with me. We ended by him avoiding for a month or so after going to a festival with my friends. When I think back I wish he just ended. So after he ended it added my depression I suffered but my friends helped me through it and I got rid of everything from him along with cutting contact. \n\nNow: Its been almost 3 years now and I've gotten better after counselling and mild anti de

In [11]:
import torch

torch.cuda.empty_cache()

sft_config = SFTConfig(
    ## GROUP 1: Memory usage
    # These arguments will squeeze the most out of your GPU's RAM
    # Checkpointing
    gradient_checkpointing=True,
    # this saves a LOT of memory
    # Set this to avoid exceptions in newer versions of PyTorch
    gradient_checkpointing_kwargs={"use_reentrant": False},
    # Gradient Accumulation / Batch size
    # Actual batch (for updating) is same (1x) as micro-batch size
    gradient_accumulation_steps=1,
    # The initial (micro) batch size to start off with
    per_device_train_batch_size=1024 + 512,
    # If batch size would cause OOM, halves its size until it works
    auto_find_batch_size=True,
    ## GROUP 2: Dataset-related
    max_seq_length=64,
    # Dataset
    # packing a dataset means no padding is needed
    packing=True,
    ## GROUP 3: These are typical training parameters
    num_train_epochs=1,
    learning_rate=3e-4,
    # Optimizer
    # optim='adamw',
    ## GROUP 4: Logging parameters
    logging_steps=10,
    logging_dir="./logs",
    output_dir="./qwen3-0.6B-tldr-adapter",
    report_to="wandb",
)

In [12]:
trainer = SFTTrainer(
    model=model,
    processing_class=tokenizer,
    args=sft_config,
    train_dataset=dataset,
)

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [13]:
trainer.train()

[34m[1mwandb[0m: Currently logged in as: [33mmathewshen[0m to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Step,Training Loss
10,3.5354
20,3.1689
30,3.1348
40,3.0673
50,3.0525
60,3.0278
70,3.0219
80,3.0467
90,3.0245
100,3.0275


TrainOutput(global_step=1243, training_loss=2.94467625575783, metrics={'train_runtime': 3310.4652, 'train_samples_per_second': 36.029, 'train_steps_per_second': 0.375, 'total_flos': 2.0404899409821696e+16, 'train_loss': 2.94467625575783})

In [None]:
trainer.push_to_hub()

In [44]:
# prepare the model input
# prompt = """A large language model (LLM) is a type of machine learning model designed for natural language processing tasks such as language generation. LLMs are language models with many parameters, and are trained with self-supervised learning on a vast amount of text.
# The largest and most capable LLMs are generative pretrained transformers (GPTs). Modern models can be fine-tuned for specific tasks or guided by prompt engineering.[1] These models acquire predictive power regarding syntax, semantics, and ontologies[2] inherent in human language corpora, but they also inherit inaccuracies and biases present in the data they are trained in.[3]
# \n\nTL;DR:
# """
model = AutoPeftModelForCausalLM.from_pretrained("./qwen3-0.6B-tldr-adapter/checkpoint-1243/").to(model.device)


prompt = """我是一个程序小白，一直想学好学精一门编程。看网上说 python 好学，就打算决定学 python ，然后就在网上找视频学习，先学习了一个尚硅谷哪个 python ，我感觉还可以，就认认真真的学了。一些基础东西算是会了吧。想提高。大致的方向是先 web 方向，因为想自己搭建一个网站是一个刚需。但在网上找不到好的学习资源了，也不知道从哪里到哪里先学了。 前段时间看要先学前端，学全栈，在 bili 上搜了一圈也找不到一个合适的教程。 现在我想请各位大哥，大姐，弟弟，妹妹们，大神们给俺推荐一个好的学习教程或者学习方法及路线，越详细越好，教程给推荐好的免费的最好！
\n\nTL;DR:
"""


messages = [{"role": "user", "content": prompt}]
text = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True,
    enable_thinking=False,  # Switches between thinking and non-thinking modes. Default is True.
)
model_inputs = tokenizer([text], return_tensors="pt").to(model.device)

# conduct text completion
generated_ids = model.generate(
    **model_inputs,
    max_new_tokens=128,
    temperature=1e-6,
)
output_ids = generated_ids[0][len(model_inputs.input_ids[0]) :].tolist()

# parsing thinking content
try:
    # rindex finding 151668 (</think>)
    index = len(output_ids) - output_ids[::-1].index(151668)
except ValueError:
    index = 0

content = tokenizer.decode(output_ids[index:], skip_special_tokens=True).strip("\n")

print("content:", content)

content:  一个程序小白，想学好学精一门编程，想先学 python ，然后学 web ，然后学前端，最后学全栈。想提高。想请各位大哥，大姐，弟弟，妹妹们，大神们给俺推荐好的学习教程或学习方法及路线，越详细越好，教程给推荐好的免费的最好！


In [45]:
trainer.push_to_hub()



Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

adapter_model.safetensors:   0%|          | 0.00/20.2M [00:00<?, ?B/s]

training_args.bin:   0%|          | 0.00/6.03k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/MathewShen/qwen3-0.6B-tldr-adapter/commit/74d9c95bc90999ed61d58ea9603295b98c7d364a', commit_message='End of training', commit_description='', oid='74d9c95bc90999ed61d58ea9603295b98c7d364a', pr_url=None, repo_url=RepoUrl('https://huggingface.co/MathewShen/qwen3-0.6B-tldr-adapter', endpoint='https://huggingface.co', repo_type='model', repo_id='MathewShen/qwen3-0.6B-tldr-adapter'), pr_revision=None, pr_num=None)

## Use peft model

In [50]:
generator = pipeline("text-generation", model="MathewShen/qwen3-0.6B-tldr-adapter", device="cuda")

Device set to use cuda


In [57]:
# question = """我是一个程序小白，一直想学好学精一门编程。看网上说 python 好学，就打算决定学 python ，然后就在网上找视频学习，先学习了一个尚硅谷哪个 python ，我感觉还可以，就认认真真的学了。一些基础东西算是会了吧。想提高。大致的方向是先 web 方向，因为想自己搭建一个网站是一个刚需。但在网上找不到好的学习资源了，也不知道从哪里到哪里先学了。 前段时间看要先学前端，学全栈，在 bili 上搜了一圈也找不到一个合适的教程。 现在我想请各位大哥，大姐，弟弟，妹妹们，大神们给俺推荐一个好的学习教程或者学习方法及路线，越详细越好，教程给推荐好的免费的最好！
# \n\nTL;DR:
# """

question = """最近在学 linux 和 docker,好多学习资料都是默认在 linux 环境，以后也想往偏运维方向转，想弄台便宜轻点的笔记本学 linux ，目前有个笔记本在家当台式机用了，虚拟机总感觉不够投入，大家有啥推荐的本子不，我想越便宜越好（不要太卡），目前感觉 chromebook 不错，装轻量级 linux 学习用够了，大家有啥推荐或者思路不.
\n\nTL;DR:
"""

In [58]:
output = generator(
    [{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False, temperature=0.001
)[0]
print(output["generated_text"])

<think>

</think>

 本子预算有限，想学 linux 和 docker，想弄台便宜轻点的笔记本学 linux，目前感觉 chromebook 不错，想越便宜越好。
