In [13]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import LoraConfig, TaskType
from peft import get_peft_model

from datasets import load_dataset
from trl import SFTConfig, SFTTrainer

In [14]:
model_name = "Qwen/Qwen3-0.6B"

# load the tokenizer and the model
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype="auto",
    device_map="auto"
)

In [15]:
# prepare the model input
# prompt = """Give me the summary of the text: A large language model (LLM) is a type of machine learning model designed for natural language processing tasks such as language generation. LLMs are language models with many parameters, and are trained with self-supervised learning on a vast amount of text.
# The largest and most capable LLMs are generative pretrained transformers (GPTs). Modern models can be fine-tuned for specific tasks or guided by prompt engineering.[1] These models acquire predictive power regarding syntax, semantics, and ontologies[2] inherent in human language corpora, but they also inherit inaccuracies and biases present in the data they are trained in.[3]
# """
prompt = """我是一个程序小白，一直想学好学精一门编程。看网上说 python 好学，就打算决定学 python ，然后就在网上找视频学习，先学习了一个尚硅谷哪个 python ，我感觉还可以，就认认真真的学了。一些基础东西算是会了吧。想提高。大致的方向是先 web 方向，因为想自己搭建一个网站是一个刚需。但在网上找不到好的学习资源了，也不知道从哪里到哪里先学了。 前段时间看要先学前端，学全栈，在 bili 上搜了一圈也找不到一个合适的教程。 现在我想请各位大哥，大姐，弟弟，妹妹们，大神们给俺推荐一个好的学习教程或者学习方法及路线，越详细越好，教程给推荐好的免费的最好！
\n\nTL;DR:
"""
messages = [
    {"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True,
    enable_thinking=False # Switches between thinking and non-thinking modes. Default is True.
)
model_inputs = tokenizer([text], return_tensors="pt").to(model.device)

# conduct text completion
generated_ids = model.generate(
    **model_inputs,
    max_new_tokens=32768
)
output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist() 

# parsing thinking content
try:
    # rindex finding 151668 (</think>)
    index = len(output_ids) - output_ids[::-1].index(151668)
except ValueError:
    index = 0

thinking_content = tokenizer.decode(output_ids[:index], skip_special_tokens=True).strip("\n")
content = tokenizer.decode(output_ids[index:], skip_special_tokens=True).strip("\n")

print("thinking content:", thinking_content)
print("content:", content)

thinking content: 
content: 当然可以！以下是一些**推荐的学习资源和路线**，适合你从**Python基础到Web开发**的全栈学习，同时注重**免费且高效**的教程：

---

### ✅ 一、推荐学习资源

#### 1. **免费 Python 教程**
- **Codecademy**（https://www.codecademy.com）  
  - 简单易学，适合初学者。
- **Udemy**（https://www.udemy.com）  
  - 有系统课程，适合全栈开发。
- **Coursera**（https://www.coursera.org）  
  - 提供专业课程，适合深入学习。
- **LeetCode**（https://leetcode.com）  
  - 适合练习编程，但需要一定时间。

#### 2. **免费 Web 开发教程**
- **W3Schools**（https://www.w3schools.com/）  
  - 简单易懂，适合入门。
- **MDN Web Docs**（https://developer.mozilla.org）  
  - 详细且适合学习。
- **Stack Overflow**（https://stackoverflow.com）  
  - 问答社区，适合找资源和讨论。

#### 3. **免费学习平台**
- **CodePen**（https://codepen.io）  
  - 可以在浏览器中直接运行代码。
- **JSFiddle**（https://jsfiddle.net）  
  - 可以用来练习基础语法和功能。
- **GitHub**（https://github.com）  
  - 可以学习项目代码，但需要一定时间。

---

### ✅ 二、推荐学习路线

#### 1. **基础 Python 学习**
- **从基础开始**：学变量、循环、条件语句、函数等。
- **推荐书籍**：
  - 《Python编程：从入门到实践》（作者：John K. Rees）  
    - 适合初学者，系统性强。
  - 《Python Crash Course》（作者：Ray Kurzweil）  
    - 简单易学，适合

## PEFT

In [16]:
peft_config = LoraConfig(task_type=TaskType.CAUSAL_LM, inference_mode=False, r=8, lora_alpha=32, lora_dropout=0.1,
                        target_modules=['q_proj', 'k_proj', 'v_proj', 'o_proj', 'gate_proj', 'up_proj', 'down_proj'])

In [17]:
model_lora = get_peft_model(model, peft_config)
model_lora.print_trainable_parameters()

trainable params: 5,046,272 || all params: 601,096,192 || trainable%: 0.8395


In [18]:
model

Qwen3ForCausalLM(
  (model): Qwen3Model(
    (embed_tokens): Embedding(151936, 1024)
    (layers): ModuleList(
      (0-27): 28 x Qwen3DecoderLayer(
        (self_attn): Qwen3Attention(
          (q_proj): lora.Linear(
            (base_layer): Linear(in_features=1024, out_features=2048, bias=False)
            (lora_dropout): ModuleDict(
              (default): Dropout(p=0.1, inplace=False)
            )
            (lora_A): ModuleDict(
              (default): Linear(in_features=1024, out_features=8, bias=False)
            )
            (lora_B): ModuleDict(
              (default): Linear(in_features=8, out_features=2048, bias=False)
            )
            (lora_embedding_A): ParameterDict()
            (lora_embedding_B): ParameterDict()
            (lora_magnitude_vector): ModuleDict()
          )
          (k_proj): lora.Linear(
            (base_layer): Linear(in_features=1024, out_features=1024, bias=False)
            (lora_dropout): ModuleDict(
              (default): 

In [19]:
dataset = load_dataset("trl-lib/tldr", split="train[:100]")
dataset

Dataset({
    features: ['prompt', 'completion'],
    num_rows: 100
})

In [20]:
# Adapted from trl.extras.dataset_formatting.instructions_formatting_function
# Converts dataset from prompt/completion format (not supported anymore)
# to the conversational format
def format_dataset(examples):
    if isinstance(examples["prompt"], list):
        output_texts = []
        for i in range(len(examples["prompt"])):
            converted_sample = [
                {"role": "user", "content": examples["prompt"][i]},
                {"role": "assistant", "content": examples["completion"][i]},
            ]
            output_texts.append(converted_sample)
        return {'messages': output_texts}
    else:
        converted_sample = [
            {"role": "user", "content": examples["prompt"]},
            {"role": "assistant", "content": examples["completion"]},
        ]
        return {'messages': converted_sample}

In [21]:
dataset = dataset.map(format_dataset).remove_columns(['prompt', 'completion'])

In [25]:
dataset[0]

{'messages': [{'content': "SUBREDDIT: r/relationships\n\nTITLE: I (f/22) have to figure out if I want to still know these girls or not and would hate to sound insulting\n\nPOST: Not sure if this belongs here but it's worth a try. \n\nBackstory:\nWhen I (f/22) went through my first real breakup 2 years ago because he needed space after a year of dating roand  it effected me more than I thought. It was a horrible time in my life due to living with my mother and finally having the chance to cut her out of my life. I can admit because of it was an emotional wreck and this guy was stable and didn't know how to deal with me. We ended by him avoiding for a month or so after going to a festival with my friends. When I think back I wish he just ended. So after he ended it added my depression I suffered but my friends helped me through it and I got rid of everything from him along with cutting contact. \n\nNow: Its been almost 3 years now and I've gotten better after counselling and mild anti de

In [22]:
sft_config = SFTConfig(
    ## GROUP 1: Memory usage
    # These arguments will squeeze the most out of your GPU's RAM
    # Checkpointing
    gradient_checkpointing=True,
    # this saves a LOT of memory
    # Set this to avoid exceptions in newer versions of PyTorch
    gradient_checkpointing_kwargs={'use_reentrant': False},
    # Gradient Accumulation / Batch size
    # Actual batch (for updating) is same (1x) as micro-batch size
    gradient_accumulation_steps=1,
    # The initial (micro) batch size to start off with
    per_device_train_batch_size=64,
    # If batch size would cause OOM, halves its size until it works
    auto_find_batch_size=True,
    
    ## GROUP 2: Dataset-related
    max_seq_length=64,
    # Dataset
    # packing a dataset means no padding is needed
    packing=True,
    
    ## GROUP 3: These are typical training parameters
    num_train_epochs=10,
    learning_rate=3e-4,
    # Optimizer
    # optim='adamw',
    
    ## GROUP 4: Logging parameters
    logging_steps=10,
    logging_dir='./logs',
    output_dir='./qwen3-0.6B-tldr-adapter',
    report_to='none'
)

In [23]:
trainer = SFTTrainer(
    model=model,
    processing_class=tokenizer,
    args=sft_config,
    train_dataset=dataset,
)

In [None]:
trainer.train()

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Step,Training Loss
10,3.5386
20,3.2426
30,2.9079
40,2.9027


In [None]:
# prepare the model input
# prompt = """A large language model (LLM) is a type of machine learning model designed for natural language processing tasks such as language generation. LLMs are language models with many parameters, and are trained with self-supervised learning on a vast amount of text.
# The largest and most capable LLMs are generative pretrained transformers (GPTs). Modern models can be fine-tuned for specific tasks or guided by prompt engineering.[1] These models acquire predictive power regarding syntax, semantics, and ontologies[2] inherent in human language corpora, but they also inherit inaccuracies and biases present in the data they are trained in.[3]
# \n\nTL;DR:
# """
model_lora = model.from_pretrained("./qwen3-0.6B-tldr-adapter/checkpoint-200/").to(model.device)

prompt = """我是一个程序小白，一直想学好学精一门编程。看网上说 python 好学，就打算决定学 python ，然后就在网上找视频学习，先学习了一个尚硅谷哪个 python ，我感觉还可以，就认认真真的学了。一些基础东西算是会了吧。想提高。大致的方向是先 web 方向，因为想自己搭建一个网站是一个刚需。但在网上找不到好的学习资源了，也不知道从哪里到哪里先学了。 前段时间看要先学前端，学全栈，在 bili 上搜了一圈也找不到一个合适的教程。 现在我想请各位大哥，大姐，弟弟，妹妹们，大神们给俺推荐一个好的学习教程或者学习方法及路线，越详细越好，教程给推荐好的免费的最好！
\n\nTL;DR:
"""


messages = [
    {"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True,
    enable_thinking=False # Switches between thinking and non-thinking modes. Default is True.
)
model_inputs = tokenizer([text], return_tensors="pt").to(model.device)

# conduct text completion
generated_ids = model_lora.generate(
    **model_inputs,
    max_new_tokens=32768,
    temperature=0.0001,
)
output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist() 

# parsing thinking content
try:
    # rindex finding 151668 (</think>)
    index = len(output_ids) - output_ids[::-1].index(151668)
except ValueError:
    index = 0

thinking_content = tokenizer.decode(output_ids[:index], skip_special_tokens=True).strip("\n")
content = tokenizer.decode(output_ids[index:], skip_special_tokens=True).strip("\n")

print("thinking content:", thinking_content)
print("content:", content)