In [1]:
!nvidia-smi

Tue Dec  9 01:32:46 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 570.124.04             Driver Version: 570.124.04     CUDA Version: 12.8     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 4090        On  |   00000000:31:00.0 Off |                  Off |
| 30%   33C    P5             62W /  450W |       1MiB /  24564MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [2]:
import subprocess
import os

result = subprocess.run('bash -c "source /etc/network_turbo && env | grep proxy"', shell=True, capture_output=True, text=True)
output = result.stdout
for line in output.splitlines():
    if '=' in line:
        var, value = line.split('=', 1)
        os.environ[var] = value

In [3]:
import os
os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"

In [4]:
from modelscope import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
model_id = "qwen/Qwen3-4B-Instruct-2507"

tokenizer = AutoTokenizer.from_pretrained(
    model_id,
    cache_dir="/root/autodl-tmp",
    trust_remote_code=True,
)

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    cache_dir="/root/autodl-tmp",
    trust_remote_code=True,
    device_map="auto",
)

print("OK")

Downloading Model from https://www.modelscope.cn to directory: /root/.cache/modelscope/hub/models/qwen/Qwen3-4B-Instruct-2507
Downloading Model from https://www.modelscope.cn to directory: /root/.cache/modelscope/hub/models/qwen/Qwen3-4B-Instruct-2507


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

OK


In [5]:
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model

model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=[
        "q_proj","k_proj","v_proj","o_proj",
        "gate_proj","up_proj","down_proj"
    ],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)

model = get_peft_model(model, config)
model.print_trainable_parameters()

trainable params: 16,515,072 || all params: 4,038,983,168 || trainable%: 0.4089


In [6]:
def format_example(example):
    task = example["task"]
    ctx = example["context"]

    # 根据任务类型构建上下文
    if task == "IELTS_TASK1_POLISH":
        # Task1 有 topic、subject（可能为空）、image_description（可能为空）
        parts = []
        if ctx.get("topic"):
            parts.append(f"Topic: {ctx['topic']}")
        if ctx.get("subject"):
            parts.append(f"Subject: {ctx['subject']}")
        if ctx.get("image_description"):
            parts.append(f"Image Description: {ctx['image_description']}")
        context_text = "\n".join(parts)

    elif task == "IELTS_TASK2_POLISH":
        # Task2 只有 topic
        topic = ctx.get("topic", "")
        context_text = f"Topic: {topic}" if topic else ""

    else:
        # fallback（理论不会走到这里）
        context_text = ""

    # 构造 prompt
    prompt = f"""[TASK: {task}]

Rewrite the following student essay to improve clarity, coherence, grammar, and lexical sophistication.
Then provide specific suggestions explaining the main improvements.

[CONTEXT]
{context_text}

[ORIGINAL]
{example['original']}

[OUTPUT]
"""

    # 建议部分
    suggestions = "\n".join(f"- {s}" for s in example["suggestions"])

    completion = f"""[POLISHED]
{example['polished']}

[SUGGESTIONS]
{suggestions}
"""

    return {"prompt": prompt, "completion": completion}



In [7]:
from datasets import load_dataset, concatenate_datasets

task1 = load_dataset("json", data_files="task1_converted.jsonl")["train"]
task2 = load_dataset("json", data_files="task2_converted.jsonl")["train"]

dataset = concatenate_datasets([task1, task2]).shuffle(seed=42)


In [8]:
dataset = dataset.train_test_split(test_size=0.05, seed=42)

train_dataset = dataset["train"]
eval_dataset = dataset["test"]


In [9]:
train_dataset = train_dataset.map(format_example)
eval_dataset = eval_dataset.map(format_example)


In [10]:
keep_cols = ["prompt", "completion"]

train_dataset = train_dataset.remove_columns(
    [col for col in train_dataset.column_names if col not in keep_cols]
)

eval_dataset = eval_dataset.remove_columns(
    [col for col in eval_dataset.column_names if col not in keep_cols]
)


In [11]:
from transformers import TrainerCallback
import csv
import os

class LossLoggerCallback(TrainerCallback):
    def __init__(self, save_path="loss_log.csv"):
        self.save_path = save_path
        # 初始化文件并写入表头
        with open(self.save_path, "w", newline="", encoding="utf-8") as f:
            writer = csv.writer(f)
            writer.writerow(["step", "train_loss", "eval_loss"])

    def on_log(self, args, state, control, logs=None, **kwargs):
        if logs is None:
            return

        # 记录训练 loss
        train_loss = logs.get("loss")
        eval_loss = logs.get("eval_loss")
        step = state.global_step

        with open(self.save_path, "a", newline="", encoding="utf-8") as f:
            writer = csv.writer(f)
            writer.writerow([step, train_loss, eval_loss])
loss_logger = LossLoggerCallback(save_path="/root/autodl-tmp/loss_log_epoch_2.csv")


In [12]:
from transformers import (
    DataCollatorForSeq2Seq,
    TrainingArguments,
    Trainer,
)

# ------------------------------------------------------
# Step 1: tokenize function
# ------------------------------------------------------
def tokenize_example(example):
    prompt = example["prompt"]
    completion = example["completion"]

    # 1. tokenize prompt
    prompt_ids = tokenizer(prompt, add_special_tokens=True)
    # 2. tokenize completion
    completion_ids = tokenizer(completion, add_special_tokens=False)

    input_ids = prompt_ids["input_ids"] + completion_ids["input_ids"]
    attention_mask = [1] * len(input_ids)

    # 3. 构建 labels：prompt 部分全部 mask 掉（-100）
    labels = [-100] * len(prompt_ids["input_ids"]) + completion_ids["input_ids"]

    return {
        "input_ids": input_ids,
        "attention_mask": attention_mask,
        "labels": labels,
    }

train_tokenized = train_dataset.map(tokenize_example, remove_columns=train_dataset.column_names)
eval_tokenized = eval_dataset.map(tokenize_example, remove_columns=eval_dataset.column_names)

# ------------------------------------------------------
# Step 2: Data Collator（自动 pad）
# ------------------------------------------------------
data_collator = DataCollatorForSeq2Seq(
    tokenizer=tokenizer,
    model=model,
    padding=True,
)

# ------------------------------------------------------
# Step 3: TrainingArguments
# ------------------------------------------------------
training_args = TrainingArguments(
    output_dir="/root/autodl-tmp/sft-output",
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=16,   # 等效 batch=16
    learning_rate=2e-5,
    num_train_epochs=2,
    lr_scheduler_type="cosine",
    warmup_ratio=0.05,

    logging_steps=10,
    eval_strategy="steps",
    eval_steps=50,
    save_strategy="steps",
    save_steps=200,
    save_total_limit=3,

    bf16=True,
    gradient_checkpointing=True,
    report_to="none",   # 避免自动连 wandb
)

# ------------------------------------------------------
# Step 4: Trainer 实例
# ------------------------------------------------------
trainer = Trainer(
    model=model,
    args=training_args,
    tokenizer=tokenizer,
    train_dataset=train_tokenized,
    eval_dataset=eval_tokenized,
    data_collator=data_collator,
    callbacks=[loss_logger]
)




  trainer = Trainer(
Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
The model is already on multiple devices. Skipping the move to device specified in `args`.


In [None]:
# ------------------------------------------------------
# Step 5: 开始训练
# ------------------------------------------------------
trainer.train()

# ------------------------------------------------------
# Step 6: 保存 LoRA Adapter
# ------------------------------------------------------
trainer.save_model("/root/autodl-tmp/sft-output/lora_epoch_2")
print("Training Finished. LoRA saved.")

The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'bos_token_id': None, 'pad_token_id': 151643}.
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Step,Training Loss,Validation Loss
50,1.5246,1.43547
100,1.1594,1.136895
150,1.1073,1.072289
200,1.0318,1.03656
250,1.0123,1.010856
300,0.9979,0.992202
350,0.9863,0.977367
400,0.988,0.965536
450,0.9187,0.956269
500,0.9343,0.948195


In [None]:
import os
os.system("/usr/bin/shutdown -h now")

In [None]:
def build_inference_prompt(sample):
    task = sample["task"]
    ctx = sample["context"]

    # ===== 构造上下文 =====
    if task == "IELTS_TASK1_POLISH":
        parts = []
        if ctx.get("topic"):
            parts.append(f"Topic: {ctx['topic']}")
        if ctx.get("subject"):
            parts.append(f"Subject: {ctx['subject']}")
        if ctx.get("image_description"):
            parts.append(f"Image Description: {ctx['image_description']}")
        context_text = "\n".join(parts)

    elif task == "IELTS_TASK2_POLISH":
        parts = []
        if ctx.get("topic"):
            parts.append(f"Topic: {ctx['topic']}")
        context_text = "\n".join(parts)

    else:
        context_text = ""

    # ===== 构造 prompt =====
    prompt = f"""[TASK: {task}]

Rewrite the following student essay to improve clarity, coherence, grammar, and lexical sophistication.
Then provide specific suggestions explaining the main improvements.

[CONTEXT]
{context_text}

[ORIGINAL]
{sample['original']}

[OUTPUT]
"""

    return prompt


In [None]:
device = "cuda"
@torch.no_grad()
def generate(messages):
    model_input = tokenizer.apply_chat_template(
        messages,
        tokenize=True,
        add_generation_prompt=True,
        return_tensors="pt"
    ).to(device)

    output = model.generate(
        model_input,
        max_new_tokens=2048,
        do_sample=False,
        return_dict_in_generate=True
    )

    # Only decode newly generated tokens
    new_tokens = output.sequences[:, model_input.shape[1]:]
    text = tokenizer.decode(new_tokens[0], skip_special_tokens=True)
    return text

In [None]:
model.eval()
sample = dataset["test"][1]   # 或 eval_dataset[i]
prompt = build_inference_prompt(sample)
messages = [
    {"role": "user", "content": prompt}
]
print(prompt)
output = generate(messages)
print(output)

In [None]:
messages = [
    {"role": "user", "content": "what is love?"}
]
print(messages)
output = generate(messages)
print(output)