In [None]:
# =======================================================================
# 单元格 1: 安装所有必需的库
# =======================================================================
print("Installing libraries for QLoRA finetuning...")
!pip install -q -U transformers peft bitsandbytes datasets accelerate ipdb wandb
print("Installation complete.")
# import ipdb  #for step run
# ipdb.set_trace()

In [6]:
# 放在 notebook 比较靠前的位置
import os
from kaggle_secrets import UserSecretsClient
import wandb
try:
    user_secrets = UserSecretsClient()
    wandb_api_key = user_secrets.get_secret("WANDB_API_KEY")
    wandb.login(key=wandb_api_key)

    # (可选) 将 W&B 设置为静默模式，减少不必要的输出
    os.environ["WANDB_SILENT"] = "true"

    print("Successfully logged into W&B.")
except Exception as e:
    print(f"Could not log in to W&B. Please ensure the secret 'WANDB_API_KEY' is set correctly. Error: {e}")

[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mhotococoalj[0m ([33mhtcca[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Successfully logged into W&B.


In [7]:
# =======================================================================
# 单元格 2: 登录 Hugging Face Hub
# =======================================================================
import os
from huggingface_hub import login
from kaggle_secrets import UserSecretsClient

try:
    user_secrets = UserSecretsClient()
    hf_token = user_secrets.get_secret("HUGGINGFACE_TOKEN")
    print("Hugging Face token found. Logging in...")
    login(token=hf_token)
    print("Login successful.")
except Exception as e:
    print(
        "Could not log in to Hugging Face. Please ensure HUGGINGFACE_TOKEN is set correctly."
    )
    print(f"Error: {e}")

Hugging Face token found. Logging in...
Login successful.


In [8]:
# =======================================================================
# 单元格 3: 配置所有参数
# =======================================================================
class TrainingConfig:
    # MODEL_ID = "google/gemma-3-270m-it"
    MODEL_ID = "google/gemma-3-1b-it"
    # MODEL_ID = "google/gemma-3-4b-it"
    # MODEL_ID = "llama3.1:8b"
    # MODEL_ID = "phi4-mini:3.8b"
    DATA_FILE_PATH = (
        "/kaggle/input/test01/training_data_for_agent.jsonl"  # <-- 请务必修改为您的路径
    )
    OUTPUT_DIR = f"/kaggle/working/{MODEL_ID.replace('/','_')}_qlora_finetuned"


print("Training Configuration:")
print(f"  - Model: {TrainingConfig.MODEL_ID}")
print(f"  - Data file: {TrainingConfig.DATA_FILE_PATH}")
print(f"  - Output directory: {TrainingConfig.OUTPUT_DIR}")

Training Configuration:
  - Model: google/gemma-3-1b-it
  - Data file: /kaggle/input/test01/training_data_for_agent.jsonl
  - Output directory: /kaggle/working/google_gemma-3-1b-it_qlora_finetuned


In [9]:
# =======================================================================
# 单元格 4: 主要的训练逻辑
# =======================================================================
import torch
import json
from datasets import load_dataset
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    BitsAndBytesConfig,
    TrainingArguments,
    Trainer,
)
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

# --- 数据加载和处理部分 ---
print(f"Loading dataset from {TrainingConfig.DATA_FILE_PATH}...")
dataset = load_dataset("json", data_files=TrainingConfig.DATA_FILE_PATH, split="train")
print(f"Dataset loaded with {len(dataset)} records.")

tokenizer = AutoTokenizer.from_pretrained(TrainingConfig.MODEL_ID)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token


def tokenize_function(examples):
    texts = []
    for prompt, tool_calls in zip(examples["prompt"], examples["tool_calls"]):
        completion_obj = {"tool_calls": tool_calls}
        completion_str = json.dumps(completion_obj, ensure_ascii=False)
        text = f"<start_of_turn>user\n{prompt}<end_of_turn>\n<start_of_turn>model\n{completion_str}<end_of_turn>"
        texts.append(text)
    tokenized_output = tokenizer(
        texts, padding="longest", truncation=True, max_length=512
    )
    tokenized_output["labels"] = [x[:] for x in tokenized_output["input_ids"]]
    return tokenized_output


print("Tokenizing dataset...")
tokenized_dataset = dataset.map(
    tokenize_function, batched=True, remove_columns=dataset.column_names
)
print("Tokenization complete.")

# --- QLoRA 配置 ---
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
)
lora_config = LoraConfig(
    r=16,  # LoRA rank, 可以设为 8, 16, 32等
    lora_alpha=32,  # LoRA alpha
    lora_dropout=0.05,
    target_modules=[
        "q_proj",
        "o_proj",
        "k_proj",
        "v_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
    ],
    task_type="CAUSAL_LM",
)

# --- 加载并准备模型 (采纳建议进行修改) ---
print("Loading model with QLoRA configuration and best practices...")
model = AutoModelForCausalLM.from_pretrained(
    TrainingConfig.MODEL_ID,
    quantization_config=quantization_config,
    device_map=0,
    torch_dtype=torch.float16,
    attn_implementation="eager",  # <-- 修改1: 使用 eager attention
    # use_cache=False,  # <-- 修改2: 明确禁用 use_cache,没有这个参数
)
model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, lora_config)
print("Model prepared for QLoRA training.")
model.print_trainable_parameters()

2025-08-24 05:42:15.813165: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1756014135.840458      90 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1756014135.847253      90 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Loading dataset from /kaggle/input/test01/training_data_for_agent.jsonl...


Generating train split: 0 examples [00:00, ? examples/s]

Dataset loaded with 88 records.


tokenizer_config.json:   0%|          | 0.00/1.16M [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.69M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/33.4M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/35.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/662 [00:00<?, ?B/s]

Tokenizing dataset...


Map:   0%|          | 0/88 [00:00<?, ? examples/s]

Tokenization complete.
Loading model with QLoRA configuration and best practices...


config.json:   0%|          | 0.00/899 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.00G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/215 [00:00<?, ?B/s]

Model prepared for QLoRA training.
trainable params: 13,045,760 || all params: 1,012,931,712 || trainable%: 1.2879


In [14]:
# --- 训练参数 (采纳建议进行修改) ---
training_args = TrainingArguments(
    output_dir=TrainingConfig.OUTPUT_DIR,
    num_train_epochs=8,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    learning_rate=2e-4,
    logging_strategy="steps",
    logging_steps=1,
    save_strategy="epoch",
    dataloader_num_workers=0,
    fp16=True,
    # --- 修改3: 解决 use_reentrant 警告 ---
    gradient_checkpointing=True,
    gradient_checkpointing_kwargs={"use_reentrant": False},
    # 禁用 wandb，防止卡死
    # report_to="none",
    report_to=["wandb"],
    # report_to=["tensorboard","wandb"], # <--- 修改这里
    logging_dir=f"{TrainingConfig.OUTPUT_DIR}/logs", # 指定日志目录
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
)

In [15]:
# --- 开始训练 ---
print("Starting final training run...")
trainer.train()
print("Training finished.")

# --- 保存最终的 LoRA 适配器 ---
print(f"Saving final model adapters to {TrainingConfig.OUTPUT_DIR}...")
trainer.save_model(TrainingConfig.OUTPUT_DIR)
tokenizer.save_pretrained(TrainingConfig.OUTPUT_DIR)
print("Script finished successfully.")

Starting final training run...


Step,Training Loss
1,0.5254
2,2.5781
3,0.4786
4,0.3913
5,0.3488
6,0.3446
7,0.2567
8,0.2528
9,0.2452
10,0.1975


Training finished.
Saving final model adapters to /kaggle/working/google_gemma-3-1b-it_qlora_finetuned...
Script finished successfully.


In [16]:
for i in trainer.state.log_history:
    print(i)
    
# 从 log_history 中提取所有记录的 loss 值
all_losses = [log['loss'] for log in trainer.state.log_history if 'loss' in log]

# 计算真实的平均 loss
if all_losses:
    true_average_loss = sum(all_losses) / len(all_losses)
    print(f"\\nManually Calculated True Average Training Loss: {true_average_loss}")
else:
    print("\\nNo loss values were found in the log history.")

{'loss': 0.5254, 'grad_norm': 111501.5078125, 'learning_rate': 0.0002, 'epoch': 0.36363636363636365, 'step': 1}
{'loss': 2.5781, 'grad_norm': 9964107.0, 'learning_rate': 0.00019333333333333333, 'epoch': 0.7272727272727273, 'step': 2}
{'loss': 0.4786, 'grad_norm': 223855.265625, 'learning_rate': 0.0001866666666666667, 'epoch': 1.0, 'step': 3}
{'loss': 0.3913, 'grad_norm': 181938.9375, 'learning_rate': 0.00018, 'epoch': 1.3636363636363638, 'step': 4}
{'loss': 0.3488, 'grad_norm': 196881.3125, 'learning_rate': 0.00017333333333333334, 'epoch': 1.7272727272727273, 'step': 5}
{'loss': 0.3446, 'grad_norm': 132871.953125, 'learning_rate': 0.0001666666666666667, 'epoch': 2.0, 'step': 6}
{'loss': 0.2567, 'grad_norm': 147286.703125, 'learning_rate': 0.00016, 'epoch': 2.3636363636363638, 'step': 7}
{'loss': 0.2528, 'grad_norm': 104181.0703125, 'learning_rate': 0.00015333333333333334, 'epoch': 2.7272727272727275, 'step': 8}
{'loss': 0.2452, 'grad_norm': 436097.65625, 'learning_rate': 0.000146666666

In [None]:
#it does not work
#%load_ext tensorboard
#%tensorboard --logdir "{TrainingConfig.OUTPUT_DIR}/logs"

In [17]:
# =======================================================================
# 合并基础模型与 LoRA 适配器
# =======================================================================
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
 
# --- 配置路径 ---
# 基础模型ID (必须和你训练时用的一致)
base_model_id = TrainingConfig.MODEL_ID

# 你训练好的 LoRA 适配器路径 (即训练的输出目录)
adapter_path = TrainingConfig.OUTPUT_DIR

# 定义一个新目录，用于存放合并后的完整模型
merged_model_path = f"/kaggle/working/{base_model_id.replace('/', '_')}_full_merged"

print(f"Base model: {base_model_id}")
print(f"Adapter path: {adapter_path}")
print(f"Merged model output path: {merged_model_path}")


# --- 加载并合并 ---
print("Loading base model...")
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_id,
    torch_dtype=torch.float16,
    device_map="auto",
)

print("Loading LoRA adapter...")
# 加载适配器并将其应用到基础模型上
peft_model = PeftModel.from_pretrained(base_model, adapter_path)

print("Merging adapter into the base model...")
# 执行合并，然后卸载适配器层，得到一个标准的 Transformer 模型
merged_model = peft_model.merge_and_unload()
print("Merge complete.")

# --- 保存完整的、可部署的模型 ---
print(f"Saving merged model to {merged_model_path}...")
merged_model.save_pretrained(merged_model_path)

# 也要把分词器(tokenizer)保存到新目录中，它是模型的一部分
tokenizer = AutoTokenizer.from_pretrained(base_model_id)
tokenizer.save_pretrained(merged_model_path)

print(f"Successfully saved the full merged model and tokenizer to {merged_model_path}")
print("This is the directory you should package and use for Ollama.")


Base model: google/gemma-3-1b-it
Adapter path: /kaggle/working/google_gemma-3-1b-it_qlora_finetuned
Merged model output path: /kaggle/working/google_gemma-3-1b-it_full_merged
Loading base model...
Loading LoRA adapter...
Merging adapter into the base model...
Merge complete.
Saving merged model to /kaggle/working/google_gemma-3-1b-it_full_merged...
Successfully saved the full merged model and tokenizer to /kaggle/working/google_gemma-3-1b-it_full_merged
This is the directory you should package and use for Ollama.


In [18]:
# =======================================================================
#打包并准备下载
# =======================================================================

# 你的模型输出目录（请确保这里的 MODEL_ID 和你训练时用的相匹配）
# 这个变量在之前的单元格已经定义好了，这里只是为了清晰展示
# model_id = "google/gemma-3-4b-it" 
# output_dir = f"/kaggle/working/{model_id.replace('/', '_')}_qlora_finetuned"

# 要生成的压缩包文件名
archive_name = f"finetuned_{TrainingConfig.MODEL_ID.replace('/', '_')}.tar.gz"
archive_path = f"/kaggle/working/{archive_name}"
source_directory_to_archive = merged_model_path

print(f"Output archive path: {archive_path}")
print(f"Archiving directory: {source_directory_to_archive}")

!rm -rf {TrainingConfig.OUTPUT_DIR}
# 使用 tar 命令进行打包和压缩
# -c: create an archive
# -z: compress with gzip
# -v: verbosely list files processed
# -f: use archive file
 # 语法: tar -czvf [最终生成的压缩文件名] [要打包的源目录]
# !tar -czvf {archive_path} {source_directory_to_archive}
# 优化：使用 -C 参数，这可以防止在压缩包里创建多余的父文件夹层级，让解压后的目录更整洁。
!tar -czvf {archive_path} -C {source_directory_to_archive} .
print(f"Archive created successfully! You can now download '{archive_path}' .")

Output archive path: /kaggle/working/finetuned_google_gemma-3-1b-it.tar.gz
Archiving directory: /kaggle/working/google_gemma-3-1b-it_full_merged


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


./
./tokenizer.model


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


./config.json
./chat_template.jinja
./tokenizer_config.json
./special_tokens_map.json
./added_tokens.json
./generation_config.json
./model.safetensors
./tokenizer.json
Archive created successfully! You can now download '/kaggle/working/finetuned_google_gemma-3-1b-it.tar.gz' .


In [21]:
# --- 清理不再需要的文件夹以释放磁盘空间 ---
# 1. 删除原始的 LoRA 适配器文件夹
lora_adapter_dir = TrainingConfig.OUTPUT_DIR
print(f"Deleting LoRA adapter directory: {lora_adapter_dir}...")
!rm -rf {lora_adapter_dir}
print("Done.")


# 2. 删除未打包的、合并后的模型文件夹
merged_dir = merged_model_path
print(f"Deleting merged model directory: {merged_dir}...")
!rm -rf /kaggle/working/wandb
!rm -rf {merged_dir}
print("Done.")
print("\nCleanup complete. Your /kaggle/working/ directory now only contains the final .tar.gz archive.")


Deleting LoRA adapter directory: /kaggle/working/google_gemma-3-1b-it_qlora_finetuned...


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Done.
Deleting merged model directory: /kaggle/working/google_gemma-3-1b-it_full_merged...


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Done.

Cleanup complete. Your /kaggle/working/ directory now only contains the final .tar.gz archive.


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
