## Local Inference on GPU
Model page: https://huggingface.co/unsloth/Qwen3-0.6B-Base

‚ö†Ô∏è If the generated code snippets do not work, please open an issue on either the [model repo](https://huggingface.co/unsloth/Qwen3-0.6B-Base)
			and/or on [huggingface.js](https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/model-libraries-snippets.ts) üôè

In [None]:
# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("text-generation", model="unsloth/Qwen3-0.6B-Base")

In [None]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("unsloth/Qwen3-0.6B-Base")
model = AutoModelForCausalLM.from_pretrained("unsloth/Qwen3-0.6B-Base")

In [None]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("unsloth/Qwen3-0.6B-Base")
model = AutoModelForCausalLM.from_pretrained("unsloth/Qwen3-0.6B-Base", device_map = "auto")
# Total parameters
total_params = sum(p.numel() for p in model.parameters())
print(f"Total parameters: {total_params:,}")

In [None]:
from datasets import load_dataset

ds = load_dataset("vicgalle/alpaca-gpt4")

ds = ds["train"]
print(ds)
dataset = ds.shuffle(seed = 42)
print(dataset)

train_dataset = dataset.select(range(2000))
test_dataset = dataset.select(range(2000,2100))

print(f"Train size: {len(train_dataset)}, Test size: {len(test_dataset)}")

In [None]:
max_seq_length = 512

def preprocess(example):
    # Combine instruction + optional input + output
    prompt = f"<system> You are a helpful assistant. </system>\n" \
             f"<user>{example['instruction']}\n{example.get('input','')}</user>\n" \
             f"<assistant>{example['output']}</assistant>"

    tokenized = tokenizer(
        prompt,
        truncation=True,
        max_length=max_seq_length,
        padding="max_length"
    )

    # Labels for causal LM
    tokenized["labels"] = tokenized["input_ids"].copy()
    return tokenized

# Apply preprocessing
train_dataset = train_dataset.map(preprocess, batched=False)
test_dataset = test_dataset.map(preprocess, batched=False)

print(train_dataset[0])
print(test_dataset[0])

In [None]:
from peft import PrefixTuningConfig, get_peft_model, TaskType

prefix_config = PrefixTuningConfig(
    task_type=TaskType.CAUSAL_LM,
    num_virtual_tokens=30,   # prefix length
)

# Apply prompt tuning to model
model = get_peft_model(model, prefix_config)

# Check trainable parameters
model.print_trainable_parameters()

In [None]:
from transformers import Trainer, TrainingArguments, DataCollatorForSeq2Seq

training_args = TrainingArguments(
    output_dir="./qwen3_prefix_tuning",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=8,  # effective batch = 8
    learning_rate=2e-4,             # higher LR works well for prompt tuning
    fp16=True,
    num_train_epochs=3,
    logging_steps=50,
    save_steps=500,
    save_strategy="steps",
    eval_steps=500,
    save_total_limit=2,
    report_to="wandb"
)

data_collator = DataCollatorForSeq2Seq(tokenizer, pad_to_multiple_of=8)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator
)

trainer.train()

In [None]:
model.save_pretrained("./qwen3_prifix_tuning")