# Fine-tuning Ministral-3B on PokÃ©mon Showdown
Upload `dataset.jsonl` to the Colab runtime before running.

In [None]:
# Check GPU
!nvidia-smi

In [None]:
!pip install -q --upgrade "transformers>=5.0.0.dev0" trl peft accelerate bitsandbytes "mistral-common>=1.8.6"
!pip install -q git+https://github.com/huggingface/transformers.git

In [None]:
from huggingface_hub import login
from google.colab import userdata

login(token=userdata.get("HF_TOKEN"))

In [None]:
import torch
from transformers import Mistral3ForConditionalGeneration, AutoTokenizer, BitsAndBytesConfig

MODEL = "mistralai/Ministral-3-3B-Instruct-2512-BF16"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
)

model = Mistral3ForConditionalGeneration.from_pretrained(
    MODEL,
    quantization_config=bnb_config,
    device_map="auto",
)
model.tie_weights()

tokenizer = AutoTokenizer.from_pretrained(MODEL)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

In [None]:
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

model = prepare_model_for_kbit_training(model)

lora_config = LoraConfig(
    r=16,
    lora_alpha=16,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
                    "gate_proj", "up_proj", "down_proj"],
    lora_dropout=0,
    bias="none",
    task_type="CAUSAL_LM",
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

In [None]:
import json
import random
from datasets import Dataset

samples = []
with open("dataset.jsonl") as f:
    for line in f:
        s = json.loads(line)
        samples.append({
            "messages": [
                {"role": "user", "content": s["prompt"]},
                {"role": "assistant", "content": s["completion"]},
            ]
        })

random.shuffle(samples)
split = int(len(samples) * 0.95)
train_data = Dataset.from_list(samples[:split])
val_data = Dataset.from_list(samples[split:])

print(f"Train: {len(train_data)} | Val: {len(val_data)}")

In [None]:
def format_sample(sample):
    return {"text": tokenizer.apply_chat_template(
        sample["messages"],
        tokenize=False,
        add_generation_prompt=False,
    )}

train_data = train_data.map(format_sample)
val_data = val_data.map(format_sample)

In [None]:
from trl import SFTTrainer, SFTConfig

trainer = SFTTrainer(
    model=model,
    processing_class=tokenizer,
    train_dataset=train_data,
    eval_dataset=val_data,
    args=SFTConfig(
        dataset_text_field="text",
        max_length=512,
        packing=False,
        per_device_train_batch_size=4,
        gradient_accumulation_steps=4,
        max_steps=100,
        learning_rate=1e-4,
        bf16=True,
        logging_steps=10,
        eval_strategy="steps",
        eval_steps=50,
        save_strategy="steps",
        save_steps=50,
        output_dir="output",
        optim="paged_adamw_8bit",
        warmup_steps=10,
        seed=42,
    ),
)

trainer.train()

In [None]:
# Quick inference test
model.eval()

prompt = "Turn 1. Weather: none. Your pokemon: Garchomp (100/100 HP, healthy) | Type: dragon/ground | Atk: 130 SpA: 80 Spe: 102. Opponent: Kingambit (100/100 HP, healthy) | Type: dark/steel | Def: 100 SpD: 60 Spe: 50. What move do you use?"

inputs = tokenizer.apply_chat_template(
    [{"role": "user", "content": prompt}],
    tokenize=True,
    add_generation_prompt=True,
    return_tensors="pt",
).to("cuda")

with torch.no_grad():
    outputs = model.generate(inputs, max_new_tokens=32, temperature=0.1, do_sample=True)
print(tokenizer.decode(outputs[0][inputs.shape[1]:], skip_special_tokens=True))

In [None]:
# Push to HuggingFace Hub
from google.colab import userdata

REPO_NAME = "ministral-3b-pokemon-showdown"

model.push_to_hub(REPO_NAME, token=userdata.get("HF_TOKEN"))
tokenizer.push_to_hub(REPO_NAME, token=userdata.get("HF_TOKEN"))