In [6]:
%%capture
import os

!pip install unsloth
!pip install transformers==4.56.2
!pip install --no-deps trl==0.22.2

In [7]:
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
huggingface_token= user_secrets.get_secret("huggingface")

In [10]:
import os
from transformers import TrainerCallback
from huggingface_hub import upload_folder

class AutoUploadCallback(TrainerCallback):
    def __init__(self, repo_id):
        self.repo_id = repo_id
        self.token = huggingface_token   # Must be set in Kaggle Secrets

    def on_save(self, args, state, control, **kwargs):
        checkpoint_path = os.path.join(
            args.output_dir,
            f"checkpoint-{state.global_step}"
        )

        if os.path.isdir(checkpoint_path):
            print(f"\nðŸ¤– Uploading {checkpoint_path} to HuggingFace repo root...")

            upload_folder(
                repo_id=self.repo_id,
                folder_path=checkpoint_path,
                path_in_repo=".",               # <-- upload directly to repo root
                repo_type="model",
                token=self.token,               # <-- required for auth
                commit_message=f"Overwrite LoRA files ({state.global_step})",
                allow_patterns=[
                    "adapter_model.safetensors",
                    "adapter_config.json",
                ],
            )

            print("âœ… Successfully uploaded to repo root!\n")

        return control

In [11]:
from huggingface_hub import snapshot_download
base_path = snapshot_download("astegaras/flytech_lora")

Fetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]

In [12]:
import os
from huggingface_hub import snapshot_download
from unsloth import FastLanguageModel

model, tokenizer = FastLanguageModel.from_pretrained(
    base_path,
    max_seq_length=2048,
    load_in_4bit=True,
)

model.print_trainable_parameters()

==((====))==  Unsloth 2025.11.6: Fast Llama patching. Transformers: 4.56.2.
   \\   /|    Tesla T4. Num GPUs = 2. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.9.0+cu128. CUDA: 7.5. CUDA Toolkit: 12.8. Triton: 3.5.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.33.post1. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/2.35G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/234 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/454 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

chat_template.jinja: 0.00B [00:00, ?B/s]

trainable params: 24,313,856 || all params: 3,237,063,680 || trainable%: 0.7511


In [14]:
from unsloth.chat_templates import get_chat_template

tokenizer = get_chat_template(tokenizer, "llama-3.2")

def formatting_prompts_func(examples):
    instructions = examples["instruction"]
    inputs = examples["input"]
    outputs = examples["output"]

    conversations = []
    for instruction, input_text, output in zip(instructions, inputs, outputs):

        # Build the user prompt as instruction + optional input
        if input_text and input_text.strip() != "":
            user_msg = instruction + "\n\n" + input_text
        else:
            user_msg = instruction

        convo = [
            {"role": "user", "content": user_msg},
            {"role": "assistant", "content": output},
        ]

        text = tokenizer.apply_chat_template(
            convo,
            tokenize=False,
            add_generation_prompt=False,
        )
        conversations.append(text)

    return {"text": conversations}

In [15]:
from datasets import load_dataset
dataset = load_dataset("flytech/python-codes-25k", split="train")

dataset = dataset.map(
    formatting_prompts_func,
    batched=True,
    remove_columns=dataset.column_names,
)

print(dataset[0]["text"])

README.md: 0.00B [00:00, ?B/s]

python-codes-25k.json:   0%|          | 0.00/26.4M [00:00<?, ?B/s]

python-codes-25k.jsonl:   0%|          | 0.00/25.4M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/49626 [00:00<?, ? examples/s]

Map:   0%|          | 0/49626 [00:00<?, ? examples/s]

<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 26 July 2024

<|eot_id|><|start_header_id|>user<|end_header_id|>

Help me set up my daily to-do list!

Setting up your daily to-do list...<|eot_id|><|start_header_id|>assistant<|end_header_id|>

```python
tasks = []
while True:
    task = input('Enter a task or type 'done' to finish: ')
    if task == 'done': break
    tasks.append(task)
print(f'Your to-do list for today: {tasks}')
```<|eot_id|>


In [16]:
from trl import SFTTrainer
from transformers import TrainingArguments, DataCollatorForSeq2Seq
from unsloth import is_bfloat16_supported
from unsloth.chat_templates import train_on_responses_only
import json
import os

max_seq_length = 2048

callback = AutoUploadCallback(
    repo_id="astegaras/flytech_lora"
)

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    data_collator = DataCollatorForSeq2Seq(tokenizer = tokenizer),
    dataset_num_proc = 2,
    packing = False,
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 2,
        warmup_steps = 5,
        num_train_epochs=1,
        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 9999999, # this is just to avoid wandbAI connection
        seed = 3407,
        save_strategy = "steps",
        save_steps = 150,
    ),
    report_to = "none",
    output_dir = "lora_output",
    callbacks=[callback],
)

trainer = train_on_responses_only(
    trainer,
    instruction_part = "<|start_header_id|>user<|end_header_id|>\n\n",
    response_part = "<|start_header_id|>assistant<|end_header_id|>\n\n",
)

Unsloth: Tokenizing ["text"] (num_proc=8):   0%|          | 0/49626 [00:00<?, ? examples/s]

Map (num_proc=8):   0%|          | 0/49626 [00:00<?, ? examples/s]

In [18]:
# we did not want to lof to wandb so we needed this set up to not run into erros 

os.environ["WANDB_DISABLED"] = "true"
os.environ["WANDB_MODE"] = "disabled"
os.environ["WANDB_SILENT"] = "true"
os.environ["WANDB_PROJECT"] = "disabled"
os.environ["WANDB_INIT_TIMEOUT"] = "0"

In [19]:
trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 49,626 | Num Epochs = 1 | Total steps = 6,204
O^O/ \_/ \    Batch size per device = 4 | Gradient accumulation steps = 2
\        /    Data Parallel GPUs = 1 | Total batch size (4 x 2 x 1) = 8
 "-____-"     Trainable parameters = 24,313,856 of 3,237,063,680 (0.75% trained)


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).



ðŸ¤– Uploading trainer_output/checkpoint-150 to HuggingFace repo root...


Processing Files (0 / 0): |          |  0.00B /  0.00B            

New Data Upload: |          |  0.00B /  0.00B            

âœ… Successfully uploaded to repo root!



Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).



ðŸ¤– Uploading trainer_output/checkpoint-300 to HuggingFace repo root...


Processing Files (0 / 0): |          |  0.00B /  0.00B            

New Data Upload: |          |  0.00B /  0.00B            

âœ… Successfully uploaded to repo root!



Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).



ðŸ¤– Uploading trainer_output/checkpoint-450 to HuggingFace repo root...


Processing Files (0 / 0): |          |  0.00B /  0.00B            

New Data Upload: |          |  0.00B /  0.00B            

âœ… Successfully uploaded to repo root!



Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).



ðŸ¤– Uploading trainer_output/checkpoint-600 to HuggingFace repo root...


Processing Files (0 / 0): |          |  0.00B /  0.00B            

New Data Upload: |          |  0.00B /  0.00B            

âœ… Successfully uploaded to repo root!



Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).



ðŸ¤– Uploading trainer_output/checkpoint-750 to HuggingFace repo root...


Processing Files (0 / 0): |          |  0.00B /  0.00B            

New Data Upload: |          |  0.00B /  0.00B            

âœ… Successfully uploaded to repo root!



Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).



ðŸ¤– Uploading trainer_output/checkpoint-900 to HuggingFace repo root...


Processing Files (0 / 0): |          |  0.00B /  0.00B            

New Data Upload: |          |  0.00B /  0.00B            

âœ… Successfully uploaded to repo root!



Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).



ðŸ¤– Uploading trainer_output/checkpoint-1050 to HuggingFace repo root...


Processing Files (0 / 0): |          |  0.00B /  0.00B            

New Data Upload: |          |  0.00B /  0.00B            

âœ… Successfully uploaded to repo root!



Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).



ðŸ¤– Uploading trainer_output/checkpoint-1200 to HuggingFace repo root...


Processing Files (0 / 0): |          |  0.00B /  0.00B            

New Data Upload: |          |  0.00B /  0.00B            

âœ… Successfully uploaded to repo root!



Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).



ðŸ¤– Uploading trainer_output/checkpoint-1350 to HuggingFace repo root...


Processing Files (0 / 0): |          |  0.00B /  0.00B            

New Data Upload: |          |  0.00B /  0.00B            

âœ… Successfully uploaded to repo root!



Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).



ðŸ¤– Uploading trainer_output/checkpoint-1500 to HuggingFace repo root...


Processing Files (0 / 0): |          |  0.00B /  0.00B            

New Data Upload: |          |  0.00B /  0.00B            

âœ… Successfully uploaded to repo root!



Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).



ðŸ¤– Uploading trainer_output/checkpoint-1650 to HuggingFace repo root...


Processing Files (0 / 0): |          |  0.00B /  0.00B            

New Data Upload: |          |  0.00B /  0.00B            

âœ… Successfully uploaded to repo root!



Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).



ðŸ¤– Uploading trainer_output/checkpoint-1800 to HuggingFace repo root...


Processing Files (0 / 0): |          |  0.00B /  0.00B            

New Data Upload: |          |  0.00B /  0.00B            

âœ… Successfully uploaded to repo root!



Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).



ðŸ¤– Uploading trainer_output/checkpoint-1950 to HuggingFace repo root...


Processing Files (0 / 0): |          |  0.00B /  0.00B            

New Data Upload: |          |  0.00B /  0.00B            

âœ… Successfully uploaded to repo root!



Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).



ðŸ¤– Uploading trainer_output/checkpoint-2100 to HuggingFace repo root...


Processing Files (0 / 0): |          |  0.00B /  0.00B            

New Data Upload: |          |  0.00B /  0.00B            

âœ… Successfully uploaded to repo root!



Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).



ðŸ¤– Uploading trainer_output/checkpoint-2250 to HuggingFace repo root...


Processing Files (0 / 0): |          |  0.00B /  0.00B            

New Data Upload: |          |  0.00B /  0.00B            

âœ… Successfully uploaded to repo root!



Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).



ðŸ¤– Uploading trainer_output/checkpoint-2400 to HuggingFace repo root...


Processing Files (0 / 0): |          |  0.00B /  0.00B            

New Data Upload: |          |  0.00B /  0.00B            

âœ… Successfully uploaded to repo root!



Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).



ðŸ¤– Uploading trainer_output/checkpoint-2550 to HuggingFace repo root...


Processing Files (0 / 0): |          |  0.00B /  0.00B            

New Data Upload: |          |  0.00B /  0.00B            

âœ… Successfully uploaded to repo root!



Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).



ðŸ¤– Uploading trainer_output/checkpoint-2700 to HuggingFace repo root...


Processing Files (0 / 0): |          |  0.00B /  0.00B            

New Data Upload: |          |  0.00B /  0.00B            

âœ… Successfully uploaded to repo root!



KeyboardInterrupt: 

In [20]:
rm -rf ~/.cache/huggingface/hub

In [21]:
import os
from huggingface_hub import snapshot_download
from unsloth import FastLanguageModel

base_path = snapshot_download("astegaras/flytech_lora")

model, tokenizer = FastLanguageModel.from_pretrained(
    base_path,
    max_seq_length=2048,
    load_in_4bit = False,
)

model.push_to_hub_gguf(
    "astegaras/lora_python_converter",
    tokenizer,
    quantization_method="q2_k",
    token=huggingface_token,
)

Fetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]

adapter_model.safetensors:   0%|          | 0.00/97.3M [00:00<?, ?B/s]

README.md:   0%|          | 0.00/31.0 [00:00<?, ?B/s]

adapter_config.json: 0.00B [00:00, ?B/s]

.gitattributes: 0.00B [00:00, ?B/s]

==((====))==  Unsloth 2025.11.6: Fast Llama patching. Transformers: 4.56.2.
   \\   /|    Tesla T4. Num GPUs = 2. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.9.0+cu128. CUDA: 7.5. CUDA Toolkit: 12.8. Triton: 3.5.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.33.post1. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/1.46G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/234 [00:00<?, ?B/s]

Unsloth: Converting model to GGUF format...
Unsloth: Merging model weights to 16-bit format...
Found HuggingFace hub cache directory: /root/.cache/huggingface/hub


Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

Checking cache directory for required files...


Unsloth: Copying 2 files from cache to `/tmp/unsloth_gguf_idizc0k0`: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 2/2 [00:12<00:00,  6.12s/it]


Successfully copied all 2 files from cache to `/tmp/unsloth_gguf_idizc0k0`
Checking cache directory for required files...
Cache check failed: tokenizer.model not found in local cache.
Not all required files found in cache. Will proceed with downloading.


Unsloth: Preparing safetensor model files: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 2/2 [00:00<00:00, 18724.57it/s]
Unsloth: Merging weights into 16bit: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 2/2 [01:06<00:00, 33.02s/it]


Unsloth: Merge process complete. Saved to `/tmp/unsloth_gguf_idizc0k0`
Unsloth: Converting to GGUF format...
==((====))==  Unsloth: Conversion from HF to GGUF information
   \\   /|    [0] Installing llama.cpp might take 3 minutes.
O^O/ \_/ \    [1] Converting HF to GGUF f16 might take 3 minutes.
\        /    [2] Converting GGUF f16 to ['q2_k'] might take 10 minutes each.
 "-____-"     In total, you will have to wait at least 16 minutes.

Unsloth: llama.cpp found in the system. Skipping installation.
Unsloth: Preparing converter script...
Unsloth: [1] Converting model into f16 GGUF format.
This might take 3 minutes...
Unsloth: Initial conversion completed! Files: ['llama-3.2-3b-instruct.F16.gguf']
Unsloth: [2] Converting GGUF f16 into q2_k. This might take 10 minutes...
Unsloth: Model files cleanup...
Unsloth: All GGUF conversions completed successfully!
Generated files: ['llama-3.2-3b-instruct.Q2_K.gguf']
Unsloth: example usage for text only LLMs: llama-cli --model llama-3.2-3b-instr

Processing Files (0 / 0): |          |  0.00B /  0.00B            

New Data Upload: |          |  0.00B /  0.00B            

No files have been modified since last commit. Skipping to prevent empty commit.


Uploading config.json...


No files have been modified since last commit. Skipping to prevent empty commit.


Uploading Ollama Modelfile...


No files have been modified since last commit. Skipping to prevent empty commit.


Unsloth: Successfully uploaded GGUF to https://huggingface.co/astegaras/lora_python_converter
Unsloth: Cleaning up temporary files...


'astegaras/lora_python_converter'