In [1]:
# Kill all processess on GPU
# !fuser -v /dev/nvidia* -k

# Libraries

In [2]:
%%capture
# Install required libraries (optimized for Colab/Kaggle notebooks)
import os
if 'COLAB_' not in ''.join(os.environ.keys()):
    %pip install unsloth
else:
    # Do this only in Colab notebooks and Kaggle notebooks!
    %pip install --no-deps bitsandbytes accelerate xformers==0.0.29 peft trl triton
    %pip install --no-deps cut_cross_entropy unsloth_zoo
    %pip install sentencepiece protobuf datasets huggingface_hub hf_transfer
    %pip install --no-deps unsloth

In [3]:
from unsloth import FastLanguageModel
import torch
from datetime import datetime

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


# Config

In [4]:
# Project config
seed = 69
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Model config
max_seq_length = 1024
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

# LoRA config
# hf_lora_id = 'alxxtexxr/L3.1-8B-wikipedia-en-5K-LoRA-v20250630122650' # wikipedia-en
hf_lora_id = 'alxxtexxr/L3.1-8B-wikipedia-es-5K-LoRA-v20250722140838' # wikipedia-es
# hf_lora_id = 'alxxtexxr/L3.1-8B-gsm8k-en-5K-LoRA-v20250701060457' # gsm8k-en

# Merging config
hf_merged_model_id = hf_lora_id.split('LoRA')[0] + 'v' + datetime.now().strftime("%Y%m%d%H%M%S")
print("Hugging Face merged model ID:", hf_merged_model_id)

Hugging Face merged model ID: alxxtexxr/L3.1-8B-wikipedia-es-5K-v20250723074825


# Model

In [5]:
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=hf_lora_id,
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=load_in_4bit,
)

==((====))==  Unsloth 2025.7.7: Fast Llama patching. Transformers: 4.53.3.
   \\   /|    NVIDIA RTX A4500. Num GPUs = 1. Max memory: 19.603 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 8.6. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/5.96G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/235 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/459 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/83.9M [00:00<?, ?B/s]

Unsloth 2025.7.7 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


In [13]:
hf_merged_model_id

'alxxtexxr/L3.1-8B-wikipedia-es-5K-v20250723074825'

In [15]:
with open('/workspace/.hf_home/token', 'r', encoding='utf-8') as file:
    hf_token = file.read()

model.push_to_hub_merged(
    hf_merged_model_id, 
    tokenizer, 
    save_method='merged_16bit', 
    token=hf_token, 
    # private=True,
)

  0%|          | 0/1 [00:00<?, ?it/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

Found HuggingFace hub cache directory: /root/.cache/huggingface/hub
Checking cache directory for required files...
Cache check failed: model-00001-of-00004.safetensors not found in local cache.
Not all required files found in cache. Will proceed with downloading.
Downloading safetensors index for unsloth/meta-llama-3.1-8b...


model.safetensors.index.json: 0.00B [00:00, ?B/s]

Unsloth: Merging weights into 16bit:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

  0%|          | 0/1 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

Unsloth: Merging weights into 16bit:  25%|██▌       | 1/4 [01:38<04:56, 98.81s/it]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

  0%|          | 0/1 [00:00<?, ?it/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

Unsloth: Merging weights into 16bit:  50%|█████     | 2/4 [03:16<03:16, 98.03s/it]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

  0%|          | 0/1 [00:00<?, ?it/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

Unsloth: Merging weights into 16bit:  75%|███████▌  | 3/4 [04:48<01:35, 95.45s/it]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

  0%|          | 0/1 [00:00<?, ?it/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

Unsloth: Merging weights into 16bit: 100%|██████████| 4/4 [05:15<00:00, 78.91s/it]
