In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

import os
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    # Do this only in Colab notebooks! Otherwise use pip install unsloth
    !pip install --no-deps bitsandbytes accelerate xformers==0.0.29.post3 peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth

Collecting bitsandbytes
  Downloading bitsandbytes-0.47.0-py3-none-manylinux_2_24_x86_64.whl.metadata (11 kB)
Collecting xformers==0.0.29.post3
  Downloading xformers-0.0.29.post3-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (1.0 kB)
Collecting trl
  Downloading trl-0.21.0-py3-none-any.whl.metadata (11 kB)
Collecting cut_cross_entropy
  Downloading cut_cross_entropy-25.1.1-py3-none-any.whl.metadata (9.3 kB)
Collecting unsloth_zoo
  Downloading unsloth_zoo-2025.8.4-py3-none-any.whl.metadata (9.4 kB)
Downloading xformers-0.0.29.post3-cp311-cp311-manylinux_2_28_x86_64.whl (43.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.4/43.4 MB[0m [31m43.4 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25hDownloading bitsandbytes-0.47.0-py3-none-manylinux_2_24_x86_64.whl (61.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.3/61.3 MB[0m [31m28.9 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25hDownloading trl-0.21.0-py3-none-any.whl (511 kB

In [2]:
import torch
from unsloth import FastLanguageModel
from transformers import TrainingArguments
from trl import SFTTrainer
from datasets import load_dataset

# 1. Load the Model
max_seq_length = 2048

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/gemma-3-270m-it",
    max_seq_length = max_seq_length,
    load_in_4bit = True,
    dtype = None, 
)

# 2. Configure LoRA Adapters
model = FastLanguageModel.get_peft_model(
    model,
    r = 16, # Rank of the adapters
    lora_alpha = 16, # Scaling factor
    lora_dropout = 0,
    bias = "none",
    use_gradient_checkpointing = True,
    random_state = 42,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                    "gate_proj", "up_proj", "down_proj",],
)

print("Unsloth model configured for 4-bit LoRA fine-tuning!")

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


2025-08-15 09:31:54.371476: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1755250314.736072      36 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1755250314.809402      36 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


🦥 Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2025.8.5: Fast Gemma3 patching. Transformers: 4.52.4.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Unsloth: Using float16 precision for gemma3 won't work! Using float32.


model.safetensors:   0%|          | 0.00/536M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/233 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/4.69M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/33.4M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/35.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/670 [00:00<?, ?B/s]

chat_template.jinja: 0.00B [00:00, ?B/s]

Unsloth: Making `model.base_model.model.model` require gradients
Unsloth model configured for 4-bit LoRA fine-tuning!


In [3]:
from unsloth.chat_templates import get_chat_template
tokenizer = get_chat_template(
    tokenizer,
    chat_template = "gemma3",
)

In [4]:
# Load Datasets and Merge them, then take a 60k random sample
from datasets import load_dataset, concatenate_datasets

def load_merge_and_sample_finance_datasets(num_samples=60000):
    """
    Loads two finance datasets, merges them, and returns a random sample
    of the specified size.
    """
    print("Loading gbharti/wealth-alpaca_lora dataset...")
    wealth_ds = load_dataset("gbharti/wealth-alpaca_lora", split="train")

    print("Loading Josephgflowers/Finance-Instruct-500k dataset...")
    finance_ds = load_dataset("Josephgflowers/Finance-Instruct-500k", split="train")

    # --- Preprocessing functions ---
    def preprocess_wealth_alpaca(example):
        if example.get('input'):
            example['instruction'] = f"{example['instruction']}\n{example['input']}"
        return {"instruction": example["instruction"], "output": example["output"]}

    def preprocess_finance_instruct(example):
        return {"instruction": example["user"], "output": example["assistant"]}

    print("Preprocessing datasets...")
    wealth_ds = wealth_ds.map(preprocess_wealth_alpaca, remove_columns=wealth_ds.column_names)
    finance_ds = finance_ds.map(preprocess_finance_instruct, remove_columns=finance_ds.column_names)

    # --- Merging ---
    print("Merging the datasets...")
    merged_dataset = concatenate_datasets([wealth_ds, finance_ds])
    print(f"Total size of merged dataset: {len(merged_dataset)} rows")

    # --- Shuffling and Sampling ---
    print("Shuffling the merged dataset to ensure a random sample...")
    # Using a seed ensures that you get the same "random" sample every time you run the code.
    shuffled_dataset = merged_dataset.shuffle(seed=42)

    print(f"Selecting a random sample of {num_samples} rows...")
    # The .select() method is highly efficient for taking a slice of the dataset.
    sampled_dataset = shuffled_dataset.select(range(num_samples))
    
    return sampled_dataset

# --- Main Execution ---
# Call the function to get your final 60k row dataset
dataset = load_merge_and_sample_finance_datasets(num_samples=60000)

print("Successfully created the 'dataset' variable!")
print(f"Final dataset size: {len(dataset)} random rows.")

Loading gbharti/wealth-alpaca_lora dataset...


README.md:   0%|          | 0.00/372 [00:00<?, ?B/s]

final_dataset_clean.json:   0%|          | 0.00/31.3M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/44341 [00:00<?, ? examples/s]

Loading Josephgflowers/Finance-Instruct-500k dataset...


README.md: 0.00B [00:00, ?B/s]

train.json:   0%|          | 0.00/580M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/518185 [00:00<?, ? examples/s]

Preprocessing datasets...


Map:   0%|          | 0/44341 [00:00<?, ? examples/s]

Map:   0%|          | 0/518185 [00:00<?, ? examples/s]

Merging the datasets...
Total size of merged dataset: 562526 rows
Shuffling the merged dataset to ensure a random sample...
Selecting a random sample of 60000 rows...
Successfully created the 'dataset' variable!
Final dataset size: 60000 random rows.


In [5]:
def convert_to_chatml(example):
    return {
        "conversations": [
            {"role": "user", "content": example["instruction"]},
            {"role": "assistant", "content": example["output"]}
        ]
    }

dataset = dataset.map(
    convert_to_chatml
)

Map:   0%|          | 0/60000 [00:00<?, ? examples/s]

In [6]:
dataset[100]

{'instruction': 'Where was the May incident recorded?',
 'output': 'Okfuskee County',
 'conversations': [{'content': 'Where was the May incident recorded?',
   'role': 'user'},
  {'content': 'Okfuskee County', 'role': 'assistant'}]}

In [7]:
def formatting_prompts_func(examples):
   convos = examples["conversations"]
   texts = [tokenizer.apply_chat_template(convo, tokenize = False, add_generation_prompt = False).removeprefix('<bos>') for convo in convos]
   return { "text" : texts, }

dataset = dataset.map(formatting_prompts_func, batched = True)

Map:   0%|          | 0/60000 [00:00<?, ? examples/s]

In [8]:
dataset[100]['text']

'<start_of_turn>user\nWhere was the May incident recorded?<end_of_turn>\n<start_of_turn>model\nOkfuskee County<end_of_turn>\n'

In [9]:
# Configure LoRA and Start Training
from trl import SFTTrainer
from transformers import TrainingArguments

training_args = TrainingArguments(
    per_device_train_batch_size = 64,     
    gradient_accumulation_steps = 8,      
    warmup_steps = 10, 
    #max_steps = 100,
    num_train_epochs = 1,                 
    learning_rate = 1e-4,                 
    fp16 = not torch.cuda.is_bf16_supported(),
    bf16 = torch.cuda.is_bf16_supported(),
    logging_steps = 25,                   
    optim = "adamw_8bit",                 
    weight_decay = 0.01,                 
    lr_scheduler_type = "linear",         
    seed = 42,
    output_dir = "outputs",             
    report_to = "none",                   
)

In [10]:
# --- Initialize Trainer ---
trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text", # Point trainer to our formatted 'text' column
    max_seq_length = max_seq_length,
    args = training_args,
)

Unsloth: Switching to float32 training since model cannot work with float16


Unsloth: Tokenizing ["text"]:   0%|          | 0/60000 [00:00<?, ? examples/s]

In [11]:
# --- Start Fine-tuning ---
print("Starting the fine-tuning process...")
trainer_stats = trainer.train()
print("Fine-tuning complete!")

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 60,000 | Num Epochs = 1 | Total steps = 118
O^O/ \_/ \    Batch size per device = 64 | Gradient accumulation steps = 8
\        /    Data Parallel GPUs = 1 | Total batch size (64 x 8 x 1) = 512
 "-____-"     Trainable parameters = 3,796,992 of 271,895,168 (1.40% trained)


Starting the fine-tuning process...


`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
25,3.6922
50,3.6811
75,3.6753
100,3.6339


Fine-tuning complete!


In [12]:
# Inference and Saving the Model
print("\n--- Running Inference ---")
from transformers import pipeline

# Use Unsloth's fast inference pipeline
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

# Create a test prompt
messages = [
    {"role": "user", "content": "What are the main risks associated with investing in emerging markets?"},
]

# Get the response
outputs = pipe(messages, max_new_tokens=256, do_sample=True, temperature=0.7, top_p=0.95)
print(outputs[0]['generated_text'])

Device set to use cuda:0
The model 'PeftModel' is not supported for text-generation. Supported models are ['PeftModelForCausalLM', 'AriaTextForCausalLM', 'BambaForCausalLM', 'BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BitNetForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CohereForCausalLM', 'Cohere2ForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'DbrxForCausalLM', 'DeepseekV3ForCausalLM', 'DiffLlamaForCausalLM', 'ElectraForCausalLM', 'Emu3ForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'FalconMambaForCausalLM', 'FuyuForCausalLM', 'GemmaForCausalLM', 'Gemma2ForCausalLM', 'Gemma3ForConditionalGeneration', 'Gemma3ForCausalLM', 'GitForCausalLM', 'GlmForCausalLM', 'Glm4ForCausalLM', 'GotOcr2ForConditionalGeneration', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 


--- Running Inference ---
[{'role': 'user', 'content': 'What are the main risks associated with investing in emerging markets?'}, {'role': 'assistant', 'content': "Investing in emerging markets, while potentially offering potential for growth and diversification, comes with a set of significant risks. Here's a breakdown of these risks:\n\n*   **Political Instability and Corruption:**\n    *   **Outdated Monetary Policy:** Emerging market currencies often rely on fiscal policies, which can lead to volatility in central bank policy. This can result in currency depreciation and, in extreme cases, a severe economic downturn, potentially impacting investment returns.\n    *   **Corruption:** Corruption in emerging markets, particularly in certain sectors (e.g., media, banking, and even certain government services), can lead to significant revenue loss, reduced investment flows, and increased illicit activities.\n    *   **Uncautious Government:** Ineffective or poorly managed government po

In [16]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [17]:
# Save the fine-tuned LoRA adapters
print("\n--- Saving LoRA Adapters ---")
model.save_pretrained("gemma-3-270m-finance-lora")
tokenizer.save_pretrained("gemma-3-270m-finance-lora")
print("Model adapters saved to 'gemma-3-270m-finance-lora'")

# Push the model adapters and tokenizer to the Hub
repo_name = "huseyincavus/gemma-3-270m-finance-lora"

print(f"\n--- Pushing LoRA Adapters to Hugging Face Hub ({repo_name}) ---")
model.push_to_hub(repo_name, token = True)
tokenizer.push_to_hub(repo_name, token = True)
print("Model adapters and tokenizer pushed to Hugging Face Hub!")



--- Saving LoRA Adapters ---
Model adapters saved to 'gemma-3-270m-finance-lora'

--- Pushing LoRA Adapters to Hugging Face Hub (huseyincavus/gemma-3-270m-finance-lora) ---


README.md:   0%|          | 0.00/583 [00:00<?, ?B/s]

  0%|          | 0/1 [00:00<?, ?it/s]

adapter_model.safetensors:   0%|          | 0.00/15.2M [00:00<?, ?B/s]

Saved model to https://huggingface.co/huseyincavus/gemma-3-270m-finance-lora


  0%|          | 0/2 [00:00<?, ?it/s]

tokenizer.json:   0%|          | 0.00/33.4M [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.69M [00:00<?, ?B/s]

Model adapters and tokenizer pushed to Hugging Face Hub!


AFTER RESTART

In [19]:
# Merge base model + adapters and push to Hub
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel

# Define the base model and LoRA adapter
base_model_id = "unsloth/gemma-3-270m-it"
lora_adapter_id = "huseyincavus/gemma-3-270m-finance-lora"
merged_model_id = "huseyincavus/gemma-3-270m-finance-merged"

# Load the base model and tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model_id)
model = AutoModelForCausalLM.from_pretrained(
    base_model_id,
    torch_dtype="auto",
    device_map="auto"
)

# Load the LoRA adapter and merge it with the base model
model = PeftModel.from_pretrained(model, lora_adapter_id)
model = model.merge_and_unload()

# Push the merged model and tokenizer to the Hugging Face Hub
model.push_to_hub(merged_model_id)
tokenizer.push_to_hub(merged_model_id)

adapter_config.json:   0%|          | 0.00/959 [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/15.2M [00:00<?, ?B/s]

  0%|          | 0/1 [00:00<?, ?it/s]

model.safetensors:   0%|          | 0.00/536M [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

  0%|          | 0/2 [00:00<?, ?it/s]

tokenizer.json:   0%|          | 0.00/33.4M [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.69M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/huseyincavus/gemma-3-270m-finance-merged/commit/c4920974123c4e1f9bc0b2b9fccde75bb717bfe0', commit_message='Upload tokenizer', commit_description='', oid='c4920974123c4e1f9bc0b2b9fccde75bb717bfe0', pr_url=None, repo_url=RepoUrl('https://huggingface.co/huseyincavus/gemma-3-270m-finance-merged', endpoint='https://huggingface.co', repo_type='model', repo_id='huseyincavus/gemma-3-270m-finance-merged'), pr_revision=None, pr_num=None)