In [1]:
%%capture
%pip install -U transformers 
%pip install -U datasets 
%pip install -U accelerate 
%pip install -U peft 
%pip install -U trl 
%pip install -U bitsandbytes 
%pip install -U wandb

In [2]:
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import (
    LoraConfig,
    PeftModel,
    prepare_model_for_kbit_training,
    get_peft_model,
)
import os, torch, wandb
from datasets import load_dataset
from trl import SFTTrainer, setup_chat_format

2025-05-12 20:23:40.074475: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1747081420.308909      19 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1747081420.375732      19 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [3]:
from huggingface_hub import login
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()

hf_token = user_secrets.get_secret("HUGGINGFACE_TOKEN")
login(token = hf_token)

In [4]:
wb_token = user_secrets.get_secret("wandb")

wandb.login(key=wb_token)
run = wandb.init(
    project='Finetune-Llama 3.2 wealth management chatbot finetune', 
    job_type="training", 
    anonymous="allow"
)

[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33myoungrha430[0m ([33myoungrha430-sekure-payment-experts[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Tracking run with wandb version 0.19.11
[34m[1mwandb[0m: Run data is saved locally in [35m[1m/kaggle/working/wandb/run-20250512_202358-3yvdmoxc[0m
[34m[1mwandb[0m: Run [1m`wandb offline`[0m to turn off syncing.
[34m[1mwandb[0m: Syncing run [33mlively-bird-6[0m
[34m[1mwandb[0m: ⭐️ View project at [34m[4mhttps://wandb.ai/youngrha430-sekure-payment-experts/Finetune-Llama%203.2%20wealth%20management%20chatbot%20finetune?apiKey=3ff0b518def644e46cfac36e679067b7eb574d13[0m
[34m[1mwandb[0m: 🚀 View run at [34m[4mhttps://wandb.ai/youngrha430-sekure-payment-experts/Finetune-Llama%203.2%20wealth%20management%20chatbot%20

In [5]:
base_model = "/kaggle/input/llama-3.2/transformers/3b-instruct/1"
new_model = "llama-3.2-3b-it-wealth-finetune"
dataset_name = "bitext/Bitext-wealth-management-llm-chatbot-training-dataset"

In [6]:
if torch.cuda.get_device_capability()[0] >= 8:
    !pip install -qqq flash-attn
    torch_dtype = torch.bfloat16
    attn_implementation = "flash_attention_2"
else:
    torch_dtype = torch.float16
    attn_implementation = "eager"

In [7]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch_dtype,
    bnb_4bit_use_double_quant=True,
)

model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=bnb_config,
    device_map="auto",
    attn_implementation=attn_implementation
)

tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [8]:
from datasets import get_dataset_config_names

print(get_dataset_config_names(dataset_name))

README.md:   0%|          | 0.00/9.81k [00:00<?, ?B/s]

['default']


In [9]:
config_name = "default"

In [10]:
dataset_dict = load_dataset(dataset_name, name=config_name)
print(dataset_dict)

(…)agement-llm-chatbot-training-dataset.csv:   0%|          | 0.00/25.9M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/23806 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['system_prompt', 'instruction', 'intent', 'category', 'tags', 'response'],
        num_rows: 23806
    })
})


In [11]:
dataset = dataset_dict["train"]
dataset = dataset.shuffle(seed=65).select(range(1000))
instruction = """You are a wealth management assistant. 
     Be professional be matter of fact about your points.
    """
def format_chat_template(row):
    row_json = [
        {"role": "system", "content": instruction},
        {"role": "user", "content": row["instruction"]},
        {"role": "assistant", "content": row["response"]},
    ]
    row["text"] = tokenizer.apply_chat_template(row_json, tokenize=False)
    return row


dataset = dataset.map(
    format_chat_template,
    num_proc= 4,
)


Map (num_proc=4):   0%|          | 0/1000 [00:00<?, ? examples/s]

In [12]:
train_val = dataset.train_test_split(test_size=0.1, seed=42)
train_dataset = train_val['train']
val_dataset = train_val['test']

In [13]:
dataset

Dataset({
    features: ['system_prompt', 'instruction', 'intent', 'category', 'tags', 'response', 'text'],
    num_rows: 1000
})

In [14]:
import bitsandbytes as bnb

def find_all_linear_names(model):
    cls = bnb.nn.Linear4bit
    lora_module_names = set()
    for name, module in model.named_modules():
        if isinstance(module, cls):
            names = name.split('.')
            lora_module_names.add(names[0] if len(names) == 1 else names[-1])
    if 'lm_head' in lora_module_names:  # needed for 16 bit
        lora_module_names.remove('lm_head')
    return list(lora_module_names)

modules = find_all_linear_names(model)

In [15]:
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=modules
)
tokenizer.chat_template = None
model, tokenizer = setup_chat_format(model, tokenizer)
model = get_peft_model(model, peft_config)

The new embeddings will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`


In [16]:
training_arguments = TrainingArguments(
    output_dir=new_model,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=2,
    optim="paged_adamw_32bit",
    num_train_epochs=1,
    eval_strategy="steps",
    eval_steps=20,
    logging_steps=1,
    warmup_steps=10,
    logging_strategy="steps",
    learning_rate=2e-4,
    fp16=False,
    bf16=False,
    group_by_length=True,
    report_to="wandb"
)

In [17]:
trainer = SFTTrainer(
    model=model,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    peft_config=peft_config,
    
    args=training_arguments,
    
)

Converting train dataset to ChatML:   0%|          | 0/900 [00:00<?, ? examples/s]

Adding EOS to train dataset:   0%|          | 0/900 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/900 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/900 [00:00<?, ? examples/s]

Converting eval dataset to ChatML:   0%|          | 0/100 [00:00<?, ? examples/s]

Adding EOS to eval dataset:   0%|          | 0/100 [00:00<?, ? examples/s]

Tokenizing eval dataset:   0%|          | 0/100 [00:00<?, ? examples/s]

Truncating eval dataset:   0%|          | 0/100 [00:00<?, ? examples/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [18]:
trainer.train()



Step,Training Loss,Validation Loss
20,1.0632,0.9357
40,1.1759,0.830396
60,0.8913,0.752546
80,0.6672,0.689576
100,0.8688,0.706342
120,0.6705,0.668869
140,0.5742,0.65063
160,0.6232,0.658654
180,0.5929,0.62293
200,0.7732,0.626264




TrainOutput(global_step=450, training_loss=0.670919167134497, metrics={'train_runtime': 1016.29, 'train_samples_per_second': 0.886, 'train_steps_per_second': 0.443, 'total_flos': 3682730956984320.0, 'train_loss': 0.670919167134497})

In [19]:
wandb.finish()

[34m[1mwandb[0m:                                                                                
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run history:
[34m[1mwandb[0m:                 eval/loss █▆▅▃▄▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁
[34m[1mwandb[0m:  eval/mean_token_accuracy ▁▃▄▅▅▆▆▆▇▇▇▇▇▇▇▇██████
[34m[1mwandb[0m:           eval/num_tokens ▁▁▂▂▂▃▃▃▄▄▄▅▅▅▆▆▆▇▇▇██
[34m[1mwandb[0m:              eval/runtime ▁▃▆█▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
[34m[1mwandb[0m:   eval/samples_per_second █▆▃▁▂▂▂▂▂▂▂▂▁▂▂▂▂▂▂▂▁▂
[34m[1mwandb[0m:     eval/steps_per_second █▆▃▁▂▂▂▂▂▂▂▂▁▂▂▂▂▂▂▂▁▂
[34m[1mwandb[0m:               train/epoch ▁▁▁▂▂▂▂▃▃▃▄▄▄▄▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇████
[34m[1mwandb[0m:         train/global_step ▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▅▅▅▅▅▆▆▆▆▇▇█████
[34m[1mwandb[0m:           train/grad_norm ▅▄▅▅▅▃▅▃▁▁▂▃▁▁▂▂▃▅▂▂▂▂▁▂▂▂▂▂▂▅▂▁▂▁▄▁▂▂█▅
[34m[1mwandb[0m:       train/learning_rate ▃████▇▇▇▇▇▇▇▆▆▆▅▅▅▅▅▅▅▅▄▄▄▄▄▄▄▃▃▃▃▂▂▂▂▁▁
[34m[1mwandb[0m:                train/loss ██▆▄▆▆▅▆▂▄▄▂▅▂▃▂▃▃▅▂▅▂▁▃▂▂▂▃▁▃▅▂▅▃▃▃▃▂

In [20]:
messages = [{"role": "system", "content": instruction},
    {"role": "user", "content": "I want to start invest in stock. how should I proceed?"}]

prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    
inputs = tokenizer(prompt, return_tensors='pt', padding=True, truncation=True).to("cuda")

outputs = model.generate(**inputs, max_new_tokens=150, num_return_sequences=1)

text = tokenizer.decode(outputs[0], skip_special_tokens=True)

prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

inputs = tokenizer(prompt, return_tensors='pt').to("cuda")

outputs = model.generate(**inputs, max_new_tokens=150)

generated_tokens = outputs[0][inputs['input_ids'].shape[-1]:]

response = tokenizer.decode(generated_tokens, skip_special_tokens=True)

print(response.strip())

Sure, I can help you with that! Investing in stocks can be a great way to grow your wealth. Here's a step-by-step guide to get you started:

1. Research different stocks: Take some time to explore various companies and their financial performance. Look for stocks that align with your investment goals and risk tolerance.

2. Choose a brokerage account: Find a reputable online brokerage that offers trading services. Compare features like fees, commissions, trading platforms, and customer support to find the best fit for you.

3. Open an account: Once you've selected a brokerage, create an account by providing necessary information such as your name, contact details, and financial information. Make sure to read and understand the terms and conditions before proceeding.

4.


In [21]:
trainer.model.save_pretrained(new_model)
trainer.model.push_to_hub(new_model, use_temp_dir=False)



README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/1.67G [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/young43/llama-3.2-3b-it-wealth-finetune/commit/7165f32463137d7523cd0f6c2c8c58cb52acca4b', commit_message='Upload model', commit_description='', oid='7165f32463137d7523cd0f6c2c8c58cb52acca4b', pr_url=None, repo_url=RepoUrl('https://huggingface.co/young43/llama-3.2-3b-it-wealth-finetune', endpoint='https://huggingface.co', repo_type='model', repo_id='young43/llama-3.2-3b-it-wealth-finetune'), pr_revision=None, pr_num=None)