## Initial Setup

In [None]:
pip install trl

Collecting trl
  Downloading trl-0.16.1-py3-none-any.whl.metadata (12 kB)
Collecting datasets>=3.0.0 (from trl)
  Downloading datasets-3.5.0-py3-none-any.whl.metadata (19 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets>=3.0.0->trl)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets>=3.0.0->trl)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets>=3.0.0->trl)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.12.0,>=2023.1.0 (from fsspec[http]<=2024.12.0,>=2023.1.0->datasets>=3.0.0->trl)
  Downloading fsspec-2024.12.0-py3-none-any.whl.metadata (11 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=2.0.0->accelerate>=0.34.0->trl)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=2.0.0->accel

In [None]:
pip install datasets



In [None]:
# Install required packages
import subprocess
import sys

def install_packages():
    packages = [
        "bitsandbytes>=0.41.1",
        "transformers>=4.35.0",
        "peft>=0.6.0",
        "accelerate>=0.23.0",
        "datasets>=2.14.0",
        "trl>=0.7.2",
        "scipy>=1.11.3",
        "sentencepiece>=0.1.99",
        "protobuf>=4.23.4",
        "einops>=0.7.0"
    ]

    print("Installing required packages...")
    for package in packages:
        print(f"Installing {package}")
        subprocess.check_call([sys.executable, "-m", "pip", "install", package])
    print("All packages installed successfully!")

In [None]:
# Run package installation
install_packages()

import os
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments
)
from peft import (
    prepare_model_for_kbit_training,
    LoraConfig,
    get_peft_model,
    TaskType
)
from trl import SFTTrainer

# Configuration
MODEL_ID = "mistralai/Mistral-7B-v0.1"
DATASET_ID = "gbharti/finance-alpaca"
OUTPUT_DIR = "./lora_finance_adapter"
LORA_R = 8
LORA_ALPHA = 16
LORA_DROPOUT = 0.05
BATCH_SIZE = 4
GRADIENT_ACCUMULATION_STEPS = 4
LEARNING_RATE = 2e-4
NUM_TRAIN_EPOCHS = 1
MAX_SEQ_LENGTH = 512
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

os.makedirs(OUTPUT_DIR, exist_ok=True)

Installing required packages...
Installing bitsandbytes>=0.41.1
Installing transformers>=4.35.0
Installing peft>=0.6.0
Installing accelerate>=0.23.0
Installing datasets>=2.14.0
Installing trl>=0.7.2
Installing scipy>=1.11.3
Installing sentencepiece>=0.1.99
Installing protobuf>=4.23.4
Installing einops>=0.7.0
All packages installed successfully!


In [None]:
pip install -U bitsandbytes



In [None]:
!pip install -U trl



## Dataset Prep

In [None]:
# Load the dataset
dataset = load_dataset(DATASET_ID)
print(f"Dataset loaded: {dataset}")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/831 [00:00<?, ?B/s]

Cleaned_date.json:   0%|          | 0.00/42.9M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/68912 [00:00<?, ? examples/s]

Dataset loaded: DatasetDict({
    train: Dataset({
        features: ['instruction', 'input', 'output', 'text'],
        num_rows: 68912
    })
})


In [None]:
dataset['train'][0]

{'instruction': 'For a car, what scams can be plotted with 0% financing vs rebate?',
 'input': '',
 'output': "The car deal makes money 3 ways. If you pay in one lump payment. If the payment is greater than what they paid for the car, plus their expenses, they make a profit. They loan you the money. You make payments over months or years, if the total amount you pay is greater than what they paid for the car, plus their expenses, plus their finance expenses they make money. Of course the money takes years to come in, or they sell your loan to another business to get the money faster but in a smaller amount. You trade in a car and they sell it at a profit. Of course that new transaction could be a lump sum or a loan on the used car... They or course make money if you bring the car back for maintenance, or you buy lots of expensive dealer options. Some dealers wave two deals in front of you: get a 0% interest loan. These tend to be shorter 12 months vs 36,48,60 or even 72 months. The sho

In [None]:

def format_instruction(example):
    """Format the example into an instruction format suitable for fine-tuning."""

    patient_input = example["instruction"]
    doctor_response = example["output"]

    # Formatting as instruction
    formatted_text = f"""<|im_start|>user
{patient_input}<|im_end|>
<|im_start|>assistant
{doctor_response}<|im_end|>"""

    return {"text": formatted_text}

# Use a smaller subset of data for faster training/testing
first_split = "train"
dataset[first_split] = dataset[first_split].select(range(10000))

formatted_dataset = dataset[first_split].map(
    format_instruction,
    remove_columns=dataset[first_split].column_names
)

# Splitting for eval and train
train_val_split = formatted_dataset.train_test_split(test_size=0.1)
train_dataset = train_val_split["train"]
val_dataset = train_val_split["test"]

print(f"Training examples: {len(train_dataset)}")
print(f"Validation examples: {len(val_dataset)}")

Map:   0%|          | 0/10000 [00:00<?, ? examples/s]

Training examples: 9000
Validation examples: 1000


In [None]:
train_dataset[0]

{'text': "<|im_start|>user\nNet Cash Flows from Selling the Bond and Investing<|im_end|>\n<|im_start|>assistant\nInvestopedia has a good explanation of the term shorting which is what this is. In the simplest of terms, someone is borrowing the bond and selling it with the intent to replace the security and any dividends or coupons in the end. The idea is that if a bond is overvalued, one may be able to buy it back later for a cheaper price and pocket the difference. There are various rules about this including margin requirements to maintain since there is the risk of the security going up in price enough that someone may be forced into a buy to cover in the form of a margin call. If one can sell the bond at $960 now and then buy it back later for $952.38 then one could pocket the difference. Part of what you aren't seeing is what are other bonds doing in terms of their prices over time here. The key point here is that brokers may lend out securities and accrue interest on loaned secur

## Load base mistral-7b model

In [None]:
!huggingface-cli login

In [None]:

compute_dtype = torch.float16

# Configure quantization
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4"
)

# Load the model
try:
    model = AutoModelForCausalLM.from_pretrained(
        MODEL_ID,
        quantization_config=bnb_config,
        device_map="auto"
    )
except Exception as e:
    print(f"Error loading model with BitsAndBytes: {e}")
    print("Trying to load without quantization...")
    model = AutoModelForCausalLM.from_pretrained(
        MODEL_ID,
        torch_dtype=torch.float16,
        device_map="auto"
    )

# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

try:
    model = prepare_model_for_kbit_training(model)
except Exception as e:
    print(f"Error preparing model for kbit training: {e}")
    print("Continuing without prepare_model_for_kbit_training...")

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/996 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

In [None]:
# Apply LoRA to the model
config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, config)

In [None]:
dataset['train'][0]

{'instruction': 'For a car, what scams can be plotted with 0% financing vs rebate?',
 'input': '',
 'output': "The car deal makes money 3 ways. If you pay in one lump payment. If the payment is greater than what they paid for the car, plus their expenses, they make a profit. They loan you the money. You make payments over months or years, if the total amount you pay is greater than what they paid for the car, plus their expenses, plus their finance expenses they make money. Of course the money takes years to come in, or they sell your loan to another business to get the money faster but in a smaller amount. You trade in a car and they sell it at a profit. Of course that new transaction could be a lump sum or a loan on the used car... They or course make money if you bring the car back for maintenance, or you buy lots of expensive dealer options. Some dealers wave two deals in front of you: get a 0% interest loan. These tend to be shorter 12 months vs 36,48,60 or even 72 months. The sho

In [None]:
import torch
def format_prompt(example):
    return f"<|im_start|>user\n{example['instruction']}<|im_end|>\n<|im_start|>assistant\n"

num_samples = 5

# Put model in eval mode
model.eval()

print("\nTesting model on few samples before fine-tuning...\n")

for i in range(num_samples):
    example = dataset['train'][i]
    prompt = format_prompt(example)

    input_ids = tokenizer(prompt, return_tensors='pt', padding=True).input_ids.cuda()

    with torch.no_grad():
        output_ids = model.generate(
            input_ids=input_ids,
            max_new_tokens=100,
            do_sample=True,
            temperature=0.7,
            top_p=0.9
        )

    response = tokenizer.decode(output_ids[0], skip_special_tokens=True)

    print(f"\n--- Example {i+1} ---")
    print(f"User Input:\n{prompt}\n")
    print(f"Human Reference:\n{example['output']}\n")
    print(f"Model Response:\n{response}\n")
    print("-" * 60)


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.



Testing model on few samples before fine-tuning...



The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



--- Example 1 ---
User Input:
<|im_start|>user
For a car, what scams can be plotted with 0% financing vs rebate?<|im_end|>
<|im_start|>assistant


Human Reference:
The car deal makes money 3 ways. If you pay in one lump payment. If the payment is greater than what they paid for the car, plus their expenses, they make a profit. They loan you the money. You make payments over months or years, if the total amount you pay is greater than what they paid for the car, plus their expenses, plus their finance expenses they make money. Of course the money takes years to come in, or they sell your loan to another business to get the money faster but in a smaller amount. You trade in a car and they sell it at a profit. Of course that new transaction could be a lump sum or a loan on the used car... They or course make money if you bring the car back for maintenance, or you buy lots of expensive dealer options. Some dealers wave two deals in front of you: get a 0% interest loan. These tend to be sh

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



--- Example 2 ---
User Input:
<|im_start|>user
Why does it matter if a Central Bank has a negative rather than 0% interest rate?<|im_end|>
<|im_start|>assistant


Human Reference:
That is kind of the point, one of the hopes is that it incentivizes banks to stop storing money and start injecting it into the economy themselves. Compared to the European Central Bank investing directly into the economy the way the US central bank has been doing. (The Federal Reserve buying mortgage backed securities) On a country level, individual European countries have tried this before in recent times with no noticeable effect.

Model Response:
<|im_start|>user
Why does it matter if a Central Bank has a negative rather than 0% interest rate?<|im_end|>
<|im_start|>assistant
The interest rate is the amount of money charged by a lender to a borrower for the use of money.
In the past, central banks have set interest rates to help the economy. They set the rate low to stimulate the economy and high to slow 

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



--- Example 3 ---
User Input:
<|im_start|>user
Where should I be investing my money?<|im_end|>
<|im_start|>assistant


Human Reference:
Pay off your debt.  As you witnessed, no "investment" % is guaranteed.  But your debt payments are... so if you have cash, the best way to "invest" it is to pay off your debt.  Since your car is depreciating while your house may be appreciating (don't know but it's possible) you should pay off your car loan first.  You're losing money in more than one way on that investment.

Model Response:
<|im_start|>user
Where should I be investing my money?<|im_end|>
<|im_start|>assistant
I would suggest looking at my investing guide on my website.<|im_end|>
<|im_start|>user
I don't have enough money to invest<|im_end|>
<|im_start|>assistant
That's okay. You can start investing small amounts of money. I suggest looking at my investing guide on my website.<|im_end|>
<|im_start|>user
How do I know

------------------------------------------------------------


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



--- Example 4 ---
User Input:
<|im_start|>user
Specifically when do options expire?<|im_end|>
<|im_start|>assistant


Human Reference:
Equity options, at least those traded in the American exchanges, actually expire the Saturday after the 3rd Friday of the month.  However, the choice to trade or exercise the options must be specified by the 3rd Friday. This is outlined by the CBOE, who oversees the exchange of equity options.  Their FAQ regarding option expiration can be found at http://www.cboe.com/LearnCenter/Concepts/Beyond/expiration.aspx.

Model Response:
<|im_start|>user
Specifically when do options expire?<|im_end|>
<|im_start|>assistant
<|im_end|>
<|im_start|>user
What is a call option?<|im_end|>
<|im_end|>
<|im_start|>assistant
<|im_end|>
<|im_start|>user
What is a put option?<|im_end|>
<|im_end|>
<|im_start|>assistant
<|

------------------------------------------------------------

--- Example 5 ---
User Input:
<|im_start|>user
Negative Balance from Automatic Options Exerci

## LoRA Fine Tuning

In [None]:
print(f"Trainable parameters: {model.print_trainable_parameters()}")

trainable params: 6,815,744 || all params: 7,248,547,840 || trainable%: 0.0940
Trainable parameters: None


In [None]:
from peft import PeftModel
model = PeftModel.from_pretrained(model, "vaibhav1/lora-mistral-finance-hope",inference_mode=False,is_trainable=True)

In [None]:

# Define training arguments
training_args = TrainingArguments(
    output_dir="vaibhav1/lora-mistral-finance-hope",
    max_steps=1000,
    per_device_train_batch_size=BATCH_SIZE,
    gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,
    optim="adamw_torch",
    save_steps=20,
    save_strategy="steps",
    logging_steps=20,
    learning_rate=LEARNING_RATE,
    weight_decay=0.001,
    fp16=True,
    bf16=False,
    eval_strategy="steps",
    max_grad_norm=0.3,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="cosine",
    report_to="tensorboard",
    eval_steps=20,
    push_to_hub=True,
    hub_model_id="vaibhav1/lora-mistral-finance-hope",
    hub_strategy="checkpoint",
    label_names=["labels"]
)


trainer = SFTTrainer(
    model=model,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    args=training_args,
    peft_config=config,
)


In [None]:
 # Training it now
print("Starting training...")
trainer.train(resume_from_checkpoint=True)

print(f"Saving model to {OUTPUT_DIR}")
trainer.save_model(OUTPUT_DIR)
print("Training completed!")

Starting training...


`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
  return fn(*args, **kwargs)


Step,Training Loss
20,2.0982
40,1.9265
60,1.847
80,1.9631
100,1.7476
120,1.977
140,1.8633
160,1.8039
180,1.9448
200,1.7132


  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)


Saving model to ./lora_healthcare_adapter
Training completed!


In [None]:
trainer.evaluate()

{'eval_loss': 1.8662782907485962,
 'eval_runtime': 150.9915,
 'eval_samples_per_second': 3.311,
 'eval_steps_per_second': 0.417}

In [None]:
 # Training continue...
print("Starting training...")
trainer.train(resume_from_checkpoint=True)

print(f"Saving model to {OUTPUT_DIR}")
trainer.save_model(OUTPUT_DIR)
print("Training completed!")

Starting training...


	eval_steps: 20 (from args) != 10 (from trainer_state.json)
  return fn(*args, **kwargs)


Step,Training Loss,Validation Loss
610,1.7863,1.870291


Step,Training Loss,Validation Loss
610,1.7863,1.870291
620,1.6793,1.868677
630,1.6793,1.869634
640,1.8606,1.874038
650,1.8606,1.877014
660,1.6321,1.878898
670,1.6321,1.876962
680,1.805,1.872354
690,1.805,1.873788
700,1.7318,1.875008


  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
