In [7]:
# Install required packages
!pip install transformers==4.41.0
!pip install peft==0.11.0
!pip install datasets==2.19.0
!pip install trl==0.8.6
!pip install bitsandbytes==0.43.1
!pip install accelerate==0.30.1
!pip install torch==2.3.0
# !pip install neptune-client  # For experiment tracking

Collecting transformers==4.41.0
  Using cached transformers-4.41.0-py3-none-any.whl.metadata (43 kB)
Collecting tokenizers<0.20,>=0.19 (from transformers==4.41.0)
  Using cached tokenizers-0.19.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Using cached transformers-4.41.0-py3-none-any.whl (9.1 MB)
Using cached tokenizers-0.19.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.6 MB)
Installing collected packages: tokenizers, transformers
  Attempting uninstall: tokenizers
    Found existing installation: tokenizers 0.21.1
    Uninstalling tokenizers-0.21.1:
      Successfully uninstalled tokenizers-0.21.1
  Attempting uninstall: transformers
    Found existing installation: transformers 4.52.4
    Uninstalling transformers-4.52.4:
      Successfully uninstalled transformers-4.52.4
[0mSuccessfully installed tokenizers-0.19.1 transformers


Collecting transformers (from peft==0.11.0)
  Using cached transformers-4.52.4-py3-none-any.whl.metadata (38 kB)
Collecting tokenizers<0.22,>=0.21 (from transformers->peft==0.11.0)
  Using cached tokenizers-0.21.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.8 kB)
Using cached transformers-4.52.4-py3-none-any.whl (10.5 MB)
Using cached tokenizers-0.21.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.0 MB)
[0mInstalling collected packages: tokenizers, transformers
  Attempting uninstall: tokenizers
[0m    Found existing installation: tokenizers 0.19.1
    Uninstalling tokenizers-0.19.1:
      Successfully uninstalled tokenizers-0.19.1
Successfully installed tokenizers-0.21.1 transformers-4.52.4




In [1]:
from google.colab import userdata
hf_token = userdata.get('HF_TOKEN')

from huggingface_hub import login
login(token=hf_token)

In [12]:
import os
import torch
# import neptune
from datasets import load_dataset, Dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    TrainingArguments,
    BitsAndBytesConfig,
)
from transformers.pipelines import pipeline
from peft import LoraConfig, get_peft_model, TaskType
from trl import SFTTrainer
import pandas as pd
import json

# Set GPU configuration
os.environ["CUDA_VISIBLE_DEVICES"] = "0"  # Adjust based on available GPUs
print(f"PyTorch detected {torch.cuda.device_count()} GPU(s)")

AttributeError: partially initialized module 'torchvision' has no attribute 'extension' (most likely due to a circular import)

## Dataset Preparation

In [8]:
def load_financial_datasets():
    """Load and combine financial datasets"""

    # Load Finance Alpaca dataset
    try:
        finance_alpaca = load_dataset("gbharti/finance-alpaca", split="train")
        print(f"Loaded Finance Alpaca: {len(finance_alpaca)} samples")
    except:
        print("Finance Alpaca not available, creating sample dataset")
        # Create sample financial data if dataset not accessible
        sample_data = [
            {
                "instruction": "What is the P/E ratio and how is it calculated?",
                "input": "",
                "output": "The P/E ratio (Price-to-Earnings ratio) is calculated by dividing the market price per share by the earnings per share (EPS). It indicates how much investors are willing to pay for each dollar of earnings."
            },
            {
                "instruction": "Explain the importance of SEC 10-K filings for investors.",
                "input": "",
                "output": "SEC 10-K filings are annual reports that provide a comprehensive overview of a company's business, financial condition, and results of operations. They are crucial for investors as they contain audited financial statements, risk factors, and management discussion."
            }
        ]
        finance_alpaca = Dataset.from_list(sample_data)

    return finance_alpaca

def format_financial_dataset(dataset):
    """Format dataset for instruction tuning"""

    def format_prompt(example):
        if example.get("input", "").strip():
            prompt = f"### Instruction:\n{example['instruction']}\n\n### Input:\n{example['input']}\n\n### Response:\n{example['output']}"
        else:
            prompt = f"### Instruction:\n{example['instruction']}\n\n### Response:\n{example['output']}"
        return {"text": prompt}

    formatted_dataset = dataset.map(format_prompt)
    return formatted_dataset

# Load and format dataset
financial_dataset = load_financial_datasets()
formatted_dataset = format_financial_dataset(financial_dataset)
print(f"Formatted dataset size: {len(formatted_dataset)}")


Downloading readme:   0%|          | 0.00/831 [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/42.9M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/68912 [00:00<?, ? examples/s]

Loaded Finance Alpaca: 68912 samples


Map:   0%|          | 0/68912 [00:00<?, ? examples/s]

Formatted dataset size: 68912


## Model and Tokenizer Configuration

In [10]:
# Model configuration based on search results
BASE_MODEL_NAME = "meta-llama/Meta-Llama-3-8B-Instruct"  # or "meta-llama/Meta-Llama-3-8B-Instruct"
NEW_MODEL_NAME = "Llama-3-8B-Financial-LoRA"

def setup_model_and_tokenizer():
    """Setup quantized model and tokenizer with financial optimization"""

    # Quantization configuration for efficient training
    quantization_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.bfloat16,
        bnb_4bit_use_double_quant=True,
    )

    # Load tokenizer
    tokenizer = AutoTokenizer.from_pretrained(
        BASE_MODEL_NAME,
        trust_remote_code=True,
        use_fast=True
    )
    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.padding_side = "right"

    # Load model with quantization
    model = AutoModelForCausalLM.from_pretrained(
        BASE_MODEL_NAME,
        quantization_config=quantization_config,
        device_map="auto",
        trust_remote_code=True,
        torch_dtype=torch.bfloat16,
    )

    # Disable caching for training
    model.config.use_cache = False
    model.config.pretraining_tp = 1

    return model, tokenizer

model, tokenizer = setup_model_and_tokenizer()
print("Model and tokenizer loaded successfully")


ValueError: Could not find LlamaForCausalLM neither in <module 'transformers.models.llama' from '/usr/local/lib/python3.11/dist-packages/transformers/models/llama/__init__.py'> nor in <module 'transformers' from '/usr/local/lib/python3.11/dist-packages/transformers/__init__.py'>!

## LoRA Configuration

In [11]:
def create_lora_config():
    """Create LoRA configuration optimized for financial tasks"""

    # LoRA parameters based on search results and financial optimization
    lora_config = LoraConfig(
        task_type=TaskType.CAUSAL_LM,
        r=16,  # Low-rank dimension - balance between performance and efficiency
        lora_alpha=16,  # Scaling factor (α/r ratio = 1.0)
        target_modules=[
            "q_proj", "k_proj", "v_proj", "o_proj",  # Attention layers
            "gate_proj", "up_proj", "down_proj",     # MLP layers
        ],
        lora_dropout=0.1,  # Dropout for regularization
        bias="none",
        use_rslora=False,
        # task_type=TaskType.CAUSAL_LM,
    )

    return lora_config

# Apply LoRA to model
lora_config = create_lora_config()
model = get_peft_model(model, lora_config)

# Print trainable parameters
def print_trainable_parameters(model):
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()

    print(f"Trainable params: {trainable_params:,}")
    print(f"All params: {all_param:,}")
    print(f"Trainable%: {100 * trainable_params / all_param:.2f}%")

print_trainable_parameters(model)


NameError: name 'LoraConfig' is not defined

## Training Configuration

In [None]:
def create_training_arguments():
    """Create training arguments optimized for financial fine-tuning"""

    training_args = TrainingArguments(
        output_dir=f"./results/{NEW_MODEL_NAME}",
        per_device_train_batch_size=4,
        per_device_eval_batch_size=4,
        gradient_accumulation_steps=2,
        num_train_epochs=3,
        learning_rate=2e-4,
        fp16=False,
        bf16=True,
        logging_steps=10,
        save_steps=500,
        evaluation_strategy="steps",
        eval_steps=500,
        warmup_steps=100,
        lr_scheduler_type="cosine",
        optim="paged_adamw_8bit",
        weight_decay=0.01,
        max_grad_norm=1.0,
        group_by_length=True,
        dataloader_pin_memory=False,
        remove_unused_columns=False,
        report_to=None,  # Can be set to "neptune" for experiment tracking
    )

    return training_args

training_args = create_training_arguments()

## Neptune AI Experiment Tracking

## Training Setup and Execution

In [None]:
def create_trainer(model, tokenizer, dataset, training_args):
    """Create SFT trainer for financial fine-tuning"""

    trainer = SFTTrainer(
        model=model,
        train_dataset=dataset,
        eval_dataset=dataset.select(range(min(100, len(dataset)))),  # Small eval set
        peft_config=lora_config,
        dataset_text_field="text",
        tokenizer=tokenizer,
        args=training_args,
        max_seq_length=2048,
        packing=True,  # Pack multiple samples into single sequence
        dataset_kwargs={
            "add_special_tokens": False,
            "append_concat_token": False,
        }
    )

    return trainer

# Create trainer
trainer = create_trainer(model, tokenizer, formatted_dataset, training_args)

# Start training
print("Starting financial fine-tuning...")
trainer.train()

# Save the fine-tuned model
trainer.save_model()
print(f"Model saved to ./results/{NEW_MODEL_NAME}")

## Model Testing and Inference

In [None]:
def test_financial_model(model, tokenizer):
    """Test the fine-tuned model on financial queries"""

    # Test prompts
    test_prompts = [
        "### Instruction:\nExplain what investors should look for in a company's 10-K filing.\n\n### Response:\n",
        "### Instruction:\nWhat are the key financial ratios for evaluating a stock?\n\n### Response:\n",
        "### Instruction:\nHow do interest rate changes affect stock market valuations?\n\n### Response:\n"
    ]

    # Create pipeline for inference
    pipe = pipeline(
        task="text-generation",
        model=model,
        tokenizer=tokenizer,
        max_length=512,
        temperature=0.1,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id
    )

    print("Testing fine-tuned financial model:")
    print("=" * 50)

    for i, prompt in enumerate(test_prompts, 1):
        print(f"\nTest {i}:")
        print("Input:", prompt.split("### Response:")[0].replace("### Instruction:\n", "").strip())

        result = pipe(prompt)
        response = result[0]['generated_text'].split("### Response:\n")[-1].strip()

        print("Output:", response[:200] + "..." if len(response) > 200 else response)
        print("-" * 30)

# Test the model
test_financial_model(model, tokenizer)


## Model Saving and Loading

In [None]:
def save_and_load_model():
    """Save and demonstrate loading the fine-tuned model"""

    # Save the model and tokenizer
    model.save_pretrained(f"./saved_models/{NEW_MODEL_NAME}")
    tokenizer.save_pretrained(f"./saved_models/{NEW_MODEL_NAME}")

    print(f"Model and tokenizer saved to ./saved_models/{NEW_MODEL_NAME}")

    # Demonstrate loading
    from peft import PeftModel

    # Load base model
    base_model_reload = AutoModelForCausalLM.from_pretrained(
        BASE_MODEL_NAME,
        quantization_config=BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_quant_type="nf4",
            bnb_4bit_compute_dtype=torch.bfloat16,
        ),
        device_map="auto",
        torch_dtype=torch.bfloat16,
    )

    # Load LoRA weights
    financial_model = PeftModel.from_pretrained(
        base_model_reload,
        f"./saved_models/{NEW_MODEL_NAME}"
    )

    print("Model successfully reloaded with LoRA weights")
    return financial_model

# Save the model
final_model = save_and_load_model()
