In [None]:
!pip install -U transformers
!pip install -U datasets
!pip install -U accelerate
!pip install -U peft
!pip install -U trl
!pip install -U bitsandbytes
!pip install huggingface_hub[hf_xet]

In [None]:
from huggingface_hub import login
from google.colab import userdata

hf_token = userdata.get("HF_TOKEN")

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

# Load tokenizer & model
# model_id = "meta-llama/Llama-2-7b-hf"
peft_model = "FinGPT/fingpt-mt_llama2-7b_lora"

model_dir = "facebook/opt-350m"

tokenizer = AutoTokenizer.from_pretrained(model_dir, use_fast=True)

model = AutoModelForCausalLM.from_pretrained(
    model_dir,
    device_map="auto",
    torch_dtype=torch.bfloat16,
    trust_remote_code=True
)

model.config.use_cache = False
model.config.pretraining_tp = 1

In [None]:
!nvidia-smi

In [None]:
train_prompt_style="""
<|im_start|>system<|im_sep|>
Below is an instruction that describes a task, paired with an input that provides further context.
Write a response that appropriately completes the request.
Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.
<|im_end|>
<|im_start|>user<|im_sep|>
{}<|im_end|>
<|im_start|>assistant<|im_sep|>
<think>
{}
</think>
{}
"""

In [None]:
EOS_TOKEN = tokenizer.eos_token  # Must add EOS_TOKEN

def formatting_prompts_func(examples):
    inputs = examples["Open-ended Verifiable Question"]
    complex_cots = examples["Complex_CoT"]
    outputs = examples["Response"]
    texts = []
    for input, cot, output in zip(inputs,complex_cots,outputs):
        text = train_prompt_style.format(input,cot,output) + EOS_TOKEN
        texts.append(text)
    return {
        "text": texts,
    }

In [None]:
from datasets import load_dataset

dataset = load_dataset(
    "TheFinAI/Fino1_Reasoning_Path_FinQA", split="train[0:1000]", trust_remote_code=True
)
dataset = dataset.map(
    formatting_prompts_func,
    batched=True,
)
dataset["text"][20]

In [None]:
from transformers import DataCollatorForLanguageModeling

data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False
)

In [None]:
inference_prompt_style = """
<|im_start|>system<|im_sep|>
Below is an instruction that describes a task, paired with an input that provides further context.
Write a response that appropriately completes the request.
Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.
<|im_end|>
<|im_start|>user<|im_sep|>
{}<|im_end|>
<|im_start|>assistant<|im_sep|>
<think>
{}
"""

In [None]:
import torch
print(torch.cuda.is_available())  # Should return True
print(torch.cuda.get_device_name(0))  # Should show the GPU name


In [None]:
question = dataset[20]['Open-ended Verifiable Question']
inputs = tokenizer(
    [inference_prompt_style.format(question, "") + tokenizer.eos_token],
    return_tensors="pt"
).to("cuda")

outputs = model.generate(
    input_ids=inputs.input_ids,
    attention_mask=inputs.attention_mask,
    max_new_tokens=250,
    eos_token_id=tokenizer.eos_token_id,
    use_cache=True,
)
response = tokenizer.batch_decode(outputs)
print(response[0].split("<|im_start|>assistant<|im_sep|>")[1])

In [None]:
from peft import LoraConfig, get_peft_model

# LoRA config
peft_config = LoraConfig(
    lora_alpha=16,                           # Scaling factor for LoRA
    lora_dropout=0.05,                       # Add slight dropout for regularization
    r=64,                                    # Rank of the LoRA update matrices
    bias="none",                             # No bias reparameterization
    task_type="CAUSAL_LM",                   # Task type: Causal Language Modeling
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
    ],  # Target modules for LoRA
)

model = get_peft_model(model, peft_config)

In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments


# Training Arguments
training_arguments = TrainingArguments(
    output_dir="output",
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=2,
    optim="paged_adamw_32bit",
    num_train_epochs=1,
    logging_steps=0.2,
    warmup_steps=10,
    logging_strategy="steps",
    learning_rate=2e-4,
    fp16=False,
    bf16=False,
    group_by_length=True,
    report_to="none"
)

# Initialize the Trainer
trainer = SFTTrainer(
    model=model,
    args=training_arguments,
    train_dataset=dataset,
    peft_config=peft_config,
    data_collator=data_collator,
)

In [None]:
import gc, torch
from transformers import AutoModelForCausalLM, AutoTokenizer

model = AutoModelForCausalLM.from_pretrained("facebook/opt-350m")
# facebook/opt-350m

gc.collect()
torch.cuda.empty_cache()
model.config.use_cache = False

# Set the padding token
tokenizer.pad_token = tokenizer.eos_token

trainer.train()

In [None]:
# prompt: can you save it to this dir ./fine_tuned_mode

# Save the model
output_dir = "./fine_tuned_model"
trainer.save_model(output_dir)

testing fine tuned model

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel

# Load the base model and tokenizer
model_dir = "facebook/opt-350m"
tokenizer = AutoTokenizer.from_pretrained(model_dir)
base_model = AutoModelForCausalLM.from_pretrained(
    model_dir,
    device_map="auto",
    torch_dtype=torch.bfloat16,
    trust_remote_code=True
)

# Load the fine-tuned LoRA adapter
peft_model_dir = "./fine_tuned_model" # Or wherever you saved your model
model = PeftModel.from_pretrained(base_model, peft_model_dir)

# Merge the LoRA weights with the base model for easier inference
model = model.merge_and_unload()

# Set the padding token if it was set during training
tokenizer.pad_token = tokenizer.eos_token

# Move model to GPU if available
if torch.cuda.is_available():
    model = model.cuda()

In [None]:
# Define your test prompts
test_prompts = [
    "Please answer the given financial question based on the context. Context: In the fiscal year ending December 31, 2023, Company X reported a net income of $500 million. The total revenue for the same period was $2.5 billion. Question: What was the net profit margin for Company X in 2023?",
    # Add more prompts as needed
]

# Prepare prompts with the inference style and tokenize
inference_prompt_style = """
<|im_start|>system<|im_sep|>
Below is an instruction that describes a task, paired with an input that provides further context.
Write a response that appropriately completes the request.
Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.
<|im_end|>
<|im_start|>user<|im_sep|>
{}<|im_end|>
<|im_start|>assistant<|im_sep|>
<think>
{}
"""

inputs = [inference_prompt_style.format(prompt, "") + tokenizer.eos_token for prompt in test_prompts]
inputs = tokenizer(inputs, return_tensors="pt", padding=True, truncation=True).to("cuda")


model.eval()
with torch.no_grad(): # Disable gradient calculation for inference
    outputs = model.generate(
        input_ids=inputs.input_ids,
        attention_mask=inputs.attention_mask,
        max_new_tokens=250,  # Adjust as needed
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.pad_token_id, # Use the correct padding token
        use_cache=True,
    )

responses = tokenizer.batch_decode(outputs, skip_special_tokens=True)

for i, response in enumerate(responses):
    print(f"Prompt {i+1}:")
    # Split the response to show only the assistant's part
    assistant_response = response.split("<|im_start|>assistant<|im_sep|>")
    if len(assistant_response) > 1:
        print(assistant_response[1])
    else:
        print(response) # Print the whole response if the split didn't work as expected
    print("-" * 50)

In [None]:
login(token=hf_token)

In [None]:
# Replace "your-username/your-model-name" with your desired repository ID
repo_id = "your-username/my-fine-tuned-financial-model"

model.push_to_hub(repo_id)
tokenizer.push_to_hub(repo_id)

print(f"Model and tokenizer pushed to https://huggingface.co/{repo_id}")