In [None]:
! pip install datasets transformers peft flash-attn

In [None]:
from datasets import load_dataset

finqa_dataset = load_dataset("ibm/finqa")

print(finqa_dataset)
sample = finqa_dataset['train'][0]
print(sample)


In [None]:
import os
from huggingface_hub import login

os.environ["HF_TOKEN"] = "YOUR_TOKEN"
login(token=os.getenv("HF_TOKEN"))

In [4]:
from transformers import AutoTokenizer

model_name = "meta-llama/Llama-3.2-1B" 
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [5]:
prompt = """You are an expert in the financial domain tasked with solving a given financial problem. Follow these steps:

1. Read the question carefully.
2. Provide the formula for the given question.
3. Use the formula to caculate the final answer.


### Pretext:
{}

### Posttext:
{}

### Table:
{}

### Question:
{}

### GoldInds:
{}

### Output:
{}"""

EOS_TOKEN = tokenizer.eos_token 
def formatting_prompts_func(examples):
    pre_text = examples["pre_text"]
    post_text       = examples["post_text"]
    table = examples["table"]
    question = examples["question"]
    answer = examples["answer"]
    program_re      = examples["program_re"]
    gold_inds = examples["gold_inds"]
    output      = examples["answer"]
    texts = []
    for a,b,c,d,e,f,g,h in zip(pre_text,post_text, table, question, answer, program_re, gold_inds, output):
        text = prompt.format(a,b,c,d,g,h) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts, }

train_dataset = finqa_dataset['train'].map(formatting_prompts_func, batched = True,)
valid_dataset = finqa_dataset["validation"].map(formatting_prompts_func, batched = True,)

In [None]:
tokenizer.pad_token = tokenizer.eos_token
def tokenize_function(examples):
    tokenized = tokenizer(
        examples["text"],
        padding="max_length",
        truncation=True,
        max_length=1024 
    )
    tokenized["labels"] = tokenized["input_ids"].copy()
    return tokenized

tokenized_train_dataset = train_dataset.map(tokenize_function, batched=True, remove_columns=["text"])
tokenized_valid_dataset = valid_dataset.map(tokenize_function, batched=True, remove_columns=["text"])



In [7]:

tokenized_train_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
tokenized_valid_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])

In [None]:
! pip install flash-attn

In [None]:

!pip install flash-attn --upgrade


In [10]:
import torch
from torch import nn
from flash_attn import flash_attn_func

class FlashAttentionModule(nn.Module):
    def __init__(self, hidden_size, num_heads):
        super().__init__()
        self.num_heads = num_heads
        self.head_dim = hidden_size // num_heads

        self.query = nn.Linear(hidden_size, hidden_size)
        self.key = nn.Linear(hidden_size, hidden_size)
        self.value = nn.Linear(hidden_size, hidden_size)
        self.out_proj = nn.Linear(hidden_size, hidden_size)

    def forward(self, hidden_states, attention_mask=None):
        batch_size, seq_len, _ = hidden_states.size()

        q = self.query(hidden_states).view(batch_size, seq_len, self.num_heads, self.head_dim)
        k = self.key(hidden_states).view(batch_size, seq_len, self.num_heads, self.head_dim)
        v = self.value(hidden_states).view(batch_size, seq_len, self.num_heads, self.head_dim)

        q = q.transpose(1, 2)
        k = k.transpose(1, 2)
        v = v.transpose(1, 2)

        q = q.reshape(-1, seq_len, self.head_dim)
        k = k.reshape(-1, seq_len, self.head_dim)
        v = v.reshape(-1, seq_len, self.head_dim)

        context = flash_attn_func(q, k, v, causal=True)

        context = context.view(batch_size, self.num_heads, seq_len, self.head_dim)
        context = context.transpose(1, 2).contiguous().view(batch_size, seq_len, -1)

        return self.out_proj(context)


In [None]:
from transformers import AutoModelForCausalLM

model_name = "meta-llama/Llama-3.2-1B" 
model = AutoModelForCausalLM.from_pretrained(model_name)

for name, module in model.named_modules():
    if isinstance(module, nn.MultiheadAttention):
        parent_name = name.rsplit(".", 1)[0]
        parent_module = eval(f"model.{parent_name}")
        hidden_size = module.embed_dim
        num_heads = module.num_heads
        flash_attention = FlashAttentionModule(hidden_size, num_heads)
        setattr(parent_module, "self_attn", flash_attention)

print("Replaced attention mechanism with FlashAttention.")


In [None]:
from peft import get_peft_model, LoraConfig, TaskType

lora_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    inference_mode=False,
    r=16, 
    lora_alpha=32,
    lora_dropout=0.1
)

model = get_peft_model(model, lora_config)
print("Integrated LoRA adapters into the model.")


In [None]:
from transformers import TrainingArguments

training_args = TrainingArguments(
    output_dir="./finetuned_llama_finqa",  
    evaluation_strategy="epoch",         
    learning_rate=2e-5,                   
    per_device_train_batch_size=4,        
    per_device_eval_batch_size=4,         
    num_train_epochs=3,                  
    weight_decay=0.01,                    
    save_strategy="epoch",           
    save_total_limit=2,                  
    logging_dir="./logs",          
    logging_steps=100,                    
    fp16=True,               
)

In [None]:
from transformers import Trainer

trainer = Trainer(
    model=model,                      
    args=training_args,                
    train_dataset=tokenized_train_dataset,  
    eval_dataset=tokenized_valid_dataset,  
    tokenizer=tokenizer,               
)


In [None]:
trainer.train()

In [22]:

test_dataset = finqa_dataset['test']

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

In [24]:
inputs = tokenizer(input_text, return_tensors='pt', truncation=True, max_length=1024)
inputs = {key: value.to(device) for key, value in inputs.items()}

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer

model = AutoModelForCausalLM.from_pretrained('./finetuned_llama_finqa/checkpoint-1563')
tokenizer = AutoTokenizer.from_pretrained('./finetuned_llama_finqa/checkpoint-1563')

model.eval()

predictions = []

for example in test_dataset:
    input_text = example['question'] 
    inputs = tokenizer(input_text, return_tensors='pt', truncation=True, max_length=1024)

    with torch.no_grad():
        outputs = model.generate(
            input_ids=inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            max_length=256,
            eos_token_id=tokenizer.eos_token_id
        )

    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    predictions.append(generated_text)
    print(generated_text)
