In [None]:
import torch
print("PyTorch Version:", torch.__version__)
print("CUDA Available:", torch.cuda.is_available())


PyTorch Version: 2.6.0+cu124
CUDA Available: True


# **Step 1: Install all required libraries**

In [None]:
!pip install -q transformers peft accelerate datasets trl

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/491.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m491.2/491.2 kB[0m [31m16.3 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/335.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m335.7/335.7 kB[0m [31m25.7 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/116.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m10.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m183.9/183.9 kB[0m [31m15.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m143.5/143.5 kB[0m [31m12.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

# **Step 2: Load and Explore the Dataset**

In [None]:
from datasets import load_dataset
import pandas as pd

print("📥 Loading dataset...")
dataset = load_dataset("virattt/financial-qa-10K")

print("\n📊 Dataset splits:")
print(dataset)

print("\n🔍 Displaying 3 sample records:")
for i in range(3):
    print(f"\nExample {i+1}:")
    print(dataset["train"][i])

# Convert to DataFrame
print("\n📄 Converting first 100 records to DataFrame...")
df = pd.DataFrame(dataset["train"][:100])

print("\n🧪 Columns present in dataset:")
print(df.columns.tolist())

print("\n🧠 Average question length (in words):")
df['question_length'] = df['question'].apply(lambda x: len(x.split()))
print(df['question_length'].mean())

print("\n🧠 Average answer length (in words):")
df['answer_length'] = df['answer'].apply(lambda x: len(x.split()))
print(df['answer_length'].mean())

print("\n✅ EDA complete. Ready to move on to formatting.")



📥 Loading dataset...


README.md:   0%|          | 0.00/419 [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/1.59M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/7000 [00:00<?, ? examples/s]


📊 Dataset splits:
DatasetDict({
    train: Dataset({
        features: ['question', 'answer', 'context', 'ticker', 'filing'],
        num_rows: 7000
    })
})

🔍 Displaying 3 sample records:

Example 1:
{'question': 'What area did NVIDIA initially focus on before expanding to other computationally intensive fields?', 'answer': 'NVIDIA initially focused on PC graphics.', 'context': 'Since our original focus on PC graphics, we have expanded to several other large and important computationally intensive fields.', 'ticker': 'NVDA', 'filing': '2023_10K'}

Example 2:
{'question': 'What are some of the recent applications of GPU-powered deep learning as mentioned by NVIDIA?', 'answer': 'Recent applications of GPU-powered deep learning include recommendation systems, large language models, and generative AI.', 'context': 'Some of the most recent applications of GPU-powered deep learning include recommendation systems, which are AI algorithms trained to understand the preferences, previous dec

# **Step 3: Format dataset into instruction format for Pythia fine-tuning.**
What is "Instruction Tuning" Format?
In this fine-tuning task, we're training a language model to behave like a helpful assistant. To do that, we follow a standard instruction-response format, which helps the model learn how to:



In [None]:
from datasets import Dataset

print("🔄 Formatting dataset into instruction tuning format...")

def format_example(example):
    return {
        "text": f"### Instruction:\n{example['question']}\n\n### Response:\n{example['answer']}"
    }

# Apply formatting
formatted_dataset = dataset["train"].map(format_example)

# Preview 3 examples
print("\n🧾 Sample formatted records:")
for i in range(3):
    print(f"\nFormatted Example {i+1}:\n{formatted_dataset[i]['text']}")


🔄 Formatting dataset into instruction tuning format...


Map:   0%|          | 0/7000 [00:00<?, ? examples/s]


🧾 Sample formatted records:

Formatted Example 1:
### Instruction:
What area did NVIDIA initially focus on before expanding to other computationally intensive fields?

### Response:
NVIDIA initially focused on PC graphics.

Formatted Example 2:
### Instruction:
What are some of the recent applications of GPU-powered deep learning as mentioned by NVIDIA?

### Response:
Recent applications of GPU-powered deep learning include recommendation systems, large language models, and generative AI.

Formatted Example 3:
### Instruction:
What significant invention did NVIDIA create in 1999?

### Response:
NVIDIA invented the GPU in 1999.


In [None]:
from huggingface_hub import notebook_login

print("🔐 Logging into Hugging Face Hub...")
notebook_login()  # Run this once to paste your token

print("☁️ Pushing formatted dataset to Hugging Face...")
formatted_dataset.push_to_hub("gandhiraketla277/financial-qa-10k-instruction")

print("✅ Dataset pushed successfully!")


🔐 Logging into Hugging Face Hub...


VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

☁️ Pushing formatted dataset to Hugging Face...


Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/7 [00:00<?, ?ba/s]

README.md:   0%|          | 0.00/452 [00:00<?, ?B/s]

No files have been modified since last commit. Skipping to prevent empty commit.


✅ Dataset pushed successfully!


# **Step 4: Tokenization**
What is Tokenization? (add this as a markdown cell or comment)
Before a language model can learn from text, the text must be converted into numbers. This process is called tokenization.

A token is a chunk of text — usually a word or subword — that the model can understand. For example:

"Financial report" → [1234, 5678]

In [None]:
from transformers import AutoTokenizer

# Use the tokenizer for Pythia-410m
print("🔢 Loading tokenizer for Pythia-410m...")
tokenizer = AutoTokenizer.from_pretrained("EleutherAI/pythia-410m", use_fast=True)

# Padding token setup (important for batch training)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# Tokenization function
def tokenize(example):
    return tokenizer(
        example["text"],
        padding="max_length",
        truncation=True,
        max_length=512,
        return_tensors="pt"
    )

print("🧼 Tokenizing dataset...")
tokenized_dataset = formatted_dataset.map(tokenize, batched=True, remove_columns=["text"])

print("\n✅ Tokenization complete.")
print("🧾 Sample tokenized output (first record):")
print(tokenized_dataset[0])


🔢 Loading tokenizer for Pythia-410m...
🧼 Tokenizing dataset...


Map:   0%|          | 0/7000 [00:00<?, ? examples/s]


✅ Tokenization complete.
🧾 Sample tokenized output (first record):
{'question': 'What area did NVIDIA initially focus on before expanding to other computationally intensive fields?', 'answer': 'NVIDIA initially focused on PC graphics.', 'context': 'Since our original focus on PC graphics, we have expanded to several other large and important computationally intensive fields.', 'ticker': 'NVDA', 'filing': '2023_10K', 'input_ids': [4118, 41959, 27, 187, 1276, 2170, 858, 24671, 1838, 5236, 8523, 2770, 327, 1078, 16122, 281, 643, 43245, 17193, 4910, 32, 187, 187, 4118, 19371, 27, 187, 23459, 1838, 5236, 8523, 7106, 327, 5578, 15896, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0

# **[Step 5: LoRA configuration + training arguments setup](https://)**

In [None]:
from peft import LoraConfig, TaskType
from transformers import TrainingArguments

# 🧠 Explanation:
# LoRA lets us fine-tune a small set of weights (adapters) instead of the entire model.
# This makes training faster and more memory efficient.

print("⚙️ Setting up LoRA config for Pythia-410M...")

peft_config = LoraConfig(
    r=8,                            # Low-rank dimension
    lora_alpha=16,                  # Scaling factor
    target_modules=[
        "query_key_value",          # QKV projection in attention block
        "dense",                    # Output of attention
        "dense_h_to_4h",            # MLP input
        "dense_4h_to_h"             # MLP output
    ],
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.CAUSAL_LM
)

print("✅ LoRA config is ready with target modules for Pythia.")

# 🧠 Explanation:
# TrainingArguments define how the training will run — batch size, epochs, learning rate, etc.

print("\n📝 Setting up training arguments...")

training_args = TrainingArguments(
    output_dir="./pythia-finetuned-financial",  # Where to save the model
    per_device_train_batch_size=2,              # Batch size per GPU (adjust if you OOM)
    gradient_accumulation_steps=4,              # Steps to accumulate grads before update
    num_train_epochs=2,                         # Number of full passes through data
    logging_steps=10,                           # Log loss every 10 steps
    learning_rate=2e-4,                         # Start with this LR for LoRA
    save_strategy="epoch",                      # Save model at the end of each epoch
    fp16=True,                                  # Enable mixed precision for speed/memory
    push_to_hub=False                           # Skip pushing until you explicitly do it
)

print("✅ TrainingArguments set. We're ready to move to fine-tuning 🚀")



⚙️ Setting up LoRA config for Pythia-410M...
✅ LoRA config is ready with target modules for Pythia.

📝 Setting up training arguments...
✅ TrainingArguments set. We're ready to move to fine-tuning 🚀


# **Step 6: Start fine-tuning with SFTTrainer**

In [None]:
for name, module in base_model.named_modules():
    if "dense" in name or "query_key_value" in name:
        print(name, "→", type(module).__name__)



gpt_neox.layers.0.attention.query_key_value → Linear
gpt_neox.layers.0.attention.dense → Linear
gpt_neox.layers.0.mlp.dense_h_to_4h → Linear
gpt_neox.layers.0.mlp.dense_4h_to_h → Linear
gpt_neox.layers.1.attention.query_key_value → Linear
gpt_neox.layers.1.attention.dense → Linear
gpt_neox.layers.1.mlp.dense_h_to_4h → Linear
gpt_neox.layers.1.mlp.dense_4h_to_h → Linear
gpt_neox.layers.2.attention.query_key_value → Linear
gpt_neox.layers.2.attention.dense → Linear
gpt_neox.layers.2.mlp.dense_h_to_4h → Linear
gpt_neox.layers.2.mlp.dense_4h_to_h → Linear
gpt_neox.layers.3.attention.query_key_value → Linear
gpt_neox.layers.3.attention.dense → Linear
gpt_neox.layers.3.mlp.dense_h_to_4h → Linear
gpt_neox.layers.3.mlp.dense_4h_to_h → Linear
gpt_neox.layers.4.attention.query_key_value → Linear
gpt_neox.layers.4.attention.dense → Linear
gpt_neox.layers.4.mlp.dense_h_to_4h → Linear
gpt_neox.layers.4.mlp.dense_4h_to_h → Linear
gpt_neox.layers.5.attention.query_key_value → Linear
gpt_neox.layers.5

In [None]:
from transformers import AutoModelForCausalLM
from peft import get_peft_model
from trl import SFTTrainer

# Step 1: Load base model
print("📦 Loading base model: EleutherAI/pythia-410m")
base_model = AutoModelForCausalLM.from_pretrained(
    "EleutherAI/pythia-410m",
    torch_dtype="auto",
    device_map="auto"
)

# Step 2: Apply LoRA
print("🔗 Applying LoRA configuration to the model...")
model = get_peft_model(base_model, peft_config)

# Step 3: Initialize SFTTrainer
print("📚 Preparing SFTTrainer...")
trainer = SFTTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset
)

# Step 4: Train
print("🚀 Starting training now...\n")
trainer.train()

# Step 5: Save model after training
print("\n💾 Saving fine-tuned LoRA model...")
model.save_pretrained("./pythia-finetuned-financial")

print("✅ Training complete and model saved to './pythia-finetuned-financial'")



📦 Loading base model: EleutherAI/pythia-410m
🔗 Applying LoRA configuration to the model...
📚 Preparing SFTTrainer...


Truncating train dataset:   0%|          | 0/7000 [00:00<?, ? examples/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


🚀 Starting training now...



[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mgandhiaiwork[0m ([33mgandhiaiwork-self[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Step,Training Loss
10,2.5602
20,2.1267
30,2.0265
40,2.0815
50,1.9436
60,2.0021
70,1.9843
80,1.946
90,1.9713
100,1.91



💾 Saving fine-tuned LoRA model...
✅ Training complete and model saved to './pythia-finetuned-financial'


In [None]:
import os

print("📁 Files saved in LoRA output directory:")
print(os.listdir("./pythia-finetuned-financial"))


📁 Files saved in LoRA output directory:
['checkpoint-875', 'adapter_model.safetensors', 'README.md', 'checkpoint-1750', 'adapter_config.json', 'runs']


In [None]:
from huggingface_hub import notebook_login
from transformers import AutoTokenizer
from peft import PeftModel

# Step 1: Login if not already done
print("🔐 Logging in to Hugging Face Hub...")
notebook_login()

# Step 2: Define repo name
repo_id = "gandhiraketla277/pythia-financial-lora"  # Change if needed

# Step 3: Push adapter weights
print("📤 Pushing fine-tuned LoRA adapter to Hub...")
PeftModel.from_pretrained(
    AutoModelForCausalLM.from_pretrained("EleutherAI/pythia-410m"),
    "./pythia-finetuned-financial"
).push_to_hub(repo_id)

# Step 4: Push tokenizer (optional but recommended)
print("📤 Pushing tokenizer to Hub...")
tokenizer = AutoTokenizer.from_pretrained("EleutherAI/pythia-410m")
tokenizer.push_to_hub(repo_id)

print(f"✅ Done! Your model is live at: https://huggingface.co/{repo_id}")

🔐 Logging in to Hugging Face Hub...


VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

📤 Pushing fine-tuned LoRA adapter to Hub...


README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/12.6M [00:00<?, ?B/s]

📤 Pushing tokenizer to Hub...


No files have been modified since last commit. Skipping to prevent empty commit.


✅ Done! Your model is live at: https://huggingface.co/gandhiraketla277/pythia-financial-lora


In [None]:
def format_response(response_text):
    # Split the text into sentences
    sentences = response_text.split('. ')

    # Group sentences into paragraphs (roughly 2-3 sentences per paragraph)
    paragraphs = []
    current_paragraph = []

    for sentence in sentences:
        current_paragraph.append(sentence)
        if len(current_paragraph) >= 2:  # Adjust this number for longer or shorter paragraphs
            paragraphs.append('. '.join(current_paragraph) + '.')
            current_paragraph = []

    # Add any remaining sentences as the last paragraph
    if current_paragraph:
        paragraphs.append('. '.join(current_paragraph) + '.')

    # Join paragraphs with newlines and print
    formatted_text = '\n\n'.join(paragraphs)
    return formatted_text



In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
import torch

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained("EleutherAI/pythia-410m")
tokenizer.pad_token = tokenizer.eos_token

# Load base model
base_model = AutoModelForCausalLM.from_pretrained("EleutherAI/pythia-410m").eval().to("cuda" if torch.cuda.is_available() else "cpu")

# Load LoRA fine-tuned adapter from Hugging Face Hub
lora_model = PeftModel.from_pretrained(
    base_model,
    "./pythia-finetuned-financial"
).eval().to(base_model.device)

# Define prompt
prompt = "### Instruction:\n What are Tesla's main risk factors?\n\n### Response:\n"
inputs = tokenizer(prompt, return_tensors="pt").to(base_model.device)

# Generate from base model
with torch.no_grad():
    base_output = base_model.generate(
        **inputs,
        max_new_tokens=1000,
        do_sample=True,
        temperature=0.7,
        top_p=0.95,
        repetition_penalty=1.1,
        eos_token_id=tokenizer.eos_token_id
    )

# Generate from fine-tuned model
with torch.no_grad():
    lora_output = lora_model.generate(
        **inputs,
        max_new_tokens=1000,
        do_sample=True,
        temperature=0.7,
        top_p=0.95,
        repetition_penalty=1.1,
        eos_token_id=tokenizer.eos_token_id
    )

# Decode responses
base_text = tokenizer.decode(base_output[0], skip_special_tokens=True)
lora_text = tokenizer.decode(lora_output[0], skip_special_tokens=True)

# Clean output (remove prompt part)
base_response = base_text.split("### Response:")[-1].strip()
lora_response = lora_text.split("### Response:")[-1].strip()

# Display both outputs
print("\n" + "="*80)
print("📉 BEFORE Fine-Tuning (Base Pythia Model)")
print("="*80)
print(format_response(base_response))

print("\n" + "="*80)
print("📈 AFTER Fine-Tuning (LoRA Adapter from Hugging Face)")
print("="*80)
print(format_response(lora_response))




Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.



📉 BEFORE Fine-Tuning (Base Pythia Model)
Other risks include global political and economic instability, impacts on global trade relations, and the availability of financing options. Additionally, there are risks related to terrorist attacks, global conflicts, natural disasters, or civil unrest.

These risks, though less serious, could adversely affect the business and results of operations. Additionally, there are concerns about foreign national or.

📈 AFTER Fine-Tuning (LoRA Adapter from Hugging Face)
These risks include changes in ownership and management, increasing staffing levels, promotions, and pay raises, and new roles and responsibilities. These changes may affect business operations, competitive position, and customer satisfaction, contributing to adverse impacts on their business.

Additionally, these risks may affect their revenue generation and business decisions. Furthermore, certain manufacturing or equipment failures could have an adverse effect on selling or paying cu

In [None]:
from peft import PeftModel
print("🔍 Checking active LoRA adapters:")
print(model.peft_config)

for name, module in model.named_modules():
    if "lora" in name.lower():
        print("✅ LoRA module attached:", name)


🔍 Checking active LoRA adapters:
{'default': LoraConfig(task_type=<TaskType.CAUSAL_LM: 'CAUSAL_LM'>, peft_type=<PeftType.LORA: 'LORA'>, auto_mapping=None, base_model_name_or_path='EleutherAI/pythia-410m', revision=None, inference_mode=False, r=8, target_modules={'dense_h_to_4h', 'dense', 'dense_4h_to_h', 'query_key_value'}, exclude_modules=None, lora_alpha=16, lora_dropout=0.05, fan_in_fan_out=False, bias='none', use_rslora=False, modules_to_save=None, init_lora_weights=True, layers_to_transform=None, layers_pattern=None, rank_pattern={}, alpha_pattern={}, megatron_config=None, megatron_core='megatron.core', loftq_config={}, eva_config=None, use_dora=False, layer_replication=None, runtime_config=LoraRuntimeConfig(ephemeral_gpu_offload=False), lora_bias=False)}
✅ LoRA module attached: base_model.model.gpt_neox.layers.0.attention.query_key_value.lora_dropout
✅ LoRA module attached: base_model.model.gpt_neox.layers.0.attention.query_key_value.lora_dropout.default
✅ LoRA module attached: b