## Setting up

In [1]:
%%capture
!pip install transformers datasets accelerate evaluate bitsandbytes peft trl wandb optimum

In [2]:
from huggingface_hub import login
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()

hf_token = user_secrets.get_secret("HUGGINGFACE_TOKEN")
login(hf_token)

In [3]:
%env WANDB_PROJECT=HPML_DSR1
%env WANDB_LOG_MODEL=false
%env WANDB_WATCH=false

env: WANDB_PROJECT=HPML_DSR1
env: WANDB_LOG_MODEL=false
env: WANDB_WATCH=false


In [4]:
import wandb
wandb.login(key="d1dc22f917dd5f840f986bbc21ad1b0a516f7da9")

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33maditya-nyu[0m ([33maditya-nyu-hpml[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

## Loading the model and tokenizer

In [19]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TrainingArguments
import kagglehub
import torch
from transformers.models.gemma3 import Gemma3ForCausalLM
from peft import (
    LoraConfig,
    PeftModel,
    prepare_model_for_kbit_training,
    get_peft_model,
)
device = "cuda" if torch.cuda.is_available() else "cpu"
model_name = 'deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B'
bnb_config = BitsAndBytesConfig(
  load_in_4bit=True,
  bnb_4bit_quant_type="nf4",
  bnb_4bit_compute_dtype=torch.float16,
)
# Load model and tokenizer.
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="cuda", quantization_config=bnb_config).eval()
tokenizer = AutoTokenizer.from_pretrained(
   model_name, padding=True, truncation=True, max_length=512
)


print(model)

Qwen2ForCausalLM(
  (model): Qwen2Model(
    (embed_tokens): Embedding(151936, 1536)
    (layers): ModuleList(
      (0-27): 28 x Qwen2DecoderLayer(
        (self_attn): Qwen2Attention(
          (q_proj): Linear4bit(in_features=1536, out_features=1536, bias=True)
          (k_proj): Linear4bit(in_features=1536, out_features=256, bias=True)
          (v_proj): Linear4bit(in_features=1536, out_features=256, bias=True)
          (o_proj): Linear4bit(in_features=1536, out_features=1536, bias=False)
        )
        (mlp): Qwen2MLP(
          (gate_proj): Linear4bit(in_features=1536, out_features=8960, bias=False)
          (up_proj): Linear4bit(in_features=1536, out_features=8960, bias=False)
          (down_proj): Linear4bit(in_features=8960, out_features=1536, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): Qwen2RMSNorm((1536,), eps=1e-06)
        (post_attention_layernorm): Qwen2RMSNorm((1536,), eps=1e-06)
      )
    )
    (norm): Qwen2RMSNorm((1536,), eps

## Loading and processing the dataset

In [6]:
train_prompt_style="""
Below is an instruction that describes a task, paired with an input that provides further context. 
Write a response that appropriately completes the request. 
Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.

### Question:
{}

### Response:
<think>
{}
</think>
{}
"""

In [7]:
def formatting_prompts_func(examples):
    inputs = examples["Open-ended Verifiable Question"]
    complex_cots = examples["Complex_CoT"]
    outputs = examples["Response"]
    texts = []
    for question, cot, response in zip(inputs, complex_cots, outputs):
        # Append the EOS token to the response if it's not already there
        if not response.endswith(tokenizer.eos_token):
            response += tokenizer.eos_token
        text = train_prompt_style.format(question, cot, response)
        texts.append(text)
    return {"text": texts}

In [8]:
from datasets import load_dataset
dataset = load_dataset("TheFinAI/Fino1_Reasoning_Path_FinQA", split = "train[0:500]",trust_remote_code=True)
dataset = dataset.map(formatting_prompts_func, batched = True,)
dataset["text"][0]

README.md:   0%|          | 0.00/1.32k [00:00<?, ?B/s]

train.parquet:   0%|          | 0.00/14.8M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/5499 [00:00<?, ? examples/s]

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

"\nBelow is an instruction that describes a task, paired with an input that provides further context. \nWrite a response that appropriately completes the request. \nBefore answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.\n\n### Question:\nPlease answer the given financial question based on the context.\nContext: amortization expense , which is included in selling , general and administrative expenses , was $ 13.0 million , $ 13.9 million and $ 8.5 million for the years ended december 31 , 2016 , 2015 and 2014 , respectively . the following is the estimated amortization expense for the company 2019s intangible assets as of december 31 , 2016 : ( in thousands ) .\n|2017|$ 10509|\n|2018|9346|\n|2019|9240|\n|2020|7201|\n|2021|5318|\n|2022 and thereafter|16756|\n|amortization expense of intangible assets|$ 58370|\nat december 31 , 2016 , 2015 and 2014 , the company determined that its goodwill and indefinite

In [9]:
from transformers import DataCollatorForLanguageModeling

data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False
)

## Model inference before fine-tuning

In [10]:
prompt_style = """Below is an instruction that describes a task, paired with an input that provides further context. 
Write a response that appropriately completes the request. 
Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.

### Question:
{}

### Response:
<think>
{}
"""

In [12]:
from torch.profiler import profile, record_function, ProfilerActivity


In [20]:
%%time
question = dataset[0]['Open-ended Verifiable Question']

print("Question: ", question)
inputs = tokenizer(
    [prompt_style.format(question, "") + tokenizer.eos_token],
    return_tensors="pt"
).to("cuda")
start_event = torch.cuda.Event(enable_timing=True)
end_event = torch.cuda.Event(enable_timing=True)
start_event.record()

outputs = model.generate(
            input_ids=inputs.input_ids,
            attention_mask=inputs.attention_mask,
            max_new_tokens=1200,
            eos_token_id=tokenizer.eos_token_id,
            use_cache=True,
        )
end_event.record()
torch.cuda.synchronize()  # Wait for the events to be recorded!
elapsed_time_ms = start_event.elapsed_time(end_event)

response = tokenizer.batch_decode(outputs, skip_special_tokens=True)

print(response[0].split("### Response:")[1])

print("GPU Time:",elapsed_time_ms)


Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


Question:  Please answer the given financial question based on the context.
Context: amortization expense , which is included in selling , general and administrative expenses , was $ 13.0 million , $ 13.9 million and $ 8.5 million for the years ended december 31 , 2016 , 2015 and 2014 , respectively . the following is the estimated amortization expense for the company 2019s intangible assets as of december 31 , 2016 : ( in thousands ) .
|2017|$ 10509|
|2018|9346|
|2019|9240|
|2020|7201|
|2021|5318|
|2022 and thereafter|16756|
|amortization expense of intangible assets|$ 58370|
at december 31 , 2016 , 2015 and 2014 , the company determined that its goodwill and indefinite- lived intangible assets were not impaired . 5 . credit facility and other long term debt credit facility the company is party to a credit agreement that provides revolving commitments for up to $ 1.25 billion of borrowings , as well as term loan commitments , in each case maturing in january 2021 . as of december 31 ,

In [21]:
print("GPU Time:",elapsed_time_ms/1000, " s")

GPU Time: 6.343044921875  s


## Setting up the model

In [15]:
%%time
from trl import SFTTrainer
from transformers import TrainingArguments


# LoRA Configuration
peft_config = LoraConfig(
    lora_alpha=16,                           # Scaling factor for LoRA
    lora_dropout=0.1,                       # Add slight dropout for regularization
    r=4,                                    # Rank of the LoRA update matrices
    bias="none",                             # No bias reparameterization
    task_type="CAUSAL_LM",                   # Task type: Causal Language Modeling
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj"
    ],  # Target modules for LoRA
)


# Training Arguments
training_arguments = TrainingArguments(
    output_dir="output",
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=2,
    optim="paged_adamw_32bit",
    num_train_epochs=1,
    logging_steps=5,
    warmup_steps=10,
    logging_strategy="steps",
    learning_rate=2e-4,
    fp16=False,
    bf16=False,
    group_by_length=True,
    report_to="wandb",
    run_name="standard_run_1",
    max_steps=100
)

# Initialize the Trainer
trainer = SFTTrainer(
    model=model,
    args=training_arguments,
    train_dataset=dataset,
    peft_config=peft_config,
    data_collator=data_collator,
)

Converting train dataset to ChatML:   0%|          | 0/500 [00:00<?, ? examples/s]

Adding EOS to train dataset:   0%|          | 0/500 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/500 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/500 [00:00<?, ? examples/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


CPU times: user 3.91 s, sys: 276 ms, total: 4.19 s
Wall time: 4.6 s


## Model training

In [16]:
torch.cuda.empty_cache()
trainer_stats = trainer.train()

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


Step,Training Loss
5,2.9433
10,2.7772
15,2.5951
20,2.472
25,2.3334
30,2.2686
35,2.2317
40,2.2064
45,2.0867
50,1.6719


## Model inference after fine-tuning

In [19]:
%%time
question = dataset[0]['Open-ended Verifiable Question']

inputs = tokenizer(
    [prompt_style.format(question, "") + tokenizer.eos_token],
    return_tensors="pt"
).to("cuda")



CPU times: user 6.48 ms, sys: 145 µs, total: 6.63 ms
Wall time: 5.94 ms


In [18]:
%%time
start_event = torch.cuda.Event(enable_timing=True)
end_event = torch.cuda.Event(enable_timing=True)
start_event.record()
outputs = model.generate(
    input_ids=inputs.input_ids,
    attention_mask=inputs.attention_mask,
    max_new_tokens=1200,
    eos_token_id=tokenizer.eos_token_id,
    use_cache=True,
)

end_event.record()
torch.cuda.synchronize()  # Wait for the events to be recorded!
elapsed_time_ms = start_event.elapsed_time(end_event)

response = tokenizer.batch_decode(outputs, skip_special_tokens=True)

print(response[0].split("### Response:")[1])

print("GPU Time:",elapsed_time_ms)

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.



<think>


the company 2019s amortization expense is based on the fair value of its intangible assets . intangible assets 2019s amortization expense is based on the fair value of its intangible assets . amortization expense 2019s amortization expense is based on the fair value of its intangible assets . amortization expense 2019s amortization expense is based on the fair value of its intangible assets . amortization expense 2019s amortization expense is based on the fair value of its intangible assets . amortization expense 2019s amortization expense is based on the fair value of its intangible assets . amortization expense 2019s amortization expense is based on the fair value of its intangible assets . amortization expense 2019s amortization expense is based on the fair value of its intangible assets . amortization expense 2019s amortization expense is based on the fair value of its intangible assets . amortization expense 2019s amortization expense is based on the fair value of its i

In [20]:
%%time
question = dataset[10]['Open-ended Verifiable Question']

inputs = tokenizer(
    [prompt_style.format(question, "") + tokenizer.eos_token],
    return_tensors="pt"
).to("cuda")

outputs = model.generate(
    input_ids=inputs.input_ids,
    attention_mask=inputs.attention_mask,
    max_new_tokens=1200,
    eos_token_id=tokenizer.eos_token_id,
    use_cache=True,
)
response = tokenizer.batch_decode(outputs, skip_special_tokens=True)
print(response[0].split("### Response:")[1])

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.



<think>



the columbia fund is being liquidated with distributions to us occurring and expected to be fully liquidated during calendar 2008 . since december 2007 , we have received disbursements of approximately $ 20.7 million from the columbia fund . our operating activities during the year ended march 31 , 2008 used cash of $ 28.9 million as compared to $ 19.8 million during the same period in the prior year . our fiscal 2008 net loss of $ 40.9 million was the primary cause of our cash use from operations , attributed to increased investments in our global distribution as we continue to drive initiatives to increase recovery awareness as well as our investments in research and development to broaden our circulatory care product portfolio . in addition , our inventories used cash of $ 11.1 million during fiscal 2008 , reflecting our inventory build-up to support anticipated increases in global demand for our products and our accounts receivable also increased as a result of higher s

## Saving the model locally

In [21]:
new_model_local = "DeepSeek-R1-Fin-QA-Reasoning"
model.save_pretrained(new_model_local) # Local saving
tokenizer.save_pretrained(new_model_local)

('DeepSeek-R1-Fin-QA-Reasoning/tokenizer_config.json',
 'DeepSeek-R1-Fin-QA-Reasoning/special_tokens_map.json',
 'DeepSeek-R1-Fin-QA-Reasoning/tokenizer.json')

## Sweep Wandb

In [17]:

from trl import SFTTrainer
from transformers import TrainingArguments

def configure_trainer(config=None):
    # LoRA Configuration
    with wandb.init(config=config):
        config = wandb.config
        peft_config = LoraConfig(
            lora_alpha=16,                           # Scaling factor for LoRA
            lora_dropout=0.1,                       # Add slight dropout for regularization
            r=config.rank,                                    # Rank of the LoRA update matrices
            bias="none",                             # No bias reparameterization
            task_type="CAUSAL_LM",                   # Task type: Causal Language Modeling
            target_modules=[
                "q_proj",
                "k_proj",
                "v_proj",
                "o_proj"
            ],  # Target modules for LoRA
        )


        # Training Arguments
        training_arguments = TrainingArguments(
            output_dir="output",
            per_device_train_batch_size=1,
            per_device_eval_batch_size=1,
            gradient_accumulation_steps=2,
            optim="paged_adamw_32bit",
            num_train_epochs=config.epochs,
            logging_steps=5,
            warmup_steps=10,
            logging_strategy="steps",
            learning_rate=config.lr,
            fp16=False,
            bf16=False,
            group_by_length=True,
            report_to="wandb",
            max_steps=config.maxsteps
        )

        # Initialize the Trainer
        trainer = SFTTrainer(
            model=model,
            args=training_arguments,
            train_dataset=dataset,
            peft_config=peft_config,
            data_collator=data_collator,
        )
        torch.cuda.empty_cache()
        trainer_stats = trainer.train()

In [18]:
sweep_configuration = {
    "method": "random",
    "metric": {"goal": "minimize", "name": "loss"},
    "parameters": {
        "epochs":{"values": [1, 2]},
        "maxsteps":{"values":[100, 200]},
        "lr":{"values":[1e-3, 1e-4, 2e-4, 5e-4]},
        "rank":{"values":[4, 16, 64]}
    },
}
sweep_id = wandb.sweep(sweep=sweep_configuration, project="HPML_DSR1")
wandb.agent(sweep_id, configure_trainer, count=10)

Create sweep with ID: swtgog00
Sweep URL: https://wandb.ai/aditya-nyu-hpml/HPML_DSR1/sweeps/swtgog00


[34m[1mwandb[0m: Agent Starting Run: s1tb5k65 with config:
[34m[1mwandb[0m: 	epochs: 1
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	maxsteps: 100
[34m[1mwandb[0m: 	rank: 16
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


Converting train dataset to ChatML:   0%|          | 0/500 [00:00<?, ? examples/s]

Adding EOS to train dataset:   0%|          | 0/500 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/500 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/500 [00:00<?, ? examples/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss
5,2.9457
10,2.8083
15,2.7117
20,2.6438
25,2.5078
30,2.4133
35,2.3711
40,2.3536
45,2.2482
50,1.884


0,1
train/epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇███
train/global_step,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇███
train/grad_norm,▃▄▆█▃▃▂▁▂▅▃▂▂▃▃▁▂▅▂▄
train/learning_rate,▄███▇▇▆▆▆▅▅▄▄▃▃▃▂▂▁▁
train/loss,█▇▆▆▅▄▄▄▃▁▄▃▃▃▃▂▂▄▂▁
train/mean_token_accuracy,▁▃▃▃▄▄▅▄▆█▅▆▆▆▅▆▇▅▆█
train/num_tokens,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██

0,1
total_flos,1880127795806208.0
train/epoch,0.4
train/global_step,100.0
train/grad_norm,0.34379
train/learning_rate,0.0
train/loss,1.9139
train/mean_token_accuracy,0.58433
train/num_tokens,202416.0
train_loss,2.31573
train_runtime,250.183


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 0reu6cgs with config:
[34m[1mwandb[0m: 	epochs: 1
[34m[1mwandb[0m: 	lr: 0.0005
[34m[1mwandb[0m: 	maxsteps: 100
[34m[1mwandb[0m: 	rank: 4


No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss
5,2.9352
10,2.6711
15,2.366
20,2.2805
25,2.1423
30,2.0742
35,2.0331
40,2.0164
45,1.9281
50,1.495


0,1
train/epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇███
train/global_step,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇███
train/grad_norm,█▆▇█▃▅▃▃▄▄▅▄▃▄▃▁▂▆▂▆
train/learning_rate,▄███▇▇▆▆▆▅▅▄▄▃▃▃▂▂▁▁
train/loss,█▇▅▅▄▄▄▄▃▁▃▃▃▃▃▃▃▄▃▂
train/mean_token_accuracy,▁▂▃▃▅▅▅▅▆█▅▆▆▆▆▆▇▅▆▇
train/num_tokens,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██

0,1
total_flos,1876158084464640.0
train/epoch,0.4
train/global_step,100.0
train/grad_norm,0.62263
train/learning_rate,1e-05
train/loss,1.655
train/mean_token_accuracy,0.62829
train/num_tokens,202416.0
train_loss,2.03351
train_runtime,251.3096


[34m[1mwandb[0m: Agent Starting Run: iwbmxgu7 with config:
[34m[1mwandb[0m: 	epochs: 1
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	maxsteps: 100
[34m[1mwandb[0m: 	rank: 16


No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss
5,2.9457
10,2.8089
15,2.7146
20,2.6482
25,2.5128
30,2.4176
35,2.3744
40,2.3559
45,2.2492
50,1.8841


0,1
train/epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇███
train/global_step,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇███
train/grad_norm,▃▄▆█▃▃▂▁▂▅▃▂▂▃▃▁▁▅▂▄
train/learning_rate,▄███▇▇▆▆▆▅▅▄▄▃▃▃▂▂▁▁
train/loss,█▇▆▆▅▅▄▄▃▁▄▃▃▃▃▂▂▄▂▁
train/mean_token_accuracy,▁▃▃▃▄▄▅▄▆█▅▆▆▆▆▆▇▆▆█
train/num_tokens,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██

0,1
total_flos,1880127795806208.0
train/epoch,0.4
train/global_step,100.0
train/grad_norm,0.33605
train/learning_rate,0.0
train/loss,1.9101
train/mean_token_accuracy,0.58406
train/num_tokens,202416.0
train_loss,2.31568
train_runtime,250.6088


[34m[1mwandb[0m: Agent Starting Run: iwddf8iv with config:
[34m[1mwandb[0m: 	epochs: 2
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	maxsteps: 200
[34m[1mwandb[0m: 	rank: 4


No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss
5,2.9211
10,2.5352
15,2.2628
20,2.1312
25,2.03
30,2.0012
35,1.9746
40,1.9548
45,1.8682
50,1.4142


0,1
train/epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train/global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train/grad_norm,█▄▇▅▃▅▃▂▃▄▄▃▅▄▂▁▂▄▂▄▂▃▂▂▂▂▄▂▂▁▃▁▂▂▂▃▁▁▂▃
train/learning_rate,▄▇███▇▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▁▁▁
train/loss,█▆▅▄▄▄▄▄▃▁▃▃▃▃▃▃▃▄▃▂▃▂▃▄▃▃▃▃▃▁▃▃▃▃▂▃▂▂▂▁
train/mean_token_accuracy,▁▃▄▄▅▅▅▅▆█▅▆▆▆▆▆▇▆▆▇▆▆▆▅▆▆▆▆▆█▆▆▆▆▆▆▆▇▇█
train/num_tokens,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
total_flos,3759490237962240.0
train/epoch,0.8
train/global_step,200.0
train/grad_norm,0.50873
train/learning_rate,1e-05
train/loss,1.4322
train/mean_token_accuracy,0.66877
train/num_tokens,405606.0
train_loss,1.85265
train_runtime,503.0003


[34m[1mwandb[0m: Agent Starting Run: pui1yw9p with config:
[34m[1mwandb[0m: 	epochs: 2
[34m[1mwandb[0m: 	lr: 0.0005
[34m[1mwandb[0m: 	maxsteps: 200
[34m[1mwandb[0m: 	rank: 16


No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss
5,2.9358
10,2.6747
15,2.3693
20,2.2774
25,2.1347
30,2.0646
35,2.0237
40,2.0072
45,1.9147
50,1.4734


0,1
train/epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train/global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train/grad_norm,█▅▆█▃▅▃▃▃▃▆▃▃▅▃▁▂▆▃▆▃▅▃▃▄▅▇▄▃▃▅▂▃▃▃▅▁▃▃▄
train/learning_rate,▄▇███▇▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▁▁▁
train/loss,█▇▅▅▄▄▄▄▃▁▃▃▃▃▃▃▃▄▃▂▃▂▃▄▃▃▃▃▃▁▃▂▃▃▂▃▂▂▂▁
train/mean_token_accuracy,▁▂▃▃▅▅▅▅▆█▅▆▆▆▆▆▇▆▆▇▆▆▆▅▆▆▆▆▆█▆▆▆▆▆▆▆█▇█
train/num_tokens,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
total_flos,3767444840060928.0
train/epoch,0.8
train/global_step,200.0
train/grad_norm,0.28438
train/learning_rate,0.0
train/loss,1.4674
train/mean_token_accuracy,0.66272
train/num_tokens,405606.0
train_loss,1.90259
train_runtime,501.6461


[34m[1mwandb[0m: Agent Starting Run: jykofcdc with config:
[34m[1mwandb[0m: 	epochs: 2
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	maxsteps: 200
[34m[1mwandb[0m: 	rank: 4


No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss
5,2.9456
10,2.808
15,2.7101
20,2.6407
25,2.5003
30,2.4024
35,2.3567
40,2.3364
45,2.2315
50,1.8576


0,1
train/epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train/global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train/grad_norm,▄▅▆█▃▃▂▂▃▆▃▃▂▄▃▂▂▅▂▃▂▄▁▂▂▃▃▂▂▃▃▁▁▂▂▂▁▂▂▃
train/learning_rate,▄▇███▇▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▁▁▁
train/loss,█▇▇▆▆▅▅▅▄▂▄▄▄▄▃▃▃▄▃▂▃▂▃▄▃▃▃▃▃▁▃▃▃▃▃▃▂▂▂▁
train/mean_token_accuracy,▁▂▂▂▃▃▄▄▄▆▄▅▅▅▅▅▆▅▆▇▆▆▆▅▆▆▆▆▆▇▆▆▆▆▆▅▆▇▇█
train/num_tokens,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
total_flos,3759490237962240.0
train/epoch,0.8
train/global_step,200.0
train/grad_norm,0.65467
train/learning_rate,0.0
train/loss,1.6187
train/mean_token_accuracy,0.64478
train/num_tokens,405606.0
train_loss,2.10047
train_runtime,503.2993


[34m[1mwandb[0m: Agent Starting Run: hkh5mzr7 with config:
[34m[1mwandb[0m: 	epochs: 1
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	maxsteps: 200
[34m[1mwandb[0m: 	rank: 64


No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss
5,2.9217
10,2.5364
15,2.2608
20,2.1293
25,2.0271
30,1.9977
35,1.9743
40,1.9551
45,1.8648
50,1.4124


0,1
train/epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train/global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train/grad_norm,█▄▇▅▃▅▃▂▃▄▅▃▄▃▃▁▂▄▂▅▂▄▃▃▃▃▅▃▂▄▃▂▂▂▂▄▁▂▂▃
train/learning_rate,▄▇███▇▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▁▁▁
train/loss,█▆▅▄▄▄▄▄▃▁▃▃▃▃▃▃▃▄▃▂▃▂▃▄▃▃▃▃▃▁▃▃▃▃▂▃▂▂▂▁
train/mean_token_accuracy,▁▃▄▄▅▅▅▅▆█▅▆▆▆▆▆▆▆▆▇▆▆▆▅▆▆▆▆▆█▆▆▆▆▆▆▆▇▇█
train/num_tokens,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
total_flos,3799263248455680.0
train/epoch,0.8
train/global_step,200.0
train/grad_norm,0.12464
train/learning_rate,1e-05
train/loss,1.4256
train/mean_token_accuracy,0.67116
train/num_tokens,405606.0
train_loss,1.84907
train_runtime,503.9814


[34m[1mwandb[0m: Agent Starting Run: x53vcwaw with config:
[34m[1mwandb[0m: 	epochs: 1
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	maxsteps: 200
[34m[1mwandb[0m: 	rank: 4


No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss
5,2.9211
10,2.5352
15,2.2628
20,2.1312
25,2.03
30,2.0012
35,1.9746
40,1.9548
45,1.8682
50,1.4142


0,1
train/epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train/global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train/grad_norm,█▄▇▅▃▅▃▂▃▄▄▃▅▄▂▁▂▄▂▄▂▃▂▂▂▂▄▂▂▁▃▁▂▂▂▃▁▁▂▃
train/learning_rate,▄▇███▇▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▁▁▁
train/loss,█▆▅▄▄▄▄▄▃▁▃▃▃▃▃▃▃▄▃▂▃▂▃▄▃▃▃▃▃▁▃▃▃▃▂▃▂▂▂▁
train/mean_token_accuracy,▁▃▄▄▅▅▅▅▆█▅▆▆▆▆▆▇▆▆▇▆▆▆▅▆▆▆▆▆█▆▆▆▆▆▆▆▇▇█
train/num_tokens,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
total_flos,3759490237962240.0
train/epoch,0.8
train/global_step,200.0
train/grad_norm,0.50873
train/learning_rate,1e-05
train/loss,1.4322
train/mean_token_accuracy,0.66877
train/num_tokens,405606.0
train_loss,1.85265
train_runtime,503.0596


[34m[1mwandb[0m: Agent Starting Run: o1ronlui with config:
[34m[1mwandb[0m: 	epochs: 1
[34m[1mwandb[0m: 	lr: 0.0005
[34m[1mwandb[0m: 	maxsteps: 200
[34m[1mwandb[0m: 	rank: 64


No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss
5,2.9355
10,2.6726
15,2.3677
20,2.2766
25,2.1344
30,2.0647
35,2.0251
40,2.0081
45,1.9157
50,1.4752


0,1
train/epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train/global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train/grad_norm,█▅▆█▃▅▃▂▃▂▆▃▃▅▃▁▂▆▃▆▃▅▃▃▄▅▆▅▂▃▄▂▂▃▃▅▁▃▃▄
train/learning_rate,▄▇███▇▇▇▇▇▆▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▁▁▁
train/loss,█▇▅▅▄▄▄▄▃▁▃▃▃▃▃▃▃▄▃▂▃▂▃▄▃▃▃▃▃▁▃▃▃▃▂▃▂▂▂▁
train/mean_token_accuracy,▁▂▃▃▅▅▅▅▆█▅▆▆▆▆▆▇▆▆▇▆▇▆▅▆▆▆▆▆█▆▆▆▆▆▆▆█▇█
train/num_tokens,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
total_flos,3799263248455680.0
train/epoch,0.8
train/global_step,200.0
train/grad_norm,0.14392
train/learning_rate,0.0
train/loss,1.4675
train/mean_token_accuracy,0.66159
train/num_tokens,405606.0
train_loss,1.90291
train_runtime,504.3501


[34m[1mwandb[0m: Agent Starting Run: 1ws2tk62 with config:
[34m[1mwandb[0m: 	epochs: 1
[34m[1mwandb[0m: 	lr: 0.0005
[34m[1mwandb[0m: 	maxsteps: 100
[34m[1mwandb[0m: 	rank: 64


No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss
5,2.9355
10,2.6726
15,2.3678
20,2.2786
25,2.14
30,2.0709
35,2.0309
40,2.0152
45,1.924
50,1.4909


0,1
train/epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇███
train/global_step,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇███
train/grad_norm,█▅▆█▄▅▃▃▄▄▅▄▃▅▃▁▂▆▃▅
train/learning_rate,▄███▇▇▆▆▆▅▅▄▄▃▃▃▂▂▁▁
train/loss,█▇▅▅▄▄▄▄▃▁▃▃▃▃▃▃▃▄▃▂
train/mean_token_accuracy,▁▂▃▃▅▅▆▅▆█▅▆▆▆▆▆▇▅▆▇
train/num_tokens,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██

0,1
total_flos,1896006641172480.0
train/epoch,0.4
train/global_step,100.0
train/grad_norm,0.15097
train/learning_rate,1e-05
train/loss,1.6525
train/mean_token_accuracy,0.62824
train/num_tokens,202416.0
train_loss,2.03183
train_runtime,251.9309
