In [1]:
import torch
import transformers
import argparse
from transformers import AutoModelForCausalLM, AutoTokenizer
from datasets import load_dataset
from peft import (
        get_peft_model, 
        prepare_model_for_kbit_training, 
        LoraConfig
    )
from trl import SFTTrainer

In [2]:
model_name = "meta-llama/Llama-2-7b-hf"
model = AutoModelForCausalLM.from_pretrained(model_name,
                                            load_in_8bit=True,
                                            device_map="auto"
                                            )
tokenizer = AutoTokenizer.from_pretrained(model_name)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [3]:
# Load your custom JSON dataset
custom_data = load_dataset('json', data_files='Gilbert.json')

# Access train, test, and validation splits if available
data_train = custom_data['train']
data_val = custom_data['train']

# Print the dataset details
print(data_train)
print(data_val)

# Access an example
#example = data_train[0]
#print(example)

def generate_prompt(materials, gilbert=None, eos_token="</s>"):
  instruction = "Answer the value:\n"
  input = f"Gilbert damping constant of {materials}\n"
  gilbert = f"Value: {gilbert + ' ' + eos_token if gilbert else ''} "
  prompt = (" ").join([instruction, input, gilbert])
  return prompt

print(generate_prompt(data_train[0]["materials"], data_train[0]["gilbert"]))

Dataset({
    features: ['gilbert', 'materials'],
    num_rows: 63
})
Dataset({
    features: ['gilbert', 'materials'],
    num_rows: 63
})
Answer the value:
 Gilbert damping constant of Y_3Fe_5O_12
 Value: Very low, around 0.0001 - 0.0004 </s> 


In [4]:
print(generate_prompt('Fe'))

Answer the value:
 Gilbert damping constant of Fe
 Value:  


In [30]:
from peft import PeftModel

peft_model_id = "cp/checkpoint-4200"
#peft_model_id = "cp2/final"
peft_model = PeftModel.from_pretrained(model, peft_model_id, torch_dtype=torch.float16, offload_folder="lora_results/lora_7/temp")

In [41]:
#input_prompt = generate_prompt(data_train[0]["materials"])
input_prompt = generate_prompt('FeCoMn')
input_tokens = tokenizer(input_prompt, return_tensors="pt")["input_ids"].to("cuda")
with torch.cuda.amp.autocast():
    generation_output = peft_model.generate(
        input_ids=input_tokens,
        max_new_tokens=100,
        do_sample=True,
        top_k=10,
        top_p=0.9,
        temperature=0.3,
        repetition_penalty=1.15,
        num_return_sequences=1,
        eos_token_id=tokenizer.eos_token_id,
      )
op = tokenizer.decode(generation_output[0], skip_special_tokens=True)
print(op) 

Answer the value:
 Gilbert damping constant of FeCoMn
 Value:  0.126


In [39]:
model == peft_model

False

In [45]:
# this should be set for finutning and batched inference
tokenizer.add_special_tokens({"pad_token": "<PAD>"})
peft_model.resize_token_embeddings(len(tokenizer))

# Loading in 8 bit ..."
peft_model = prepare_model_for_kbit_training(peft_model)
#model = get_peft_model(model, lora_config)

In [50]:
output_dir = "cp2"
per_device_train_batch_size = 4
gradient_accumulation_steps = 4
per_device_eval_batch_size = 4
eval_accumulation_steps = 4
optim = "paged_adamw_32bit"
save_steps = 50
logging_steps = 50
learning_rate = 1e-4
max_grad_norm = 0.2
#max_steps = 50
warmup_ratio = 0.03
evaluation_strategy="epoch"
lr_scheduler_type = "constant"

training_args = transformers.TrainingArguments(
            output_dir=output_dir,
            per_device_train_batch_size=per_device_train_batch_size,
            gradient_accumulation_steps=gradient_accumulation_steps,
            optim=optim,
            evaluation_strategy=evaluation_strategy,
            save_steps=save_steps,
            learning_rate=learning_rate,
            logging_steps=logging_steps,
            max_grad_norm=max_grad_norm,
            #max_steps=max_steps,
            warmup_ratio=warmup_ratio,
            group_by_length=True,
            lr_scheduler_type=lr_scheduler_type,
            ddp_find_unused_parameters=False,
            eval_accumulation_steps=eval_accumulation_steps,
            per_device_eval_batch_size=per_device_eval_batch_size,
        )

def formatting_func(prompt):
  output = []

  for d, s in zip(prompt["materials"], prompt["gilbert"]):
    op = generate_prompt(d, s)
    output.append(op)

  return output


trainer = SFTTrainer(
    model=peft_model,
    train_dataset=data_train,
    eval_dataset=data_val,
    #peft_config=lora_config,
    formatting_func=formatting_func,
    max_seq_length=1024,
    tokenizer=tokenizer,
    args=training_args
)

# We will also pre-process the model by upcasting the layer norms in float 32 for more stable training
for name, module in trainer.model.named_modules():
    if "norm" in name:
        module = module.to(torch.float32)

trainer.train()
trainer.save_model(f"{output_dir}/final")

Map:   0%|          | 0/63 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss
1,No log,3.19933
2,No log,3.19933
3,No log,3.19933


In [11]:
print(peft_model==model)

False
