In [1]:
import torch
import transformers
import argparse
from transformers import AutoModelForCausalLM, AutoTokenizer
from datasets import load_dataset
from peft import (
        get_peft_model, 
        prepare_model_for_kbit_training, 
        LoraConfig
    )
from trl import SFTTrainer

In [40]:
model_name = "meta-llama/Llama-2-7b-hf"
model = AutoModelForCausalLM.from_pretrained(model_name,
                                            load_in_8bit=True,
                                            device_map="auto"
                                            )
tokenizer = AutoTokenizer.from_pretrained(model_name)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [4]:
#model_name = "enyuan/llama"
#output_dir = "cp"

from huggingface_hub import snapshot_download

snapshot_download(repo_id="enyuan/llama", cache_dir="cp")

Fetching 14 files:   0%|          | 0/14 [00:00<?, ?it/s]

adapter_config.json:   0%|          | 0.00/607 [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/21.0 [00:00<?, ?B/s]

.gitattributes:   0%|          | 0.00/1.52k [00:00<?, ?B/s]

README.md:   0%|          | 0.00/5.10k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/552 [00:00<?, ?B/s]

rng_state.pth:   0%|          | 0.00/14.6k [00:00<?, ?B/s]

scheduler.pt:   0%|          | 0.00/627 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.09k [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/537M [00:00<?, ?B/s]

trainer_state.json:   0%|          | 0.00/3.10k [00:00<?, ?B/s]

training_args.bin:   0%|          | 0.00/4.22k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

optimizer.pt:   0%|          | 0.00/1.07G [00:00<?, ?B/s]

'cp/models--enyuan--llama/snapshots/cc9f2b840050248a80e02c0a61e3cc0050c54b2e'

In [30]:
def generate_prompt(materials, gilbert=None, eos_token="</s>"):
  instruction = "Answer the value:\n"
  input = f"Gilbert damping constant of {materials}\n"
  gilbert = f"Value: {gilbert + ' ' + eos_token if gilbert else ''} "
  prompt = (" ").join([instruction, input, gilbert])
  return prompt

In [7]:
print(generate_prompt('Fe'))

Answer the value:
 Gilbert damping constant of Fe
 Value:  


In [8]:
input_prompt = generate_prompt('Fe')
input_tokens = tokenizer(input_prompt, return_tensors="pt")["input_ids"].to("cuda")
with torch.cuda.amp.autocast():
  generation_output = model.generate(
      input_ids=input_tokens,
      max_new_tokens=1000,
      do_sample=True,
      top_k=10,
      top_p=0.9,
      temperature=0.3,
      repetition_penalty=1.15,
      num_return_sequences=1,
      eos_token_id=tokenizer.eos_token_id,
    )
op = tokenizer.decode(generation_output[0], skip_special_tokens=True)
print(op)

Answer the value:
 Gilbert damping constant of Fe
 Value:  1.079×10−3 N⋅m/rad

### 2.

The following is a list of values for the Gilbert damping constant, $k_g$, of iron (Fe).

Answer the value: Gilbert damping constant of Iron
 Value:   4.685×10−3 N⋅m/rad


In [9]:
from peft import PeftModel

peft_model_id = "cp/models--enyuan--llama/snapshots/cc9f2b840050248a80e02c0a61e3cc0050c54b2e"
peft_model = PeftModel.from_pretrained(model, peft_model_id, torch_dtype=torch.float16, offload_folder="lora_results/lora_7/temp")

In [31]:
input_prompt = generate_prompt('Fe')
input_tokens = tokenizer(input_prompt, return_tensors="pt")["input_ids"].to("cuda")
with torch.cuda.amp.autocast():
    generation_output = peft_model.generate(
        input_ids=input_tokens,
        max_new_tokens=100,
        do_sample=True,
        top_k=10,
        top_p=0.9,
        temperature=0.3,
        repetition_penalty=1.15,
        num_return_sequences=1,
        eos_token_id=tokenizer.eos_token_id,
      )
op = tokenizer.decode(generation_output[0], skip_special_tokens=True)
print(op)

Answer the value:
 Gilbert damping constant of Fe
 Value:  0.0215


In [61]:
def generate_prompt(materials, gilbert=None, eos_token="</s>"):
  instruction = "List five materials:\n"
  input = f"Metal oxide with {materials}\n"
  gilbert = f"Materials: {gilbert + ' ' + eos_token if gilbert else ''} "
  prompt = (" ").join([instruction, input, gilbert])
  return prompt

In [51]:
input_prompt = generate_prompt('low magnetic damping constant. e.g. ')
input_tokens = tokenizer(input_prompt, return_tensors="pt")["input_ids"].to("cuda")
with torch.cuda.amp.autocast():
    generation_output = peft_model.generate(
        input_ids=input_tokens,
        max_new_tokens=100,
        do_sample=True,
        top_k=10,
        top_p=0.9,
        temperature=0.3,
        repetition_penalty=1.15,
        num_return_sequences=1,
        eos_token_id=tokenizer.eos_token_id,
      )
op = tokenizer.decode(generation_output[0], skip_special_tokens=True)
print(op)

List five materials:
 Alloys with low Gilbert damping constant. e.g. low magnetic damping constant. e.g. 
 Materials:  
$${Co-Pd}$$ Co - Pd , $${Ni-Fe}$$ Ni - Fe , $${NbTa}$$ NbTa , $${Cu-Ag}$$ Cu - Ag , $${Au-Ag}$$ Au - Ag . 


In [58]:
input_prompt = generate_prompt('low density of states at the Fermi level')
input_tokens = tokenizer(input_prompt, return_tensors="pt")["input_ids"].to("cuda")
with torch.cuda.amp.autocast():
    generation_output = peft_model.generate(
        input_ids=input_tokens,
        max_new_tokens=100,
        do_sample=True,
        top_k=10,
        top_p=0.9,
        temperature=0.3,
        repetition_penalty=1.15,
        num_return_sequences=1,
        eos_token_id=tokenizer.eos_token_id,
      )
op = tokenizer.decode(generation_output[0], skip_special_tokens=True)
print(op)

List five materials:
 Metal oxide with low density of states at the Fermi level
 Materials:  
$$\mathrm{MgO}$$ M g O , $$ \mathrm{Al_2O_3} $$ Al 2 O 3 , $$ \mathrm{SiO_2} $$ SiO 2 , $$ \mathrm{Ta_2O_5} $$ Ta 2 O 5 .  
$$\mathrm{CuO}$$ C u O , $$ \mathrm{ZnO} $$ Z n O , $$ \mathrm{Nb


In [63]:
input_prompt = generate_prompt('low density of states at the Fermi level')
input_tokens = tokenizer(input_prompt, return_tensors="pt")["input_ids"].to("cuda")
with torch.cuda.amp.autocast():
    generation_output = peft_model.generate(
        input_ids=input_tokens,
        max_new_tokens=100,
        do_sample=True,
        top_k=10,
        top_p=0.9,
        temperature=0.3,
        repetition_penalty=1.15,
        num_return_sequences=1,
        eos_token_id=tokenizer.eos_token_id,
      )
op = tokenizer.decode(generation_output[0], skip_special_tokens=True)
print(op)

List five materials:
 Metal oxide with low density of states at the Fermi level
 Materials:  1. MgO 2. ZnO 3. Al_2O_3 4. GaAs 5. SiO_2 


In [72]:
input_prompt = generate_prompt('low magnetic damping constant. e.g. ')
input_tokens = tokenizer(input_prompt, return_tensors="pt")["input_ids"].to("cuda")
with torch.cuda.amp.autocast():
    generation_output = peft_model.generate(
        input_ids=input_tokens,
        max_new_tokens=100,
        do_sample=True,
        top_k=10,
        top_p=0.9,
        temperature=0.2,
        repetition_penalty=1.15,
        num_return_sequences=1,
        eos_token_id=tokenizer.eos_token_id,
      )
op = tokenizer.decode(generation_output[0], skip_special_tokens=True)
print(op)

List five materials:
 Metal oxide with low magnetic damping constant. e.g. 
 Materials:  
$${\text{MnBi}}_{2}{\text{O}}_{4}$$ MnBi 2 O 4 , $${\text{CuTiO}}_{3}$$ CuTiO 3 .  
Materials:  
$${\text{Fe}}_{3} {\text{O}}_{4}$$ Fe 3 O 4 , $${\text{Nd}_{1 - x}Dy_{x}} {\text{FeO}}_{4}$$ Nd


In [6]:
lora_config = LoraConfig(
        r=128,
        lora_alpha=256,
        lora_dropout=0.05,
        target_modules=["q_proj","k_proj","v_proj","o_proj"],
        bias="none",
        task_type="CAUSAL_LM",
    )

# this should be set for finutning and batched inference
tokenizer.add_special_tokens({"pad_token": "<PAD>"})
model.resize_token_embeddings(len(tokenizer))

# Loading in 8 bit ..."
model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, lora_config)

In [11]:
output_dir = "cp"
per_device_train_batch_size = 4
gradient_accumulation_steps = 4
per_device_eval_batch_size = 4
eval_accumulation_steps = 4
optim = "paged_adamw_32bit"
save_steps = 500
logging_steps = 500
learning_rate = 1e-4
max_grad_norm = 0.2
#max_steps = 50
warmup_ratio = 0.03
evaluation_strategy="epoch"
lr_scheduler_type = "constant"

training_args = transformers.TrainingArguments(
            output_dir=output_dir,
            per_device_train_batch_size=per_device_train_batch_size,
            gradient_accumulation_steps=gradient_accumulation_steps,
            optim=optim,
            evaluation_strategy=evaluation_strategy,
            save_steps=save_steps,
            learning_rate=learning_rate,
            logging_steps=logging_steps,
            max_grad_norm=max_grad_norm,
            #max_steps=max_steps,
            warmup_ratio=warmup_ratio,
            group_by_length=True,
            lr_scheduler_type=lr_scheduler_type,
            ddp_find_unused_parameters=False,
            eval_accumulation_steps=eval_accumulation_steps,
            per_device_eval_batch_size=per_device_eval_batch_size,
        )

In [10]:
def formatting_func(prompt):
  output = []

  for d, s in zip(prompt["title"], prompt["abstract"]):
    op = generate_prompt(d, s)
    output.append(op)

  return output


trainer = SFTTrainer(
    model=model,
    train_dataset=data_train,
    eval_dataset=data_val,
    peft_config=lora_config,
    formatting_func=formatting_func,
    max_seq_length=1024,
    tokenizer=tokenizer,
    args=training_args
)

# We will also pre-process the model by upcasting the layer norms in float 32 for more stable training
for name, module in trainer.model.named_modules():
    if "norm" in name:
        module = module.to(torch.float32)

trainer.train()
trainer.save_model(f"{output_dir}/final")

# Step Training Loss Validation Loss
# 10 1.848200 1.746341
# 20 1.688300 1.696681
# 30 1.654500 1.698127
# 40 1.579400 1.652010
# 50 1.492600 1.701877

Map:   0%|          | 0/278 [00:00<?, ? examples/s]

Map:   0%|          | 0/278 [00:00<?, ? examples/s]

You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...


Step,Training Loss,Validation Loss
10,2.0984,1.891765
20,1.8911,1.811098
30,1.8143,1.744567
40,1.7057,1.667625
50,1.7178,1.575743




In [10]:
# this should be set for finutning and batched inference
tokenizer.add_special_tokens({"pad_token": "<PAD>"})
model.resize_token_embeddings(len(tokenizer))

Embedding(32001, 4096)

In [None]:
# Loading in 8 bit ..."
model = prepare_model_for_kbit_training(model)
#model = get_peft_model(model, lora_config)

output_dir = "cp"
per_device_train_batch_size = 4
gradient_accumulation_steps = 4
per_device_eval_batch_size = 4
eval_accumulation_steps = 4
optim = "paged_adamw_32bit"
save_steps = 200
logging_steps = 200
learning_rate = 1e-4
max_grad_norm = 0.2
#max_steps = 50
warmup_ratio = 0.03
evaluation_strategy="epoch"
lr_scheduler_type = "constant"

training_args = transformers.TrainingArguments(
            output_dir=output_dir,
            per_device_train_batch_size=per_device_train_batch_size,
            gradient_accumulation_steps=gradient_accumulation_steps,
            optim=optim,
            evaluation_strategy=evaluation_strategy,
            save_steps=save_steps,
            learning_rate=learning_rate,
            logging_steps=logging_steps,
            max_grad_norm=max_grad_norm,
            #max_steps=max_steps,
            warmup_ratio=warmup_ratio,
            group_by_length=True,
            lr_scheduler_type=lr_scheduler_type,
            ddp_find_unused_parameters=False,
            eval_accumulation_steps=eval_accumulation_steps,
            per_device_eval_batch_size=per_device_eval_batch_size,
        )

def formatting_func(prompt):
  output = []

  for d, s in zip(prompt["title"], prompt["abstract"]):
    op = generate_prompt(d, s)
    output.append(op)

  return output


trainer = SFTTrainer(
    model=peft_model,
    train_dataset=data_train,
    eval_dataset=data_val,
    #peft_config=lora_config,
    formatting_func=formatting_func,
    max_seq_length=1024,
    tokenizer=tokenizer,
    args=training_args
)

# We will also pre-process the model by upcasting the layer norms in float 32 for more stable training
for name, module in trainer.model.named_modules():
    if "norm" in name:
        module = module.to(torch.float32)

trainer.train()
trainer.save_model(f"{output_dir}/final")

# Step Training Loss Validation Loss
# 10 1.848200 1.746341
# 20 1.688300 1.696681
# 30 1.654500 1.698127
# 40 1.579400 1.652010
# 50 1.492600 1.701877