In [1]:
import transformers
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    pipeline,
    logging,
)
from typing import List
import torch
from torch import cuda, bfloat16
from datasets import load_dataset
import os
 
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
DEVICE

'cuda'

In [1]:
# model_id = 'meta-llama/Llama-2-7b-chat-hf'
model_id = 'meta-llama/Llama-2-13b-chat-hf'
# model_id = 'codellama/CodeLlama-7b-hf'
# model_id = "meta-llama/Meta-Llama-3-70B-Instruct"

device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

bnb_config = transformers.BitsAndBytesConfig(
    load_in_8bit=True,
)


# Need auth token for these
hf_auth = os.environ.get('hf_token')
model_config = transformers.AutoConfig.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)

# device_map = {"": 0}

model = transformers.AutoModelForCausalLM.from_pretrained(
    model_id,
    trust_remote_code=True,
    config=model_config,
    quantization_config=bnb_config,
    device_map="auto",
    use_auth_token=hf_auth
)

model.eval()
print(f"Model loaded on {device}")

NameError: name 'cuda' is not defined

In [None]:
tokenizer = transformers.AutoTokenizer.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)
 
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"
# tokenizer.padding_side = "left"

In [None]:
data = load_dataset("json", data_files="/home/hb/LLM-research/finetune_main/finetuning_tabular/table_read/llm_table_bgp_data_train_split20_135.json")
data["train"]

Found cached dataset json (/home/hb/.cache/huggingface/datasets/json/default-eed7dec0331e737e/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51)


  0%|          | 0/1 [00:00<?, ?it/s]

Dataset({
    features: ['instruction', 'input_seg', 'question', 'output'],
    num_rows: 132
})

In [2]:
CUTOFF_LEN = 4090

def generate_prompt(data):
    return f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.  # noqa: E501
### Instruction:
{data['instruction']}
### ASN data table:
{data['input_seg']}
### Question:
{data['question']}
### Output:
{data['output']}"""
 
 
def tokenize(prompt, add_eos_token=True):
    result = tokenizer(
        prompt,
        truncation=True,
        max_length=CUTOFF_LEN,
        padding=False,
        return_tensors=None,
    )
    if (
        result["input_ids"][-1] != tokenizer.eos_token_id
        and len(result["input_ids"]) < CUTOFF_LEN
        and add_eos_token
    ):
        result["input_ids"].append(tokenizer.eos_token_id)
        result["attention_mask"].append(1)
 
    result["labels"] = result["input_ids"].copy()
 
    return result
 
def generate_and_tokenize_prompt(data_point):
    full_prompt = generate_prompt(data_point)
    tokenized_full_prompt = tokenize(full_prompt)
    return tokenized_full_prompt

In [3]:
train_val = data["train"].train_test_split(
    test_size=10, shuffle=False, seed=42
)
train_data = (
    train_val["train"]
)
val_data = (
    train_val["test"]
)

# train_data = (
#     train_val["train"].map(generate_and_tokenize_prompt)
# )
# val_data = (
#     train_val["test"].map(generate_and_tokenize_prompt)
# )

NameError: name 'data' is not defined

In [4]:
from peft import LoraConfig, get_peft_model
from transformers import TrainingArguments

lora_alpha = 16
lora_dropout = 0
# lora_dropout = 0.1
lora_r = 8
# lora_r = 64

peft_config = LoraConfig(
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    r=lora_r,
    bias="none",
    task_type="CAUSAL_LM"
)

output_dir = "/home/hb/dataset_bgp/llm_finetuned/runs"
# per_device_train_batch_size = 4
per_device_train_batch_size = 3
# per_device_train_batch_size = 2
gradient_accumulation_steps = 1
optim = "paged_adamw_32bit"
save_steps = 200
logging_steps = 500
# learning_rate = 2e-5
learning_rate = 1e-4
max_grad_norm = 0.3
max_steps = 2000
warmup_ratio = 0.03
lr_scheduler_type = "cosine"

training_arguments = TrainingArguments(
    output_dir=output_dir,
    per_device_train_batch_size=per_device_train_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    optim=optim,
    save_steps=save_steps,
    logging_steps=logging_steps,
    learning_rate=learning_rate,
    fp16=True,
    max_grad_norm=max_grad_norm,
    max_steps=max_steps,
    warmup_ratio=warmup_ratio,
    group_by_length=True,
    lr_scheduler_type=lr_scheduler_type,
    num_train_epochs=3
)

model = get_peft_model(model, peft_config)
# model.print_trainable_parameters()

In [None]:
data_collator = transformers.DataCollatorForSeq2Seq(
    tokenizer, return_tensors="pt", padding=True
)

In [None]:
from trl import SFTTrainer

max_seq_length = 4090

trainer = SFTTrainer(
    model=model,
    train_dataset=train_data,
    eval_dataset=val_data,
    peft_config=peft_config,
    dataset_text_field="output",
    max_seq_length=max_seq_length,
    tokenizer=tokenizer,
    args=training_arguments,
)

trainer.train()

Loading cached processed dataset at /home/hb/.cache/huggingface/datasets/json/default-eed7dec0331e737e/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-a76684d30fef775f.arrow
Loading cached processed dataset at /home/hb/.cache/huggingface/datasets/json/default-eed7dec0331e737e/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-d7b307f82e689f70.arrow
You are using 8-bit optimizers with a version of `bitsandbytes` < 0.41.1. It is recommended to update your version as a major bug has been fixed in 8-bit optimizers.
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


[34m[1mwandb[0m: Currently logged in as: [33mmkkanhb[0m ([33mdnlab_2023[0m). Use [1m`wandb login --relogin`[0m to force relogin


OutOfMemoryError: CUDA out of memory. Tried to allocate 138.00 MiB. GPU 1 has a total capacity of 47.52 GiB of which 166.69 MiB is free. Including non-PyTorch memory, this process has 46.72 GiB memory in use. Of the allocated memory 46.18 GiB is allocated by PyTorch, and 234.61 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
new_model = "/home/hb/dataset_bgp/llm_finetuned/llama2-13b-table135-20split-2k-table-llama"

trainer.model.save_pretrained(new_model)
tokenizer.save_pretrained(new_model)

('/home/hb/dataset_bgp/llm_finetuned/llama2-13b-table135-20split-5k-newparam/tokenizer_config.json',
 '/home/hb/dataset_bgp/llm_finetuned/llama2-13b-table135-20split-5k-newparam/special_tokens_map.json',
 '/home/hb/dataset_bgp/llm_finetuned/llama2-13b-table135-20split-5k-newparam/tokenizer.model',
 '/home/hb/dataset_bgp/llm_finetuned/llama2-13b-table135-20split-5k-newparam/added_tokens.json',
 '/home/hb/dataset_bgp/llm_finetuned/llama2-13b-table135-20split-5k-newparam/tokenizer.json')