In [None]:
# !¸ bitsandbytes accelerate
# !pip install git+https://github.com/huggingface/peft.git@main
# !pip install datasets
# import locale # colab workaround
# locale.getpreferredencoding = lambda: "UTF-8" # colab workaround
# !pip install wandb

In [1]:
from datetime import datetime
import os
import sys
import time

from datasets import load_dataset
import torch
from peft import (
    LoraConfig,
    get_peft_model,
    get_peft_model_state_dict,
    prepare_model_for_int8_training,
    set_peft_model_state_dict,
)
from transformers import (
    AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig,
    DataCollatorForSeq2Seq, pipeline,
    TrainingArguments, Trainer,
)

'NoneType' object has no attribute 'cadam32bit_grad_fp32'


  warn("The installed version of bitsandbytes was compiled without GPU support. "


In [4]:
base_model = "codellama/CodeLlama-7b-python-hf"
tokenizer = AutoTokenizer.from_pretrained(base_model)

In [3]:
orig_model = AutoModelForCausalLM.from_pretrained(
    base_model,
    torch_dtype = torch.float16,
    device_map = {"":0},
    low_cpu_mem_usage = True,
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [4]:
data = load_dataset("csv", data_files={"train": ["./dataset/dataset.csv"], "validation": "./dataset/dataset.csv", "test": "./dataset/dataset.csv"})

In [5]:
tokenizer.add_eos_token = True
tokenizer.pad_token_id = 0
tokenizer.padding_side = "left"

In [6]:
def tokenize(prompt):
    result = tokenizer(
        prompt,
        truncation=True,
        max_length=512,
        padding=False,
        return_tensors=None,
    )

    # "self-supervised learning" means the labels are also the inputs:
    result["labels"] = result["input_ids"].copy()

    return result

def generate_and_tokenize_prompt(data_point):
    full_prompt = f'Requirements: \n\n{data_point["input_ids"]}\n\nDiff: {data_point["labels"]}'
    return tokenize(full_prompt)

# The dataset actually contains 3 diff splits: train, validation, test.
# The tokenize_function code is handling all data across all splits in batches.
tokenized_datasets = data.map(generate_and_tokenize_prompt)
print(tokenized_datasets)

Map:   0%|          | 0/5 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['input_ids', 'labels', 'attention_mask'],
        num_rows: 5
    })
    validation: Dataset({
        features: ['input_ids', 'labels', 'attention_mask'],
        num_rows: 5
    })
    test: Dataset({
        features: ['input_ids', 'labels', 'attention_mask'],
        num_rows: 5
    })
})


In [7]:
orig_model.train()
orig_model = prepare_model_for_int8_training(orig_model)
config = LoraConfig(
    r=16,
    lora_alpha=16,
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
    ],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)
peft_model = get_peft_model(orig_model, config)

In [9]:
def print_number_of_trainable_model_parameters(model):
    trainable_model_params = 0
    all_model_params = 0
    for _, param in model.named_parameters():
        all_model_params += param.numel()
        if param.requires_grad:
            trainable_model_params += param.numel()
    return f"trainable model parameters: {trainable_model_params}\nall model parameters: {all_model_params}\npercentage of trainable model parameters: {100 * trainable_model_params / all_model_params:.2f}%"

print(print_number_of_trainable_model_parameters(peft_model))

trainable model parameters: 16777216
all model parameters: 6755192832
percentage of trainable model parameters: 0.25%


In [10]:
output_dir = "./peft-code-llama-training"

# Set Training parameters
batch_size = 128
per_device_train_batch_size = 32
gradient_accumulation_steps = batch_size // per_device_train_batch_size

training_args = TrainingArguments(
    output_dir=output_dir,
    per_device_train_batch_size=per_device_train_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    warmup_steps=100,
    max_steps=5,
    learning_rate=3e-4,
    fp16=False,
    logging_steps=10,
    optim="adamw_torch",
    evaluation_strategy="no", # if val_set_size > 0 else "no",
    save_strategy="steps",
    save_steps=20,
    load_best_model_at_end=False,
    group_by_length=True, # group sequences of roughly the same length together to speed up training
    report_to="none", # if use_wandb else "none",
    # run_name=f"codellama-{datetime.now().strftime('%Y-%m-%d-%H-%M')}", # if use_wandb else None,
)

trainer = Trainer(
    model=peft_model,
    train_dataset=tokenized_datasets["train"],
    args=training_args,
    data_collator=DataCollatorForSeq2Seq(
        tokenizer, pad_to_multiple_of=8, return_tensors="pt", padding=True
    ),
)



In [11]:
os.environ['PYTORCH_MPS_HIGH_WATERMARK_RATIO'] = '0.0'
tokenizer.add_special_tokens({'pad_token': '[PAD]'})
trainer.train()

Step,Training Loss


TrainOutput(global_step=5, training_loss=0.32601046562194824, metrics={'train_runtime': 843.505, 'train_samples_per_second': 0.759, 'train_steps_per_second': 0.006, 'total_flos': 397447249920000.0, 'train_loss': 0.32601046562194824, 'epoch': 5.0})

In [5]:
from peft import PeftModel

base_model_reload = AutoModelForCausalLM.from_pretrained(
    base_model,
    torch_dtype=torch.float16,
    device_map={"":0},
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [13]:
peft_model = PeftModel.from_pretrained(
    base_model_reload,
    output_dir,
    offload_dir="./CodeLlama-7b-python-hf-peft-trained/model",
)

  adapters_weights = torch.load(
Some parameters are on the meta device because they were offloaded to the disk.


In [18]:
eval_prompt = """Requirements: 
input params to POST changed to building and floor
Diff: """

model_input = tokenizer(eval_prompt, return_tensors="pt")

peft_model.eval()
with torch.no_grad():
    print(tokenizer.decode(peft_model.generate(**model_input, max_new_tokens=100)[0], skip_special_tokens=False))

# pipe = pipeline(task="text-generation", model=peft_model, tokenizer=tokenizer, max_length=200)
# result = pipe(eval_prompt)
# print(result[0]['generated_text'])

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


KeyError: 'base_model.model.model.layers.11.input_layernorm.weight'

In [1]:
# save trained model
trainer.model.save_pretrained(output_dir)
# merged_model = peft_model.merge_and_unload()

NameError: name 'trainer' is not defined

In [4]:
import torch
from transformers import AutoModelForCausalLM, BitsAndBytesConfig, AutoTokenizer

new_model = "./peft-code-llama-training"
model = AutoModelForCausalLM.from_pretrained(
    new_model,
    torch_dtype=torch.float16,
    device_map={"":0},
)
tokenizer = AutoTokenizer.from_pretrained(new_model)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

ImportError: cannot import name 'inject_adapter_in_model' from 'peft' (/opt/homebrew/anaconda3/envs/conda-venv/lib/python3.9/site-packages/peft/__init__.py)

In [None]:
#Use fine-tuned Llama model for running text generation pipelines
prompt = "Can coronaviruses mutate to an extent to get as dangerous as fliovirusues such as Marburg viruses, Ravn or Ebola Zaire?"
pipe = pipeline(task="text-generation",
                model=model,
                tokenizer=tokenizer, max_length=300)



result = pipe(f"<s>[INST] {prompt} [/INST]")
print(result[0]['generated_text'].split("[/INST]")[1])